[llvm-branch-commits] [libc] fea10f7 - Revert "[libc] Add strftime_l (#127708)"

2025-02-19 Thread via llvm-branch-commits

Author: Petr Hosek
Date: 2025-02-19T00:39:14-08:00
New Revision: fea10f7ff16ddd019841a69697194d373325ff6b

URL: 
https://github.com/llvm/llvm-project/commit/fea10f7ff16ddd019841a69697194d373325ff6b
DIFF: 
https://github.com/llvm/llvm-project/commit/fea10f7ff16ddd019841a69697194d373325ff6b.diff

LOG: Revert "[libc] Add strftime_l (#127708)"

This reverts commit 9072ba71cac6d518b4164615c609e358d49c4ed2.

Added: 


Modified: 
libc/config/baremetal/aarch64/entrypoints.txt
libc/config/baremetal/arm/entrypoints.txt
libc/config/baremetal/riscv/entrypoints.txt
libc/config/linux/x86_64/entrypoints.txt
libc/include/time.yaml
libc/src/time/CMakeLists.txt
libc/src/time/strftime.cpp

Removed: 
libc/src/time/strftime_l.cpp
libc/src/time/strftime_l.h



diff  --git a/libc/config/baremetal/aarch64/entrypoints.txt 
b/libc/config/baremetal/aarch64/entrypoints.txt
index 2c226ef176c08..44c4ab49e5c58 100644
--- a/libc/config/baremetal/aarch64/entrypoints.txt
+++ b/libc/config/baremetal/aarch64/entrypoints.txt
@@ -248,8 +248,6 @@ set(TARGET_LIBC_ENTRYPOINTS
 libc.src.time.gmtime
 libc.src.time.gmtime_r
 libc.src.time.mktime
-libc.src.time.strftime
-libc.src.time.strftime_l
 libc.src.time.timespec_get
 
 # internal entrypoints

diff  --git a/libc/config/baremetal/arm/entrypoints.txt 
b/libc/config/baremetal/arm/entrypoints.txt
index 6fd1fce3ab245..370b5462fe9e8 100644
--- a/libc/config/baremetal/arm/entrypoints.txt
+++ b/libc/config/baremetal/arm/entrypoints.txt
@@ -248,8 +248,6 @@ set(TARGET_LIBC_ENTRYPOINTS
 libc.src.time.gmtime
 libc.src.time.gmtime_r
 libc.src.time.mktime
-libc.src.time.strftime
-libc.src.time.strftime_l
 libc.src.time.timespec_get
 
 # internal entrypoints

diff  --git a/libc/config/baremetal/riscv/entrypoints.txt 
b/libc/config/baremetal/riscv/entrypoints.txt
index 5985c495bdaf2..07311a60a17a2 100644
--- a/libc/config/baremetal/riscv/entrypoints.txt
+++ b/libc/config/baremetal/riscv/entrypoints.txt
@@ -244,8 +244,6 @@ set(TARGET_LIBC_ENTRYPOINTS
 libc.src.time.gmtime
 libc.src.time.gmtime_r
 libc.src.time.mktime
-libc.src.time.strftime
-libc.src.time.strftime_l
 libc.src.time.timespec_get
 
 # internal entrypoints

diff  --git a/libc/config/linux/x86_64/entrypoints.txt 
b/libc/config/linux/x86_64/entrypoints.txt
index 2e3af00ec303d..a4f6671a59789 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -1128,7 +1128,6 @@ if(LLVM_LIBC_FULL_BUILD)
 libc.src.time.mktime
 libc.src.time.nanosleep
 libc.src.time.strftime
-libc.src.time.strftime_l
 libc.src.time.time
 libc.src.time.timespec_get
 

diff  --git a/libc/include/time.yaml b/libc/include/time.yaml
index 7bb25dbe85ac4..37ee824678cda 100644
--- a/libc/include/time.yaml
+++ b/libc/include/time.yaml
@@ -9,7 +9,6 @@ types:
   - type_name: time_t
   - type_name: clock_t
   - type_name: size_t
-  - type_name: locale_t
 enums: []
 objects: []
 functions:
@@ -101,16 +100,6 @@ functions:
   - type: size_t
   - type: const char *__restrict
   - type: const struct tm *__restrict
-  - name: strftime_l
-standard:
-  - stdc
-return_type: size_t
-arguments:
-  - type: char *__restrict
-  - type: size_t
-  - type: const char *__restrict
-  - type: const struct tm *__restrict
-  - type: locale_t
   - name: time
 standard:
   - stdc

diff  --git a/libc/src/time/CMakeLists.txt b/libc/src/time/CMakeLists.txt
index 3b951df810011..8332e8ab66f97 100644
--- a/libc/src/time/CMakeLists.txt
+++ b/libc/src/time/CMakeLists.txt
@@ -150,20 +150,6 @@ add_entrypoint_object(
 libc.src.time.strftime_core.strftime_main
 )
 
-add_entrypoint_object(
-  strftime_l
-  SRCS
-strftime_l.cpp
-  HDRS
-strftime_l.h
-  DEPENDS
-libc.hdr.types.locale_t
-libc.hdr.types.size_t
-libc.hdr.types.struct_tm
-libc.src.stdio.printf_core.writer
-libc.src.time.strftime_core.strftime_main
-)
-
 add_entrypoint_object(
   time
   SRCS

diff  --git a/libc/src/time/strftime.cpp b/libc/src/time/strftime.cpp
index c19e58fbadf71..4b89bf2ea3a70 100644
--- a/libc/src/time/strftime.cpp
+++ b/libc/src/time/strftime.cpp
@@ -19,6 +19,7 @@ namespace LIBC_NAMESPACE_DECL {
 LLVM_LIBC_FUNCTION(size_t, strftime,
(char *__restrict buffer, size_t buffsz,
 const char *__restrict format, const tm *timeptr)) {
+
   printf_core::WriteBuffer wb(buffer, (buffsz > 0 ? buffsz - 1 : 0));
   printf_core::Writer writer(&wb);
   int ret = strftime_core::strftime_main(&writer, format, timeptr);

diff  --git a/libc/src/time/strftime_l.cpp b/libc/src/time/strftime_l.cpp
deleted file mode 100644
index 4203136af4cba..0
--- a/libc/src/time/strftime_l.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-//===-- Implementation of strftime_l function 

[llvm-branch-commits] [clang] release/20.x: Revert Do not use `private` as the default AS for when `generic` is available (#112442)" (PR #127771)

2025-02-19 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-clang-codegen

Author: Matt Arsenault (arsenm)


Changes

This reverts commit 6e0b0038cd65ce726ce404305a06e1cf33e36cca.

This breaks the rocm-device-libs build, so it should not ship in the release.

---

Patch is 214.51 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/127771.diff


21 Files Affected:

- (modified) clang/lib/Basic/Targets/AMDGPU.cpp (+3-3) 
- (modified) clang/lib/CodeGen/CGBlocks.cpp (+1-2) 
- (modified) clang/lib/CodeGen/CGBuiltin.cpp (+2-9) 
- (modified) clang/test/CodeGen/scoped-fence-ops.c (+120-61) 
- (modified) clang/test/CodeGenOpenCL/addr-space-struct-arg.cl (+70-99) 
- (modified) clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl (+16-20) 
- (modified) clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl (+118-164) 
- (modified) clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl (+220-275) 
- (modified) clang/test/CodeGenOpenCL/amdgpu-nullptr.cl (+14-14) 
- (modified) clang/test/CodeGenOpenCL/atomic-ops.cl (+4-7) 
- (modified) clang/test/CodeGenOpenCL/atomics-unsafe-hw-remarks-gfx90a.cl 
(+3-3) 
- (modified) clang/test/CodeGenOpenCL/blocks.cl (+12-11) 
- (modified) clang/test/CodeGenOpenCL/builtins-alloca.cl (+4-428) 
- (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl (+56-87) 
- (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-gfx940.cl (+12-18) 
- (modified) clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl (+2-2) 
- (modified) clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx8.cl (+1-1) 
- (modified) clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl (+1-1) 
- (modified) clang/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl (+1-1) 
- (modified) clang/test/CodeGenOpenCL/opencl_types.cl (+1-1) 
- (modified) clang/test/Index/pipe-size.cl (+2-2) 


``diff
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp 
b/clang/lib/Basic/Targets/AMDGPU.cpp
index 0d308cb6af969..9ea366af56a52 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -261,9 +261,9 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple 
&Triple,
 void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) {
   TargetInfo::adjust(Diags, Opts);
   // ToDo: There are still a few places using default address space as private
-  // address space in OpenCL, which needs to be cleaned up, then the references
-  // to OpenCL can be removed from the following line.
-  setAddressSpaceMap((Opts.OpenCL && !Opts.OpenCLGenericAddressSpace) ||
+  // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
+  // can be removed from the following line.
+  setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
  !isAMDGCN(getTriple()));
 }
 
diff --git a/clang/lib/CodeGen/CGBlocks.cpp b/clang/lib/CodeGen/CGBlocks.cpp
index a7584a95c8ca7..f38f86c792f69 100644
--- a/clang/lib/CodeGen/CGBlocks.cpp
+++ b/clang/lib/CodeGen/CGBlocks.cpp
@@ -1396,8 +1396,7 @@ void CodeGenFunction::setBlockContextParameter(const 
ImplicitParamDecl *D,
   DI->setLocation(D->getLocation());
   DI->EmitDeclareOfBlockLiteralArgVariable(
   *BlockInfo, D->getName(), argNum,
-  cast(alloc.getPointer()->stripPointerCasts()),
-  Builder);
+  cast(alloc.getPointer()), Builder);
 }
   }
 
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 7ec9d59bfed5c..5237533364294 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -6092,13 +6092,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
   /*IndexTypeQuals=*/0);
   auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
   llvm::Value *TmpPtr = Tmp.getPointer();
-  // The EmitLifetime* pair expect a naked Alloca as their last argument,
-  // however for cases where the default AS is not the Alloca AS, Tmp is
-  // actually the Alloca ascasted to the default AS, hence the
-  // stripPointerCasts()
-  llvm::Value *Alloca = TmpPtr->stripPointerCasts();
   llvm::Value *TmpSize = EmitLifetimeStart(
-  CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), Alloca);
+  CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr);
   llvm::Value *ElemPtr;
   // Each of the following arguments specifies the size of the 
corresponding
   // argument passed to the enqueued block.
@@ -6114,9 +6109,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
 Builder.CreateAlignedStore(
 V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy));
   }
-  // Return the Alloca itself rather than a potential ascast as this is 
only
-  // used by the paired EmitLifetimeEnd.
-  return std::tie(ElemPtr, TmpSize, Alloca);
+  return std::tie(ElemPtr, TmpSize, TmpPtr);
 };
 
 // Could have events and/or varargs.
diff --git a/clang/test/CodeGen/scoped-f

[llvm-branch-commits] [clang] release/20.x: [clang][CodeGen] `sret` args should always point to the `alloca` AS, so use that (#114062) (PR #127552)

2025-02-19 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

#127771 is the revert alternative 

https://github.com/llvm/llvm-project/pull/127552
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [SHT_LLVM_FUNC_MAP][llvm-readobj]Introduce function address map section and emit dynamic instruction count(readobj part) (PR #124333)

2025-02-19 Thread James Henderson via llvm-branch-commits


@@ -7922,6 +7928,59 @@ void LLVMELFDumper::printBBAddrMaps(bool 
PrettyPGOAnalysis) {
   }
 }
 
+template  void LLVMELFDumper::printFuncMaps() {
+  bool IsRelocatable = this->Obj.getHeader().e_type == ELF::ET_REL;
+  using Elf_Shdr = typename ELFT::Shdr;
+  auto IsMatch = [](const Elf_Shdr &Sec) -> bool {
+return Sec.sh_type == ELF::SHT_LLVM_FUNC_MAP;
+  };

jh7370 wrote:

Please define this inline, since it's only used once, and delete the trailing 
return type (since that is automatically derived from the result of the return 
expression, of which there is only one).

https://github.com/llvm/llvm-project/pull/124333
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [SHT_LLVM_FUNC_MAP][llvm-readobj]Introduce function address map section and emit dynamic instruction count(readobj part) (PR #124333)

2025-02-19 Thread James Henderson via llvm-branch-commits


@@ -940,6 +940,92 @@ ELFFile::decodeBBAddrMap(const Elf_Shdr &Sec, const 
Elf_Shdr *RelaSec,
   return std::move(AddrMapsOrErr);
 }
 
+template 
+Expected>
+ELFFile::decodeFuncMap(const Elf_Shdr &Sec,
+ const Elf_Shdr *RelaSec) const {
+  bool IsRelocatable = this->getHeader().e_type == ELF::ET_REL;
+
+  // This DenseMap maps the offset of each function (the location of the
+  // reference to the function in the SHT_LLVM_FUNC_MAP section) to the
+  // addend (the location of the function in the text section).
+  llvm::DenseMap FunctionOffsetTranslations;
+  if (IsRelocatable && RelaSec) {
+assert(RelaSec &&
+   "Can't read a SHT_LLVM_FUNC_ADDR_MAP section in a relocatable "
+   "object file without providing a relocation section.");
+Expected::Elf_Rela_Range> Relas =
+this->relas(*RelaSec);
+if (!Relas)
+  return createError("unable to read relocations for section " +
+ describe(*this, Sec) + ": " +
+ toString(Relas.takeError()));
+for (typename ELFFile::Elf_Rela Rela : *Relas)
+  FunctionOffsetTranslations[Rela.r_offset] = Rela.r_addend;
+  }
+  auto GetAddressForRelocation =
+  [&](unsigned RelocationOffsetInSection) -> Expected {
+auto FOTIterator =
+FunctionOffsetTranslations.find(RelocationOffsetInSection);
+if (FOTIterator == FunctionOffsetTranslations.end()) {
+  return createError("failed to get relocation data for offset: " +
+ Twine::utohexstr(RelocationOffsetInSection) +
+ " in section " + describe(*this, Sec));
+}
+return FOTIterator->second;
+  };
+  Expected> ContentsOrErr = this->getSectionContents(Sec);
+  if (!ContentsOrErr)
+return ContentsOrErr.takeError();
+  ArrayRef Content = *ContentsOrErr;
+  DataExtractor Data(Content, this->isLE(), ELFT::Is64Bits ? 8 : 4);
+  std::vector FunctionEntries;
+
+  DataExtractor::Cursor Cur(0);
+
+  // Helper lampda to extract the (possiblly relocatable) address stored at 
Cur.
+  auto ExtractAddress = [&]() -> Expected::uintX_t> {
+uint64_t RelocationOffsetInSection = Cur.tell();
+auto Address =
+static_cast::uintX_t>(Data.getAddress(Cur));
+if (!Cur)
+  return Cur.takeError();
+if (!IsRelocatable)
+  return Address;
+assert(Address == 0);
+Expected AddressOrErr =
+GetAddressForRelocation(RelocationOffsetInSection);
+if (!AddressOrErr)
+  return AddressOrErr.takeError();
+return *AddressOrErr;
+  };
+
+  uint8_t Version = 0;
+  while (Cur && Cur.tell() < Content.size()) {
+if (Sec.sh_type == ELF::SHT_LLVM_FUNC_MAP) {

jh7370 wrote:

Why is this check needed?

https://github.com/llvm/llvm-project/pull/124333
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [SHT_LLVM_FUNC_MAP][llvm-readobj]Introduce function address map section and emit dynamic instruction count(readobj part) (PR #124333)

2025-02-19 Thread James Henderson via llvm-branch-commits


@@ -7922,6 +7928,59 @@ void LLVMELFDumper::printBBAddrMaps(bool 
PrettyPGOAnalysis) {
   }
 }
 
+template  void LLVMELFDumper::printFuncMaps() {
+  bool IsRelocatable = this->Obj.getHeader().e_type == ELF::ET_REL;
+  using Elf_Shdr = typename ELFT::Shdr;
+  auto IsMatch = [](const Elf_Shdr &Sec) -> bool {
+return Sec.sh_type == ELF::SHT_LLVM_FUNC_MAP;
+  };
+  Expected> SecRelocMapOrErr =
+  this->Obj.getSectionAndRelocations(IsMatch);
+  if (!SecRelocMapOrErr) {
+this->reportUniqueWarning("failed to get SHT_LLVM_FUNC_MAP section(s): " +
+  toString(SecRelocMapOrErr.takeError()));
+return;
+  }
+
+  for (auto const &[Sec, RelocSec] : *SecRelocMapOrErr) {
+std::optional FunctionSec;
+if (IsRelocatable)
+  FunctionSec =
+  unwrapOrError(this->FileName, this->Obj.getSection(Sec->sh_link));

jh7370 wrote:

Do not use `unwrapOrError` in new code. The dumper should be tolerant of 
slightly dodgy looking object files, as the dumper is often the only way of 
finding out what's gone wrong (short of decoding the bytes be hand).

A warning is fine here and you can then either continue as if the relocation 
section didn't exist or bail out. See my comments elsewhere about the 
requirement for a relocation section.

https://github.com/llvm/llvm-project/pull/124333
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/20.x: Revert Do not use `private` as the default AS for when `generic` is available (#112442)" (PR #127771)

2025-02-19 Thread via llvm-branch-commits

llvmbot wrote:



@llvm/pr-subscribers-clang

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)


Changes

This reverts commit 6e0b0038cd65ce726ce404305a06e1cf33e36cca.

This breaks the rocm-device-libs build, so it should not ship in the release.

---

Patch is 214.51 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/127771.diff


21 Files Affected:

- (modified) clang/lib/Basic/Targets/AMDGPU.cpp (+3-3) 
- (modified) clang/lib/CodeGen/CGBlocks.cpp (+1-2) 
- (modified) clang/lib/CodeGen/CGBuiltin.cpp (+2-9) 
- (modified) clang/test/CodeGen/scoped-fence-ops.c (+120-61) 
- (modified) clang/test/CodeGenOpenCL/addr-space-struct-arg.cl (+70-99) 
- (modified) clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl (+16-20) 
- (modified) clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl (+118-164) 
- (modified) clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl (+220-275) 
- (modified) clang/test/CodeGenOpenCL/amdgpu-nullptr.cl (+14-14) 
- (modified) clang/test/CodeGenOpenCL/atomic-ops.cl (+4-7) 
- (modified) clang/test/CodeGenOpenCL/atomics-unsafe-hw-remarks-gfx90a.cl 
(+3-3) 
- (modified) clang/test/CodeGenOpenCL/blocks.cl (+12-11) 
- (modified) clang/test/CodeGenOpenCL/builtins-alloca.cl (+4-428) 
- (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl (+56-87) 
- (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-gfx940.cl (+12-18) 
- (modified) clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl (+2-2) 
- (modified) clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx8.cl (+1-1) 
- (modified) clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl (+1-1) 
- (modified) clang/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl (+1-1) 
- (modified) clang/test/CodeGenOpenCL/opencl_types.cl (+1-1) 
- (modified) clang/test/Index/pipe-size.cl (+2-2) 


``diff
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp 
b/clang/lib/Basic/Targets/AMDGPU.cpp
index 0d308cb6af969..9ea366af56a52 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -261,9 +261,9 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple 
&Triple,
 void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) {
   TargetInfo::adjust(Diags, Opts);
   // ToDo: There are still a few places using default address space as private
-  // address space in OpenCL, which needs to be cleaned up, then the references
-  // to OpenCL can be removed from the following line.
-  setAddressSpaceMap((Opts.OpenCL && !Opts.OpenCLGenericAddressSpace) ||
+  // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
+  // can be removed from the following line.
+  setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
  !isAMDGCN(getTriple()));
 }
 
diff --git a/clang/lib/CodeGen/CGBlocks.cpp b/clang/lib/CodeGen/CGBlocks.cpp
index a7584a95c8ca7..f38f86c792f69 100644
--- a/clang/lib/CodeGen/CGBlocks.cpp
+++ b/clang/lib/CodeGen/CGBlocks.cpp
@@ -1396,8 +1396,7 @@ void CodeGenFunction::setBlockContextParameter(const 
ImplicitParamDecl *D,
   DI->setLocation(D->getLocation());
   DI->EmitDeclareOfBlockLiteralArgVariable(
   *BlockInfo, D->getName(), argNum,
-  cast(alloc.getPointer()->stripPointerCasts()),
-  Builder);
+  cast(alloc.getPointer()), Builder);
 }
   }
 
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 7ec9d59bfed5c..5237533364294 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -6092,13 +6092,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
   /*IndexTypeQuals=*/0);
   auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
   llvm::Value *TmpPtr = Tmp.getPointer();
-  // The EmitLifetime* pair expect a naked Alloca as their last argument,
-  // however for cases where the default AS is not the Alloca AS, Tmp is
-  // actually the Alloca ascasted to the default AS, hence the
-  // stripPointerCasts()
-  llvm::Value *Alloca = TmpPtr->stripPointerCasts();
   llvm::Value *TmpSize = EmitLifetimeStart(
-  CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), Alloca);
+  CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr);
   llvm::Value *ElemPtr;
   // Each of the following arguments specifies the size of the 
corresponding
   // argument passed to the enqueued block.
@@ -6114,9 +6109,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
 Builder.CreateAlignedStore(
 V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy));
   }
-  // Return the Alloca itself rather than a potential ascast as this is 
only
-  // used by the paired EmitLifetimeEnd.
-  return std::tie(ElemPtr, TmpSize, Alloca);
+  return std::tie(ElemPtr, TmpSize, TmpPtr);
 };
 
 // Could have events and/or varargs.
diff --git a

[llvm-branch-commits] [clang] release/20.x: Revert Do not use `private` as the default AS for when `generic` is available (#112442)" (PR #127771)

2025-02-19 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm milestoned 
https://github.com/llvm/llvm-project/pull/127771
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/20.x: Revert Do not use `private` as the default AS for when `generic` is available (#112442)" (PR #127771)

2025-02-19 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm edited 
https://github.com/llvm/llvm-project/pull/127771
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [SHT_LLVM_FUNC_MAP][llvm-readobj]Introduce function address map section and emit dynamic instruction count(readobj part) (PR #124333)

2025-02-19 Thread James Henderson via llvm-branch-commits


@@ -940,6 +940,92 @@ ELFFile::decodeBBAddrMap(const Elf_Shdr &Sec, const 
Elf_Shdr *RelaSec,
   return std::move(AddrMapsOrErr);
 }
 
+template 
+Expected>
+ELFFile::decodeFuncMap(const Elf_Shdr &Sec,
+ const Elf_Shdr *RelaSec) const {
+  bool IsRelocatable = this->getHeader().e_type == ELF::ET_REL;

jh7370 wrote:

Do we really need this check? Would not the value of `RelaSec` (`nullptr` or 
otherwise) be sufficient?

https://github.com/llvm/llvm-project/pull/124333
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [SHT_LLVM_FUNC_MAP][llvm-readobj]Introduce function address map section and emit dynamic instruction count(readobj part) (PR #124333)

2025-02-19 Thread James Henderson via llvm-branch-commits


@@ -940,6 +940,92 @@ ELFFile::decodeBBAddrMap(const Elf_Shdr &Sec, const 
Elf_Shdr *RelaSec,
   return std::move(AddrMapsOrErr);
 }
 
+template 
+Expected>
+ELFFile::decodeFuncMap(const Elf_Shdr &Sec,
+ const Elf_Shdr *RelaSec) const {
+  bool IsRelocatable = this->getHeader().e_type == ELF::ET_REL;
+
+  // This DenseMap maps the offset of each function (the location of the
+  // reference to the function in the SHT_LLVM_FUNC_MAP section) to the
+  // addend (the location of the function in the text section).
+  llvm::DenseMap FunctionOffsetTranslations;
+  if (IsRelocatable && RelaSec) {
+assert(RelaSec &&
+   "Can't read a SHT_LLVM_FUNC_ADDR_MAP section in a relocatable "
+   "object file without providing a relocation section.");
+Expected::Elf_Rela_Range> Relas =
+this->relas(*RelaSec);
+if (!Relas)
+  return createError("unable to read relocations for section " +
+ describe(*this, Sec) + ": " +
+ toString(Relas.takeError()));
+for (typename ELFFile::Elf_Rela Rela : *Relas)
+  FunctionOffsetTranslations[Rela.r_offset] = Rela.r_addend;

jh7370 wrote:

Not all relocations function in the same way. Naively assuming that the 
`r_addend` and `r_offset` work like this is not going to be correct in some 
cases. The ELF gABI only describes `r_addend` as a "constant addend used to 
compute the value".

Have you looked into the Object/RelocationResolver.h? It's used elsewhere by 
llvm-readobj to calculate the values of relocations and may be of some use (see 
`printRelocatableStackSizes` for an example usage).

https://github.com/llvm/llvm-project/pull/124333
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/20.x: [clang][CodeGen] `sret` args should always point to the `alloca` AS, so use that (#114062) (PR #127552)

2025-02-19 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> Sans this patch DeviceLibs on our side will be broken when using the vanilla 
> upstream.

6e0b0038 should have been reverted upstream right away, and not internally. 
We're now in this terrible situation 



https://github.com/llvm/llvm-project/pull/127552
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/20.x: [clang][CodeGen] `sret` args should always point to the `alloca` AS, so use that (#114062) (PR #127552)

2025-02-19 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> @arsenm This is a pretty large change, what bug does it fix?

The rocm device libs build crashes without this. Alternatively we need to 
revert 6e0b0038cd65ce726ce404305a06e1cf33e36cca

https://github.com/llvm/llvm-project/pull/127552
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][docs] Replace gfx940 and gfx941 with gfx942 in llvm/docs (PR #126887)

2025-02-19 Thread Fabian Ritter via llvm-branch-commits

ritter-x2a wrote:

### Merge activity

* **Feb 19, 3:48 AM EST**: A user started a stack merge that includes this pull 
request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/126887).


https://github.com/llvm/llvm-project/pull/126887
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Remove FeatureForceStoreSC0SC1 (PR #126878)

2025-02-19 Thread Fabian Ritter via llvm-branch-commits

ritter-x2a wrote:

### Merge activity

* **Feb 19, 3:48 AM EST**: A user started a stack merge that includes this pull 
request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/126878).


https://github.com/llvm/llvm-project/pull/126878
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [AMDGPU][MLIR] Replace gfx940 and gfx941 with gfx942 in MLIR (PR #125836)

2025-02-19 Thread Fabian Ritter via llvm-branch-commits

ritter-x2a wrote:

### Merge activity

* **Feb 19, 3:48 AM EST**: A user started a stack merge that includes this pull 
request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/125836).


https://github.com/llvm/llvm-project/pull/125836
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [AMDGPU] Add missing gfx architectures to AddFlangOffloadRuntime.cmake (PR #125827)

2025-02-19 Thread Fabian Ritter via llvm-branch-commits

ritter-x2a wrote:

### Merge activity

* **Feb 19, 3:48 AM EST**: A user started a stack merge that includes this pull 
request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/125827).


https://github.com/llvm/llvm-project/pull/125827
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][docs][NFC] Replace gfx940 with gfx942 in the gfx940 ISA doc (PR #126906)

2025-02-19 Thread Fabian Ritter via llvm-branch-commits

ritter-x2a wrote:

### Merge activity

* **Feb 19, 3:48 AM EST**: A user started a stack merge that includes this pull 
request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/126906).


https://github.com/llvm/llvm-project/pull/126906
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [AMDGPU][clang] Replace gfx940 and gfx941 with gfx942 in clang (PR #126762)

2025-02-19 Thread Fabian Ritter via llvm-branch-commits

ritter-x2a wrote:

### Merge activity

* **Feb 19, 3:48 AM EST**: A user started a stack merge that includes this pull 
request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/126762).


https://github.com/llvm/llvm-project/pull/126762
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [AMDGPU] Replace gfx940 and gfx941 with gfx942 in llvm (PR #126763)

2025-02-19 Thread Fabian Ritter via llvm-branch-commits

ritter-x2a wrote:

### Merge activity

* **Feb 19, 3:48 AM EST**: A user started a stack merge that includes this pull 
request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/126763).


https://github.com/llvm/llvm-project/pull/126763
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [lld] [lld][LoongArch] Relax TLSDESC code sequence (PR #123677)

2025-02-19 Thread Lu Weining via llvm-branch-commits

SixWeining wrote:

cc @xen0n for review

https://github.com/llvm/llvm-project/pull/123677
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [lld] [lld][LoongArch] Relax TLSDESC code sequence (PR #123677)

2025-02-19 Thread Lu Weining via llvm-branch-commits

https://github.com/SixWeining commented:

Actually the `FIXME` in commit message is `TODO`, right?

https://github.com/llvm/llvm-project/pull/123677
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [lld] [lld][LoongArch] Support TLSDESC GD/LD to IE/LE (PR #123715)

2025-02-19 Thread Lu Weining via llvm-branch-commits

SixWeining wrote:

cc @xen0n 

https://github.com/llvm/llvm-project/pull/123715
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] Reland: [clang] Track function template instantiation from definition… (PR #127777)

2025-02-19 Thread Matheus Izvekov via llvm-branch-commits

https://github.com/mizvekov milestoned 
https://github.com/llvm/llvm-project/pull/12
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] Reland: [clang] Track function template instantiation from definition… (PR #127777)

2025-02-19 Thread Matheus Izvekov via llvm-branch-commits

https://github.com/mizvekov created 
https://github.com/llvm/llvm-project/pull/12

… (#125266)

This fixes instantiation of definition for friend function templates, when the 
declaration found and the one containing the definition have different template 
contexts.

In these cases, the the function declaration corresponding to the definition is 
not available; it may not even be instantiated at all.

So this patch adds a bit which tracks which function template declaration was 
instantiated from the member template. It's used to find which primary template 
serves as a context for the purpose of obtainining the template arguments 
needed to instantiate the definition.

Fixes #55509

>From c0f86c988617ab5104d5a95fbcac38fd0a8ee4d7 Mon Sep 17 00:00:00 2001
From: Matheus Izvekov 
Date: Wed, 5 Feb 2025 14:12:12 -0300
Subject: [PATCH] Reland: [clang] Track function template instantiation from
 definition (#125266)

This fixes instantiation of definition for friend function templates,
when the declaration found and the one containing the definition
have different template contexts.

In these cases, the the function declaration corresponding to the
definition is not available; it may not even be instantiated at all.

So this patch adds a bit which tracks which function template
declaration was instantiated from the member template.
It's used to find which primary template serves as a context
for the purpose of obtainining the template arguments needed
to instantiate the definition.

Fixes #55509
---
 clang/docs/ReleaseNotes.rst   |   1 +
 clang/include/clang/AST/Decl.h|   7 ++
 clang/include/clang/AST/DeclBase.h|  10 +-
 clang/include/clang/AST/DeclTemplate.h|  20 
 clang/lib/AST/Decl.cpp|   1 +
 clang/lib/Sema/SemaTemplateDeduction.cpp  |  17 +--
 clang/lib/Sema/SemaTemplateInstantiate.cpp|   9 +-
 .../lib/Sema/SemaTemplateInstantiateDecl.cpp  |  27 -
 clang/lib/Serialization/ASTReaderDecl.cpp |   1 +
 clang/lib/Serialization/ASTWriterDecl.cpp |   3 +-
 clang/test/SemaTemplate/GH55509.cpp   | 112 ++
 11 files changed, 180 insertions(+), 28 deletions(-)
 create mode 100644 clang/test/SemaTemplate/GH55509.cpp

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index ad1a5e7ae282e..ee161515fe68b 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1053,6 +1053,7 @@ Bug Fixes to C++ Support
   template parameter. Now, such expression can be used with ``static_assert`` 
and ``constexpr``. (#GH123498)
 - Correctly determine the implicit constexprness of lambdas in dependent 
contexts. (#GH97958) (#GH114234)
 - Fix that some dependent immediate expressions did not cause immediate 
escalation (#GH119046)
+- Clang is now better at keeping track of friend function template instance 
contexts. (#GH55509)
 
 Bug Fixes to AST Handling
 ^
diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h
index 9593bab576412..362a2741a0cdd 100644
--- a/clang/include/clang/AST/Decl.h
+++ b/clang/include/clang/AST/Decl.h
@@ -2298,6 +2298,13 @@ class FunctionDecl : public DeclaratorDecl,
 FunctionDeclBits.IsLateTemplateParsed = ILT;
   }
 
+  bool isInstantiatedFromMemberTemplate() const {
+return FunctionDeclBits.IsInstantiatedFromMemberTemplate;
+  }
+  void setInstantiatedFromMemberTemplate(bool Val = true) {
+FunctionDeclBits.IsInstantiatedFromMemberTemplate = Val;
+  }
+
   /// Whether this function is "trivial" in some specialized C++ senses.
   /// Can only be true for default constructors, copy constructors,
   /// copy assignment operators, and destructors.  Not meaningful until
diff --git a/clang/include/clang/AST/DeclBase.h 
b/clang/include/clang/AST/DeclBase.h
index 3bb82c1572ef9..648dae2838e03 100644
--- a/clang/include/clang/AST/DeclBase.h
+++ b/clang/include/clang/AST/DeclBase.h
@@ -1780,6 +1780,8 @@ class DeclContext {
 uint64_t HasImplicitReturnZero : 1;
 LLVM_PREFERRED_TYPE(bool)
 uint64_t IsLateTemplateParsed : 1;
+LLVM_PREFERRED_TYPE(bool)
+uint64_t IsInstantiatedFromMemberTemplate : 1;
 
 /// Kind of contexpr specifier as defined by ConstexprSpecKind.
 LLVM_PREFERRED_TYPE(ConstexprSpecKind)
@@ -1830,7 +1832,7 @@ class DeclContext {
   };
 
   /// Number of inherited and non-inherited bits in FunctionDeclBitfields.
-  enum { NumFunctionDeclBits = NumDeclContextBits + 31 };
+  enum { NumFunctionDeclBits = NumDeclContextBits + 32 };
 
   /// Stores the bits used by CXXConstructorDecl. If modified
   /// NumCXXConstructorDeclBits and the accessor
@@ -1841,12 +1843,12 @@ class DeclContext {
 LLVM_PREFERRED_TYPE(FunctionDeclBitfields)
 uint64_t : NumFunctionDeclBits;
 
-/// 20 bits to fit in the remaining available space.
+/// 19 bits to fit in the remaining available space.
 /// Note that this makes CXXConstructorDeclBitfields take
 /// exactly 6

[llvm-branch-commits] [lld] [lld][LoongArch] Support relaxation during TLSDESC GD/LD to IE/LE conversion (PR #123730)

2025-02-19 Thread Lu Weining via llvm-branch-commits

SixWeining wrote:

cc @xen0n 

https://github.com/llvm/llvm-project/pull/123730
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] Reland: [clang] Track function template instantiation from definition… (PR #127777)

2025-02-19 Thread via llvm-branch-commits

llvmbot wrote:



@llvm/pr-subscribers-clang

@llvm/pr-subscribers-clang-modules

Author: Matheus Izvekov (mizvekov)


Changes

… (#125266)

This fixes instantiation of definition for friend function templates, when the 
declaration found and the one containing the definition have different template 
contexts.

In these cases, the the function declaration corresponding to the definition is 
not available; it may not even be instantiated at all.

So this patch adds a bit which tracks which function template declaration was 
instantiated from the member template. It's used to find which primary template 
serves as a context for the purpose of obtainining the template arguments 
needed to instantiate the definition.

Fixes #55509

---
Full diff: https://github.com/llvm/llvm-project/pull/12.diff


11 Files Affected:

- (modified) clang/docs/ReleaseNotes.rst (+1) 
- (modified) clang/include/clang/AST/Decl.h (+7) 
- (modified) clang/include/clang/AST/DeclBase.h (+6-4) 
- (modified) clang/include/clang/AST/DeclTemplate.h (+20) 
- (modified) clang/lib/AST/Decl.cpp (+1) 
- (modified) clang/lib/Sema/SemaTemplateDeduction.cpp (+1-16) 
- (modified) clang/lib/Sema/SemaTemplateInstantiate.cpp (+4-5) 
- (modified) clang/lib/Sema/SemaTemplateInstantiateDecl.cpp (+25-2) 
- (modified) clang/lib/Serialization/ASTReaderDecl.cpp (+1) 
- (modified) clang/lib/Serialization/ASTWriterDecl.cpp (+2-1) 
- (added) clang/test/SemaTemplate/GH55509.cpp (+112) 


``diff
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index ad1a5e7ae282e..ee161515fe68b 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1053,6 +1053,7 @@ Bug Fixes to C++ Support
   template parameter. Now, such expression can be used with ``static_assert`` 
and ``constexpr``. (#GH123498)
 - Correctly determine the implicit constexprness of lambdas in dependent 
contexts. (#GH97958) (#GH114234)
 - Fix that some dependent immediate expressions did not cause immediate 
escalation (#GH119046)
+- Clang is now better at keeping track of friend function template instance 
contexts. (#GH55509)
 
 Bug Fixes to AST Handling
 ^
diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h
index 9593bab576412..362a2741a0cdd 100644
--- a/clang/include/clang/AST/Decl.h
+++ b/clang/include/clang/AST/Decl.h
@@ -2298,6 +2298,13 @@ class FunctionDecl : public DeclaratorDecl,
 FunctionDeclBits.IsLateTemplateParsed = ILT;
   }
 
+  bool isInstantiatedFromMemberTemplate() const {
+return FunctionDeclBits.IsInstantiatedFromMemberTemplate;
+  }
+  void setInstantiatedFromMemberTemplate(bool Val = true) {
+FunctionDeclBits.IsInstantiatedFromMemberTemplate = Val;
+  }
+
   /// Whether this function is "trivial" in some specialized C++ senses.
   /// Can only be true for default constructors, copy constructors,
   /// copy assignment operators, and destructors.  Not meaningful until
diff --git a/clang/include/clang/AST/DeclBase.h 
b/clang/include/clang/AST/DeclBase.h
index 3bb82c1572ef9..648dae2838e03 100644
--- a/clang/include/clang/AST/DeclBase.h
+++ b/clang/include/clang/AST/DeclBase.h
@@ -1780,6 +1780,8 @@ class DeclContext {
 uint64_t HasImplicitReturnZero : 1;
 LLVM_PREFERRED_TYPE(bool)
 uint64_t IsLateTemplateParsed : 1;
+LLVM_PREFERRED_TYPE(bool)
+uint64_t IsInstantiatedFromMemberTemplate : 1;
 
 /// Kind of contexpr specifier as defined by ConstexprSpecKind.
 LLVM_PREFERRED_TYPE(ConstexprSpecKind)
@@ -1830,7 +1832,7 @@ class DeclContext {
   };
 
   /// Number of inherited and non-inherited bits in FunctionDeclBitfields.
-  enum { NumFunctionDeclBits = NumDeclContextBits + 31 };
+  enum { NumFunctionDeclBits = NumDeclContextBits + 32 };
 
   /// Stores the bits used by CXXConstructorDecl. If modified
   /// NumCXXConstructorDeclBits and the accessor
@@ -1841,12 +1843,12 @@ class DeclContext {
 LLVM_PREFERRED_TYPE(FunctionDeclBitfields)
 uint64_t : NumFunctionDeclBits;
 
-/// 20 bits to fit in the remaining available space.
+/// 19 bits to fit in the remaining available space.
 /// Note that this makes CXXConstructorDeclBitfields take
 /// exactly 64 bits and thus the width of NumCtorInitializers
 /// will need to be shrunk if some bit is added to NumDeclContextBitfields,
 /// NumFunctionDeclBitfields or CXXConstructorDeclBitfields.
-uint64_t NumCtorInitializers : 17;
+uint64_t NumCtorInitializers : 16;
 LLVM_PREFERRED_TYPE(bool)
 uint64_t IsInheritingConstructor : 1;
 
@@ -1860,7 +1862,7 @@ class DeclContext {
   };
 
   /// Number of inherited and non-inherited bits in 
CXXConstructorDeclBitfields.
-  enum { NumCXXConstructorDeclBits = NumFunctionDeclBits + 20 };
+  enum { NumCXXConstructorDeclBits = NumFunctionDeclBits + 19 };
 
   /// Stores the bits used by ObjCMethodDecl.
   /// If modified NumObjCMethodDeclBits and the accessor
diff --git a/clang/include/clang/AST/DeclTemplate.h 
b/clang/include/c

[llvm-branch-commits] [clang] Reland: [clang] Track function template instantiation from definition… (PR #127777)

2025-02-19 Thread Matheus Izvekov via llvm-branch-commits

https://github.com/mizvekov edited 
https://github.com/llvm/llvm-project/pull/12
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] Reland: [clang] Track function template instantiation from definition… (PR #127777)

2025-02-19 Thread Matheus Izvekov via llvm-branch-commits

mizvekov wrote:

This is a cherry-pick of https://github.com/llvm/llvm-project/pull/125266 into 
the 20.x release branch.

https://github.com/llvm/llvm-project/pull/12
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [lld] [lld][LoongArch] Convert TLS IE to LE in the normal or medium code model (PR #123680)

2025-02-19 Thread Lu Weining via llvm-branch-commits

SixWeining wrote:

cc @xen0n 

https://github.com/llvm/llvm-project/pull/123680
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [lld] [lld][LoongArch] Support relaxation during IE to LE conversion (PR #123702)

2025-02-19 Thread Lu Weining via llvm-branch-commits

SixWeining wrote:

cc @xen0n 

https://github.com/llvm/llvm-project/pull/123702
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [lld] [lld][LoongArch] GOT indirection to PC relative optimization (PR #123743)

2025-02-19 Thread Lu Weining via llvm-branch-commits

SixWeining wrote:

cc @xen0n 

https://github.com/llvm/llvm-project/pull/123743
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [lldb] release/20.x: Reland: [clang] fix P3310 overload resolution flag propagation (#125791) (PR #127779)

2025-02-19 Thread via llvm-branch-commits

llvmbot wrote:

@erichkeane What do you think about merging this PR to the release branch?

https://github.com/llvm/llvm-project/pull/127779
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [lldb] release/20.x: Reland: [clang] fix P3310 overload resolution flag propagation (#125791) (PR #127779)

2025-02-19 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/127779
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [lldb] release/20.x: Reland: [clang] fix P3310 overload resolution flag propagation (#125791) (PR #127779)

2025-02-19 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-clang

Author: None (llvmbot)


Changes

Backport 08bda1cc6b0d2f1d31a89a76b5c154a11086c420

Requested by: @mizvekov

---

Patch is 232.51 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/127779.diff


16 Files Affected:

- (modified) clang/include/clang/AST/DeclTemplate.h (+14-2) 
- (modified) clang/include/clang/Sema/Sema.h (+2-2) 
- (modified) clang/lib/AST/ASTImporter.cpp (+3-3) 
- (modified) clang/lib/AST/DeclTemplate.cpp (+24-23) 
- (modified) clang/lib/AST/JSONNodeDumper.cpp (+5) 
- (modified) clang/lib/AST/TextNodeDumper.cpp (+4-1) 
- (modified) clang/lib/Sema/SemaTemplate.cpp (+5-3) 
- (modified) clang/lib/Sema/SemaTemplateDeduction.cpp (-2) 
- (modified) clang/lib/Sema/SemaTemplateInstantiateDecl.cpp (+1-1) 
- (modified) clang/lib/Sema/SemaType.cpp (+2-1) 
- (modified) clang/lib/Serialization/ASTReaderDecl.cpp (+1) 
- (modified) clang/lib/Serialization/ASTWriterDecl.cpp (+1) 
- (modified) clang/test/AST/ast-dump-templates.cpp (+6045-2) 
- (modified) clang/test/AST/gen_ast_dump_json_test.py (+17-4) 
- (modified) clang/test/SemaTemplate/cwg2398.cpp (+17) 
- (modified) lldb/source/Plugins/ExpressionParser/Clang/CxxModuleHandler.cpp 
(+2-1) 


``diff
diff --git a/clang/include/clang/AST/DeclTemplate.h 
b/clang/include/clang/AST/DeclTemplate.h
index 9ecff2c898acd..03c43765206b1 100644
--- a/clang/include/clang/AST/DeclTemplate.h
+++ b/clang/include/clang/AST/DeclTemplate.h
@@ -1841,15 +1841,23 @@ class ClassTemplateSpecializationDecl : public 
CXXRecordDecl,
   LLVM_PREFERRED_TYPE(TemplateSpecializationKind)
   unsigned SpecializationKind : 3;
 
+  /// Indicate that we have matched a parameter pack with a non pack
+  /// argument, when the opposite match is also allowed (strict pack match).
+  /// This needs to be cached as deduction is performed during declaration,
+  /// and we need the information to be preserved so that it is consistent
+  /// during instantiation.
+  bool MatchedPackOnParmToNonPackOnArg : 1;
+
 protected:
   ClassTemplateSpecializationDecl(ASTContext &Context, Kind DK, TagKind TK,
   DeclContext *DC, SourceLocation StartLoc,
   SourceLocation IdLoc,
   ClassTemplateDecl *SpecializedTemplate,
   ArrayRef Args,
+  bool MatchedPackOnParmToNonPackOnArg,
   ClassTemplateSpecializationDecl *PrevDecl);
 
-  explicit ClassTemplateSpecializationDecl(ASTContext &C, Kind DK);
+  ClassTemplateSpecializationDecl(ASTContext &C, Kind DK);
 
 public:
   friend class ASTDeclReader;
@@ -1859,7 +1867,7 @@ class ClassTemplateSpecializationDecl : public 
CXXRecordDecl,
   Create(ASTContext &Context, TagKind TK, DeclContext *DC,
  SourceLocation StartLoc, SourceLocation IdLoc,
  ClassTemplateDecl *SpecializedTemplate,
- ArrayRef Args,
+ ArrayRef Args, bool MatchedPackOnParmToNonPackOnArg,
  ClassTemplateSpecializationDecl *PrevDecl);
   static ClassTemplateSpecializationDecl *CreateDeserialized(ASTContext &C,
  GlobalDeclID ID);
@@ -1930,6 +1938,10 @@ class ClassTemplateSpecializationDecl : public 
CXXRecordDecl,
 SpecializationKind = TSK;
   }
 
+  bool hasMatchedPackOnParmToNonPackOnArg() const {
+return MatchedPackOnParmToNonPackOnArg;
+  }
+
   /// Get the point of instantiation (if any), or null if none.
   SourceLocation getPointOfInstantiation() const {
 return PointOfInstantiation;
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index eb82d1b978e94..a30a7076ea5d4 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -13491,8 +13491,8 @@ class Sema final : public SemaBase {
   bool InstantiateClassTemplateSpecialization(
   SourceLocation PointOfInstantiation,
   ClassTemplateSpecializationDecl *ClassTemplateSpec,
-  TemplateSpecializationKind TSK, bool Complain = true,
-  bool PrimaryHasMatchedPackOnParmToNonPackOnArg = false);
+  TemplateSpecializationKind TSK, bool Complain,
+  bool PrimaryHasMatchedPackOnParmToNonPackOnArg);
 
   /// Instantiates the definitions of all of the member
   /// of the given class, which is an instantiation of a class template
diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp
index 09fa10f716ec1..13e7f93233a7f 100644
--- a/clang/lib/AST/ASTImporter.cpp
+++ b/clang/lib/AST/ASTImporter.cpp
@@ -6321,9 +6321,9 @@ ExpectedDecl 
ASTNodeImporter::VisitClassTemplateSpecializationDecl(
 updateLookupTableForTemplateParameters(*ToTPList);
   } else { // Not a partial specialization.
 if (GetImportedOrCreateDecl(
-D2, D, Importer.getToContext(), D->getTagKind(), DC,
-*BeginLocOrErr, *IdLocOrErr, ClassTemplate, TemplateArgs,
-PrevDecl)

[llvm-branch-commits] [clang] [lldb] release/20.x: Reland: [clang] fix P3310 overload resolution flag propagation (#125791) (PR #127779)

2025-02-19 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-clang-modules

Author: None (llvmbot)


Changes

Backport 08bda1cc6b0d2f1d31a89a76b5c154a11086c420

Requested by: @mizvekov

---

Patch is 232.51 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/127779.diff


16 Files Affected:

- (modified) clang/include/clang/AST/DeclTemplate.h (+14-2) 
- (modified) clang/include/clang/Sema/Sema.h (+2-2) 
- (modified) clang/lib/AST/ASTImporter.cpp (+3-3) 
- (modified) clang/lib/AST/DeclTemplate.cpp (+24-23) 
- (modified) clang/lib/AST/JSONNodeDumper.cpp (+5) 
- (modified) clang/lib/AST/TextNodeDumper.cpp (+4-1) 
- (modified) clang/lib/Sema/SemaTemplate.cpp (+5-3) 
- (modified) clang/lib/Sema/SemaTemplateDeduction.cpp (-2) 
- (modified) clang/lib/Sema/SemaTemplateInstantiateDecl.cpp (+1-1) 
- (modified) clang/lib/Sema/SemaType.cpp (+2-1) 
- (modified) clang/lib/Serialization/ASTReaderDecl.cpp (+1) 
- (modified) clang/lib/Serialization/ASTWriterDecl.cpp (+1) 
- (modified) clang/test/AST/ast-dump-templates.cpp (+6045-2) 
- (modified) clang/test/AST/gen_ast_dump_json_test.py (+17-4) 
- (modified) clang/test/SemaTemplate/cwg2398.cpp (+17) 
- (modified) lldb/source/Plugins/ExpressionParser/Clang/CxxModuleHandler.cpp 
(+2-1) 


``diff
diff --git a/clang/include/clang/AST/DeclTemplate.h 
b/clang/include/clang/AST/DeclTemplate.h
index 9ecff2c898acd..03c43765206b1 100644
--- a/clang/include/clang/AST/DeclTemplate.h
+++ b/clang/include/clang/AST/DeclTemplate.h
@@ -1841,15 +1841,23 @@ class ClassTemplateSpecializationDecl : public 
CXXRecordDecl,
   LLVM_PREFERRED_TYPE(TemplateSpecializationKind)
   unsigned SpecializationKind : 3;
 
+  /// Indicate that we have matched a parameter pack with a non pack
+  /// argument, when the opposite match is also allowed (strict pack match).
+  /// This needs to be cached as deduction is performed during declaration,
+  /// and we need the information to be preserved so that it is consistent
+  /// during instantiation.
+  bool MatchedPackOnParmToNonPackOnArg : 1;
+
 protected:
   ClassTemplateSpecializationDecl(ASTContext &Context, Kind DK, TagKind TK,
   DeclContext *DC, SourceLocation StartLoc,
   SourceLocation IdLoc,
   ClassTemplateDecl *SpecializedTemplate,
   ArrayRef Args,
+  bool MatchedPackOnParmToNonPackOnArg,
   ClassTemplateSpecializationDecl *PrevDecl);
 
-  explicit ClassTemplateSpecializationDecl(ASTContext &C, Kind DK);
+  ClassTemplateSpecializationDecl(ASTContext &C, Kind DK);
 
 public:
   friend class ASTDeclReader;
@@ -1859,7 +1867,7 @@ class ClassTemplateSpecializationDecl : public 
CXXRecordDecl,
   Create(ASTContext &Context, TagKind TK, DeclContext *DC,
  SourceLocation StartLoc, SourceLocation IdLoc,
  ClassTemplateDecl *SpecializedTemplate,
- ArrayRef Args,
+ ArrayRef Args, bool MatchedPackOnParmToNonPackOnArg,
  ClassTemplateSpecializationDecl *PrevDecl);
   static ClassTemplateSpecializationDecl *CreateDeserialized(ASTContext &C,
  GlobalDeclID ID);
@@ -1930,6 +1938,10 @@ class ClassTemplateSpecializationDecl : public 
CXXRecordDecl,
 SpecializationKind = TSK;
   }
 
+  bool hasMatchedPackOnParmToNonPackOnArg() const {
+return MatchedPackOnParmToNonPackOnArg;
+  }
+
   /// Get the point of instantiation (if any), or null if none.
   SourceLocation getPointOfInstantiation() const {
 return PointOfInstantiation;
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index eb82d1b978e94..a30a7076ea5d4 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -13491,8 +13491,8 @@ class Sema final : public SemaBase {
   bool InstantiateClassTemplateSpecialization(
   SourceLocation PointOfInstantiation,
   ClassTemplateSpecializationDecl *ClassTemplateSpec,
-  TemplateSpecializationKind TSK, bool Complain = true,
-  bool PrimaryHasMatchedPackOnParmToNonPackOnArg = false);
+  TemplateSpecializationKind TSK, bool Complain,
+  bool PrimaryHasMatchedPackOnParmToNonPackOnArg);
 
   /// Instantiates the definitions of all of the member
   /// of the given class, which is an instantiation of a class template
diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp
index 09fa10f716ec1..13e7f93233a7f 100644
--- a/clang/lib/AST/ASTImporter.cpp
+++ b/clang/lib/AST/ASTImporter.cpp
@@ -6321,9 +6321,9 @@ ExpectedDecl 
ASTNodeImporter::VisitClassTemplateSpecializationDecl(
 updateLookupTableForTemplateParameters(*ToTPList);
   } else { // Not a partial specialization.
 if (GetImportedOrCreateDecl(
-D2, D, Importer.getToContext(), D->getTagKind(), DC,
-*BeginLocOrErr, *IdLocOrErr, ClassTemplate, TemplateArgs,
-P

[llvm-branch-commits] [libcxx] release/20.x: [libc++][TZDB] Fixes mapping of nonexisting time. (#127330) (PR #127531)

2025-02-19 Thread Louis Dionne via llvm-branch-commits

https://github.com/ldionne approved this pull request.


https://github.com/llvm/llvm-project/pull/127531
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang] Implement instantiation context note for checking template parameters (PR #126088)

2025-02-19 Thread Erich Keane via llvm-branch-commits


@@ -1251,12 +1261,18 @@ void Sema::PrintInstantiationStack(DiagFuncRef 
DiagFunc) {
 case CodeSynthesisContext::PartialOrderingTTP:
   DiagFunc(Active->PointOfInstantiation,
PDiag(diag::note_template_arg_template_params_mismatch));
-  if (SourceLocation ParamLoc = Active->Entity->getLocation();
-  ParamLoc.isValid())
-DiagFunc(ParamLoc, PDiag(diag::note_template_prev_declaration)
-   << /*isTemplateTemplateParam=*/true
-   << Active->InstantiationRange);
   break;
+case CodeSynthesisContext::CheckTemplateParameter: {
+  auto &ND = *cast(Active->Entity);

erichkeane wrote:

```suggestion
  const auto &ND = *cast(Active->Entity);
```
??

https://github.com/llvm/llvm-project/pull/126088
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang] Implement instantiation context note for checking template parameters (PR #126088)

2025-02-19 Thread Erich Keane via llvm-branch-commits

https://github.com/erichkeane edited 
https://github.com/llvm/llvm-project/pull/126088
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang] Implement instantiation context note for checking template parameters (PR #126088)

2025-02-19 Thread Erich Keane via llvm-branch-commits


@@ -1909,7 +1909,22 @@ class Sema final : public SemaBase {
   /// '\#pragma clang attribute push' directives to the given declaration.
   void AddPragmaAttributes(Scope *S, Decl *D);
 
-  void PrintPragmaAttributeInstantiationPoint();
+  using DiagFuncRef =
+  llvm::function_ref;
+  auto getDefaultDiagFunc() {
+return [this](SourceLocation Loc, PartialDiagnostic PD) {
+  // This bypasses a lof of the filters in the diag engine, as it's

erichkeane wrote:

```suggestion
  // This bypasses a lot of the filters in the diag engine, as it's
```

https://github.com/llvm/llvm-project/pull/126088
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang] Implement instantiation context note for checking template parameters (PR #126088)

2025-02-19 Thread Erich Keane via llvm-branch-commits


@@ -11802,9 +11817,10 @@ class Sema final : public SemaBase {
  bool PartialOrdering,
  bool *StrictPackMatch);
 
+  SmallString<128> toTerseString(const NamedDecl &D) const;

erichkeane wrote:

would love a comment that tells what 'terse string' means here.  'terse' is 
actually a pretty loaded word in C++, so explainations need to be pretty 
sizable here.

https://github.com/llvm/llvm-project/pull/126088
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] 876a5c9 - [libc++] Avoid including on arbitrary platforms (#125587)

2025-02-19 Thread Tom Stellard via llvm-branch-commits

Author: Louis Dionne
Date: 2025-02-19T06:26:51-08:00
New Revision: 876a5c9e5905a9666748632afba1ff83200ed95b

URL: 
https://github.com/llvm/llvm-project/commit/876a5c9e5905a9666748632afba1ff83200ed95b
DIFF: 
https://github.com/llvm/llvm-project/commit/876a5c9e5905a9666748632afba1ff83200ed95b.diff

LOG: [libc++] Avoid including  on arbitrary platforms (#125587)

This partially reverts commit 5f2389d4. That commit started checking
whether  was a valid include unconditionally, however codebases
are free to have such a header on their search path, which breaks compilation.
LLVM libc now provides a more standard way of getting configuration macros
like __LLVM_LIBC__.

After this patch, we only include  when we're on Linux or
when we're compiling for GPUs.

(cherry picked from commit cffc1ac3491c891ef4f80bcbfa685710e477eeac)

Added: 


Modified: 
libcxx/include/__configuration/platform.h

Removed: 




diff  --git a/libcxx/include/__configuration/platform.h 
b/libcxx/include/__configuration/platform.h
index 2a92ce209b91f..cff99376ee24b 100644
--- a/libcxx/include/__configuration/platform.h
+++ b/libcxx/include/__configuration/platform.h
@@ -30,12 +30,9 @@
 // ... add new file formats here ...
 #endif
 
-// To detect which libc we're using
-#if __has_include()
+// Need to detect which libc we're using if we're on Linux.
+#if defined(__linux__) || defined(__AMDGPU__) || defined(__NVPTX__)
 #  include 
-#endif
-
-#if defined(__linux__)
 #  if defined(__GLIBC_PREREQ)
 #define _LIBCPP_GLIBC_PREREQ(a, b) __GLIBC_PREREQ(a, b)
 #  else



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang] Implement instantiation context note for checking template parameters (PR #126088)

2025-02-19 Thread Erich Keane via llvm-branch-commits

https://github.com/erichkeane approved this pull request.

A few quick comments, else the source changes LGTM.  Note that @endill's 
suggestion to use 'bookmarks' for notes (or something like that) are good ones 
that I agree with.

https://github.com/llvm/llvm-project/pull/126088
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] release/20.x: [libc++] Avoid including on arbitrary platforms (#125587) (PR #127310)

2025-02-19 Thread Tom Stellard via llvm-branch-commits

https://github.com/tstellar closed 
https://github.com/llvm/llvm-project/pull/127310
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] release/20.x: [libc++] Avoid including on arbitrary platforms (#125587) (PR #127310)

2025-02-19 Thread Louis Dionne via llvm-branch-commits

ldionne wrote:

@tstellar Can we merge this one? I have another fix I want to cherry-pick which 
depends on this one.

https://github.com/llvm/llvm-project/pull/127310
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] release/20.x: flang: Fix build with latest libc++ (#127362) (PR #127805)

2025-02-19 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/127805
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] release/20.x: flang: Fix build with latest libc++ (#127362) (PR #127805)

2025-02-19 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-flang-runtime

Author: None (llvmbot)


Changes

Backport 2b340c10a611d929fee25e6222909c8915e3d6b6

Requested by: @tstellar

---
Full diff: https://github.com/llvm/llvm-project/pull/127805.diff


1 Files Affected:

- (modified) flang/runtime/io-api-minimal.cpp (+2-1) 


``diff
diff --git a/flang/runtime/io-api-minimal.cpp b/flang/runtime/io-api-minimal.cpp
index 68768427be0c2..93ac82248aa4c 100644
--- a/flang/runtime/io-api-minimal.cpp
+++ b/flang/runtime/io-api-minimal.cpp
@@ -150,7 +150,8 @@ bool IODEF(OutputLogical)(Cookie cookie, bool truth) {
 // Provide own definition for `std::__libcpp_verbose_abort` to avoid dependency
 // on the version provided by libc++.
 
-void std::__libcpp_verbose_abort(char const *format, ...) {
+void std::__libcpp_verbose_abort(char const *format, ...) noexcept(
+noexcept(std::__libcpp_verbose_abort(""))) {
   va_list list;
   va_start(list, format);
   std::vfprintf(stderr, format, list);

``




https://github.com/llvm/llvm-project/pull/127805
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] release/20.x: flang: Fix build with latest libc++ (#127362) (PR #127805)

2025-02-19 Thread via llvm-branch-commits

llvmbot wrote:

@ldionne What do you think about merging this PR to the release branch?

https://github.com/llvm/llvm-project/pull/127805
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] release/20.x: [libc++] Avoid including on arbitrary platforms (#125587) (PR #127310)

2025-02-19 Thread via llvm-branch-commits

https://github.com/llvmbot updated 
https://github.com/llvm/llvm-project/pull/127310

>From 876a5c9e5905a9666748632afba1ff83200ed95b Mon Sep 17 00:00:00 2001
From: Louis Dionne 
Date: Sat, 15 Feb 2025 10:54:00 +0100
Subject: [PATCH] [libc++] Avoid including  on arbitrary platforms
 (#125587)

This partially reverts commit 5f2389d4. That commit started checking
whether  was a valid include unconditionally, however codebases
are free to have such a header on their search path, which breaks compilation.
LLVM libc now provides a more standard way of getting configuration macros
like __LLVM_LIBC__.

After this patch, we only include  when we're on Linux or
when we're compiling for GPUs.

(cherry picked from commit cffc1ac3491c891ef4f80bcbfa685710e477eeac)
---
 libcxx/include/__configuration/platform.h | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/libcxx/include/__configuration/platform.h 
b/libcxx/include/__configuration/platform.h
index 2a92ce209b91f..cff99376ee24b 100644
--- a/libcxx/include/__configuration/platform.h
+++ b/libcxx/include/__configuration/platform.h
@@ -30,12 +30,9 @@
 // ... add new file formats here ...
 #endif
 
-// To detect which libc we're using
-#if __has_include()
+// Need to detect which libc we're using if we're on Linux.
+#if defined(__linux__) || defined(__AMDGPU__) || defined(__NVPTX__)
 #  include 
-#endif
-
-#if defined(__linux__)
 #  if defined(__GLIBC_PREREQ)
 #define _LIBCPP_GLIBC_PREREQ(a, b) __GLIBC_PREREQ(a, b)
 #  else

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] release/20.x: [libc++] Avoid including on arbitrary platforms (#125587) (PR #127310)

2025-02-19 Thread via llvm-branch-commits

github-actions[bot] wrote:

@ldionne (or anyone else). If you would like to add a note about this fix in 
the release notes (completely optional). Please reply to this comment with a 
one or two sentence description of the fix.  When you are done, please add the 
release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/127310
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [MLIR][OpenMP] Host lowering of standalone distribute (PR #127817)

2025-02-19 Thread Sergio Afonso via llvm-branch-commits

https://github.com/skatrak created 
https://github.com/llvm/llvm-project/pull/127817

This patch adds MLIR to LLVM IR translation support for standalone 
`omp.distribute` operations, as well as `distribute simd` through ignoring SIMD 
information (similarly to `do/for simd`).

>From 8ecbf3579bcff069548f6e4484cb546a1b54511e Mon Sep 17 00:00:00 2001
From: Sergio Afonso 
Date: Tue, 18 Feb 2025 11:22:43 +
Subject: [PATCH] [MLIR][OpenMP] Host lowering of standalone distribute

This patch adds MLIR to LLVM IR translation support for standalone
`omp.distribute` operations, as well as `distribute simd` through ignoring
SIMD information (similarly to `do/for simd`).

Co-authored-by: Dominik Adamski 
---
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp  | 78 +++
 mlir/test/Target/LLVMIR/openmp-llvm.mlir  | 37 +
 mlir/test/Target/LLVMIR/openmp-todo.mlir  | 66 +++-
 3 files changed, 178 insertions(+), 3 deletions(-)

diff --git 
a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp 
b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index a5ff3eff6439f..c8221a9f9854a 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -164,6 +164,10 @@ static LogicalResult checkImplementationStatus(Operation 
&op) {
 if (op.getDevice())
   result = todo("device");
   };
+  auto checkDistSchedule = [&todo](auto op, LogicalResult &result) {
+if (op.getDistScheduleChunkSize())
+  result = todo("dist_schedule with chunk_size");
+  };
   auto checkHasDeviceAddr = [&todo](auto op, LogicalResult &result) {
 if (!op.getHasDeviceAddrVars().empty())
   result = todo("has_device_addr");
@@ -255,6 +259,16 @@ static LogicalResult checkImplementationStatus(Operation 
&op) {
 
   LogicalResult result = success();
   llvm::TypeSwitch(op)
+  .Case([&](omp::DistributeOp op) {
+if (op.isComposite() &&
+isa_and_present(op.getNestedWrapper()))
+  result = op.emitError() << "not yet implemented: "
+ "composite omp.distribute + omp.wsloop";
+checkAllocate(op, result);
+checkDistSchedule(op, result);
+checkOrder(op, result);
+checkPrivate(op, result);
+  })
   .Case([&](omp::OrderedRegionOp op) { checkParLevelSimd(op, result); })
   .Case([&](omp::SectionsOp op) {
 checkAllocate(op, result);
@@ -3755,6 +3769,67 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase 
&builder,
   return success();
 }
 
+static LogicalResult
+convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation) {
+  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+  auto distributeOp = cast(opInst);
+  if (failed(checkImplementationStatus(opInst)))
+return failure();
+
+  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+  auto bodyGenCB = [&](InsertPointTy allocaIP,
+   InsertPointTy codeGenIP) -> llvm::Error {
+// DistributeOp has only one region associated with it.
+builder.restoreIP(codeGenIP);
+
+llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
+llvm::Expected regionBlock =
+convertOmpOpRegions(distributeOp.getRegion(), "omp.distribute.region",
+builder, moduleTranslation);
+if (!regionBlock)
+  return regionBlock.takeError();
+builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
+
+// TODO: Add support for clauses which are valid for DISTRIBUTE constructs.
+// Static schedule is the default.
+auto schedule = omp::ClauseScheduleKind::Static;
+bool isOrdered = false;
+std::optional scheduleMod;
+bool isSimd = false;
+llvm::omp::WorksharingLoopType workshareLoopType =
+llvm::omp::WorksharingLoopType::DistributeStaticLoop;
+bool loopNeedsBarrier = false;
+llvm::Value *chunk = nullptr;
+
+llvm::CanonicalLoopInfo *loopInfo = 
*findCurrentLoopInfo(moduleTranslation);
+llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP =
+ompBuilder->applyWorkshareLoop(
+ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier,
+convertToScheduleKind(schedule), chunk, isSimd,
+scheduleMod == omp::ScheduleModifier::monotonic,
+scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered,
+workshareLoopType);
+
+if (!wsloopIP)
+  return wsloopIP.takeError();
+return llvm::Error::success();
+  };
+
+  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
+  findAllocaInsertPoint(builder, moduleTranslation);
+  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
+  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
+  ompBuilder->createDistribute(om

[llvm-branch-commits] [llvm] [OpenMPIRBuilder] Split calculation of canonical loop trip count, NFC (PR #127820)

2025-02-19 Thread Sergio Afonso via llvm-branch-commits

https://github.com/skatrak created 
https://github.com/llvm/llvm-project/pull/127820

This patch splits off the calculation of canonical loop trip counts from the 
creation of canonical loops. This makes it possible to reuse this logic to, for 
instance, populate the `__tgt_target_kernel` runtime call for SPMD kernels.

This feature is used to simplify one of the existing OpenMPIRBuilder tests.

>From 5153e0d8ebcad5dacebe2dd00f4e2e96831ef5cf Mon Sep 17 00:00:00 2001
From: Sergio Afonso 
Date: Tue, 18 Feb 2025 14:19:30 +
Subject: [PATCH] [OpenMPIRBuilder] Split calculation of canonical loop trip
 count, NFC

This patch splits off the calculation of canonical loop trip counts from the
creation of canonical loops. This makes it possible to reuse this logic to, for
instance, populate the `__tgt_target_kernel` runtime call for SPMD kernels.

This feature is used to simplify one of the existing OpenMPIRBuilder tests.
---
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h   | 38 +++
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 27 -
 .../Frontend/OpenMPIRBuilderTest.cpp  | 16 ++--
 3 files changed, 52 insertions(+), 29 deletions(-)

diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h 
b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 9ad85413acd34..207ca7fb05f62 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -728,13 +728,12 @@ class OpenMPIRBuilder {
   LoopBodyGenCallbackTy BodyGenCB, Value *TripCount,
   const Twine &Name = "loop");
 
-  /// Generator for the control flow structure of an OpenMP canonical loop.
+  /// Calculate the trip count of a canonical loop.
   ///
-  /// Instead of a logical iteration space, this allows specifying user-defined
-  /// loop counter values using increment, upper- and lower bounds. To
-  /// disambiguate the terminology when counting downwards, instead of lower
-  /// bounds we use \p Start for the loop counter value in the first body
-  /// iteration.
+  /// This allows specifying user-defined loop counter values using increment,
+  /// upper- and lower bounds. To disambiguate the terminology when counting
+  /// downwards, instead of lower bounds we use \p Start for the loop counter
+  /// value in the first body iteration.
   ///
   /// Consider the following limitations:
   ///
@@ -758,7 +757,32 @@ class OpenMPIRBuilder {
   ///
   ///  for (int i = 0; i < 42; i -= 1u)
   ///
-  //
+  /// \param Loc   The insert and source location description.
+  /// \param Start Value of the loop counter for the first iterations.
+  /// \param Stop  Loop counter values past this will stop the loop.
+  /// \param Step  Loop counter increment after each iteration; negative
+  ///  means counting down.
+  /// \param IsSigned  Whether Start, Stop and Step are signed integers.
+  /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop
+  ///  counter.
+  /// \param Name  Base name used to derive instruction names.
+  ///
+  /// \returns The value holding the calculated trip count.
+  Value *calculateCanonicalLoopTripCount(const LocationDescription &Loc,
+ Value *Start, Value *Stop, Value 
*Step,
+ bool IsSigned, bool InclusiveStop,
+ const Twine &Name = "loop");
+
+  /// Generator for the control flow structure of an OpenMP canonical loop.
+  ///
+  /// Instead of a logical iteration space, this allows specifying user-defined
+  /// loop counter values using increment, upper- and lower bounds. To
+  /// disambiguate the terminology when counting downwards, instead of lower
+  /// bounds we use \p Start for the loop counter value in the first body
+  ///
+  /// It calls \see calculateCanonicalLoopTripCount for trip count 
calculations,
+  /// so limitations of that method apply here as well.
+  ///
   /// \param Loc   The insert and source location description.
   /// \param BodyGenCB Callback that will generate the loop body code.
   /// \param Start Value of the loop counter for the first iterations.
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp 
b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 7788897fc0795..eee6e3e54d615 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -4059,10 +4059,9 @@ OpenMPIRBuilder::createCanonicalLoop(const 
LocationDescription &Loc,
   return CL;
 }
 
-Expected OpenMPIRBuilder::createCanonicalLoop(
-const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB,
-Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
-InsertPointTy ComputeIP, const Twine &Name) {
+Value *OpenMPIRBuilder::calculateCanonicalLoopTripCount(
+const LocationDescription &Loc, Value *Start, Value *Stop, Value *St

[llvm-branch-commits] [flang] [Flang][OpenMP] Allow host evaluation of loop bounds for distribute (PR #127822)

2025-02-19 Thread Sergio Afonso via llvm-branch-commits

https://github.com/skatrak created 
https://github.com/llvm/llvm-project/pull/127822

This patch adds `target teams distribute [simd]` and equivalent construct nests 
to the list of cases where loop bounds can be evaluated in the host, as they 
represent kernels for which the trip count must also be evaluated in advance to 
the kernel call.

>From 0e96e97bb5405904522d1bd54b458fb92d11f7fb Mon Sep 17 00:00:00 2001
From: Sergio Afonso 
Date: Wed, 19 Feb 2025 15:15:01 +
Subject: [PATCH] [Flang][OpenMP] Allow host evaluation of loop bounds for
 distribute

This patch adds `target teams distribute [simd]` and equivalent construct nests
to the list of cases where loop bounds can be evaluated in the host, as they
represent Generic-SPMD kernels for which the trip count must also be evaluated
in advance to the kernel call.
---
 flang/lib/Lower/OpenMP/OpenMP.cpp |  12 +--
 flang/test/Lower/OpenMP/host-eval.f90 | 103 ++
 2 files changed, 110 insertions(+), 5 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp 
b/flang/lib/Lower/OpenMP/OpenMP.cpp
index bd794033cdf11..8c80453610473 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -562,8 +562,11 @@ static void 
processHostEvalClauses(lower::AbstractConverter &converter,
   [[fallthrough]];
 case OMPD_distribute_parallel_do:
 case OMPD_distribute_parallel_do_simd:
-  cp.processCollapse(loc, eval, hostInfo.ops, hostInfo.iv);
   cp.processNumThreads(stmtCtx, hostInfo.ops);
+  [[fallthrough]];
+case OMPD_distribute:
+case OMPD_distribute_simd:
+  cp.processCollapse(loc, eval, hostInfo.ops, hostInfo.iv);
   break;
 
 // Cases where 'teams' clauses might be present, and target SPMD is
@@ -573,10 +576,8 @@ static void 
processHostEvalClauses(lower::AbstractConverter &converter,
   [[fallthrough]];
 case OMPD_target_teams:
   cp.processNumTeams(stmtCtx, hostInfo.ops);
-  processSingleNestedIf([](Directive nestedDir) {
-return nestedDir == OMPD_distribute_parallel_do ||
-   nestedDir == OMPD_distribute_parallel_do_simd;
-  });
+  processSingleNestedIf(
+  [](Directive nestedDir) { return topDistributeSet.test(nestedDir); 
});
   break;
 
 // Cases where only 'teams' host-evaluated clauses might be present.
@@ -586,6 +587,7 @@ static void processHostEvalClauses(lower::AbstractConverter 
&converter,
   [[fallthrough]];
 case OMPD_target_teams_distribute:
 case OMPD_target_teams_distribute_simd:
+  cp.processCollapse(loc, eval, hostInfo.ops, hostInfo.iv);
   cp.processNumTeams(stmtCtx, hostInfo.ops);
   break;
 
diff --git a/flang/test/Lower/OpenMP/host-eval.f90 
b/flang/test/Lower/OpenMP/host-eval.f90
index 32c52462b86a7..65258c91e5daf 100644
--- a/flang/test/Lower/OpenMP/host-eval.f90
+++ b/flang/test/Lower/OpenMP/host-eval.f90
@@ -155,3 +155,106 @@ subroutine distribute_parallel_do_simd()
   !$omp end distribute parallel do simd
   !$omp end teams
 end subroutine distribute_parallel_do_simd
+
+! BOTH-LABEL: func.func @_QPdistribute
+subroutine distribute()
+  ! BOTH: omp.target
+  
+  ! HOST-SAME: host_eval(%{{.*}} -> %[[LB:.*]], %{{.*}} -> %[[UB:.*]], %{{.*}} 
-> %[[STEP:.*]] : i32, i32, i32)
+  
+  ! DEVICE-NOT: host_eval({{.*}})
+  ! DEVICE-SAME: {
+
+  ! BOTH: omp.teams
+  !$omp target teams
+
+  ! BOTH: omp.distribute
+  ! BOTH-NEXT: omp.loop_nest
+
+  ! HOST-SAME: (%{{.*}}) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step 
(%[[STEP]])
+  !$omp distribute
+  do i=1,10
+call foo()
+  end do
+  !$omp end distribute
+  !$omp end target teams
+
+  ! BOTH: omp.target
+  ! BOTH-NOT: host_eval({{.*}})
+  ! BOTH-SAME: {
+  ! BOTH: omp.teams
+  !$omp target teams
+  call foo() !< Prevents this from being Generic-SPMD.
+
+  ! BOTH: omp.distribute
+  !$omp distribute
+  do i=1,10
+call foo()
+  end do
+  !$omp end distribute
+  !$omp end target teams
+
+  ! BOTH: omp.teams
+  !$omp teams
+
+  ! BOTH: omp.distribute
+  !$omp distribute
+  do i=1,10
+call foo()
+  end do
+  !$omp end distribute
+  !$omp end teams
+end subroutine distribute
+
+! BOTH-LABEL: func.func @_QPdistribute_simd
+subroutine distribute_simd()
+  ! BOTH: omp.target
+  
+  ! HOST-SAME: host_eval(%{{.*}} -> %[[LB:.*]], %{{.*}} -> %[[UB:.*]], %{{.*}} 
-> %[[STEP:.*]] : i32, i32, i32)
+  
+  ! DEVICE-NOT: host_eval({{.*}})
+  ! DEVICE-SAME: {
+
+  ! BOTH: omp.teams
+  !$omp target teams
+
+  ! BOTH: omp.distribute
+  ! BOTH-NEXT: omp.simd
+  ! BOTH-NEXT: omp.loop_nest
+
+  ! HOST-SAME: (%{{.*}}) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step 
(%[[STEP]])
+  !$omp distribute simd
+  do i=1,10
+call foo()
+  end do
+  !$omp end distribute simd
+  !$omp end target teams
+
+  ! BOTH: omp.target
+  ! BOTH-NOT: host_eval({{.*}})
+  ! BOTH-SAME: {
+  ! BOTH: omp.teams
+  !$omp target teams
+  call foo() !< Prevents this from being Generic-SPMD.
+
+  ! BOTH: omp.distribute
+  ! BOTH-NEXT: omp.simd
+  !$omp di

[llvm-branch-commits] [mlir] [MLIR][OpenMP] Host lowering of distribute-parallel-do/for (PR #127819)

2025-02-19 Thread Sergio Afonso via llvm-branch-commits

https://github.com/skatrak created 
https://github.com/llvm/llvm-project/pull/127819

This patch adds support for translating composite `omp.parallel` + 
`omp.distribute` + `omp.wsloop` loops to LLVM IR on the host. This is done by 
passing an updated `WorksharingLoopType` to the call to `applyWorkshareLoop` 
associated to the lowering of the `omp.wsloop` operation, so that 
`__kmpc_dist_for_static_init` is called at runtime in place of 
`__kmpc_for_static_init`.

Existing translation rules take care of creating a parallel region to hold the 
workshared and workdistributed loop.

>From 38ba269f0681b8d962841c4471a242fe382a6106 Mon Sep 17 00:00:00 2001
From: Sergio Afonso 
Date: Tue, 18 Feb 2025 13:07:51 +
Subject: [PATCH] [MLIR][OpenMP] Host lowering of distribute-parallel-do/for

This patch adds support for translating composite `omp.parallel` +
`omp.distribute` + `omp.wsloop` loops to LLVM IR on the host. This is done by
passing an updated `WorksharingLoopType` to the call to `applyWorkshareLoop`
associated to the lowering of the `omp.wsloop` operation, so that
`__kmpc_dist_for_static_init` is called at runtime in place of
`__kmpc_for_static_init`.

Existing translation rules take care of creating a parallel region to hold the
workshared and workdistributed loop.
---
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp  | 21 --
 mlir/test/Target/LLVMIR/openmp-llvm.mlir  | 65 +++
 mlir/test/Target/LLVMIR/openmp-todo.mlir  | 19 --
 3 files changed, 81 insertions(+), 24 deletions(-)

diff --git 
a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp 
b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index c8221a9f9854a..7e8a9bdb5b133 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -260,10 +260,6 @@ static LogicalResult checkImplementationStatus(Operation 
&op) {
   LogicalResult result = success();
   llvm::TypeSwitch(op)
   .Case([&](omp::DistributeOp op) {
-if (op.isComposite() &&
-isa_and_present(op.getNestedWrapper()))
-  result = op.emitError() << "not yet implemented: "
- "composite omp.distribute + omp.wsloop";
 checkAllocate(op, result);
 checkDistSchedule(op, result);
 checkOrder(op, result);
@@ -1993,6 +1989,14 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase 
&builder,
   bool isSimd = wsloopOp.getScheduleSimd();
   bool loopNeedsBarrier = !wsloopOp.getNowait();
 
+  // The only legal way for the direct parent to be omp.distribute is that this
+  // represents 'distribute parallel do'. Otherwise, this is a regular
+  // worksharing loop.
+  llvm::omp::WorksharingLoopType workshareLoopType =
+  llvm::isa_and_present(opInst.getParentOp())
+  ? llvm::omp::WorksharingLoopType::DistributeForStaticLoop
+  : llvm::omp::WorksharingLoopType::ForStaticLoop;
+
   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
   llvm::Expected regionBlock = convertOmpOpRegions(
   wsloopOp.getRegion(), "omp.wsloop.region", builder, moduleTranslation);
@@ -2008,7 +2012,8 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase 
&builder,
   ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier,
   convertToScheduleKind(schedule), chunk, isSimd,
   scheduleMod == omp::ScheduleModifier::monotonic,
-  scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered);
+  scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered,
+  workshareLoopType);
 
   if (failed(handleError(wsloopIP, opInst)))
 return failure();
@@ -3792,6 +3797,12 @@ convertOmpDistribute(Operation &opInst, 
llvm::IRBuilderBase &builder,
   return regionBlock.takeError();
 builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
 
+// Skip applying a workshare loop below when translating 'distribute
+// parallel do' (it's been already handled by this point while translating
+// the nested omp.wsloop).
+if (isa_and_present(distributeOp.getNestedWrapper()))
+  return llvm::Error::success();
+
 // TODO: Add support for clauses which are valid for DISTRIBUTE constructs.
 // Static schedule is the default.
 auto schedule = omp::ClauseScheduleKind::Static;
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir 
b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index a5a490e527d79..d85b149c66811 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -3307,3 +3307,68 @@ llvm.func @distribute() {
 // CHECK: store i64 1, ptr %[[STRIDE]]
 // CHECK: %[[TID:.*]] = call i32 @__kmpc_global_thread_num({{.*}})
 // CHECK: call void @__kmpc_for_static_init_{{.*}}(ptr @{{.*}}, i32 
%[[TID]], i32 92, ptr %[[LASTITER]], ptr %[[LB]], ptr %[[UB]], ptr %[[STRIDE]], 
i64 1, i64 0)
+
+// -
+
+llvm.func @distribute_w

[llvm-branch-commits] [llvm] [OpenMPIRBuilder] Add support for distribute constructs (PR #127816)

2025-02-19 Thread Sergio Afonso via llvm-branch-commits

https://github.com/skatrak created 
https://github.com/llvm/llvm-project/pull/127816

This patch adds the `OpenMPIRBuilder::createDistribute()` function and updates 
`OpenMPIRBuilder::applyStaticWorkshareLoop()` in preparation for adding 
`distribute` support to flang.

>From a79b7a2d6a443ef26bf4beaf73ec3c8042d968d1 Mon Sep 17 00:00:00 2001
From: Dominik Adamski 
Date: Mon, 17 Feb 2025 14:25:40 +
Subject: [PATCH] [OpenMPIRBuilder] Add support for distribute constructs

This patch adds the `OpenMPIRBuilder::createDistribute()` function and updates
`OpenMPIRBuilder::applyStaticWorkshareLoop()` in preparation for adding
`distribute` support to flang.

Co-authored-by: Sergio Afonso 
---
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h   | 17 --
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 59 ---
 2 files changed, 64 insertions(+), 12 deletions(-)

diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h 
b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index d25077cae63e4..9ad85413acd34 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1004,12 +1004,12 @@ class OpenMPIRBuilder {
   /// preheader of the loop.
   /// \param NeedsBarrier Indicates whether a barrier must be inserted after
   /// the loop.
+  /// \param LoopType Type of workshare loop.
   ///
   /// \returns Point where to insert code after the workshare construct.
-  InsertPointOrErrorTy applyStaticWorkshareLoop(DebugLoc DL,
-CanonicalLoopInfo *CLI,
-InsertPointTy AllocaIP,
-bool NeedsBarrier);
+  InsertPointOrErrorTy applyStaticWorkshareLoop(
+  DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
+  omp::WorksharingLoopType LoopType, bool NeedsBarrier);
 
   /// Modifies the canonical loop a statically-scheduled workshare loop with a
   /// user-specified chunk size.
@@ -2660,6 +2660,15 @@ class OpenMPIRBuilder {
   Value *NumTeamsLower = nullptr, Value *NumTeamsUpper = nullptr,
   Value *ThreadLimit = nullptr, Value *IfExpr = nullptr);
 
+  /// Generator for `#omp distribute`
+  ///
+  /// \param Loc The location where the distribute construct was encountered.
+  /// \param AllocaIP The insertion points to be used for alloca instructions.
+  /// \param BodyGenCB Callback that will generate the region code.
+  InsertPointOrErrorTy createDistribute(const LocationDescription &Loc,
+InsertPointTy AllocaIP,
+BodyGenCallbackTy BodyGenCB);
+
   /// Generate conditional branch and relevant BasicBlocks through which 
private
   /// threads copy the 'copyin' variables from Master copy to threadprivate
   /// copies.
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp 
b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 04acab1e5765e..9e380bf2d3dbe 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -2295,7 +2295,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy 
OpenMPIRBuilder::createSections(
 return LoopInfo.takeError();
 
   InsertPointOrErrorTy WsloopIP =
-  applyStaticWorkshareLoop(Loc.DL, *LoopInfo, AllocaIP, !IsNowait);
+  applyStaticWorkshareLoop(Loc.DL, *LoopInfo, AllocaIP,
+   WorksharingLoopType::ForStaticLoop, !IsNowait);
   if (!WsloopIP)
 return WsloopIP.takeError();
   InsertPointTy AfterIP = *WsloopIP;
@@ -4145,10 +4146,9 @@ static FunctionCallee getKmpcForStaticInitForType(Type 
*Ty, Module &M,
   llvm_unreachable("unknown OpenMP loop iterator bitwidth");
 }
 
-OpenMPIRBuilder::InsertPointOrErrorTy
-OpenMPIRBuilder::applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
-  InsertPointTy AllocaIP,
-  bool NeedsBarrier) {
+OpenMPIRBuilder::InsertPointOrErrorTy 
OpenMPIRBuilder::applyStaticWorkshareLoop(
+DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
+WorksharingLoopType LoopType, bool NeedsBarrier) {
   assert(CLI->isValid() && "Requires a valid canonical loop");
   assert(!isConflictIP(AllocaIP, CLI->getPreheaderIP()) &&
  "Require dedicated allocate IP");
@@ -4191,8 +4191,12 @@ OpenMPIRBuilder::applyStaticWorkshareLoop(DebugLoc DL, 
CanonicalLoopInfo *CLI,
 
   Value *ThreadNum = getOrCreateThreadID(SrcLoc);
 
-  Constant *SchedulingType = ConstantInt::get(
-  I32Type, static_cast(OMPScheduleType::UnorderedStatic));
+  OMPScheduleType SchedType =
+  (LoopType == WorksharingLoopType::DistributeStaticLoop)
+  ? OMPScheduleType::OrderedDistribute
+  : OMPScheduleType::UnorderedStatic;
+  Constant *SchedulingType =
+  ConstantInt::get(I32Type, static_cast(SchedType));
 
   // Call the "init" function and update 

[llvm-branch-commits] [llvm] [OpenMPIRBuilder] Split calculation of canonical loop trip count, NFC (PR #127820)

2025-02-19 Thread Tom Eccles via llvm-branch-commits

https://github.com/tblah approved this pull request.

LGTM, thanks!

https://github.com/llvm/llvm-project/pull/127820
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [lldb] release/20.x: Reland: [clang] fix P3310 overload resolution flag propagation (#125791) (PR #127779)

2025-02-19 Thread Matheus Izvekov via llvm-branch-commits

https://github.com/mizvekov closed 
https://github.com/llvm/llvm-project/pull/127779
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [lldb] Backport: [clang] fix P3310 overload resolution flag propagation (#125791) (PR #127831)

2025-02-19 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-lldb

Author: Matheus Izvekov (mizvekov)


Changes

Class templates might be only instantiated when they are required to be 
complete, but checking the template args against the primary template is 
immediate.

This result is cached so that later when the class is instantiated, checking 
against the primary template is not repeated.

The 'MatchedPackOnParmToNonPackOnArg' flag is also produced upon checking 
against the primary template, so it needs to be cached in the specialziation as 
well.

This fixes a bug which has not been in any release, so there are no release 
notes.

Fixes #125290

---

Patch is 232.71 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/127831.diff


16 Files Affected:

- (modified) clang/include/clang/AST/DeclTemplate.h (+14-2) 
- (modified) clang/include/clang/Sema/Sema.h (+2-2) 
- (modified) clang/lib/AST/ASTImporter.cpp (+3-3) 
- (modified) clang/lib/AST/DeclTemplate.cpp (+24-23) 
- (modified) clang/lib/AST/JSONNodeDumper.cpp (+5) 
- (modified) clang/lib/AST/TextNodeDumper.cpp (+4-1) 
- (modified) clang/lib/Sema/SemaTemplate.cpp (+5-3) 
- (modified) clang/lib/Sema/SemaTemplateDeduction.cpp (-2) 
- (modified) clang/lib/Sema/SemaTemplateInstantiateDecl.cpp (+1-1) 
- (modified) clang/lib/Sema/SemaType.cpp (+2-1) 
- (modified) clang/lib/Serialization/ASTReaderDecl.cpp (+1) 
- (modified) clang/lib/Serialization/ASTWriterDecl.cpp (+1) 
- (modified) clang/test/AST/ast-dump-templates.cpp (+6045-2) 
- (modified) clang/test/AST/gen_ast_dump_json_test.py (+17-4) 
- (modified) clang/test/SemaTemplate/cwg2398.cpp (+20) 
- (modified) lldb/source/Plugins/ExpressionParser/Clang/CxxModuleHandler.cpp 
(+2-1) 


``diff
diff --git a/clang/include/clang/AST/DeclTemplate.h 
b/clang/include/clang/AST/DeclTemplate.h
index 9ecff2c898acd..03c43765206b1 100644
--- a/clang/include/clang/AST/DeclTemplate.h
+++ b/clang/include/clang/AST/DeclTemplate.h
@@ -1841,15 +1841,23 @@ class ClassTemplateSpecializationDecl : public 
CXXRecordDecl,
   LLVM_PREFERRED_TYPE(TemplateSpecializationKind)
   unsigned SpecializationKind : 3;
 
+  /// Indicate that we have matched a parameter pack with a non pack
+  /// argument, when the opposite match is also allowed (strict pack match).
+  /// This needs to be cached as deduction is performed during declaration,
+  /// and we need the information to be preserved so that it is consistent
+  /// during instantiation.
+  bool MatchedPackOnParmToNonPackOnArg : 1;
+
 protected:
   ClassTemplateSpecializationDecl(ASTContext &Context, Kind DK, TagKind TK,
   DeclContext *DC, SourceLocation StartLoc,
   SourceLocation IdLoc,
   ClassTemplateDecl *SpecializedTemplate,
   ArrayRef Args,
+  bool MatchedPackOnParmToNonPackOnArg,
   ClassTemplateSpecializationDecl *PrevDecl);
 
-  explicit ClassTemplateSpecializationDecl(ASTContext &C, Kind DK);
+  ClassTemplateSpecializationDecl(ASTContext &C, Kind DK);
 
 public:
   friend class ASTDeclReader;
@@ -1859,7 +1867,7 @@ class ClassTemplateSpecializationDecl : public 
CXXRecordDecl,
   Create(ASTContext &Context, TagKind TK, DeclContext *DC,
  SourceLocation StartLoc, SourceLocation IdLoc,
  ClassTemplateDecl *SpecializedTemplate,
- ArrayRef Args,
+ ArrayRef Args, bool MatchedPackOnParmToNonPackOnArg,
  ClassTemplateSpecializationDecl *PrevDecl);
   static ClassTemplateSpecializationDecl *CreateDeserialized(ASTContext &C,
  GlobalDeclID ID);
@@ -1930,6 +1938,10 @@ class ClassTemplateSpecializationDecl : public 
CXXRecordDecl,
 SpecializationKind = TSK;
   }
 
+  bool hasMatchedPackOnParmToNonPackOnArg() const {
+return MatchedPackOnParmToNonPackOnArg;
+  }
+
   /// Get the point of instantiation (if any), or null if none.
   SourceLocation getPointOfInstantiation() const {
 return PointOfInstantiation;
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index eb82d1b978e94..a30a7076ea5d4 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -13491,8 +13491,8 @@ class Sema final : public SemaBase {
   bool InstantiateClassTemplateSpecialization(
   SourceLocation PointOfInstantiation,
   ClassTemplateSpecializationDecl *ClassTemplateSpec,
-  TemplateSpecializationKind TSK, bool Complain = true,
-  bool PrimaryHasMatchedPackOnParmToNonPackOnArg = false);
+  TemplateSpecializationKind TSK, bool Complain,
+  bool PrimaryHasMatchedPackOnParmToNonPackOnArg);
 
   /// Instantiates the definitions of all of the member
   /// of the given class, which is an instantiation of a class template
diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp
index 09fa10f716e

[llvm-branch-commits] [clang] [lldb] release/20.x: Reland: [clang] fix P3310 overload resolution flag propagation (#125791) (PR #127779)

2025-02-19 Thread Matheus Izvekov via llvm-branch-commits

mizvekov wrote:

Closing as this needs manual rebase.

https://github.com/llvm/llvm-project/pull/127779
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [lldb] Backport: [clang] fix P3310 overload resolution flag propagation (#125791) (PR #127831)

2025-02-19 Thread Matheus Izvekov via llvm-branch-commits

https://github.com/mizvekov milestoned 
https://github.com/llvm/llvm-project/pull/127831
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [Flang][OpenMP] Allow host evaluation of loop bounds for distribute (PR #127822)

2025-02-19 Thread Sergio Afonso via llvm-branch-commits

skatrak wrote:

PR stack:
- #115475
- #127217
- #127816
- #127817
- #127818
- #127819
- #127820
- #127821
- :arrow_right: #127822

https://github.com/llvm/llvm-project/pull/127822
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang] Implement instantiation context note for checking template parameters (PR #126088)

2025-02-19 Thread Matheus Izvekov via llvm-branch-commits


@@ -1909,7 +1909,22 @@ class Sema final : public SemaBase {
   /// '\#pragma clang attribute push' directives to the given declaration.
   void AddPragmaAttributes(Scope *S, Decl *D);
 
-  void PrintPragmaAttributeInstantiationPoint();
+  using DiagFuncRef =
+  llvm::function_ref;
+  auto getDefaultDiagFunc() {
+return [this](SourceLocation Loc, PartialDiagnostic PD) {
+  // This bypasses a lof of the filters in the diag engine, as it's

mizvekov wrote:

Not part of this PR, this is currently not rebased.
This has been fixed in the parent PR, and will disappear when rebased.

https://github.com/llvm/llvm-project/pull/126088
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [lldb] Backport: [clang] fix P3310 overload resolution flag propagation (#125791) (PR #127831)

2025-02-19 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-clang-modules

Author: Matheus Izvekov (mizvekov)


Changes

Class templates might be only instantiated when they are required to be 
complete, but checking the template args against the primary template is 
immediate.

This result is cached so that later when the class is instantiated, checking 
against the primary template is not repeated.

The 'MatchedPackOnParmToNonPackOnArg' flag is also produced upon checking 
against the primary template, so it needs to be cached in the specialziation as 
well.

This fixes a bug which has not been in any release, so there are no release 
notes.

Fixes #125290

---

Patch is 232.71 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/127831.diff


16 Files Affected:

- (modified) clang/include/clang/AST/DeclTemplate.h (+14-2) 
- (modified) clang/include/clang/Sema/Sema.h (+2-2) 
- (modified) clang/lib/AST/ASTImporter.cpp (+3-3) 
- (modified) clang/lib/AST/DeclTemplate.cpp (+24-23) 
- (modified) clang/lib/AST/JSONNodeDumper.cpp (+5) 
- (modified) clang/lib/AST/TextNodeDumper.cpp (+4-1) 
- (modified) clang/lib/Sema/SemaTemplate.cpp (+5-3) 
- (modified) clang/lib/Sema/SemaTemplateDeduction.cpp (-2) 
- (modified) clang/lib/Sema/SemaTemplateInstantiateDecl.cpp (+1-1) 
- (modified) clang/lib/Sema/SemaType.cpp (+2-1) 
- (modified) clang/lib/Serialization/ASTReaderDecl.cpp (+1) 
- (modified) clang/lib/Serialization/ASTWriterDecl.cpp (+1) 
- (modified) clang/test/AST/ast-dump-templates.cpp (+6045-2) 
- (modified) clang/test/AST/gen_ast_dump_json_test.py (+17-4) 
- (modified) clang/test/SemaTemplate/cwg2398.cpp (+20) 
- (modified) lldb/source/Plugins/ExpressionParser/Clang/CxxModuleHandler.cpp 
(+2-1) 


``diff
diff --git a/clang/include/clang/AST/DeclTemplate.h 
b/clang/include/clang/AST/DeclTemplate.h
index 9ecff2c898acd..03c43765206b1 100644
--- a/clang/include/clang/AST/DeclTemplate.h
+++ b/clang/include/clang/AST/DeclTemplate.h
@@ -1841,15 +1841,23 @@ class ClassTemplateSpecializationDecl : public 
CXXRecordDecl,
   LLVM_PREFERRED_TYPE(TemplateSpecializationKind)
   unsigned SpecializationKind : 3;
 
+  /// Indicate that we have matched a parameter pack with a non pack
+  /// argument, when the opposite match is also allowed (strict pack match).
+  /// This needs to be cached as deduction is performed during declaration,
+  /// and we need the information to be preserved so that it is consistent
+  /// during instantiation.
+  bool MatchedPackOnParmToNonPackOnArg : 1;
+
 protected:
   ClassTemplateSpecializationDecl(ASTContext &Context, Kind DK, TagKind TK,
   DeclContext *DC, SourceLocation StartLoc,
   SourceLocation IdLoc,
   ClassTemplateDecl *SpecializedTemplate,
   ArrayRef Args,
+  bool MatchedPackOnParmToNonPackOnArg,
   ClassTemplateSpecializationDecl *PrevDecl);
 
-  explicit ClassTemplateSpecializationDecl(ASTContext &C, Kind DK);
+  ClassTemplateSpecializationDecl(ASTContext &C, Kind DK);
 
 public:
   friend class ASTDeclReader;
@@ -1859,7 +1867,7 @@ class ClassTemplateSpecializationDecl : public 
CXXRecordDecl,
   Create(ASTContext &Context, TagKind TK, DeclContext *DC,
  SourceLocation StartLoc, SourceLocation IdLoc,
  ClassTemplateDecl *SpecializedTemplate,
- ArrayRef Args,
+ ArrayRef Args, bool MatchedPackOnParmToNonPackOnArg,
  ClassTemplateSpecializationDecl *PrevDecl);
   static ClassTemplateSpecializationDecl *CreateDeserialized(ASTContext &C,
  GlobalDeclID ID);
@@ -1930,6 +1938,10 @@ class ClassTemplateSpecializationDecl : public 
CXXRecordDecl,
 SpecializationKind = TSK;
   }
 
+  bool hasMatchedPackOnParmToNonPackOnArg() const {
+return MatchedPackOnParmToNonPackOnArg;
+  }
+
   /// Get the point of instantiation (if any), or null if none.
   SourceLocation getPointOfInstantiation() const {
 return PointOfInstantiation;
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index eb82d1b978e94..a30a7076ea5d4 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -13491,8 +13491,8 @@ class Sema final : public SemaBase {
   bool InstantiateClassTemplateSpecialization(
   SourceLocation PointOfInstantiation,
   ClassTemplateSpecializationDecl *ClassTemplateSpec,
-  TemplateSpecializationKind TSK, bool Complain = true,
-  bool PrimaryHasMatchedPackOnParmToNonPackOnArg = false);
+  TemplateSpecializationKind TSK, bool Complain,
+  bool PrimaryHasMatchedPackOnParmToNonPackOnArg);
 
   /// Instantiates the definitions of all of the member
   /// of the given class, which is an instantiation of a class template
diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp
index 09

[llvm-branch-commits] [mlir] [MLIR][OpenMP] Support target SPMD (PR #127821)

2025-02-19 Thread Michael Kruse via llvm-branch-commits

https://github.com/Meinersbur edited 
https://github.com/llvm/llvm-project/pull/127821
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [OpenMPIRBuilder] Add support for distribute constructs (PR #127816)

2025-02-19 Thread Sergio Afonso via llvm-branch-commits

skatrak wrote:

PR stack:
- #115475
- #127217
- :arrow_right: #127816
- #127817
- #127818
- #127819
- #127820
- #127821
- #127822

https://github.com/llvm/llvm-project/pull/127816
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [MLIR][OpenMP] Host lowering of standalone distribute (PR #127817)

2025-02-19 Thread Sergio Afonso via llvm-branch-commits

skatrak wrote:

PR stack:
- #115475
- #127217
- #127816
- :arrow_right: #127817
- #127818
- #127819
- #127820
- #127821
- #127822

https://github.com/llvm/llvm-project/pull/127817
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [MLIR][OpenMP] Support target SPMD (PR #127821)

2025-02-19 Thread Sergio Afonso via llvm-branch-commits

skatrak wrote:

PR stack:
- #115475
- #127217
- #127816
- #127817
- #127818
- #127819
- #127820
- :arrow_right: #127821
- #127822

https://github.com/llvm/llvm-project/pull/127821
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [MLIR][OpenMP] Host lowering of distribute-parallel-do/for (PR #127819)

2025-02-19 Thread Sergio Afonso via llvm-branch-commits

skatrak wrote:

PR stack:
- #115475
- #127217
- #127816
- #127817
- #127818
- :arrow_right: #127819
- #127820
- #127821
- #127822

https://github.com/llvm/llvm-project/pull/127819
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [Flang][OpenMP] Allow host evaluation of loop bounds for distribute (PR #127822)

2025-02-19 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-flang-fir-hlfir

Author: Sergio Afonso (skatrak)


Changes

This patch adds `target teams distribute [simd]` and equivalent construct nests 
to the list of cases where loop bounds can be evaluated in the host, as they 
represent kernels for which the trip count must also be evaluated in advance to 
the kernel call.

---
Full diff: https://github.com/llvm/llvm-project/pull/127822.diff


2 Files Affected:

- (modified) flang/lib/Lower/OpenMP/OpenMP.cpp (+7-5) 
- (modified) flang/test/Lower/OpenMP/host-eval.f90 (+103) 


``diff
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp 
b/flang/lib/Lower/OpenMP/OpenMP.cpp
index bd794033cdf11..8c80453610473 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -562,8 +562,11 @@ static void 
processHostEvalClauses(lower::AbstractConverter &converter,
   [[fallthrough]];
 case OMPD_distribute_parallel_do:
 case OMPD_distribute_parallel_do_simd:
-  cp.processCollapse(loc, eval, hostInfo.ops, hostInfo.iv);
   cp.processNumThreads(stmtCtx, hostInfo.ops);
+  [[fallthrough]];
+case OMPD_distribute:
+case OMPD_distribute_simd:
+  cp.processCollapse(loc, eval, hostInfo.ops, hostInfo.iv);
   break;
 
 // Cases where 'teams' clauses might be present, and target SPMD is
@@ -573,10 +576,8 @@ static void 
processHostEvalClauses(lower::AbstractConverter &converter,
   [[fallthrough]];
 case OMPD_target_teams:
   cp.processNumTeams(stmtCtx, hostInfo.ops);
-  processSingleNestedIf([](Directive nestedDir) {
-return nestedDir == OMPD_distribute_parallel_do ||
-   nestedDir == OMPD_distribute_parallel_do_simd;
-  });
+  processSingleNestedIf(
+  [](Directive nestedDir) { return topDistributeSet.test(nestedDir); 
});
   break;
 
 // Cases where only 'teams' host-evaluated clauses might be present.
@@ -586,6 +587,7 @@ static void processHostEvalClauses(lower::AbstractConverter 
&converter,
   [[fallthrough]];
 case OMPD_target_teams_distribute:
 case OMPD_target_teams_distribute_simd:
+  cp.processCollapse(loc, eval, hostInfo.ops, hostInfo.iv);
   cp.processNumTeams(stmtCtx, hostInfo.ops);
   break;
 
diff --git a/flang/test/Lower/OpenMP/host-eval.f90 
b/flang/test/Lower/OpenMP/host-eval.f90
index 32c52462b86a7..65258c91e5daf 100644
--- a/flang/test/Lower/OpenMP/host-eval.f90
+++ b/flang/test/Lower/OpenMP/host-eval.f90
@@ -155,3 +155,106 @@ subroutine distribute_parallel_do_simd()
   !$omp end distribute parallel do simd
   !$omp end teams
 end subroutine distribute_parallel_do_simd
+
+! BOTH-LABEL: func.func @_QPdistribute
+subroutine distribute()
+  ! BOTH: omp.target
+  
+  ! HOST-SAME: host_eval(%{{.*}} -> %[[LB:.*]], %{{.*}} -> %[[UB:.*]], %{{.*}} 
-> %[[STEP:.*]] : i32, i32, i32)
+  
+  ! DEVICE-NOT: host_eval({{.*}})
+  ! DEVICE-SAME: {
+
+  ! BOTH: omp.teams
+  !$omp target teams
+
+  ! BOTH: omp.distribute
+  ! BOTH-NEXT: omp.loop_nest
+
+  ! HOST-SAME: (%{{.*}}) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step 
(%[[STEP]])
+  !$omp distribute
+  do i=1,10
+call foo()
+  end do
+  !$omp end distribute
+  !$omp end target teams
+
+  ! BOTH: omp.target
+  ! BOTH-NOT: host_eval({{.*}})
+  ! BOTH-SAME: {
+  ! BOTH: omp.teams
+  !$omp target teams
+  call foo() !< Prevents this from being Generic-SPMD.
+
+  ! BOTH: omp.distribute
+  !$omp distribute
+  do i=1,10
+call foo()
+  end do
+  !$omp end distribute
+  !$omp end target teams
+
+  ! BOTH: omp.teams
+  !$omp teams
+
+  ! BOTH: omp.distribute
+  !$omp distribute
+  do i=1,10
+call foo()
+  end do
+  !$omp end distribute
+  !$omp end teams
+end subroutine distribute
+
+! BOTH-LABEL: func.func @_QPdistribute_simd
+subroutine distribute_simd()
+  ! BOTH: omp.target
+  
+  ! HOST-SAME: host_eval(%{{.*}} -> %[[LB:.*]], %{{.*}} -> %[[UB:.*]], %{{.*}} 
-> %[[STEP:.*]] : i32, i32, i32)
+  
+  ! DEVICE-NOT: host_eval({{.*}})
+  ! DEVICE-SAME: {
+
+  ! BOTH: omp.teams
+  !$omp target teams
+
+  ! BOTH: omp.distribute
+  ! BOTH-NEXT: omp.simd
+  ! BOTH-NEXT: omp.loop_nest
+
+  ! HOST-SAME: (%{{.*}}) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step 
(%[[STEP]])
+  !$omp distribute simd
+  do i=1,10
+call foo()
+  end do
+  !$omp end distribute simd
+  !$omp end target teams
+
+  ! BOTH: omp.target
+  ! BOTH-NOT: host_eval({{.*}})
+  ! BOTH-SAME: {
+  ! BOTH: omp.teams
+  !$omp target teams
+  call foo() !< Prevents this from being Generic-SPMD.
+
+  ! BOTH: omp.distribute
+  ! BOTH-NEXT: omp.simd
+  !$omp distribute simd
+  do i=1,10
+call foo()
+  end do
+  !$omp end distribute simd
+  !$omp end target teams
+
+  ! BOTH: omp.teams
+  !$omp teams
+
+  ! BOTH: omp.distribute
+  ! BOTH-NEXT: omp.simd
+  !$omp distribute simd
+  do i=1,10
+call foo()
+  end do
+  !$omp end distribute simd
+  !$omp end teams
+end subroutine distribute_simd

``




https://github.com/llvm/llvm-project/pull/127822

[llvm-branch-commits] [llvm] [OpenMPIRBuilder] Split calculation of canonical loop trip count, NFC (PR #127820)

2025-02-19 Thread Sergio Afonso via llvm-branch-commits

skatrak wrote:

PR stack:
- #115475
- #127217
- #127816
- #127817
- #127818
- #127819
- :arrow_right: #127820
- #127821
- #127822

https://github.com/llvm/llvm-project/pull/127820
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [OpenMPIRBuilder] Add support for distribute constructs (PR #127816)

2025-02-19 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-flang-openmp

Author: Sergio Afonso (skatrak)


Changes

This patch adds the `OpenMPIRBuilder::createDistribute()` function and updates 
`OpenMPIRBuilder::applyStaticWorkshareLoop()` in preparation for adding 
`distribute` support to flang.

---
Full diff: https://github.com/llvm/llvm-project/pull/127816.diff


2 Files Affected:

- (modified) llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h (+13-4) 
- (modified) llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp (+51-8) 


``diff
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h 
b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index d25077cae63e4..9ad85413acd34 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1004,12 +1004,12 @@ class OpenMPIRBuilder {
   /// preheader of the loop.
   /// \param NeedsBarrier Indicates whether a barrier must be inserted after
   /// the loop.
+  /// \param LoopType Type of workshare loop.
   ///
   /// \returns Point where to insert code after the workshare construct.
-  InsertPointOrErrorTy applyStaticWorkshareLoop(DebugLoc DL,
-CanonicalLoopInfo *CLI,
-InsertPointTy AllocaIP,
-bool NeedsBarrier);
+  InsertPointOrErrorTy applyStaticWorkshareLoop(
+  DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
+  omp::WorksharingLoopType LoopType, bool NeedsBarrier);
 
   /// Modifies the canonical loop a statically-scheduled workshare loop with a
   /// user-specified chunk size.
@@ -2660,6 +2660,15 @@ class OpenMPIRBuilder {
   Value *NumTeamsLower = nullptr, Value *NumTeamsUpper = nullptr,
   Value *ThreadLimit = nullptr, Value *IfExpr = nullptr);
 
+  /// Generator for `#omp distribute`
+  ///
+  /// \param Loc The location where the distribute construct was encountered.
+  /// \param AllocaIP The insertion points to be used for alloca instructions.
+  /// \param BodyGenCB Callback that will generate the region code.
+  InsertPointOrErrorTy createDistribute(const LocationDescription &Loc,
+InsertPointTy AllocaIP,
+BodyGenCallbackTy BodyGenCB);
+
   /// Generate conditional branch and relevant BasicBlocks through which 
private
   /// threads copy the 'copyin' variables from Master copy to threadprivate
   /// copies.
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp 
b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 04acab1e5765e..9e380bf2d3dbe 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -2295,7 +2295,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy 
OpenMPIRBuilder::createSections(
 return LoopInfo.takeError();
 
   InsertPointOrErrorTy WsloopIP =
-  applyStaticWorkshareLoop(Loc.DL, *LoopInfo, AllocaIP, !IsNowait);
+  applyStaticWorkshareLoop(Loc.DL, *LoopInfo, AllocaIP,
+   WorksharingLoopType::ForStaticLoop, !IsNowait);
   if (!WsloopIP)
 return WsloopIP.takeError();
   InsertPointTy AfterIP = *WsloopIP;
@@ -4145,10 +4146,9 @@ static FunctionCallee getKmpcForStaticInitForType(Type 
*Ty, Module &M,
   llvm_unreachable("unknown OpenMP loop iterator bitwidth");
 }
 
-OpenMPIRBuilder::InsertPointOrErrorTy
-OpenMPIRBuilder::applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
-  InsertPointTy AllocaIP,
-  bool NeedsBarrier) {
+OpenMPIRBuilder::InsertPointOrErrorTy 
OpenMPIRBuilder::applyStaticWorkshareLoop(
+DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
+WorksharingLoopType LoopType, bool NeedsBarrier) {
   assert(CLI->isValid() && "Requires a valid canonical loop");
   assert(!isConflictIP(AllocaIP, CLI->getPreheaderIP()) &&
  "Require dedicated allocate IP");
@@ -4191,8 +4191,12 @@ OpenMPIRBuilder::applyStaticWorkshareLoop(DebugLoc DL, 
CanonicalLoopInfo *CLI,
 
   Value *ThreadNum = getOrCreateThreadID(SrcLoc);
 
-  Constant *SchedulingType = ConstantInt::get(
-  I32Type, static_cast(OMPScheduleType::UnorderedStatic));
+  OMPScheduleType SchedType =
+  (LoopType == WorksharingLoopType::DistributeStaticLoop)
+  ? OMPScheduleType::OrderedDistribute
+  : OMPScheduleType::UnorderedStatic;
+  Constant *SchedulingType =
+  ConstantInt::get(I32Type, static_cast(SchedType));
 
   // Call the "init" function and update the trip count of the loop with the
   // value it produced.
@@ -4452,6 +4456,7 @@ static void createTargetLoopWorkshareCall(
   RealArgs.push_back(TripCount);
   if (LoopType == WorksharingLoopType::DistributeStaticLoop) {
 RealArgs.push_back(ConstantInt::get(TripCountTy, 0));
+Builder.restoreIP({InsertBlock, std::prev(InsertBlock->end())});
   

[llvm-branch-commits] [mlir] [MLIR][OpenMP] Support target SPMD (PR #127821)

2025-02-19 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-mlir-llvm

Author: Sergio Afonso (skatrak)


Changes

This patch implements MLIR to LLVM IR translation of host-evaluated loop 
bounds, completing initial support for `target teams distribute parallel do 
[simd]` and `target teams distribute [simd]`.

---
Full diff: https://github.com/llvm/llvm-project/pull/127821.diff


3 Files Affected:

- (modified) 
mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp (+63-20) 
- (added) mlir/test/Target/LLVMIR/openmp-target-spmd.mlir (+96) 
- (modified) mlir/test/Target/LLVMIR/openmp-todo.mlir (-24) 


``diff
diff --git 
a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp 
b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 7e8a9bdb5b133..93a88c89162d6 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -176,15 +176,6 @@ static LogicalResult checkImplementationStatus(Operation 
&op) {
 if (op.getHint())
   op.emitWarning("hint clause discarded");
   };
-  auto checkHostEval = [](auto op, LogicalResult &result) {
-// Host evaluated clauses are supported, except for loop bounds.
-for (BlockArgument arg :
- cast(*op).getHostEvalBlockArgs())
-  for (Operation *user : arg.getUsers())
-if (isa(user))
-  result = op.emitError("not yet implemented: host evaluation of loop "
-"bounds in omp.target operation");
-  };
   auto checkInReduction = [&todo](auto op, LogicalResult &result) {
 if (!op.getInReductionVars().empty() || op.getInReductionByref() ||
 op.getInReductionSyms())
@@ -321,7 +312,6 @@ static LogicalResult checkImplementationStatus(Operation 
&op) {
 checkBare(op, result);
 checkDevice(op, result);
 checkHasDeviceAddr(op, result);
-checkHostEval(op, result);
 checkInReduction(op, result);
 checkIsDevicePtr(op, result);
 checkPrivate(op, result);
@@ -4054,9 +4044,13 @@ createDeviceArgumentAccessor(MapInfoData &mapData, 
llvm::Argument &arg,
 ///
 /// Loop bounds and steps are only optionally populated, if output vectors are
 /// provided.
-static void extractHostEvalClauses(omp::TargetOp targetOp, Value &numThreads,
-   Value &numTeamsLower, Value &numTeamsUpper,
-   Value &threadLimit) {
+static void
+extractHostEvalClauses(omp::TargetOp targetOp, Value &numThreads,
+   Value &numTeamsLower, Value &numTeamsUpper,
+   Value &threadLimit,
+   llvm::SmallVectorImpl *lowerBounds = nullptr,
+   llvm::SmallVectorImpl *upperBounds = nullptr,
+   llvm::SmallVectorImpl *steps = nullptr) {
   auto blockArgIface = llvm::cast(*targetOp);
   for (auto item : llvm::zip_equal(targetOp.getHostEvalVars(),
blockArgIface.getHostEvalBlockArgs())) {
@@ -4081,11 +4075,26 @@ static void extractHostEvalClauses(omp::TargetOp 
targetOp, Value &numThreads,
   llvm_unreachable("unsupported host_eval use");
   })
   .Case([&](omp::LoopNestOp loopOp) {
-// TODO: Extract bounds and step values. Currently, this cannot be
-// reached because translation would have been stopped earlier as a
-// result of `checkImplementationStatus` detecting and reporting
-// this situation.
-llvm_unreachable("unsupported host_eval use");
+auto processBounds =
+[&](OperandRange opBounds,
+llvm::SmallVectorImpl *outBounds) -> bool {
+  bool found = false;
+  for (auto [i, lb] : llvm::enumerate(opBounds)) {
+if (lb == blockArg) {
+  found = true;
+  if (outBounds)
+(*outBounds)[i] = hostEvalVar;
+}
+  }
+  return found;
+};
+bool found =
+processBounds(loopOp.getLoopLowerBounds(), lowerBounds);
+found = processBounds(loopOp.getLoopUpperBounds(), upperBounds) ||
+found;
+found = processBounds(loopOp.getLoopSteps(), steps) || found;
+if (!found)
+  llvm_unreachable("unsupported host_eval use");
   })
   .Default([](Operation *) {
 llvm_unreachable("unsupported host_eval use");
@@ -4222,6 +4231,7 @@ initTargetDefaultAttrs(omp::TargetOp targetOp,
 combinedMaxThreadsVal = maxThreadsVal;
 
   // Update kernel bounds structure for the `OpenMPIRBuilder` to use.
+  attrs.ExecFlags = targetOp.getKernelExecFlags();
   attrs.MinTeams = minTeamsVal;
   attrs.MaxTeams.front() = maxTeamsVal;
   attrs.MinThreads = 1;
@@ -4239,9 +4249,15 @@ initTargetRuntimeAttrs(llvm::IRBuilderBase &build

[llvm-branch-commits] [mlir] [MLIR][OpenMP] Host lowering of standalone distribute (PR #127817)

2025-02-19 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-flang-openmp

Author: Sergio Afonso (skatrak)


Changes

This patch adds MLIR to LLVM IR translation support for standalone 
`omp.distribute` operations, as well as `distribute simd` through ignoring SIMD 
information (similarly to `do/for simd`).

---
Full diff: https://github.com/llvm/llvm-project/pull/127817.diff


3 Files Affected:

- (modified) 
mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp (+78) 
- (modified) mlir/test/Target/LLVMIR/openmp-llvm.mlir (+37) 
- (modified) mlir/test/Target/LLVMIR/openmp-todo.mlir (+63-3) 


``diff
diff --git 
a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp 
b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index a5ff3eff6439f..c8221a9f9854a 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -164,6 +164,10 @@ static LogicalResult checkImplementationStatus(Operation 
&op) {
 if (op.getDevice())
   result = todo("device");
   };
+  auto checkDistSchedule = [&todo](auto op, LogicalResult &result) {
+if (op.getDistScheduleChunkSize())
+  result = todo("dist_schedule with chunk_size");
+  };
   auto checkHasDeviceAddr = [&todo](auto op, LogicalResult &result) {
 if (!op.getHasDeviceAddrVars().empty())
   result = todo("has_device_addr");
@@ -255,6 +259,16 @@ static LogicalResult checkImplementationStatus(Operation 
&op) {
 
   LogicalResult result = success();
   llvm::TypeSwitch(op)
+  .Case([&](omp::DistributeOp op) {
+if (op.isComposite() &&
+isa_and_present(op.getNestedWrapper()))
+  result = op.emitError() << "not yet implemented: "
+ "composite omp.distribute + omp.wsloop";
+checkAllocate(op, result);
+checkDistSchedule(op, result);
+checkOrder(op, result);
+checkPrivate(op, result);
+  })
   .Case([&](omp::OrderedRegionOp op) { checkParLevelSimd(op, result); })
   .Case([&](omp::SectionsOp op) {
 checkAllocate(op, result);
@@ -3755,6 +3769,67 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase 
&builder,
   return success();
 }
 
+static LogicalResult
+convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation) {
+  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+  auto distributeOp = cast(opInst);
+  if (failed(checkImplementationStatus(opInst)))
+return failure();
+
+  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+  auto bodyGenCB = [&](InsertPointTy allocaIP,
+   InsertPointTy codeGenIP) -> llvm::Error {
+// DistributeOp has only one region associated with it.
+builder.restoreIP(codeGenIP);
+
+llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
+llvm::Expected regionBlock =
+convertOmpOpRegions(distributeOp.getRegion(), "omp.distribute.region",
+builder, moduleTranslation);
+if (!regionBlock)
+  return regionBlock.takeError();
+builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
+
+// TODO: Add support for clauses which are valid for DISTRIBUTE constructs.
+// Static schedule is the default.
+auto schedule = omp::ClauseScheduleKind::Static;
+bool isOrdered = false;
+std::optional scheduleMod;
+bool isSimd = false;
+llvm::omp::WorksharingLoopType workshareLoopType =
+llvm::omp::WorksharingLoopType::DistributeStaticLoop;
+bool loopNeedsBarrier = false;
+llvm::Value *chunk = nullptr;
+
+llvm::CanonicalLoopInfo *loopInfo = 
*findCurrentLoopInfo(moduleTranslation);
+llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP =
+ompBuilder->applyWorkshareLoop(
+ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier,
+convertToScheduleKind(schedule), chunk, isSimd,
+scheduleMod == omp::ScheduleModifier::monotonic,
+scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered,
+workshareLoopType);
+
+if (!wsloopIP)
+  return wsloopIP.takeError();
+return llvm::Error::success();
+  };
+
+  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
+  findAllocaInsertPoint(builder, moduleTranslation);
+  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
+  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
+  ompBuilder->createDistribute(ompLoc, allocaIP, bodyGenCB);
+
+  if (failed(handleError(afterIP, opInst)))
+return failure();
+
+  builder.restoreIP(*afterIP);
+  return success();
+}
+
 /// Lowers the FlagsAttr which is applied to the module on the device
 /// pass when offloading, this attribute contains OpenMP RTL globals that can
 /// be passed as flags to the frontend, otherw

[llvm-branch-commits] [llvm] [OpenMPIRBuilder] Add support for distribute-parallel-for/do constructs (PR #127818)

2025-02-19 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-flang-openmp

Author: Sergio Afonso (skatrak)


Changes

This patch adds codegen for `kmpc_dist_for_static_init` runtime calls, used to 
support worksharing a single loop across teams and threads. This can be used to 
implement `distribute parallel for/do` support.

---
Full diff: https://github.com/llvm/llvm-project/pull/127818.diff


1 Files Affected:

- (modified) llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp (+30-4) 


``diff
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp 
b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 9e380bf2d3dbe..7788897fc0795 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -4130,6 +4130,23 @@ Expected 
OpenMPIRBuilder::createCanonicalLoop(
   return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name);
 }
 
+// Returns an LLVM function to call for initializing loop bounds using OpenMP
+// static scheduling for composite `distribute parallel for` depending on
+// `type`. Only i32 and i64 are supported by the runtime. Always interpret
+// integers as unsigned similarly to CanonicalLoopInfo.
+static FunctionCallee
+getKmpcDistForStaticInitForType(Type *Ty, Module &M,
+OpenMPIRBuilder &OMPBuilder) {
+  unsigned Bitwidth = Ty->getIntegerBitWidth();
+  if (Bitwidth == 32)
+return OMPBuilder.getOrCreateRuntimeFunction(
+M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_4u);
+  if (Bitwidth == 64)
+return OMPBuilder.getOrCreateRuntimeFunction(
+M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_8u);
+  llvm_unreachable("unknown OpenMP loop iterator bitwidth");
+}
+
 // Returns an LLVM function to call for initializing loop bounds using OpenMP
 // static scheduling depending on `type`. Only i32 and i64 are supported by the
 // runtime. Always interpret integers as unsigned similarly to
@@ -4164,7 +4181,10 @@ OpenMPIRBuilder::InsertPointOrErrorTy 
OpenMPIRBuilder::applyStaticWorkshareLoop(
   // Declare useful OpenMP runtime functions.
   Value *IV = CLI->getIndVar();
   Type *IVTy = IV->getType();
-  FunctionCallee StaticInit = getKmpcForStaticInitForType(IVTy, M, *this);
+  FunctionCallee StaticInit =
+  LoopType == WorksharingLoopType::DistributeForStaticLoop
+  ? getKmpcDistForStaticInitForType(IVTy, M, *this)
+  : getKmpcForStaticInitForType(IVTy, M, *this);
   FunctionCallee StaticFini =
   getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
 
@@ -4200,9 +4220,15 @@ OpenMPIRBuilder::InsertPointOrErrorTy 
OpenMPIRBuilder::applyStaticWorkshareLoop(
 
   // Call the "init" function and update the trip count of the loop with the
   // value it produced.
-  Builder.CreateCall(StaticInit,
- {SrcLoc, ThreadNum, SchedulingType, PLastIter, 
PLowerBound,
-  PUpperBound, PStride, One, Zero});
+  SmallVector Args(
+  {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound, 
PUpperBound});
+  if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
+Value *PDistUpperBound =
+Builder.CreateAlloca(IVTy, nullptr, "p.distupperbound");
+Args.push_back(PDistUpperBound);
+  }
+  Args.append({PStride, One, Zero});
+  Builder.CreateCall(StaticInit, Args);
   Value *LowerBound = Builder.CreateLoad(IVTy, PLowerBound);
   Value *InclusiveUpperBound = Builder.CreateLoad(IVTy, PUpperBound);
   Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, 
LowerBound);

``




https://github.com/llvm/llvm-project/pull/127818
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [OpenMPIRBuilder] Split calculation of canonical loop trip count, NFC (PR #127820)

2025-02-19 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-flang-openmp

Author: Sergio Afonso (skatrak)


Changes

This patch splits off the calculation of canonical loop trip counts from the 
creation of canonical loops. This makes it possible to reuse this logic to, for 
instance, populate the `__tgt_target_kernel` runtime call for SPMD kernels.

This feature is used to simplify one of the existing OpenMPIRBuilder tests.

---
Full diff: https://github.com/llvm/llvm-project/pull/127820.diff


3 Files Affected:

- (modified) llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h (+31-7) 
- (modified) llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp (+18-9) 
- (modified) llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp (+3-13) 


``diff
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h 
b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 9ad85413acd34..207ca7fb05f62 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -728,13 +728,12 @@ class OpenMPIRBuilder {
   LoopBodyGenCallbackTy BodyGenCB, Value *TripCount,
   const Twine &Name = "loop");
 
-  /// Generator for the control flow structure of an OpenMP canonical loop.
+  /// Calculate the trip count of a canonical loop.
   ///
-  /// Instead of a logical iteration space, this allows specifying user-defined
-  /// loop counter values using increment, upper- and lower bounds. To
-  /// disambiguate the terminology when counting downwards, instead of lower
-  /// bounds we use \p Start for the loop counter value in the first body
-  /// iteration.
+  /// This allows specifying user-defined loop counter values using increment,
+  /// upper- and lower bounds. To disambiguate the terminology when counting
+  /// downwards, instead of lower bounds we use \p Start for the loop counter
+  /// value in the first body iteration.
   ///
   /// Consider the following limitations:
   ///
@@ -758,7 +757,32 @@ class OpenMPIRBuilder {
   ///
   ///  for (int i = 0; i < 42; i -= 1u)
   ///
-  //
+  /// \param Loc   The insert and source location description.
+  /// \param Start Value of the loop counter for the first iterations.
+  /// \param Stop  Loop counter values past this will stop the loop.
+  /// \param Step  Loop counter increment after each iteration; negative
+  ///  means counting down.
+  /// \param IsSigned  Whether Start, Stop and Step are signed integers.
+  /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop
+  ///  counter.
+  /// \param Name  Base name used to derive instruction names.
+  ///
+  /// \returns The value holding the calculated trip count.
+  Value *calculateCanonicalLoopTripCount(const LocationDescription &Loc,
+ Value *Start, Value *Stop, Value 
*Step,
+ bool IsSigned, bool InclusiveStop,
+ const Twine &Name = "loop");
+
+  /// Generator for the control flow structure of an OpenMP canonical loop.
+  ///
+  /// Instead of a logical iteration space, this allows specifying user-defined
+  /// loop counter values using increment, upper- and lower bounds. To
+  /// disambiguate the terminology when counting downwards, instead of lower
+  /// bounds we use \p Start for the loop counter value in the first body
+  ///
+  /// It calls \see calculateCanonicalLoopTripCount for trip count 
calculations,
+  /// so limitations of that method apply here as well.
+  ///
   /// \param Loc   The insert and source location description.
   /// \param BodyGenCB Callback that will generate the loop body code.
   /// \param Start Value of the loop counter for the first iterations.
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp 
b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 7788897fc0795..eee6e3e54d615 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -4059,10 +4059,9 @@ OpenMPIRBuilder::createCanonicalLoop(const 
LocationDescription &Loc,
   return CL;
 }
 
-Expected OpenMPIRBuilder::createCanonicalLoop(
-const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB,
-Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
-InsertPointTy ComputeIP, const Twine &Name) {
+Value *OpenMPIRBuilder::calculateCanonicalLoopTripCount(
+const LocationDescription &Loc, Value *Start, Value *Stop, Value *Step,
+bool IsSigned, bool InclusiveStop, const Twine &Name) {
 
   // Consider the following difficulties (assuming 8-bit signed integers):
   //  * Adding \p Step to the loop counter which passes \p Stop may overflow:
@@ -4075,9 +4074,7 @@ Expected 
OpenMPIRBuilder::createCanonicalLoop(
   assert(IndVarTy == Stop->getType() && "Stop type mismatch");
   assert(IndVarTy == Step->getType() && "Step type mismatch");
 
-  LocationDescription ComputeLoc =
-

[llvm-branch-commits] [mlir] [MLIR][OpenMP] Support target SPMD (PR #127821)

2025-02-19 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-mlir

Author: Sergio Afonso (skatrak)


Changes

This patch implements MLIR to LLVM IR translation of host-evaluated loop 
bounds, completing initial support for `target teams distribute parallel do 
[simd]` and `target teams distribute [simd]`.

---
Full diff: https://github.com/llvm/llvm-project/pull/127821.diff


3 Files Affected:

- (modified) 
mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp (+63-20) 
- (added) mlir/test/Target/LLVMIR/openmp-target-spmd.mlir (+96) 
- (modified) mlir/test/Target/LLVMIR/openmp-todo.mlir (-24) 


``diff
diff --git 
a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp 
b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 7e8a9bdb5b133..93a88c89162d6 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -176,15 +176,6 @@ static LogicalResult checkImplementationStatus(Operation 
&op) {
 if (op.getHint())
   op.emitWarning("hint clause discarded");
   };
-  auto checkHostEval = [](auto op, LogicalResult &result) {
-// Host evaluated clauses are supported, except for loop bounds.
-for (BlockArgument arg :
- cast(*op).getHostEvalBlockArgs())
-  for (Operation *user : arg.getUsers())
-if (isa(user))
-  result = op.emitError("not yet implemented: host evaluation of loop "
-"bounds in omp.target operation");
-  };
   auto checkInReduction = [&todo](auto op, LogicalResult &result) {
 if (!op.getInReductionVars().empty() || op.getInReductionByref() ||
 op.getInReductionSyms())
@@ -321,7 +312,6 @@ static LogicalResult checkImplementationStatus(Operation 
&op) {
 checkBare(op, result);
 checkDevice(op, result);
 checkHasDeviceAddr(op, result);
-checkHostEval(op, result);
 checkInReduction(op, result);
 checkIsDevicePtr(op, result);
 checkPrivate(op, result);
@@ -4054,9 +4044,13 @@ createDeviceArgumentAccessor(MapInfoData &mapData, 
llvm::Argument &arg,
 ///
 /// Loop bounds and steps are only optionally populated, if output vectors are
 /// provided.
-static void extractHostEvalClauses(omp::TargetOp targetOp, Value &numThreads,
-   Value &numTeamsLower, Value &numTeamsUpper,
-   Value &threadLimit) {
+static void
+extractHostEvalClauses(omp::TargetOp targetOp, Value &numThreads,
+   Value &numTeamsLower, Value &numTeamsUpper,
+   Value &threadLimit,
+   llvm::SmallVectorImpl *lowerBounds = nullptr,
+   llvm::SmallVectorImpl *upperBounds = nullptr,
+   llvm::SmallVectorImpl *steps = nullptr) {
   auto blockArgIface = llvm::cast(*targetOp);
   for (auto item : llvm::zip_equal(targetOp.getHostEvalVars(),
blockArgIface.getHostEvalBlockArgs())) {
@@ -4081,11 +4075,26 @@ static void extractHostEvalClauses(omp::TargetOp 
targetOp, Value &numThreads,
   llvm_unreachable("unsupported host_eval use");
   })
   .Case([&](omp::LoopNestOp loopOp) {
-// TODO: Extract bounds and step values. Currently, this cannot be
-// reached because translation would have been stopped earlier as a
-// result of `checkImplementationStatus` detecting and reporting
-// this situation.
-llvm_unreachable("unsupported host_eval use");
+auto processBounds =
+[&](OperandRange opBounds,
+llvm::SmallVectorImpl *outBounds) -> bool {
+  bool found = false;
+  for (auto [i, lb] : llvm::enumerate(opBounds)) {
+if (lb == blockArg) {
+  found = true;
+  if (outBounds)
+(*outBounds)[i] = hostEvalVar;
+}
+  }
+  return found;
+};
+bool found =
+processBounds(loopOp.getLoopLowerBounds(), lowerBounds);
+found = processBounds(loopOp.getLoopUpperBounds(), upperBounds) ||
+found;
+found = processBounds(loopOp.getLoopSteps(), steps) || found;
+if (!found)
+  llvm_unreachable("unsupported host_eval use");
   })
   .Default([](Operation *) {
 llvm_unreachable("unsupported host_eval use");
@@ -4222,6 +4231,7 @@ initTargetDefaultAttrs(omp::TargetOp targetOp,
 combinedMaxThreadsVal = maxThreadsVal;
 
   // Update kernel bounds structure for the `OpenMPIRBuilder` to use.
+  attrs.ExecFlags = targetOp.getKernelExecFlags();
   attrs.MinTeams = minTeamsVal;
   attrs.MaxTeams.front() = maxTeamsVal;
   attrs.MinThreads = 1;
@@ -4239,9 +4249,15 @@ initTargetRuntimeAttrs(llvm::IRBuilderBase &builder,
 

[llvm-branch-commits] [llvm] [OpenMPIRBuilder] Add support for distribute-parallel-for/do constructs (PR #127818)

2025-02-19 Thread Sergio Afonso via llvm-branch-commits

https://github.com/skatrak created 
https://github.com/llvm/llvm-project/pull/127818

This patch adds codegen for `kmpc_dist_for_static_init` runtime calls, used to 
support worksharing a single loop across teams and threads. This can be used to 
implement `distribute parallel for/do` support.

>From cb7ae2d2aa19a0bdb46e38943eab629d74c8de2c Mon Sep 17 00:00:00 2001
From: Sergio Afonso 
Date: Tue, 18 Feb 2025 12:04:53 +
Subject: [PATCH] [OpenMPIRBuilder] Add support for distribute-parallel-for/do
 constructs

This patch adds codegen for `kmpc_dist_for_static_init` runtime calls, used to
support worksharing a single loop across teams and threads. This can be used to
implement `distribute parallel for/do` support.
---
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 34 ---
 1 file changed, 30 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp 
b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 9e380bf2d3dbe..7788897fc0795 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -4130,6 +4130,23 @@ Expected 
OpenMPIRBuilder::createCanonicalLoop(
   return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name);
 }
 
+// Returns an LLVM function to call for initializing loop bounds using OpenMP
+// static scheduling for composite `distribute parallel for` depending on
+// `type`. Only i32 and i64 are supported by the runtime. Always interpret
+// integers as unsigned similarly to CanonicalLoopInfo.
+static FunctionCallee
+getKmpcDistForStaticInitForType(Type *Ty, Module &M,
+OpenMPIRBuilder &OMPBuilder) {
+  unsigned Bitwidth = Ty->getIntegerBitWidth();
+  if (Bitwidth == 32)
+return OMPBuilder.getOrCreateRuntimeFunction(
+M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_4u);
+  if (Bitwidth == 64)
+return OMPBuilder.getOrCreateRuntimeFunction(
+M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_8u);
+  llvm_unreachable("unknown OpenMP loop iterator bitwidth");
+}
+
 // Returns an LLVM function to call for initializing loop bounds using OpenMP
 // static scheduling depending on `type`. Only i32 and i64 are supported by the
 // runtime. Always interpret integers as unsigned similarly to
@@ -4164,7 +4181,10 @@ OpenMPIRBuilder::InsertPointOrErrorTy 
OpenMPIRBuilder::applyStaticWorkshareLoop(
   // Declare useful OpenMP runtime functions.
   Value *IV = CLI->getIndVar();
   Type *IVTy = IV->getType();
-  FunctionCallee StaticInit = getKmpcForStaticInitForType(IVTy, M, *this);
+  FunctionCallee StaticInit =
+  LoopType == WorksharingLoopType::DistributeForStaticLoop
+  ? getKmpcDistForStaticInitForType(IVTy, M, *this)
+  : getKmpcForStaticInitForType(IVTy, M, *this);
   FunctionCallee StaticFini =
   getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
 
@@ -4200,9 +4220,15 @@ OpenMPIRBuilder::InsertPointOrErrorTy 
OpenMPIRBuilder::applyStaticWorkshareLoop(
 
   // Call the "init" function and update the trip count of the loop with the
   // value it produced.
-  Builder.CreateCall(StaticInit,
- {SrcLoc, ThreadNum, SchedulingType, PLastIter, 
PLowerBound,
-  PUpperBound, PStride, One, Zero});
+  SmallVector Args(
+  {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound, 
PUpperBound});
+  if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
+Value *PDistUpperBound =
+Builder.CreateAlloca(IVTy, nullptr, "p.distupperbound");
+Args.push_back(PDistUpperBound);
+  }
+  Args.append({PStride, One, Zero});
+  Builder.CreateCall(StaticInit, Args);
   Value *LowerBound = Builder.CreateLoad(IVTy, PLowerBound);
   Value *InclusiveUpperBound = Builder.CreateLoad(IVTy, PUpperBound);
   Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, 
LowerBound);

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [MLIR][OpenMP] Support target SPMD (PR #127821)

2025-02-19 Thread Michael Kruse via llvm-branch-commits

https://github.com/Meinersbur approved this pull request.

LGTM

https://github.com/llvm/llvm-project/pull/127821
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [MLIR][OpenMP] Support target SPMD (PR #127821)

2025-02-19 Thread Michael Kruse via llvm-branch-commits


@@ -4081,11 +4075,26 @@ static void extractHostEvalClauses(omp::TargetOp 
targetOp, Value &numThreads,
   llvm_unreachable("unsupported host_eval use");
   })
   .Case([&](omp::LoopNestOp loopOp) {
-// TODO: Extract bounds and step values. Currently, this cannot be
-// reached because translation would have been stopped earlier as a
-// result of `checkImplementationStatus` detecting and reporting
-// this situation.
-llvm_unreachable("unsupported host_eval use");
+auto processBounds =
+[&](OperandRange opBounds,
+llvm::SmallVectorImpl *outBounds) -> bool {
+  bool found = false;
+  for (auto [i, lb] : llvm::enumerate(opBounds)) {
+if (lb == blockArg) {
+  found = true;
+  if (outBounds)
+(*outBounds)[i] = hostEvalVar;
+}
+  }
+  return found;
+};
+bool found =
+processBounds(loopOp.getLoopLowerBounds(), lowerBounds);
+found = processBounds(loopOp.getLoopUpperBounds(), upperBounds) ||
+found;
+found = processBounds(loopOp.getLoopSteps(), steps) || found;
+if (!found)
+  llvm_unreachable("unsupported host_eval use");

Meinersbur wrote:

```suggestion
(void)found;
assert(found && "unsupported host_eval use");
```

https://github.com/llvm/llvm-project/pull/127821
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [MLIR][OpenMP] Support target SPMD (PR #127821)

2025-02-19 Thread Sergio Afonso via llvm-branch-commits

https://github.com/skatrak created 
https://github.com/llvm/llvm-project/pull/127821

This patch implements MLIR to LLVM IR translation of host-evaluated loop 
bounds, completing initial support for `target teams distribute parallel do 
[simd]` and `target teams distribute [simd]`.

>From 33409d2b52bfb4c69f67bbde001de5ce48feb073 Mon Sep 17 00:00:00 2001
From: Sergio Afonso 
Date: Wed, 19 Feb 2025 14:41:12 +
Subject: [PATCH] [MLIR][OpenMP] Support target SPMD

This patch implements MLIR to LLVM IR translation of host-evaluated loop
bounds, completing initial support for `target teams distribute parallel do
[simd]` and `target teams distribute [simd]`.
---
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp  | 83 
 .../Target/LLVMIR/openmp-target-spmd.mlir | 96 +++
 mlir/test/Target/LLVMIR/openmp-todo.mlir  | 24 -
 3 files changed, 159 insertions(+), 44 deletions(-)
 create mode 100644 mlir/test/Target/LLVMIR/openmp-target-spmd.mlir

diff --git 
a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp 
b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 7e8a9bdb5b133..93a88c89162d6 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -176,15 +176,6 @@ static LogicalResult checkImplementationStatus(Operation 
&op) {
 if (op.getHint())
   op.emitWarning("hint clause discarded");
   };
-  auto checkHostEval = [](auto op, LogicalResult &result) {
-// Host evaluated clauses are supported, except for loop bounds.
-for (BlockArgument arg :
- cast(*op).getHostEvalBlockArgs())
-  for (Operation *user : arg.getUsers())
-if (isa(user))
-  result = op.emitError("not yet implemented: host evaluation of loop "
-"bounds in omp.target operation");
-  };
   auto checkInReduction = [&todo](auto op, LogicalResult &result) {
 if (!op.getInReductionVars().empty() || op.getInReductionByref() ||
 op.getInReductionSyms())
@@ -321,7 +312,6 @@ static LogicalResult checkImplementationStatus(Operation 
&op) {
 checkBare(op, result);
 checkDevice(op, result);
 checkHasDeviceAddr(op, result);
-checkHostEval(op, result);
 checkInReduction(op, result);
 checkIsDevicePtr(op, result);
 checkPrivate(op, result);
@@ -4054,9 +4044,13 @@ createDeviceArgumentAccessor(MapInfoData &mapData, 
llvm::Argument &arg,
 ///
 /// Loop bounds and steps are only optionally populated, if output vectors are
 /// provided.
-static void extractHostEvalClauses(omp::TargetOp targetOp, Value &numThreads,
-   Value &numTeamsLower, Value &numTeamsUpper,
-   Value &threadLimit) {
+static void
+extractHostEvalClauses(omp::TargetOp targetOp, Value &numThreads,
+   Value &numTeamsLower, Value &numTeamsUpper,
+   Value &threadLimit,
+   llvm::SmallVectorImpl *lowerBounds = nullptr,
+   llvm::SmallVectorImpl *upperBounds = nullptr,
+   llvm::SmallVectorImpl *steps = nullptr) {
   auto blockArgIface = llvm::cast(*targetOp);
   for (auto item : llvm::zip_equal(targetOp.getHostEvalVars(),
blockArgIface.getHostEvalBlockArgs())) {
@@ -4081,11 +4075,26 @@ static void extractHostEvalClauses(omp::TargetOp 
targetOp, Value &numThreads,
   llvm_unreachable("unsupported host_eval use");
   })
   .Case([&](omp::LoopNestOp loopOp) {
-// TODO: Extract bounds and step values. Currently, this cannot be
-// reached because translation would have been stopped earlier as a
-// result of `checkImplementationStatus` detecting and reporting
-// this situation.
-llvm_unreachable("unsupported host_eval use");
+auto processBounds =
+[&](OperandRange opBounds,
+llvm::SmallVectorImpl *outBounds) -> bool {
+  bool found = false;
+  for (auto [i, lb] : llvm::enumerate(opBounds)) {
+if (lb == blockArg) {
+  found = true;
+  if (outBounds)
+(*outBounds)[i] = hostEvalVar;
+}
+  }
+  return found;
+};
+bool found =
+processBounds(loopOp.getLoopLowerBounds(), lowerBounds);
+found = processBounds(loopOp.getLoopUpperBounds(), upperBounds) ||
+found;
+found = processBounds(loopOp.getLoopSteps(), steps) || found;
+if (!found)
+  llvm_unreachable("unsupported host_eval use");
   })
   .Default([](Operation *) {
 llvm_unreachable("unsupported host_eval use");
@@ -4222,6 +4231,7 @@ initTargetDefaul

[llvm-branch-commits] [mlir] [MLIR][OpenMP] Host lowering of distribute-parallel-do/for (PR #127819)

2025-02-19 Thread via llvm-branch-commits

llvmbot wrote:



@llvm/pr-subscribers-mlir-llvm

@llvm/pr-subscribers-mlir-openmp

Author: Sergio Afonso (skatrak)


Changes

This patch adds support for translating composite `omp.parallel` + 
`omp.distribute` + `omp.wsloop` loops to LLVM IR on the host. This is done by 
passing an updated `WorksharingLoopType` to the call to `applyWorkshareLoop` 
associated to the lowering of the `omp.wsloop` operation, so that 
`__kmpc_dist_for_static_init` is called at runtime in place of 
`__kmpc_for_static_init`.

Existing translation rules take care of creating a parallel region to hold the 
workshared and workdistributed loop.

---
Full diff: https://github.com/llvm/llvm-project/pull/127819.diff


3 Files Affected:

- (modified) 
mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp (+16-5) 
- (modified) mlir/test/Target/LLVMIR/openmp-llvm.mlir (+65) 
- (modified) mlir/test/Target/LLVMIR/openmp-todo.mlir (-19) 


``diff
diff --git 
a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp 
b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index c8221a9f9854a..7e8a9bdb5b133 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -260,10 +260,6 @@ static LogicalResult checkImplementationStatus(Operation 
&op) {
   LogicalResult result = success();
   llvm::TypeSwitch(op)
   .Case([&](omp::DistributeOp op) {
-if (op.isComposite() &&
-isa_and_present(op.getNestedWrapper()))
-  result = op.emitError() << "not yet implemented: "
- "composite omp.distribute + omp.wsloop";
 checkAllocate(op, result);
 checkDistSchedule(op, result);
 checkOrder(op, result);
@@ -1993,6 +1989,14 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase 
&builder,
   bool isSimd = wsloopOp.getScheduleSimd();
   bool loopNeedsBarrier = !wsloopOp.getNowait();
 
+  // The only legal way for the direct parent to be omp.distribute is that this
+  // represents 'distribute parallel do'. Otherwise, this is a regular
+  // worksharing loop.
+  llvm::omp::WorksharingLoopType workshareLoopType =
+  llvm::isa_and_present(opInst.getParentOp())
+  ? llvm::omp::WorksharingLoopType::DistributeForStaticLoop
+  : llvm::omp::WorksharingLoopType::ForStaticLoop;
+
   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
   llvm::Expected regionBlock = convertOmpOpRegions(
   wsloopOp.getRegion(), "omp.wsloop.region", builder, moduleTranslation);
@@ -2008,7 +2012,8 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase 
&builder,
   ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier,
   convertToScheduleKind(schedule), chunk, isSimd,
   scheduleMod == omp::ScheduleModifier::monotonic,
-  scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered);
+  scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered,
+  workshareLoopType);
 
   if (failed(handleError(wsloopIP, opInst)))
 return failure();
@@ -3792,6 +3797,12 @@ convertOmpDistribute(Operation &opInst, 
llvm::IRBuilderBase &builder,
   return regionBlock.takeError();
 builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
 
+// Skip applying a workshare loop below when translating 'distribute
+// parallel do' (it's been already handled by this point while translating
+// the nested omp.wsloop).
+if (isa_and_present(distributeOp.getNestedWrapper()))
+  return llvm::Error::success();
+
 // TODO: Add support for clauses which are valid for DISTRIBUTE constructs.
 // Static schedule is the default.
 auto schedule = omp::ClauseScheduleKind::Static;
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir 
b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index a5a490e527d79..d85b149c66811 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -3307,3 +3307,68 @@ llvm.func @distribute() {
 // CHECK: store i64 1, ptr %[[STRIDE]]
 // CHECK: %[[TID:.*]] = call i32 @__kmpc_global_thread_num({{.*}})
 // CHECK: call void @__kmpc_for_static_init_{{.*}}(ptr @{{.*}}, i32 
%[[TID]], i32 92, ptr %[[LASTITER]], ptr %[[LB]], ptr %[[UB]], ptr %[[STRIDE]], 
i64 1, i64 0)
+
+// -
+
+llvm.func @distribute_wsloop(%lb : i32, %ub : i32, %step : i32) {
+  omp.parallel {
+omp.distribute {
+  omp.wsloop {
+omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
+  omp.yield
+}
+  } {omp.composite}
+} {omp.composite}
+omp.terminator
+  } {omp.composite}
+  llvm.return
+}
+
+// CHECK-LABEL: define void @distribute_wsloop
+// CHECK: call void{{.*}}@__kmpc_fork_call({{.*}}, ptr 
@[[OUTLINED_PARALLEL:.*]],
+
+// CHECK:   define internal void @[[OUTLINED_PARALLEL]]({{.*}})
+// CHECK: %[[ARGS:.*]] = alloca { i32, i32, i32, ptr, ptr

[llvm-branch-commits] [mlir] [MLIR][OpenMP] Host lowering of distribute-parallel-do/for (PR #127819)

2025-02-19 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-mlir

Author: Sergio Afonso (skatrak)


Changes

This patch adds support for translating composite `omp.parallel` + 
`omp.distribute` + `omp.wsloop` loops to LLVM IR on the host. This is done by 
passing an updated `WorksharingLoopType` to the call to `applyWorkshareLoop` 
associated to the lowering of the `omp.wsloop` operation, so that 
`__kmpc_dist_for_static_init` is called at runtime in place of 
`__kmpc_for_static_init`.

Existing translation rules take care of creating a parallel region to hold the 
workshared and workdistributed loop.

---
Full diff: https://github.com/llvm/llvm-project/pull/127819.diff


3 Files Affected:

- (modified) 
mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp (+16-5) 
- (modified) mlir/test/Target/LLVMIR/openmp-llvm.mlir (+65) 
- (modified) mlir/test/Target/LLVMIR/openmp-todo.mlir (-19) 


``diff
diff --git 
a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp 
b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index c8221a9f9854a..7e8a9bdb5b133 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -260,10 +260,6 @@ static LogicalResult checkImplementationStatus(Operation 
&op) {
   LogicalResult result = success();
   llvm::TypeSwitch(op)
   .Case([&](omp::DistributeOp op) {
-if (op.isComposite() &&
-isa_and_present(op.getNestedWrapper()))
-  result = op.emitError() << "not yet implemented: "
- "composite omp.distribute + omp.wsloop";
 checkAllocate(op, result);
 checkDistSchedule(op, result);
 checkOrder(op, result);
@@ -1993,6 +1989,14 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase 
&builder,
   bool isSimd = wsloopOp.getScheduleSimd();
   bool loopNeedsBarrier = !wsloopOp.getNowait();
 
+  // The only legal way for the direct parent to be omp.distribute is that this
+  // represents 'distribute parallel do'. Otherwise, this is a regular
+  // worksharing loop.
+  llvm::omp::WorksharingLoopType workshareLoopType =
+  llvm::isa_and_present(opInst.getParentOp())
+  ? llvm::omp::WorksharingLoopType::DistributeForStaticLoop
+  : llvm::omp::WorksharingLoopType::ForStaticLoop;
+
   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
   llvm::Expected regionBlock = convertOmpOpRegions(
   wsloopOp.getRegion(), "omp.wsloop.region", builder, moduleTranslation);
@@ -2008,7 +2012,8 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase 
&builder,
   ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier,
   convertToScheduleKind(schedule), chunk, isSimd,
   scheduleMod == omp::ScheduleModifier::monotonic,
-  scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered);
+  scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered,
+  workshareLoopType);
 
   if (failed(handleError(wsloopIP, opInst)))
 return failure();
@@ -3792,6 +3797,12 @@ convertOmpDistribute(Operation &opInst, 
llvm::IRBuilderBase &builder,
   return regionBlock.takeError();
 builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
 
+// Skip applying a workshare loop below when translating 'distribute
+// parallel do' (it's been already handled by this point while translating
+// the nested omp.wsloop).
+if (isa_and_present(distributeOp.getNestedWrapper()))
+  return llvm::Error::success();
+
 // TODO: Add support for clauses which are valid for DISTRIBUTE constructs.
 // Static schedule is the default.
 auto schedule = omp::ClauseScheduleKind::Static;
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir 
b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index a5a490e527d79..d85b149c66811 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -3307,3 +3307,68 @@ llvm.func @distribute() {
 // CHECK: store i64 1, ptr %[[STRIDE]]
 // CHECK: %[[TID:.*]] = call i32 @__kmpc_global_thread_num({{.*}})
 // CHECK: call void @__kmpc_for_static_init_{{.*}}(ptr @{{.*}}, i32 
%[[TID]], i32 92, ptr %[[LASTITER]], ptr %[[LB]], ptr %[[UB]], ptr %[[STRIDE]], 
i64 1, i64 0)
+
+// -
+
+llvm.func @distribute_wsloop(%lb : i32, %ub : i32, %step : i32) {
+  omp.parallel {
+omp.distribute {
+  omp.wsloop {
+omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
+  omp.yield
+}
+  } {omp.composite}
+} {omp.composite}
+omp.terminator
+  } {omp.composite}
+  llvm.return
+}
+
+// CHECK-LABEL: define void @distribute_wsloop
+// CHECK: call void{{.*}}@__kmpc_fork_call({{.*}}, ptr 
@[[OUTLINED_PARALLEL:.*]],
+
+// CHECK:   define internal void @[[OUTLINED_PARALLEL]]({{.*}})
+// CHECK: %[[ARGS:.*]] = alloca { i32, i32, i32, ptr, ptr, ptr, ptr }
+// CHECK: %[[LAS

[llvm-branch-commits] [flang] release/20.x: flang: Fix build with latest libc++ (#127362) (PR #127805)

2025-02-19 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/127805

Backport 2b340c10a611d929fee25e6222909c8915e3d6b6

Requested by: @tstellar

>From 2b70b17d30744ee6720eb2645ef8b61e043ed295 Mon Sep 17 00:00:00 2001
From: Tom Stellard 
Date: Wed, 19 Feb 2025 06:53:30 -0800
Subject: [PATCH] flang: Fix build with latest libc++ (#127362)

I think this first stopped working with
954836634abb446f18719b14120c386a929a42d1. This patch fixes the following
error:

/home/runner/work/llvm-project/llvm-project/flang/runtime/io-api-minimal.cpp:153:11:
error: '__libcpp_verbose_abort' is missing exception specification
'noexcept'
   153 | void std::__libcpp_verbose_abort(char const *format, ...) {
   |   ^
| noexcept
/mnt/build/bin/../include/c++/v1/__verbose_abort:30:28: note: previous
declaration is here
30 | __printf__, 1, 2) void __libcpp_verbose_abort(const char* __format,
...) _LIBCPP_VERBOSE_ABORT_NOEXCEPT;
   |^
1 error generated.

(cherry picked from commit 2b340c10a611d929fee25e6222909c8915e3d6b6)
---
 flang/runtime/io-api-minimal.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/flang/runtime/io-api-minimal.cpp b/flang/runtime/io-api-minimal.cpp
index 68768427be0c2..93ac82248aa4c 100644
--- a/flang/runtime/io-api-minimal.cpp
+++ b/flang/runtime/io-api-minimal.cpp
@@ -150,7 +150,8 @@ bool IODEF(OutputLogical)(Cookie cookie, bool truth) {
 // Provide own definition for `std::__libcpp_verbose_abort` to avoid dependency
 // on the version provided by libc++.
 
-void std::__libcpp_verbose_abort(char const *format, ...) {
+void std::__libcpp_verbose_abort(char const *format, ...) noexcept(
+noexcept(std::__libcpp_verbose_abort(""))) {
   va_list list;
   va_start(list, format);
   std::vfprintf(stderr, format, list);

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang] Implement instantiation context note for checking template parameters (PR #126088)

2025-02-19 Thread Matheus Izvekov via llvm-branch-commits


@@ -1018,9 +1019,9 @@ namespace cwg62 { // cwg62: 2.9
   struct A {
 struct { int n; } b;
   };
-  template struct X {};
-  template T get() { return get(); }
-  template int take(T) { return 0; }
+  template struct X {}; // cxx98-note 6{{template parameter is 
declared here}}

mizvekov wrote:

Why though? These notes are not particularly relevant for a DR test.

This makes these tests very cumbersome to update.

I don't quite understand all these special rules for these DR tests, as I think 
they popped up while I was on a break from the project, and I probably missed 
discussions here.

But this goes around the design intent of the diagnostic verifier, which 
encourages these sorts of matches, while at the same time does not support 
matching on a diagnostic sequence at all, while this can give the people 
illusion that this is actually verified.

I think this makes the verifier unsuited for this kind of test. FileCheck on 
the raw clang output and a generator/updater script would be a superior 
solution, which would actually support matching on a sequence of diagnostics.

https://github.com/llvm/llvm-project/pull/126088
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] Backport: [clang] Track function template instantiation from definition… (PR #127777)

2025-02-19 Thread Matheus Izvekov via llvm-branch-commits

https://github.com/mizvekov edited 
https://github.com/llvm/llvm-project/pull/12
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] Backport: [clang] Track function template instantiation from definition… (PR #127777)

2025-02-19 Thread Erich Keane via llvm-branch-commits

https://github.com/erichkeane approved this pull request.


https://github.com/llvm/llvm-project/pull/12
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [lldb] Backport: [clang] fix P3310 overload resolution flag propagation (#125791) (PR #127831)

2025-02-19 Thread Erich Keane via llvm-branch-commits

https://github.com/erichkeane approved this pull request.


https://github.com/llvm/llvm-project/pull/127831
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] Backport: [clang] Track function template instantiation from definition (#125266) (PR #127777)

2025-02-19 Thread Matheus Izvekov via llvm-branch-commits

https://github.com/mizvekov edited 
https://github.com/llvm/llvm-project/pull/12
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] release/20.x: flang: Fix build with latest libc++ (#127362) (PR #127805)

2025-02-19 Thread Louis Dionne via llvm-branch-commits

https://github.com/ldionne approved this pull request.


https://github.com/llvm/llvm-project/pull/127805
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] release/20.x: [libc++] Guard include of with __has_include (#127691) (PR #127842)

2025-02-19 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/127842
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] release/20.x: [libc++] Guard include of with __has_include (#127691) (PR #127842)

2025-02-19 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/127842

Backport 2c8b1248513624e89b510397224f0f405116f3d3

Requested by: @ldionne

>From 315226cf7b7751303615984ec3d84664d156 Mon Sep 17 00:00:00 2001
From: Louis Dionne 
Date: Wed, 19 Feb 2025 08:21:56 -0500
Subject: [PATCH] [libc++] Guard include of  with __has_include
 (#127691)

Some configurations define __AMDGPU__ or __NVPTX__ on platforms that
don't provide , such as CUDA on Mac.

(cherry picked from commit 2c8b1248513624e89b510397224f0f405116f3d3)
---
 libcxx/include/__configuration/platform.h | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/libcxx/include/__configuration/platform.h 
b/libcxx/include/__configuration/platform.h
index cff99376ee24b..8d0f8f63f5213 100644
--- a/libcxx/include/__configuration/platform.h
+++ b/libcxx/include/__configuration/platform.h
@@ -32,12 +32,14 @@
 
 // Need to detect which libc we're using if we're on Linux.
 #if defined(__linux__) || defined(__AMDGPU__) || defined(__NVPTX__)
-#  include 
-#  if defined(__GLIBC_PREREQ)
-#define _LIBCPP_GLIBC_PREREQ(a, b) __GLIBC_PREREQ(a, b)
-#  else
-#define _LIBCPP_GLIBC_PREREQ(a, b) 0
-#  endif // defined(__GLIBC_PREREQ)
+#  if __has_include()
+#include 
+#if defined(__GLIBC_PREREQ)
+#  define _LIBCPP_GLIBC_PREREQ(a, b) __GLIBC_PREREQ(a, b)
+#else
+#  define _LIBCPP_GLIBC_PREREQ(a, b) 0
+#endif // defined(__GLIBC_PREREQ)
+#  endif
 #endif
 
 #ifndef __BYTE_ORDER__

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang] Implement instantiation context note for checking template parameters (PR #126088)

2025-02-19 Thread Matheus Izvekov via llvm-branch-commits


@@ -1018,9 +1019,9 @@ namespace cwg62 { // cwg62: 2.9
   struct A {
 struct { int n; } b;
   };
-  template struct X {};
-  template T get() { return get(); }
-  template int take(T) { return 0; }
+  template struct X {}; // cxx98-note 6{{template parameter is 
declared here}}

mizvekov wrote:

I support the bookmarks, they are fine, sure let's use them more.

My concern is not about the bookmarks though, is about the way the tests are 
written here, checking the whole
diagnostic sequence, but without actually checking it, and without any kind of 
automated update.

I would be strongly against adopting this for the other tests, as if this were 
a thing a while ago, this would have made many of my PRs unviable due to the 
amount of manual test rework required.

That is not to say that I don't agree with the overall goal, I think we do need 
to test for duplicated notes and things like that, but we should implement this 
in tooling, being actually verified and not manually updated.

https://github.com/llvm/llvm-project/pull/126088
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang] Implement instantiation context note for checking template parameters (PR #126088)

2025-02-19 Thread Matheus Izvekov via llvm-branch-commits


@@ -1018,9 +1019,9 @@ namespace cwg62 { // cwg62: 2.9
   struct A {
 struct { int n; } b;
   };
-  template struct X {};
-  template T get() { return get(); }
-  template int take(T) { return 0; }
+  template struct X {}; // cxx98-note 6{{template parameter is 
declared here}}

mizvekov wrote:

> I'm not certain I understand the concern then. The request is to do something 
> like:
> so either way it is actually checking the whole diagnostic sequence, just 
> that with bookmarks it's easier to tell which warnings/errors generate what 
> notes.

No it's not actually checking the sequence, because the notes can be attached 
to random diagnostics which are not actually the preceding ones, and the tests 
will still pass.

The diagnostic verifier does not currently check the order diagnostics appear 
in, writing the test expectations like this only gives you the illusion that it 
does :)



https://github.com/llvm/llvm-project/pull/126088
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/20.x: Revert "[C++20][Modules][Serialization] Delay marking pending incompl… (#127136) (PR #127252)

2025-02-19 Thread Aaron Ballman via llvm-branch-commits

AaronBallman wrote:

> Not super familiar with the release cherry-picking workflow. How should I 
> move this forward?

You've done it correctly (though you should set your email to public per 
https://github.com/llvm/llvm-project/pull/127252#issuecomment-2660155014). It's 
just waiting on review before it gets merged into the release branch.

https://github.com/llvm/llvm-project/pull/127252
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang] Implement instantiation context note for checking template parameters (PR #126088)

2025-02-19 Thread Aaron Ballman via llvm-branch-commits


@@ -1018,9 +1019,9 @@ namespace cwg62 { // cwg62: 2.9
   struct A {
 struct { int n; } b;
   };
-  template struct X {};
-  template T get() { return get(); }
-  template int take(T) { return 0; }
+  template struct X {}; // cxx98-note 6{{template parameter is 
declared here}}

AaronBallman wrote:

> My concern is not about the bookmarks though, is about the way the tests are 
> written here, checking the whole
diagnostic sequence, but without actually checking it, and without any kind of 
automated update.

I'm not certain I understand the concern then. The request is to do something 
like:
```
template struct X {}; // #template_struct_X
...
... // expected-warning {{yada yada}}
// expected-note@#template_struct_X {{declared here}}
...
... // expected-warning {{yada yada}} \
// expected-note@#template_struct_X {{declared here}} \
// expected-error {{a different yada on the same line}} \
// expected-note@#template_struct_X {{declared here}}
```
instead of doing:
```
template struct X {}; // expected-note 3 {{declared here}}
...
... // expected-warning {{yada yada}}
...
... // expected-warning {{yada yada}} \
// expected-error {{a different yada on the same line}}
```

so either way it is actually checking the whole diagnostic sequence, just that 
with bookmarks it's easier to tell which warnings/errors generate what notes. 
And we've never had a way to automatically update `-verify` tests. We do for 
some kinds of `FileCheck` tests, but that can be contentious because of how 
easy it is to generate the test updates, bugs and all. (I'm not opposed to 
having tooling which helps with diagnostic changes; upgrading warnings to 
errors is a prime example of something that could hopefully be made easier.)

https://github.com/llvm/llvm-project/pull/126088
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang] Implement instantiation context note for checking template parameters (PR #126088)

2025-02-19 Thread Matheus Izvekov via llvm-branch-commits


@@ -1018,9 +1019,9 @@ namespace cwg62 { // cwg62: 2.9
   struct A {
 struct { int n; } b;
   };
-  template struct X {};
-  template T get() { return get(); }
-  template int take(T) { return 0; }
+  template struct X {}; // cxx98-note 6{{template parameter is 
declared here}}

mizvekov wrote:

No worries, I'll do the update here on this PR.

I think it would be good to post on discourse to get some design feedback on 
that.

If this were actually checked, this would significantly reduce the amount of 
work required when manually updating tests, to the point that it would be fine 
for me to not also need to support automatic generation and updates of test 
expectations for now.

https://github.com/llvm/llvm-project/pull/126088
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/20.x: [CUDA] Add support for sm101 and sm120 target architectures (#127187) (PR #127918)

2025-02-19 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/127918

Backport 0127f169dc8e0b5b6c2a24f74cd42d9d277916f6

Requested by: @Artem-B

>From c3f0998ff02643c3811cfa1af46ba6b0ed2c24c6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20Jod=C5=82owski?= 
Date: Wed, 19 Feb 2025 14:41:07 -0800
Subject: [PATCH] [CUDA] Add support for sm101 and sm120 target architectures
 (#127187)

Add support for sm101 and sm120 target architectures. It requires CUDA
12.8.

-

Co-authored-by: Sebastian Jodlowski 
(cherry picked from commit 0127f169dc8e0b5b6c2a24f74cd42d9d277916f6)
---
 clang/include/clang/Basic/BuiltinsNVPTX.td|  8 ---
 clang/include/clang/Basic/Cuda.h  |  4 
 clang/lib/Basic/Cuda.cpp  |  8 +++
 clang/lib/Basic/Targets/NVPTX.cpp | 23 +++
 clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp  |  4 
 .../test/Misc/target-invalid-cpu-note/nvptx.c |  4 
 6 files changed, 43 insertions(+), 8 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.td 
b/clang/include/clang/Basic/BuiltinsNVPTX.td
index 9d24a992563a4..b550fff8567df 100644
--- a/clang/include/clang/Basic/BuiltinsNVPTX.td
+++ b/clang/include/clang/Basic/BuiltinsNVPTX.td
@@ -21,12 +21,14 @@ class SM newer_list> : 
SMFeatures {
 !strconcat(f, "|", newer.Features));
 }
 
+let Features = "sm_120a" in def SM_120a : SMFeatures;
+let Features = "sm_101a" in def SM_101a : SMFeatures;
 let Features = "sm_100a" in def SM_100a : SMFeatures;
-
-def SM_100 : SM<"100", [SM_100a]>;
-
 let Features = "sm_90a" in def SM_90a : SMFeatures;
 
+def SM_120 : SM<"120", [SM_120a]>;
+def SM_101 : SM<"101", [SM_101a, SM_120]>;
+def SM_100 : SM<"100", [SM_100a, SM_101]>;
 def SM_90 : SM<"90", [SM_90a, SM_100]>;
 def SM_89 : SM<"89", [SM_90]>;
 def SM_87 : SM<"87", [SM_89]>;
diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h
index f33ba46233a7a..5c909a8e9ca11 100644
--- a/clang/include/clang/Basic/Cuda.h
+++ b/clang/include/clang/Basic/Cuda.h
@@ -82,6 +82,10 @@ enum class OffloadArch {
   SM_90a,
   SM_100,
   SM_100a,
+  SM_101,
+  SM_101a,
+  SM_120,
+  SM_120a,
   GFX600,
   GFX601,
   GFX602,
diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp
index 1bfec0b37c5ee..79cac0ec119dd 100644
--- a/clang/lib/Basic/Cuda.cpp
+++ b/clang/lib/Basic/Cuda.cpp
@@ -100,6 +100,10 @@ static const OffloadArchToStringMap arch_names[] = {
 SM(90a), // Hopper
 SM(100), // Blackwell
 SM(100a),// Blackwell
+SM(101), // Blackwell
+SM(101a),// Blackwell
+SM(120), // Blackwell
+SM(120a),// Blackwell
 GFX(600),  // gfx600
 GFX(601),  // gfx601
 GFX(602),  // gfx602
@@ -230,6 +234,10 @@ CudaVersion MinVersionForOffloadArch(OffloadArch A) {
 return CudaVersion::CUDA_120;
   case OffloadArch::SM_100:
   case OffloadArch::SM_100a:
+  case OffloadArch::SM_101:
+  case OffloadArch::SM_101a:
+  case OffloadArch::SM_120:
+  case OffloadArch::SM_120a:
 return CudaVersion::CUDA_128;
   default:
 llvm_unreachable("invalid enum");
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp 
b/clang/lib/Basic/Targets/NVPTX.cpp
index a03f4983b9d03..9be12cbe7ac19 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -176,7 +176,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions 
&Opts,
 
   if (Opts.CUDAIsDevice || Opts.OpenMPIsTargetDevice || !HostTarget) {
 // Set __CUDA_ARCH__ for the GPU specified.
-std::string CUDAArchCode = [this] {
+llvm::StringRef CUDAArchCode = [this] {
   switch (GPU) {
   case OffloadArch::GFX600:
   case OffloadArch::GFX601:
@@ -283,14 +283,27 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions 
&Opts,
   case OffloadArch::SM_100:
   case OffloadArch::SM_100a:
 return "1000";
+  case OffloadArch::SM_101:
+  case OffloadArch::SM_101a:
+ return "1010";
+  case OffloadArch::SM_120:
+  case OffloadArch::SM_120a:
+ return "1200";
   }
   llvm_unreachable("unhandled OffloadArch");
 }();
 Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);
-if (GPU == OffloadArch::SM_90a)
-  Builder.defineMacro("__CUDA_ARCH_FEAT_SM90_ALL", "1");
-if (GPU == OffloadArch::SM_100a)
-  Builder.defineMacro("__CUDA_ARCH_FEAT_SM100_ALL", "1");
+switch(GPU) {
+  case OffloadArch::SM_90a:
+  case OffloadArch::SM_100a:
+  case OffloadArch::SM_101a:
+  case OffloadArch::SM_120a:
+Builder.defineMacro("__CUDA_ARCH_FEAT_SM" + CUDAArchCode.drop_back() + 
"_ALL", "1");
+break;
+  default:
+// Do nothing if this is not an enhanced architecture.
+break;
+}
   }
 }
 
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp 
b/clang/

[llvm-branch-commits] [clang] release/20.x: [CUDA] Add support for sm101 and sm120 target architectures (#127187) (PR #127918)

2025-02-19 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/127918
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/20.x: [CUDA] Add support for sm101 and sm120 target architectures (#127187) (PR #127918)

2025-02-19 Thread via llvm-branch-commits

llvmbot wrote:

@Artem-B What do you think about merging this PR to the release branch?

https://github.com/llvm/llvm-project/pull/127918
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/20.x: [CUDA] Add support for sm101 and sm120 target architectures (#127187) (PR #127918)

2025-02-19 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-clang-codegen

Author: None (llvmbot)


Changes

Backport 0127f169dc8e0b5b6c2a24f74cd42d9d277916f6

Requested by: @Artem-B

---
Full diff: https://github.com/llvm/llvm-project/pull/127918.diff


6 Files Affected:

- (modified) clang/include/clang/Basic/BuiltinsNVPTX.td (+5-3) 
- (modified) clang/include/clang/Basic/Cuda.h (+4) 
- (modified) clang/lib/Basic/Cuda.cpp (+8) 
- (modified) clang/lib/Basic/Targets/NVPTX.cpp (+18-5) 
- (modified) clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp (+4) 
- (modified) clang/test/Misc/target-invalid-cpu-note/nvptx.c (+4) 


``diff
diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.td 
b/clang/include/clang/Basic/BuiltinsNVPTX.td
index 9d24a992563a4..b550fff8567df 100644
--- a/clang/include/clang/Basic/BuiltinsNVPTX.td
+++ b/clang/include/clang/Basic/BuiltinsNVPTX.td
@@ -21,12 +21,14 @@ class SM newer_list> : 
SMFeatures {
 !strconcat(f, "|", newer.Features));
 }
 
+let Features = "sm_120a" in def SM_120a : SMFeatures;
+let Features = "sm_101a" in def SM_101a : SMFeatures;
 let Features = "sm_100a" in def SM_100a : SMFeatures;
-
-def SM_100 : SM<"100", [SM_100a]>;
-
 let Features = "sm_90a" in def SM_90a : SMFeatures;
 
+def SM_120 : SM<"120", [SM_120a]>;
+def SM_101 : SM<"101", [SM_101a, SM_120]>;
+def SM_100 : SM<"100", [SM_100a, SM_101]>;
 def SM_90 : SM<"90", [SM_90a, SM_100]>;
 def SM_89 : SM<"89", [SM_90]>;
 def SM_87 : SM<"87", [SM_89]>;
diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h
index f33ba46233a7a..5c909a8e9ca11 100644
--- a/clang/include/clang/Basic/Cuda.h
+++ b/clang/include/clang/Basic/Cuda.h
@@ -82,6 +82,10 @@ enum class OffloadArch {
   SM_90a,
   SM_100,
   SM_100a,
+  SM_101,
+  SM_101a,
+  SM_120,
+  SM_120a,
   GFX600,
   GFX601,
   GFX602,
diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp
index 1bfec0b37c5ee..79cac0ec119dd 100644
--- a/clang/lib/Basic/Cuda.cpp
+++ b/clang/lib/Basic/Cuda.cpp
@@ -100,6 +100,10 @@ static const OffloadArchToStringMap arch_names[] = {
 SM(90a), // Hopper
 SM(100), // Blackwell
 SM(100a),// Blackwell
+SM(101), // Blackwell
+SM(101a),// Blackwell
+SM(120), // Blackwell
+SM(120a),// Blackwell
 GFX(600),  // gfx600
 GFX(601),  // gfx601
 GFX(602),  // gfx602
@@ -230,6 +234,10 @@ CudaVersion MinVersionForOffloadArch(OffloadArch A) {
 return CudaVersion::CUDA_120;
   case OffloadArch::SM_100:
   case OffloadArch::SM_100a:
+  case OffloadArch::SM_101:
+  case OffloadArch::SM_101a:
+  case OffloadArch::SM_120:
+  case OffloadArch::SM_120a:
 return CudaVersion::CUDA_128;
   default:
 llvm_unreachable("invalid enum");
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp 
b/clang/lib/Basic/Targets/NVPTX.cpp
index a03f4983b9d03..9be12cbe7ac19 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -176,7 +176,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions 
&Opts,
 
   if (Opts.CUDAIsDevice || Opts.OpenMPIsTargetDevice || !HostTarget) {
 // Set __CUDA_ARCH__ for the GPU specified.
-std::string CUDAArchCode = [this] {
+llvm::StringRef CUDAArchCode = [this] {
   switch (GPU) {
   case OffloadArch::GFX600:
   case OffloadArch::GFX601:
@@ -283,14 +283,27 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions 
&Opts,
   case OffloadArch::SM_100:
   case OffloadArch::SM_100a:
 return "1000";
+  case OffloadArch::SM_101:
+  case OffloadArch::SM_101a:
+ return "1010";
+  case OffloadArch::SM_120:
+  case OffloadArch::SM_120a:
+ return "1200";
   }
   llvm_unreachable("unhandled OffloadArch");
 }();
 Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);
-if (GPU == OffloadArch::SM_90a)
-  Builder.defineMacro("__CUDA_ARCH_FEAT_SM90_ALL", "1");
-if (GPU == OffloadArch::SM_100a)
-  Builder.defineMacro("__CUDA_ARCH_FEAT_SM100_ALL", "1");
+switch(GPU) {
+  case OffloadArch::SM_90a:
+  case OffloadArch::SM_100a:
+  case OffloadArch::SM_101a:
+  case OffloadArch::SM_120a:
+Builder.defineMacro("__CUDA_ARCH_FEAT_SM" + CUDAArchCode.drop_back() + 
"_ALL", "1");
+break;
+  default:
+// Do nothing if this is not an enhanced architecture.
+break;
+}
   }
 }
 
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp 
b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index c13928f61a748..dc417880a50e9 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -2278,6 +2278,10 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const 
OMPRequiresDecl *D) {
   case OffloadArch::SM_90a:
   case OffloadArch::SM_100:
   case OffloadArch::SM_100a:
+  case OffloadArch::SM_101:

  1   2   >