[llvm-branch-commits] [libc] fea10f7 - Revert "[libc] Add strftime_l (#127708)"
Author: Petr Hosek Date: 2025-02-19T00:39:14-08:00 New Revision: fea10f7ff16ddd019841a69697194d373325ff6b URL: https://github.com/llvm/llvm-project/commit/fea10f7ff16ddd019841a69697194d373325ff6b DIFF: https://github.com/llvm/llvm-project/commit/fea10f7ff16ddd019841a69697194d373325ff6b.diff LOG: Revert "[libc] Add strftime_l (#127708)" This reverts commit 9072ba71cac6d518b4164615c609e358d49c4ed2. Added: Modified: libc/config/baremetal/aarch64/entrypoints.txt libc/config/baremetal/arm/entrypoints.txt libc/config/baremetal/riscv/entrypoints.txt libc/config/linux/x86_64/entrypoints.txt libc/include/time.yaml libc/src/time/CMakeLists.txt libc/src/time/strftime.cpp Removed: libc/src/time/strftime_l.cpp libc/src/time/strftime_l.h diff --git a/libc/config/baremetal/aarch64/entrypoints.txt b/libc/config/baremetal/aarch64/entrypoints.txt index 2c226ef176c08..44c4ab49e5c58 100644 --- a/libc/config/baremetal/aarch64/entrypoints.txt +++ b/libc/config/baremetal/aarch64/entrypoints.txt @@ -248,8 +248,6 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.time.gmtime libc.src.time.gmtime_r libc.src.time.mktime -libc.src.time.strftime -libc.src.time.strftime_l libc.src.time.timespec_get # internal entrypoints diff --git a/libc/config/baremetal/arm/entrypoints.txt b/libc/config/baremetal/arm/entrypoints.txt index 6fd1fce3ab245..370b5462fe9e8 100644 --- a/libc/config/baremetal/arm/entrypoints.txt +++ b/libc/config/baremetal/arm/entrypoints.txt @@ -248,8 +248,6 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.time.gmtime libc.src.time.gmtime_r libc.src.time.mktime -libc.src.time.strftime -libc.src.time.strftime_l libc.src.time.timespec_get # internal entrypoints diff --git a/libc/config/baremetal/riscv/entrypoints.txt b/libc/config/baremetal/riscv/entrypoints.txt index 5985c495bdaf2..07311a60a17a2 100644 --- a/libc/config/baremetal/riscv/entrypoints.txt +++ b/libc/config/baremetal/riscv/entrypoints.txt @@ -244,8 +244,6 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.time.gmtime libc.src.time.gmtime_r libc.src.time.mktime -libc.src.time.strftime -libc.src.time.strftime_l libc.src.time.timespec_get # internal entrypoints diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 2e3af00ec303d..a4f6671a59789 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -1128,7 +1128,6 @@ if(LLVM_LIBC_FULL_BUILD) libc.src.time.mktime libc.src.time.nanosleep libc.src.time.strftime -libc.src.time.strftime_l libc.src.time.time libc.src.time.timespec_get diff --git a/libc/include/time.yaml b/libc/include/time.yaml index 7bb25dbe85ac4..37ee824678cda 100644 --- a/libc/include/time.yaml +++ b/libc/include/time.yaml @@ -9,7 +9,6 @@ types: - type_name: time_t - type_name: clock_t - type_name: size_t - - type_name: locale_t enums: [] objects: [] functions: @@ -101,16 +100,6 @@ functions: - type: size_t - type: const char *__restrict - type: const struct tm *__restrict - - name: strftime_l -standard: - - stdc -return_type: size_t -arguments: - - type: char *__restrict - - type: size_t - - type: const char *__restrict - - type: const struct tm *__restrict - - type: locale_t - name: time standard: - stdc diff --git a/libc/src/time/CMakeLists.txt b/libc/src/time/CMakeLists.txt index 3b951df810011..8332e8ab66f97 100644 --- a/libc/src/time/CMakeLists.txt +++ b/libc/src/time/CMakeLists.txt @@ -150,20 +150,6 @@ add_entrypoint_object( libc.src.time.strftime_core.strftime_main ) -add_entrypoint_object( - strftime_l - SRCS -strftime_l.cpp - HDRS -strftime_l.h - DEPENDS -libc.hdr.types.locale_t -libc.hdr.types.size_t -libc.hdr.types.struct_tm -libc.src.stdio.printf_core.writer -libc.src.time.strftime_core.strftime_main -) - add_entrypoint_object( time SRCS diff --git a/libc/src/time/strftime.cpp b/libc/src/time/strftime.cpp index c19e58fbadf71..4b89bf2ea3a70 100644 --- a/libc/src/time/strftime.cpp +++ b/libc/src/time/strftime.cpp @@ -19,6 +19,7 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(size_t, strftime, (char *__restrict buffer, size_t buffsz, const char *__restrict format, const tm *timeptr)) { + printf_core::WriteBuffer wb(buffer, (buffsz > 0 ? buffsz - 1 : 0)); printf_core::Writer writer(&wb); int ret = strftime_core::strftime_main(&writer, format, timeptr); diff --git a/libc/src/time/strftime_l.cpp b/libc/src/time/strftime_l.cpp deleted file mode 100644 index 4203136af4cba..0 --- a/libc/src/time/strftime_l.cpp +++ /dev/null @@ -1,33 +0,0 @@ -//===-- Implementation of strftime_l function
[llvm-branch-commits] [clang] release/20.x: Revert Do not use `private` as the default AS for when `generic` is available (#112442)" (PR #127771)
llvmbot wrote: @llvm/pr-subscribers-clang-codegen Author: Matt Arsenault (arsenm) Changes This reverts commit 6e0b0038cd65ce726ce404305a06e1cf33e36cca. This breaks the rocm-device-libs build, so it should not ship in the release. --- Patch is 214.51 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/127771.diff 21 Files Affected: - (modified) clang/lib/Basic/Targets/AMDGPU.cpp (+3-3) - (modified) clang/lib/CodeGen/CGBlocks.cpp (+1-2) - (modified) clang/lib/CodeGen/CGBuiltin.cpp (+2-9) - (modified) clang/test/CodeGen/scoped-fence-ops.c (+120-61) - (modified) clang/test/CodeGenOpenCL/addr-space-struct-arg.cl (+70-99) - (modified) clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl (+16-20) - (modified) clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl (+118-164) - (modified) clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl (+220-275) - (modified) clang/test/CodeGenOpenCL/amdgpu-nullptr.cl (+14-14) - (modified) clang/test/CodeGenOpenCL/atomic-ops.cl (+4-7) - (modified) clang/test/CodeGenOpenCL/atomics-unsafe-hw-remarks-gfx90a.cl (+3-3) - (modified) clang/test/CodeGenOpenCL/blocks.cl (+12-11) - (modified) clang/test/CodeGenOpenCL/builtins-alloca.cl (+4-428) - (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl (+56-87) - (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-gfx940.cl (+12-18) - (modified) clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl (+2-2) - (modified) clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx8.cl (+1-1) - (modified) clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl (+1-1) - (modified) clang/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl (+1-1) - (modified) clang/test/CodeGenOpenCL/opencl_types.cl (+1-1) - (modified) clang/test/Index/pipe-size.cl (+2-2) ``diff diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index 0d308cb6af969..9ea366af56a52 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -261,9 +261,9 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) { TargetInfo::adjust(Diags, Opts); // ToDo: There are still a few places using default address space as private - // address space in OpenCL, which needs to be cleaned up, then the references - // to OpenCL can be removed from the following line. - setAddressSpaceMap((Opts.OpenCL && !Opts.OpenCLGenericAddressSpace) || + // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL + // can be removed from the following line. + setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || !isAMDGCN(getTriple())); } diff --git a/clang/lib/CodeGen/CGBlocks.cpp b/clang/lib/CodeGen/CGBlocks.cpp index a7584a95c8ca7..f38f86c792f69 100644 --- a/clang/lib/CodeGen/CGBlocks.cpp +++ b/clang/lib/CodeGen/CGBlocks.cpp @@ -1396,8 +1396,7 @@ void CodeGenFunction::setBlockContextParameter(const ImplicitParamDecl *D, DI->setLocation(D->getLocation()); DI->EmitDeclareOfBlockLiteralArgVariable( *BlockInfo, D->getName(), argNum, - cast(alloc.getPointer()->stripPointerCasts()), - Builder); + cast(alloc.getPointer()), Builder); } } diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 7ec9d59bfed5c..5237533364294 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -6092,13 +6092,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, /*IndexTypeQuals=*/0); auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes"); llvm::Value *TmpPtr = Tmp.getPointer(); - // The EmitLifetime* pair expect a naked Alloca as their last argument, - // however for cases where the default AS is not the Alloca AS, Tmp is - // actually the Alloca ascasted to the default AS, hence the - // stripPointerCasts() - llvm::Value *Alloca = TmpPtr->stripPointerCasts(); llvm::Value *TmpSize = EmitLifetimeStart( - CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), Alloca); + CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr); llvm::Value *ElemPtr; // Each of the following arguments specifies the size of the corresponding // argument passed to the enqueued block. @@ -6114,9 +6109,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Builder.CreateAlignedStore( V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy)); } - // Return the Alloca itself rather than a potential ascast as this is only - // used by the paired EmitLifetimeEnd. - return std::tie(ElemPtr, TmpSize, Alloca); + return std::tie(ElemPtr, TmpSize, TmpPtr); }; // Could have events and/or varargs. diff --git a/clang/test/CodeGen/scoped-f
[llvm-branch-commits] [clang] release/20.x: [clang][CodeGen] `sret` args should always point to the `alloca` AS, so use that (#114062) (PR #127552)
arsenm wrote: #127771 is the revert alternative https://github.com/llvm/llvm-project/pull/127552 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SHT_LLVM_FUNC_MAP][llvm-readobj]Introduce function address map section and emit dynamic instruction count(readobj part) (PR #124333)
@@ -7922,6 +7928,59 @@ void LLVMELFDumper::printBBAddrMaps(bool PrettyPGOAnalysis) { } } +template void LLVMELFDumper::printFuncMaps() { + bool IsRelocatable = this->Obj.getHeader().e_type == ELF::ET_REL; + using Elf_Shdr = typename ELFT::Shdr; + auto IsMatch = [](const Elf_Shdr &Sec) -> bool { +return Sec.sh_type == ELF::SHT_LLVM_FUNC_MAP; + }; jh7370 wrote: Please define this inline, since it's only used once, and delete the trailing return type (since that is automatically derived from the result of the return expression, of which there is only one). https://github.com/llvm/llvm-project/pull/124333 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SHT_LLVM_FUNC_MAP][llvm-readobj]Introduce function address map section and emit dynamic instruction count(readobj part) (PR #124333)
@@ -940,6 +940,92 @@ ELFFile::decodeBBAddrMap(const Elf_Shdr &Sec, const Elf_Shdr *RelaSec, return std::move(AddrMapsOrErr); } +template +Expected> +ELFFile::decodeFuncMap(const Elf_Shdr &Sec, + const Elf_Shdr *RelaSec) const { + bool IsRelocatable = this->getHeader().e_type == ELF::ET_REL; + + // This DenseMap maps the offset of each function (the location of the + // reference to the function in the SHT_LLVM_FUNC_MAP section) to the + // addend (the location of the function in the text section). + llvm::DenseMap FunctionOffsetTranslations; + if (IsRelocatable && RelaSec) { +assert(RelaSec && + "Can't read a SHT_LLVM_FUNC_ADDR_MAP section in a relocatable " + "object file without providing a relocation section."); +Expected::Elf_Rela_Range> Relas = +this->relas(*RelaSec); +if (!Relas) + return createError("unable to read relocations for section " + + describe(*this, Sec) + ": " + + toString(Relas.takeError())); +for (typename ELFFile::Elf_Rela Rela : *Relas) + FunctionOffsetTranslations[Rela.r_offset] = Rela.r_addend; + } + auto GetAddressForRelocation = + [&](unsigned RelocationOffsetInSection) -> Expected { +auto FOTIterator = +FunctionOffsetTranslations.find(RelocationOffsetInSection); +if (FOTIterator == FunctionOffsetTranslations.end()) { + return createError("failed to get relocation data for offset: " + + Twine::utohexstr(RelocationOffsetInSection) + + " in section " + describe(*this, Sec)); +} +return FOTIterator->second; + }; + Expected> ContentsOrErr = this->getSectionContents(Sec); + if (!ContentsOrErr) +return ContentsOrErr.takeError(); + ArrayRef Content = *ContentsOrErr; + DataExtractor Data(Content, this->isLE(), ELFT::Is64Bits ? 8 : 4); + std::vector FunctionEntries; + + DataExtractor::Cursor Cur(0); + + // Helper lampda to extract the (possiblly relocatable) address stored at Cur. + auto ExtractAddress = [&]() -> Expected::uintX_t> { +uint64_t RelocationOffsetInSection = Cur.tell(); +auto Address = +static_cast::uintX_t>(Data.getAddress(Cur)); +if (!Cur) + return Cur.takeError(); +if (!IsRelocatable) + return Address; +assert(Address == 0); +Expected AddressOrErr = +GetAddressForRelocation(RelocationOffsetInSection); +if (!AddressOrErr) + return AddressOrErr.takeError(); +return *AddressOrErr; + }; + + uint8_t Version = 0; + while (Cur && Cur.tell() < Content.size()) { +if (Sec.sh_type == ELF::SHT_LLVM_FUNC_MAP) { jh7370 wrote: Why is this check needed? https://github.com/llvm/llvm-project/pull/124333 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SHT_LLVM_FUNC_MAP][llvm-readobj]Introduce function address map section and emit dynamic instruction count(readobj part) (PR #124333)
@@ -7922,6 +7928,59 @@ void LLVMELFDumper::printBBAddrMaps(bool PrettyPGOAnalysis) { } } +template void LLVMELFDumper::printFuncMaps() { + bool IsRelocatable = this->Obj.getHeader().e_type == ELF::ET_REL; + using Elf_Shdr = typename ELFT::Shdr; + auto IsMatch = [](const Elf_Shdr &Sec) -> bool { +return Sec.sh_type == ELF::SHT_LLVM_FUNC_MAP; + }; + Expected> SecRelocMapOrErr = + this->Obj.getSectionAndRelocations(IsMatch); + if (!SecRelocMapOrErr) { +this->reportUniqueWarning("failed to get SHT_LLVM_FUNC_MAP section(s): " + + toString(SecRelocMapOrErr.takeError())); +return; + } + + for (auto const &[Sec, RelocSec] : *SecRelocMapOrErr) { +std::optional FunctionSec; +if (IsRelocatable) + FunctionSec = + unwrapOrError(this->FileName, this->Obj.getSection(Sec->sh_link)); jh7370 wrote: Do not use `unwrapOrError` in new code. The dumper should be tolerant of slightly dodgy looking object files, as the dumper is often the only way of finding out what's gone wrong (short of decoding the bytes be hand). A warning is fine here and you can then either continue as if the relocation section didn't exist or bail out. See my comments elsewhere about the requirement for a relocation section. https://github.com/llvm/llvm-project/pull/124333 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: Revert Do not use `private` as the default AS for when `generic` is available (#112442)" (PR #127771)
llvmbot wrote: @llvm/pr-subscribers-clang @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes This reverts commit 6e0b0038cd65ce726ce404305a06e1cf33e36cca. This breaks the rocm-device-libs build, so it should not ship in the release. --- Patch is 214.51 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/127771.diff 21 Files Affected: - (modified) clang/lib/Basic/Targets/AMDGPU.cpp (+3-3) - (modified) clang/lib/CodeGen/CGBlocks.cpp (+1-2) - (modified) clang/lib/CodeGen/CGBuiltin.cpp (+2-9) - (modified) clang/test/CodeGen/scoped-fence-ops.c (+120-61) - (modified) clang/test/CodeGenOpenCL/addr-space-struct-arg.cl (+70-99) - (modified) clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl (+16-20) - (modified) clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl (+118-164) - (modified) clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl (+220-275) - (modified) clang/test/CodeGenOpenCL/amdgpu-nullptr.cl (+14-14) - (modified) clang/test/CodeGenOpenCL/atomic-ops.cl (+4-7) - (modified) clang/test/CodeGenOpenCL/atomics-unsafe-hw-remarks-gfx90a.cl (+3-3) - (modified) clang/test/CodeGenOpenCL/blocks.cl (+12-11) - (modified) clang/test/CodeGenOpenCL/builtins-alloca.cl (+4-428) - (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl (+56-87) - (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-gfx940.cl (+12-18) - (modified) clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl (+2-2) - (modified) clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx8.cl (+1-1) - (modified) clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl (+1-1) - (modified) clang/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl (+1-1) - (modified) clang/test/CodeGenOpenCL/opencl_types.cl (+1-1) - (modified) clang/test/Index/pipe-size.cl (+2-2) ``diff diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index 0d308cb6af969..9ea366af56a52 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -261,9 +261,9 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) { TargetInfo::adjust(Diags, Opts); // ToDo: There are still a few places using default address space as private - // address space in OpenCL, which needs to be cleaned up, then the references - // to OpenCL can be removed from the following line. - setAddressSpaceMap((Opts.OpenCL && !Opts.OpenCLGenericAddressSpace) || + // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL + // can be removed from the following line. + setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || !isAMDGCN(getTriple())); } diff --git a/clang/lib/CodeGen/CGBlocks.cpp b/clang/lib/CodeGen/CGBlocks.cpp index a7584a95c8ca7..f38f86c792f69 100644 --- a/clang/lib/CodeGen/CGBlocks.cpp +++ b/clang/lib/CodeGen/CGBlocks.cpp @@ -1396,8 +1396,7 @@ void CodeGenFunction::setBlockContextParameter(const ImplicitParamDecl *D, DI->setLocation(D->getLocation()); DI->EmitDeclareOfBlockLiteralArgVariable( *BlockInfo, D->getName(), argNum, - cast(alloc.getPointer()->stripPointerCasts()), - Builder); + cast(alloc.getPointer()), Builder); } } diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 7ec9d59bfed5c..5237533364294 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -6092,13 +6092,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, /*IndexTypeQuals=*/0); auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes"); llvm::Value *TmpPtr = Tmp.getPointer(); - // The EmitLifetime* pair expect a naked Alloca as their last argument, - // however for cases where the default AS is not the Alloca AS, Tmp is - // actually the Alloca ascasted to the default AS, hence the - // stripPointerCasts() - llvm::Value *Alloca = TmpPtr->stripPointerCasts(); llvm::Value *TmpSize = EmitLifetimeStart( - CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), Alloca); + CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr); llvm::Value *ElemPtr; // Each of the following arguments specifies the size of the corresponding // argument passed to the enqueued block. @@ -6114,9 +6109,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Builder.CreateAlignedStore( V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy)); } - // Return the Alloca itself rather than a potential ascast as this is only - // used by the paired EmitLifetimeEnd. - return std::tie(ElemPtr, TmpSize, Alloca); + return std::tie(ElemPtr, TmpSize, TmpPtr); }; // Could have events and/or varargs. diff --git a
[llvm-branch-commits] [clang] release/20.x: Revert Do not use `private` as the default AS for when `generic` is available (#112442)" (PR #127771)
https://github.com/arsenm milestoned https://github.com/llvm/llvm-project/pull/127771 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: Revert Do not use `private` as the default AS for when `generic` is available (#112442)" (PR #127771)
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/127771 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SHT_LLVM_FUNC_MAP][llvm-readobj]Introduce function address map section and emit dynamic instruction count(readobj part) (PR #124333)
@@ -940,6 +940,92 @@ ELFFile::decodeBBAddrMap(const Elf_Shdr &Sec, const Elf_Shdr *RelaSec, return std::move(AddrMapsOrErr); } +template +Expected> +ELFFile::decodeFuncMap(const Elf_Shdr &Sec, + const Elf_Shdr *RelaSec) const { + bool IsRelocatable = this->getHeader().e_type == ELF::ET_REL; jh7370 wrote: Do we really need this check? Would not the value of `RelaSec` (`nullptr` or otherwise) be sufficient? https://github.com/llvm/llvm-project/pull/124333 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SHT_LLVM_FUNC_MAP][llvm-readobj]Introduce function address map section and emit dynamic instruction count(readobj part) (PR #124333)
@@ -940,6 +940,92 @@ ELFFile::decodeBBAddrMap(const Elf_Shdr &Sec, const Elf_Shdr *RelaSec, return std::move(AddrMapsOrErr); } +template +Expected> +ELFFile::decodeFuncMap(const Elf_Shdr &Sec, + const Elf_Shdr *RelaSec) const { + bool IsRelocatable = this->getHeader().e_type == ELF::ET_REL; + + // This DenseMap maps the offset of each function (the location of the + // reference to the function in the SHT_LLVM_FUNC_MAP section) to the + // addend (the location of the function in the text section). + llvm::DenseMap FunctionOffsetTranslations; + if (IsRelocatable && RelaSec) { +assert(RelaSec && + "Can't read a SHT_LLVM_FUNC_ADDR_MAP section in a relocatable " + "object file without providing a relocation section."); +Expected::Elf_Rela_Range> Relas = +this->relas(*RelaSec); +if (!Relas) + return createError("unable to read relocations for section " + + describe(*this, Sec) + ": " + + toString(Relas.takeError())); +for (typename ELFFile::Elf_Rela Rela : *Relas) + FunctionOffsetTranslations[Rela.r_offset] = Rela.r_addend; jh7370 wrote: Not all relocations function in the same way. Naively assuming that the `r_addend` and `r_offset` work like this is not going to be correct in some cases. The ELF gABI only describes `r_addend` as a "constant addend used to compute the value". Have you looked into the Object/RelocationResolver.h? It's used elsewhere by llvm-readobj to calculate the values of relocations and may be of some use (see `printRelocatableStackSizes` for an example usage). https://github.com/llvm/llvm-project/pull/124333 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [clang][CodeGen] `sret` args should always point to the `alloca` AS, so use that (#114062) (PR #127552)
arsenm wrote: > Sans this patch DeviceLibs on our side will be broken when using the vanilla > upstream. 6e0b0038 should have been reverted upstream right away, and not internally. We're now in this terrible situation https://github.com/llvm/llvm-project/pull/127552 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [clang][CodeGen] `sret` args should always point to the `alloca` AS, so use that (#114062) (PR #127552)
arsenm wrote: > @arsenm This is a pretty large change, what bug does it fix? The rocm device libs build crashes without this. Alternatively we need to revert 6e0b0038cd65ce726ce404305a06e1cf33e36cca https://github.com/llvm/llvm-project/pull/127552 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][docs] Replace gfx940 and gfx941 with gfx942 in llvm/docs (PR #126887)
ritter-x2a wrote: ### Merge activity * **Feb 19, 3:48 AM EST**: A user started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/126887). https://github.com/llvm/llvm-project/pull/126887 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Remove FeatureForceStoreSC0SC1 (PR #126878)
ritter-x2a wrote: ### Merge activity * **Feb 19, 3:48 AM EST**: A user started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/126878). https://github.com/llvm/llvm-project/pull/126878 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [AMDGPU][MLIR] Replace gfx940 and gfx941 with gfx942 in MLIR (PR #125836)
ritter-x2a wrote: ### Merge activity * **Feb 19, 3:48 AM EST**: A user started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/125836). https://github.com/llvm/llvm-project/pull/125836 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [AMDGPU] Add missing gfx architectures to AddFlangOffloadRuntime.cmake (PR #125827)
ritter-x2a wrote: ### Merge activity * **Feb 19, 3:48 AM EST**: A user started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/125827). https://github.com/llvm/llvm-project/pull/125827 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][docs][NFC] Replace gfx940 with gfx942 in the gfx940 ISA doc (PR #126906)
ritter-x2a wrote: ### Merge activity * **Feb 19, 3:48 AM EST**: A user started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/126906). https://github.com/llvm/llvm-project/pull/126906 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [AMDGPU][clang] Replace gfx940 and gfx941 with gfx942 in clang (PR #126762)
ritter-x2a wrote: ### Merge activity * **Feb 19, 3:48 AM EST**: A user started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/126762). https://github.com/llvm/llvm-project/pull/126762 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [AMDGPU] Replace gfx940 and gfx941 with gfx942 in llvm (PR #126763)
ritter-x2a wrote: ### Merge activity * **Feb 19, 3:48 AM EST**: A user started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/126763). https://github.com/llvm/llvm-project/pull/126763 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [lld][LoongArch] Relax TLSDESC code sequence (PR #123677)
SixWeining wrote: cc @xen0n for review https://github.com/llvm/llvm-project/pull/123677 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [lld][LoongArch] Relax TLSDESC code sequence (PR #123677)
https://github.com/SixWeining commented: Actually the `FIXME` in commit message is `TODO`, right? https://github.com/llvm/llvm-project/pull/123677 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [lld][LoongArch] Support TLSDESC GD/LD to IE/LE (PR #123715)
SixWeining wrote: cc @xen0n https://github.com/llvm/llvm-project/pull/123715 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] Reland: [clang] Track function template instantiation from definition… (PR #127777)
https://github.com/mizvekov milestoned https://github.com/llvm/llvm-project/pull/12 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] Reland: [clang] Track function template instantiation from definition… (PR #127777)
https://github.com/mizvekov created https://github.com/llvm/llvm-project/pull/12 … (#125266) This fixes instantiation of definition for friend function templates, when the declaration found and the one containing the definition have different template contexts. In these cases, the the function declaration corresponding to the definition is not available; it may not even be instantiated at all. So this patch adds a bit which tracks which function template declaration was instantiated from the member template. It's used to find which primary template serves as a context for the purpose of obtainining the template arguments needed to instantiate the definition. Fixes #55509 >From c0f86c988617ab5104d5a95fbcac38fd0a8ee4d7 Mon Sep 17 00:00:00 2001 From: Matheus Izvekov Date: Wed, 5 Feb 2025 14:12:12 -0300 Subject: [PATCH] Reland: [clang] Track function template instantiation from definition (#125266) This fixes instantiation of definition for friend function templates, when the declaration found and the one containing the definition have different template contexts. In these cases, the the function declaration corresponding to the definition is not available; it may not even be instantiated at all. So this patch adds a bit which tracks which function template declaration was instantiated from the member template. It's used to find which primary template serves as a context for the purpose of obtainining the template arguments needed to instantiate the definition. Fixes #55509 --- clang/docs/ReleaseNotes.rst | 1 + clang/include/clang/AST/Decl.h| 7 ++ clang/include/clang/AST/DeclBase.h| 10 +- clang/include/clang/AST/DeclTemplate.h| 20 clang/lib/AST/Decl.cpp| 1 + clang/lib/Sema/SemaTemplateDeduction.cpp | 17 +-- clang/lib/Sema/SemaTemplateInstantiate.cpp| 9 +- .../lib/Sema/SemaTemplateInstantiateDecl.cpp | 27 - clang/lib/Serialization/ASTReaderDecl.cpp | 1 + clang/lib/Serialization/ASTWriterDecl.cpp | 3 +- clang/test/SemaTemplate/GH55509.cpp | 112 ++ 11 files changed, 180 insertions(+), 28 deletions(-) create mode 100644 clang/test/SemaTemplate/GH55509.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index ad1a5e7ae282e..ee161515fe68b 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -1053,6 +1053,7 @@ Bug Fixes to C++ Support template parameter. Now, such expression can be used with ``static_assert`` and ``constexpr``. (#GH123498) - Correctly determine the implicit constexprness of lambdas in dependent contexts. (#GH97958) (#GH114234) - Fix that some dependent immediate expressions did not cause immediate escalation (#GH119046) +- Clang is now better at keeping track of friend function template instance contexts. (#GH55509) Bug Fixes to AST Handling ^ diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h index 9593bab576412..362a2741a0cdd 100644 --- a/clang/include/clang/AST/Decl.h +++ b/clang/include/clang/AST/Decl.h @@ -2298,6 +2298,13 @@ class FunctionDecl : public DeclaratorDecl, FunctionDeclBits.IsLateTemplateParsed = ILT; } + bool isInstantiatedFromMemberTemplate() const { +return FunctionDeclBits.IsInstantiatedFromMemberTemplate; + } + void setInstantiatedFromMemberTemplate(bool Val = true) { +FunctionDeclBits.IsInstantiatedFromMemberTemplate = Val; + } + /// Whether this function is "trivial" in some specialized C++ senses. /// Can only be true for default constructors, copy constructors, /// copy assignment operators, and destructors. Not meaningful until diff --git a/clang/include/clang/AST/DeclBase.h b/clang/include/clang/AST/DeclBase.h index 3bb82c1572ef9..648dae2838e03 100644 --- a/clang/include/clang/AST/DeclBase.h +++ b/clang/include/clang/AST/DeclBase.h @@ -1780,6 +1780,8 @@ class DeclContext { uint64_t HasImplicitReturnZero : 1; LLVM_PREFERRED_TYPE(bool) uint64_t IsLateTemplateParsed : 1; +LLVM_PREFERRED_TYPE(bool) +uint64_t IsInstantiatedFromMemberTemplate : 1; /// Kind of contexpr specifier as defined by ConstexprSpecKind. LLVM_PREFERRED_TYPE(ConstexprSpecKind) @@ -1830,7 +1832,7 @@ class DeclContext { }; /// Number of inherited and non-inherited bits in FunctionDeclBitfields. - enum { NumFunctionDeclBits = NumDeclContextBits + 31 }; + enum { NumFunctionDeclBits = NumDeclContextBits + 32 }; /// Stores the bits used by CXXConstructorDecl. If modified /// NumCXXConstructorDeclBits and the accessor @@ -1841,12 +1843,12 @@ class DeclContext { LLVM_PREFERRED_TYPE(FunctionDeclBitfields) uint64_t : NumFunctionDeclBits; -/// 20 bits to fit in the remaining available space. +/// 19 bits to fit in the remaining available space. /// Note that this makes CXXConstructorDeclBitfields take /// exactly 6
[llvm-branch-commits] [lld] [lld][LoongArch] Support relaxation during TLSDESC GD/LD to IE/LE conversion (PR #123730)
SixWeining wrote: cc @xen0n https://github.com/llvm/llvm-project/pull/123730 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] Reland: [clang] Track function template instantiation from definition… (PR #127777)
llvmbot wrote: @llvm/pr-subscribers-clang @llvm/pr-subscribers-clang-modules Author: Matheus Izvekov (mizvekov) Changes … (#125266) This fixes instantiation of definition for friend function templates, when the declaration found and the one containing the definition have different template contexts. In these cases, the the function declaration corresponding to the definition is not available; it may not even be instantiated at all. So this patch adds a bit which tracks which function template declaration was instantiated from the member template. It's used to find which primary template serves as a context for the purpose of obtainining the template arguments needed to instantiate the definition. Fixes #55509 --- Full diff: https://github.com/llvm/llvm-project/pull/12.diff 11 Files Affected: - (modified) clang/docs/ReleaseNotes.rst (+1) - (modified) clang/include/clang/AST/Decl.h (+7) - (modified) clang/include/clang/AST/DeclBase.h (+6-4) - (modified) clang/include/clang/AST/DeclTemplate.h (+20) - (modified) clang/lib/AST/Decl.cpp (+1) - (modified) clang/lib/Sema/SemaTemplateDeduction.cpp (+1-16) - (modified) clang/lib/Sema/SemaTemplateInstantiate.cpp (+4-5) - (modified) clang/lib/Sema/SemaTemplateInstantiateDecl.cpp (+25-2) - (modified) clang/lib/Serialization/ASTReaderDecl.cpp (+1) - (modified) clang/lib/Serialization/ASTWriterDecl.cpp (+2-1) - (added) clang/test/SemaTemplate/GH55509.cpp (+112) ``diff diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index ad1a5e7ae282e..ee161515fe68b 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -1053,6 +1053,7 @@ Bug Fixes to C++ Support template parameter. Now, such expression can be used with ``static_assert`` and ``constexpr``. (#GH123498) - Correctly determine the implicit constexprness of lambdas in dependent contexts. (#GH97958) (#GH114234) - Fix that some dependent immediate expressions did not cause immediate escalation (#GH119046) +- Clang is now better at keeping track of friend function template instance contexts. (#GH55509) Bug Fixes to AST Handling ^ diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h index 9593bab576412..362a2741a0cdd 100644 --- a/clang/include/clang/AST/Decl.h +++ b/clang/include/clang/AST/Decl.h @@ -2298,6 +2298,13 @@ class FunctionDecl : public DeclaratorDecl, FunctionDeclBits.IsLateTemplateParsed = ILT; } + bool isInstantiatedFromMemberTemplate() const { +return FunctionDeclBits.IsInstantiatedFromMemberTemplate; + } + void setInstantiatedFromMemberTemplate(bool Val = true) { +FunctionDeclBits.IsInstantiatedFromMemberTemplate = Val; + } + /// Whether this function is "trivial" in some specialized C++ senses. /// Can only be true for default constructors, copy constructors, /// copy assignment operators, and destructors. Not meaningful until diff --git a/clang/include/clang/AST/DeclBase.h b/clang/include/clang/AST/DeclBase.h index 3bb82c1572ef9..648dae2838e03 100644 --- a/clang/include/clang/AST/DeclBase.h +++ b/clang/include/clang/AST/DeclBase.h @@ -1780,6 +1780,8 @@ class DeclContext { uint64_t HasImplicitReturnZero : 1; LLVM_PREFERRED_TYPE(bool) uint64_t IsLateTemplateParsed : 1; +LLVM_PREFERRED_TYPE(bool) +uint64_t IsInstantiatedFromMemberTemplate : 1; /// Kind of contexpr specifier as defined by ConstexprSpecKind. LLVM_PREFERRED_TYPE(ConstexprSpecKind) @@ -1830,7 +1832,7 @@ class DeclContext { }; /// Number of inherited and non-inherited bits in FunctionDeclBitfields. - enum { NumFunctionDeclBits = NumDeclContextBits + 31 }; + enum { NumFunctionDeclBits = NumDeclContextBits + 32 }; /// Stores the bits used by CXXConstructorDecl. If modified /// NumCXXConstructorDeclBits and the accessor @@ -1841,12 +1843,12 @@ class DeclContext { LLVM_PREFERRED_TYPE(FunctionDeclBitfields) uint64_t : NumFunctionDeclBits; -/// 20 bits to fit in the remaining available space. +/// 19 bits to fit in the remaining available space. /// Note that this makes CXXConstructorDeclBitfields take /// exactly 64 bits and thus the width of NumCtorInitializers /// will need to be shrunk if some bit is added to NumDeclContextBitfields, /// NumFunctionDeclBitfields or CXXConstructorDeclBitfields. -uint64_t NumCtorInitializers : 17; +uint64_t NumCtorInitializers : 16; LLVM_PREFERRED_TYPE(bool) uint64_t IsInheritingConstructor : 1; @@ -1860,7 +1862,7 @@ class DeclContext { }; /// Number of inherited and non-inherited bits in CXXConstructorDeclBitfields. - enum { NumCXXConstructorDeclBits = NumFunctionDeclBits + 20 }; + enum { NumCXXConstructorDeclBits = NumFunctionDeclBits + 19 }; /// Stores the bits used by ObjCMethodDecl. /// If modified NumObjCMethodDeclBits and the accessor diff --git a/clang/include/clang/AST/DeclTemplate.h b/clang/include/c
[llvm-branch-commits] [clang] Reland: [clang] Track function template instantiation from definition… (PR #127777)
https://github.com/mizvekov edited https://github.com/llvm/llvm-project/pull/12 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] Reland: [clang] Track function template instantiation from definition… (PR #127777)
mizvekov wrote: This is a cherry-pick of https://github.com/llvm/llvm-project/pull/125266 into the 20.x release branch. https://github.com/llvm/llvm-project/pull/12 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [lld][LoongArch] Convert TLS IE to LE in the normal or medium code model (PR #123680)
SixWeining wrote: cc @xen0n https://github.com/llvm/llvm-project/pull/123680 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [lld][LoongArch] Support relaxation during IE to LE conversion (PR #123702)
SixWeining wrote: cc @xen0n https://github.com/llvm/llvm-project/pull/123702 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [lld][LoongArch] GOT indirection to PC relative optimization (PR #123743)
SixWeining wrote: cc @xen0n https://github.com/llvm/llvm-project/pull/123743 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [lldb] release/20.x: Reland: [clang] fix P3310 overload resolution flag propagation (#125791) (PR #127779)
llvmbot wrote: @erichkeane What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/127779 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [lldb] release/20.x: Reland: [clang] fix P3310 overload resolution flag propagation (#125791) (PR #127779)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/127779 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [lldb] release/20.x: Reland: [clang] fix P3310 overload resolution flag propagation (#125791) (PR #127779)
llvmbot wrote: @llvm/pr-subscribers-clang Author: None (llvmbot) Changes Backport 08bda1cc6b0d2f1d31a89a76b5c154a11086c420 Requested by: @mizvekov --- Patch is 232.51 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/127779.diff 16 Files Affected: - (modified) clang/include/clang/AST/DeclTemplate.h (+14-2) - (modified) clang/include/clang/Sema/Sema.h (+2-2) - (modified) clang/lib/AST/ASTImporter.cpp (+3-3) - (modified) clang/lib/AST/DeclTemplate.cpp (+24-23) - (modified) clang/lib/AST/JSONNodeDumper.cpp (+5) - (modified) clang/lib/AST/TextNodeDumper.cpp (+4-1) - (modified) clang/lib/Sema/SemaTemplate.cpp (+5-3) - (modified) clang/lib/Sema/SemaTemplateDeduction.cpp (-2) - (modified) clang/lib/Sema/SemaTemplateInstantiateDecl.cpp (+1-1) - (modified) clang/lib/Sema/SemaType.cpp (+2-1) - (modified) clang/lib/Serialization/ASTReaderDecl.cpp (+1) - (modified) clang/lib/Serialization/ASTWriterDecl.cpp (+1) - (modified) clang/test/AST/ast-dump-templates.cpp (+6045-2) - (modified) clang/test/AST/gen_ast_dump_json_test.py (+17-4) - (modified) clang/test/SemaTemplate/cwg2398.cpp (+17) - (modified) lldb/source/Plugins/ExpressionParser/Clang/CxxModuleHandler.cpp (+2-1) ``diff diff --git a/clang/include/clang/AST/DeclTemplate.h b/clang/include/clang/AST/DeclTemplate.h index 9ecff2c898acd..03c43765206b1 100644 --- a/clang/include/clang/AST/DeclTemplate.h +++ b/clang/include/clang/AST/DeclTemplate.h @@ -1841,15 +1841,23 @@ class ClassTemplateSpecializationDecl : public CXXRecordDecl, LLVM_PREFERRED_TYPE(TemplateSpecializationKind) unsigned SpecializationKind : 3; + /// Indicate that we have matched a parameter pack with a non pack + /// argument, when the opposite match is also allowed (strict pack match). + /// This needs to be cached as deduction is performed during declaration, + /// and we need the information to be preserved so that it is consistent + /// during instantiation. + bool MatchedPackOnParmToNonPackOnArg : 1; + protected: ClassTemplateSpecializationDecl(ASTContext &Context, Kind DK, TagKind TK, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, ClassTemplateDecl *SpecializedTemplate, ArrayRef Args, + bool MatchedPackOnParmToNonPackOnArg, ClassTemplateSpecializationDecl *PrevDecl); - explicit ClassTemplateSpecializationDecl(ASTContext &C, Kind DK); + ClassTemplateSpecializationDecl(ASTContext &C, Kind DK); public: friend class ASTDeclReader; @@ -1859,7 +1867,7 @@ class ClassTemplateSpecializationDecl : public CXXRecordDecl, Create(ASTContext &Context, TagKind TK, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, ClassTemplateDecl *SpecializedTemplate, - ArrayRef Args, + ArrayRef Args, bool MatchedPackOnParmToNonPackOnArg, ClassTemplateSpecializationDecl *PrevDecl); static ClassTemplateSpecializationDecl *CreateDeserialized(ASTContext &C, GlobalDeclID ID); @@ -1930,6 +1938,10 @@ class ClassTemplateSpecializationDecl : public CXXRecordDecl, SpecializationKind = TSK; } + bool hasMatchedPackOnParmToNonPackOnArg() const { +return MatchedPackOnParmToNonPackOnArg; + } + /// Get the point of instantiation (if any), or null if none. SourceLocation getPointOfInstantiation() const { return PointOfInstantiation; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index eb82d1b978e94..a30a7076ea5d4 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -13491,8 +13491,8 @@ class Sema final : public SemaBase { bool InstantiateClassTemplateSpecialization( SourceLocation PointOfInstantiation, ClassTemplateSpecializationDecl *ClassTemplateSpec, - TemplateSpecializationKind TSK, bool Complain = true, - bool PrimaryHasMatchedPackOnParmToNonPackOnArg = false); + TemplateSpecializationKind TSK, bool Complain, + bool PrimaryHasMatchedPackOnParmToNonPackOnArg); /// Instantiates the definitions of all of the member /// of the given class, which is an instantiation of a class template diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index 09fa10f716ec1..13e7f93233a7f 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -6321,9 +6321,9 @@ ExpectedDecl ASTNodeImporter::VisitClassTemplateSpecializationDecl( updateLookupTableForTemplateParameters(*ToTPList); } else { // Not a partial specialization. if (GetImportedOrCreateDecl( -D2, D, Importer.getToContext(), D->getTagKind(), DC, -*BeginLocOrErr, *IdLocOrErr, ClassTemplate, TemplateArgs, -PrevDecl)
[llvm-branch-commits] [clang] [lldb] release/20.x: Reland: [clang] fix P3310 overload resolution flag propagation (#125791) (PR #127779)
llvmbot wrote: @llvm/pr-subscribers-clang-modules Author: None (llvmbot) Changes Backport 08bda1cc6b0d2f1d31a89a76b5c154a11086c420 Requested by: @mizvekov --- Patch is 232.51 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/127779.diff 16 Files Affected: - (modified) clang/include/clang/AST/DeclTemplate.h (+14-2) - (modified) clang/include/clang/Sema/Sema.h (+2-2) - (modified) clang/lib/AST/ASTImporter.cpp (+3-3) - (modified) clang/lib/AST/DeclTemplate.cpp (+24-23) - (modified) clang/lib/AST/JSONNodeDumper.cpp (+5) - (modified) clang/lib/AST/TextNodeDumper.cpp (+4-1) - (modified) clang/lib/Sema/SemaTemplate.cpp (+5-3) - (modified) clang/lib/Sema/SemaTemplateDeduction.cpp (-2) - (modified) clang/lib/Sema/SemaTemplateInstantiateDecl.cpp (+1-1) - (modified) clang/lib/Sema/SemaType.cpp (+2-1) - (modified) clang/lib/Serialization/ASTReaderDecl.cpp (+1) - (modified) clang/lib/Serialization/ASTWriterDecl.cpp (+1) - (modified) clang/test/AST/ast-dump-templates.cpp (+6045-2) - (modified) clang/test/AST/gen_ast_dump_json_test.py (+17-4) - (modified) clang/test/SemaTemplate/cwg2398.cpp (+17) - (modified) lldb/source/Plugins/ExpressionParser/Clang/CxxModuleHandler.cpp (+2-1) ``diff diff --git a/clang/include/clang/AST/DeclTemplate.h b/clang/include/clang/AST/DeclTemplate.h index 9ecff2c898acd..03c43765206b1 100644 --- a/clang/include/clang/AST/DeclTemplate.h +++ b/clang/include/clang/AST/DeclTemplate.h @@ -1841,15 +1841,23 @@ class ClassTemplateSpecializationDecl : public CXXRecordDecl, LLVM_PREFERRED_TYPE(TemplateSpecializationKind) unsigned SpecializationKind : 3; + /// Indicate that we have matched a parameter pack with a non pack + /// argument, when the opposite match is also allowed (strict pack match). + /// This needs to be cached as deduction is performed during declaration, + /// and we need the information to be preserved so that it is consistent + /// during instantiation. + bool MatchedPackOnParmToNonPackOnArg : 1; + protected: ClassTemplateSpecializationDecl(ASTContext &Context, Kind DK, TagKind TK, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, ClassTemplateDecl *SpecializedTemplate, ArrayRef Args, + bool MatchedPackOnParmToNonPackOnArg, ClassTemplateSpecializationDecl *PrevDecl); - explicit ClassTemplateSpecializationDecl(ASTContext &C, Kind DK); + ClassTemplateSpecializationDecl(ASTContext &C, Kind DK); public: friend class ASTDeclReader; @@ -1859,7 +1867,7 @@ class ClassTemplateSpecializationDecl : public CXXRecordDecl, Create(ASTContext &Context, TagKind TK, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, ClassTemplateDecl *SpecializedTemplate, - ArrayRef Args, + ArrayRef Args, bool MatchedPackOnParmToNonPackOnArg, ClassTemplateSpecializationDecl *PrevDecl); static ClassTemplateSpecializationDecl *CreateDeserialized(ASTContext &C, GlobalDeclID ID); @@ -1930,6 +1938,10 @@ class ClassTemplateSpecializationDecl : public CXXRecordDecl, SpecializationKind = TSK; } + bool hasMatchedPackOnParmToNonPackOnArg() const { +return MatchedPackOnParmToNonPackOnArg; + } + /// Get the point of instantiation (if any), or null if none. SourceLocation getPointOfInstantiation() const { return PointOfInstantiation; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index eb82d1b978e94..a30a7076ea5d4 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -13491,8 +13491,8 @@ class Sema final : public SemaBase { bool InstantiateClassTemplateSpecialization( SourceLocation PointOfInstantiation, ClassTemplateSpecializationDecl *ClassTemplateSpec, - TemplateSpecializationKind TSK, bool Complain = true, - bool PrimaryHasMatchedPackOnParmToNonPackOnArg = false); + TemplateSpecializationKind TSK, bool Complain, + bool PrimaryHasMatchedPackOnParmToNonPackOnArg); /// Instantiates the definitions of all of the member /// of the given class, which is an instantiation of a class template diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index 09fa10f716ec1..13e7f93233a7f 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -6321,9 +6321,9 @@ ExpectedDecl ASTNodeImporter::VisitClassTemplateSpecializationDecl( updateLookupTableForTemplateParameters(*ToTPList); } else { // Not a partial specialization. if (GetImportedOrCreateDecl( -D2, D, Importer.getToContext(), D->getTagKind(), DC, -*BeginLocOrErr, *IdLocOrErr, ClassTemplate, TemplateArgs, -P
[llvm-branch-commits] [libcxx] release/20.x: [libc++][TZDB] Fixes mapping of nonexisting time. (#127330) (PR #127531)
https://github.com/ldionne approved this pull request. https://github.com/llvm/llvm-project/pull/127531 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang] Implement instantiation context note for checking template parameters (PR #126088)
@@ -1251,12 +1261,18 @@ void Sema::PrintInstantiationStack(DiagFuncRef DiagFunc) { case CodeSynthesisContext::PartialOrderingTTP: DiagFunc(Active->PointOfInstantiation, PDiag(diag::note_template_arg_template_params_mismatch)); - if (SourceLocation ParamLoc = Active->Entity->getLocation(); - ParamLoc.isValid()) -DiagFunc(ParamLoc, PDiag(diag::note_template_prev_declaration) - << /*isTemplateTemplateParam=*/true - << Active->InstantiationRange); break; +case CodeSynthesisContext::CheckTemplateParameter: { + auto &ND = *cast(Active->Entity); erichkeane wrote: ```suggestion const auto &ND = *cast(Active->Entity); ``` ?? https://github.com/llvm/llvm-project/pull/126088 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang] Implement instantiation context note for checking template parameters (PR #126088)
https://github.com/erichkeane edited https://github.com/llvm/llvm-project/pull/126088 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang] Implement instantiation context note for checking template parameters (PR #126088)
@@ -1909,7 +1909,22 @@ class Sema final : public SemaBase { /// '\#pragma clang attribute push' directives to the given declaration. void AddPragmaAttributes(Scope *S, Decl *D); - void PrintPragmaAttributeInstantiationPoint(); + using DiagFuncRef = + llvm::function_ref; + auto getDefaultDiagFunc() { +return [this](SourceLocation Loc, PartialDiagnostic PD) { + // This bypasses a lof of the filters in the diag engine, as it's erichkeane wrote: ```suggestion // This bypasses a lot of the filters in the diag engine, as it's ``` https://github.com/llvm/llvm-project/pull/126088 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang] Implement instantiation context note for checking template parameters (PR #126088)
@@ -11802,9 +11817,10 @@ class Sema final : public SemaBase { bool PartialOrdering, bool *StrictPackMatch); + SmallString<128> toTerseString(const NamedDecl &D) const; erichkeane wrote: would love a comment that tells what 'terse string' means here. 'terse' is actually a pretty loaded word in C++, so explainations need to be pretty sizable here. https://github.com/llvm/llvm-project/pull/126088 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] 876a5c9 - [libc++] Avoid including on arbitrary platforms (#125587)
Author: Louis Dionne Date: 2025-02-19T06:26:51-08:00 New Revision: 876a5c9e5905a9666748632afba1ff83200ed95b URL: https://github.com/llvm/llvm-project/commit/876a5c9e5905a9666748632afba1ff83200ed95b DIFF: https://github.com/llvm/llvm-project/commit/876a5c9e5905a9666748632afba1ff83200ed95b.diff LOG: [libc++] Avoid including on arbitrary platforms (#125587) This partially reverts commit 5f2389d4. That commit started checking whether was a valid include unconditionally, however codebases are free to have such a header on their search path, which breaks compilation. LLVM libc now provides a more standard way of getting configuration macros like __LLVM_LIBC__. After this patch, we only include when we're on Linux or when we're compiling for GPUs. (cherry picked from commit cffc1ac3491c891ef4f80bcbfa685710e477eeac) Added: Modified: libcxx/include/__configuration/platform.h Removed: diff --git a/libcxx/include/__configuration/platform.h b/libcxx/include/__configuration/platform.h index 2a92ce209b91f..cff99376ee24b 100644 --- a/libcxx/include/__configuration/platform.h +++ b/libcxx/include/__configuration/platform.h @@ -30,12 +30,9 @@ // ... add new file formats here ... #endif -// To detect which libc we're using -#if __has_include() +// Need to detect which libc we're using if we're on Linux. +#if defined(__linux__) || defined(__AMDGPU__) || defined(__NVPTX__) # include -#endif - -#if defined(__linux__) # if defined(__GLIBC_PREREQ) #define _LIBCPP_GLIBC_PREREQ(a, b) __GLIBC_PREREQ(a, b) # else ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang] Implement instantiation context note for checking template parameters (PR #126088)
https://github.com/erichkeane approved this pull request. A few quick comments, else the source changes LGTM. Note that @endill's suggestion to use 'bookmarks' for notes (or something like that) are good ones that I agree with. https://github.com/llvm/llvm-project/pull/126088 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] release/20.x: [libc++] Avoid including on arbitrary platforms (#125587) (PR #127310)
https://github.com/tstellar closed https://github.com/llvm/llvm-project/pull/127310 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] release/20.x: [libc++] Avoid including on arbitrary platforms (#125587) (PR #127310)
ldionne wrote: @tstellar Can we merge this one? I have another fix I want to cherry-pick which depends on this one. https://github.com/llvm/llvm-project/pull/127310 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] release/20.x: flang: Fix build with latest libc++ (#127362) (PR #127805)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/127805 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] release/20.x: flang: Fix build with latest libc++ (#127362) (PR #127805)
llvmbot wrote: @llvm/pr-subscribers-flang-runtime Author: None (llvmbot) Changes Backport 2b340c10a611d929fee25e6222909c8915e3d6b6 Requested by: @tstellar --- Full diff: https://github.com/llvm/llvm-project/pull/127805.diff 1 Files Affected: - (modified) flang/runtime/io-api-minimal.cpp (+2-1) ``diff diff --git a/flang/runtime/io-api-minimal.cpp b/flang/runtime/io-api-minimal.cpp index 68768427be0c2..93ac82248aa4c 100644 --- a/flang/runtime/io-api-minimal.cpp +++ b/flang/runtime/io-api-minimal.cpp @@ -150,7 +150,8 @@ bool IODEF(OutputLogical)(Cookie cookie, bool truth) { // Provide own definition for `std::__libcpp_verbose_abort` to avoid dependency // on the version provided by libc++. -void std::__libcpp_verbose_abort(char const *format, ...) { +void std::__libcpp_verbose_abort(char const *format, ...) noexcept( +noexcept(std::__libcpp_verbose_abort(""))) { va_list list; va_start(list, format); std::vfprintf(stderr, format, list); `` https://github.com/llvm/llvm-project/pull/127805 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] release/20.x: flang: Fix build with latest libc++ (#127362) (PR #127805)
llvmbot wrote: @ldionne What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/127805 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] release/20.x: [libc++] Avoid including on arbitrary platforms (#125587) (PR #127310)
https://github.com/llvmbot updated https://github.com/llvm/llvm-project/pull/127310 >From 876a5c9e5905a9666748632afba1ff83200ed95b Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Sat, 15 Feb 2025 10:54:00 +0100 Subject: [PATCH] [libc++] Avoid including on arbitrary platforms (#125587) This partially reverts commit 5f2389d4. That commit started checking whether was a valid include unconditionally, however codebases are free to have such a header on their search path, which breaks compilation. LLVM libc now provides a more standard way of getting configuration macros like __LLVM_LIBC__. After this patch, we only include when we're on Linux or when we're compiling for GPUs. (cherry picked from commit cffc1ac3491c891ef4f80bcbfa685710e477eeac) --- libcxx/include/__configuration/platform.h | 7 ++- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/libcxx/include/__configuration/platform.h b/libcxx/include/__configuration/platform.h index 2a92ce209b91f..cff99376ee24b 100644 --- a/libcxx/include/__configuration/platform.h +++ b/libcxx/include/__configuration/platform.h @@ -30,12 +30,9 @@ // ... add new file formats here ... #endif -// To detect which libc we're using -#if __has_include() +// Need to detect which libc we're using if we're on Linux. +#if defined(__linux__) || defined(__AMDGPU__) || defined(__NVPTX__) # include -#endif - -#if defined(__linux__) # if defined(__GLIBC_PREREQ) #define _LIBCPP_GLIBC_PREREQ(a, b) __GLIBC_PREREQ(a, b) # else ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] release/20.x: [libc++] Avoid including on arbitrary platforms (#125587) (PR #127310)
github-actions[bot] wrote: @ldionne (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. https://github.com/llvm/llvm-project/pull/127310 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [MLIR][OpenMP] Host lowering of standalone distribute (PR #127817)
https://github.com/skatrak created https://github.com/llvm/llvm-project/pull/127817 This patch adds MLIR to LLVM IR translation support for standalone `omp.distribute` operations, as well as `distribute simd` through ignoring SIMD information (similarly to `do/for simd`). >From 8ecbf3579bcff069548f6e4484cb546a1b54511e Mon Sep 17 00:00:00 2001 From: Sergio Afonso Date: Tue, 18 Feb 2025 11:22:43 + Subject: [PATCH] [MLIR][OpenMP] Host lowering of standalone distribute This patch adds MLIR to LLVM IR translation support for standalone `omp.distribute` operations, as well as `distribute simd` through ignoring SIMD information (similarly to `do/for simd`). Co-authored-by: Dominik Adamski --- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 78 +++ mlir/test/Target/LLVMIR/openmp-llvm.mlir | 37 + mlir/test/Target/LLVMIR/openmp-todo.mlir | 66 +++- 3 files changed, 178 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index a5ff3eff6439f..c8221a9f9854a 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -164,6 +164,10 @@ static LogicalResult checkImplementationStatus(Operation &op) { if (op.getDevice()) result = todo("device"); }; + auto checkDistSchedule = [&todo](auto op, LogicalResult &result) { +if (op.getDistScheduleChunkSize()) + result = todo("dist_schedule with chunk_size"); + }; auto checkHasDeviceAddr = [&todo](auto op, LogicalResult &result) { if (!op.getHasDeviceAddrVars().empty()) result = todo("has_device_addr"); @@ -255,6 +259,16 @@ static LogicalResult checkImplementationStatus(Operation &op) { LogicalResult result = success(); llvm::TypeSwitch(op) + .Case([&](omp::DistributeOp op) { +if (op.isComposite() && +isa_and_present(op.getNestedWrapper())) + result = op.emitError() << "not yet implemented: " + "composite omp.distribute + omp.wsloop"; +checkAllocate(op, result); +checkDistSchedule(op, result); +checkOrder(op, result); +checkPrivate(op, result); + }) .Case([&](omp::OrderedRegionOp op) { checkParLevelSimd(op, result); }) .Case([&](omp::SectionsOp op) { checkAllocate(op, result); @@ -3755,6 +3769,67 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, return success(); } +static LogicalResult +convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + auto distributeOp = cast(opInst); + if (failed(checkImplementationStatus(opInst))) +return failure(); + + using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + auto bodyGenCB = [&](InsertPointTy allocaIP, + InsertPointTy codeGenIP) -> llvm::Error { +// DistributeOp has only one region associated with it. +builder.restoreIP(codeGenIP); + +llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); +llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); +llvm::Expected regionBlock = +convertOmpOpRegions(distributeOp.getRegion(), "omp.distribute.region", +builder, moduleTranslation); +if (!regionBlock) + return regionBlock.takeError(); +builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin()); + +// TODO: Add support for clauses which are valid for DISTRIBUTE constructs. +// Static schedule is the default. +auto schedule = omp::ClauseScheduleKind::Static; +bool isOrdered = false; +std::optional scheduleMod; +bool isSimd = false; +llvm::omp::WorksharingLoopType workshareLoopType = +llvm::omp::WorksharingLoopType::DistributeStaticLoop; +bool loopNeedsBarrier = false; +llvm::Value *chunk = nullptr; + +llvm::CanonicalLoopInfo *loopInfo = *findCurrentLoopInfo(moduleTranslation); +llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP = +ompBuilder->applyWorkshareLoop( +ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier, +convertToScheduleKind(schedule), chunk, isSimd, +scheduleMod == omp::ScheduleModifier::monotonic, +scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered, +workshareLoopType); + +if (!wsloopIP) + return wsloopIP.takeError(); +return llvm::Error::success(); + }; + + llvm::OpenMPIRBuilder::InsertPointTy allocaIP = + findAllocaInsertPoint(builder, moduleTranslation); + llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); + llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = + ompBuilder->createDistribute(om
[llvm-branch-commits] [llvm] [OpenMPIRBuilder] Split calculation of canonical loop trip count, NFC (PR #127820)
https://github.com/skatrak created https://github.com/llvm/llvm-project/pull/127820 This patch splits off the calculation of canonical loop trip counts from the creation of canonical loops. This makes it possible to reuse this logic to, for instance, populate the `__tgt_target_kernel` runtime call for SPMD kernels. This feature is used to simplify one of the existing OpenMPIRBuilder tests. >From 5153e0d8ebcad5dacebe2dd00f4e2e96831ef5cf Mon Sep 17 00:00:00 2001 From: Sergio Afonso Date: Tue, 18 Feb 2025 14:19:30 + Subject: [PATCH] [OpenMPIRBuilder] Split calculation of canonical loop trip count, NFC This patch splits off the calculation of canonical loop trip counts from the creation of canonical loops. This makes it possible to reuse this logic to, for instance, populate the `__tgt_target_kernel` runtime call for SPMD kernels. This feature is used to simplify one of the existing OpenMPIRBuilder tests. --- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 38 +++ llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 27 - .../Frontend/OpenMPIRBuilderTest.cpp | 16 ++-- 3 files changed, 52 insertions(+), 29 deletions(-) diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 9ad85413acd34..207ca7fb05f62 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -728,13 +728,12 @@ class OpenMPIRBuilder { LoopBodyGenCallbackTy BodyGenCB, Value *TripCount, const Twine &Name = "loop"); - /// Generator for the control flow structure of an OpenMP canonical loop. + /// Calculate the trip count of a canonical loop. /// - /// Instead of a logical iteration space, this allows specifying user-defined - /// loop counter values using increment, upper- and lower bounds. To - /// disambiguate the terminology when counting downwards, instead of lower - /// bounds we use \p Start for the loop counter value in the first body - /// iteration. + /// This allows specifying user-defined loop counter values using increment, + /// upper- and lower bounds. To disambiguate the terminology when counting + /// downwards, instead of lower bounds we use \p Start for the loop counter + /// value in the first body iteration. /// /// Consider the following limitations: /// @@ -758,7 +757,32 @@ class OpenMPIRBuilder { /// /// for (int i = 0; i < 42; i -= 1u) /// - // + /// \param Loc The insert and source location description. + /// \param Start Value of the loop counter for the first iterations. + /// \param Stop Loop counter values past this will stop the loop. + /// \param Step Loop counter increment after each iteration; negative + /// means counting down. + /// \param IsSigned Whether Start, Stop and Step are signed integers. + /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop + /// counter. + /// \param Name Base name used to derive instruction names. + /// + /// \returns The value holding the calculated trip count. + Value *calculateCanonicalLoopTripCount(const LocationDescription &Loc, + Value *Start, Value *Stop, Value *Step, + bool IsSigned, bool InclusiveStop, + const Twine &Name = "loop"); + + /// Generator for the control flow structure of an OpenMP canonical loop. + /// + /// Instead of a logical iteration space, this allows specifying user-defined + /// loop counter values using increment, upper- and lower bounds. To + /// disambiguate the terminology when counting downwards, instead of lower + /// bounds we use \p Start for the loop counter value in the first body + /// + /// It calls \see calculateCanonicalLoopTripCount for trip count calculations, + /// so limitations of that method apply here as well. + /// /// \param Loc The insert and source location description. /// \param BodyGenCB Callback that will generate the loop body code. /// \param Start Value of the loop counter for the first iterations. diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 7788897fc0795..eee6e3e54d615 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -4059,10 +4059,9 @@ OpenMPIRBuilder::createCanonicalLoop(const LocationDescription &Loc, return CL; } -Expected OpenMPIRBuilder::createCanonicalLoop( -const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, -Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, -InsertPointTy ComputeIP, const Twine &Name) { +Value *OpenMPIRBuilder::calculateCanonicalLoopTripCount( +const LocationDescription &Loc, Value *Start, Value *Stop, Value *St
[llvm-branch-commits] [flang] [Flang][OpenMP] Allow host evaluation of loop bounds for distribute (PR #127822)
https://github.com/skatrak created https://github.com/llvm/llvm-project/pull/127822 This patch adds `target teams distribute [simd]` and equivalent construct nests to the list of cases where loop bounds can be evaluated in the host, as they represent kernels for which the trip count must also be evaluated in advance to the kernel call. >From 0e96e97bb5405904522d1bd54b458fb92d11f7fb Mon Sep 17 00:00:00 2001 From: Sergio Afonso Date: Wed, 19 Feb 2025 15:15:01 + Subject: [PATCH] [Flang][OpenMP] Allow host evaluation of loop bounds for distribute This patch adds `target teams distribute [simd]` and equivalent construct nests to the list of cases where loop bounds can be evaluated in the host, as they represent Generic-SPMD kernels for which the trip count must also be evaluated in advance to the kernel call. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 12 +-- flang/test/Lower/OpenMP/host-eval.f90 | 103 ++ 2 files changed, 110 insertions(+), 5 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index bd794033cdf11..8c80453610473 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -562,8 +562,11 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, [[fallthrough]]; case OMPD_distribute_parallel_do: case OMPD_distribute_parallel_do_simd: - cp.processCollapse(loc, eval, hostInfo.ops, hostInfo.iv); cp.processNumThreads(stmtCtx, hostInfo.ops); + [[fallthrough]]; +case OMPD_distribute: +case OMPD_distribute_simd: + cp.processCollapse(loc, eval, hostInfo.ops, hostInfo.iv); break; // Cases where 'teams' clauses might be present, and target SPMD is @@ -573,10 +576,8 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, [[fallthrough]]; case OMPD_target_teams: cp.processNumTeams(stmtCtx, hostInfo.ops); - processSingleNestedIf([](Directive nestedDir) { -return nestedDir == OMPD_distribute_parallel_do || - nestedDir == OMPD_distribute_parallel_do_simd; - }); + processSingleNestedIf( + [](Directive nestedDir) { return topDistributeSet.test(nestedDir); }); break; // Cases where only 'teams' host-evaluated clauses might be present. @@ -586,6 +587,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, [[fallthrough]]; case OMPD_target_teams_distribute: case OMPD_target_teams_distribute_simd: + cp.processCollapse(loc, eval, hostInfo.ops, hostInfo.iv); cp.processNumTeams(stmtCtx, hostInfo.ops); break; diff --git a/flang/test/Lower/OpenMP/host-eval.f90 b/flang/test/Lower/OpenMP/host-eval.f90 index 32c52462b86a7..65258c91e5daf 100644 --- a/flang/test/Lower/OpenMP/host-eval.f90 +++ b/flang/test/Lower/OpenMP/host-eval.f90 @@ -155,3 +155,106 @@ subroutine distribute_parallel_do_simd() !$omp end distribute parallel do simd !$omp end teams end subroutine distribute_parallel_do_simd + +! BOTH-LABEL: func.func @_QPdistribute +subroutine distribute() + ! BOTH: omp.target + + ! HOST-SAME: host_eval(%{{.*}} -> %[[LB:.*]], %{{.*}} -> %[[UB:.*]], %{{.*}} -> %[[STEP:.*]] : i32, i32, i32) + + ! DEVICE-NOT: host_eval({{.*}}) + ! DEVICE-SAME: { + + ! BOTH: omp.teams + !$omp target teams + + ! BOTH: omp.distribute + ! BOTH-NEXT: omp.loop_nest + + ! HOST-SAME: (%{{.*}}) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) + !$omp distribute + do i=1,10 +call foo() + end do + !$omp end distribute + !$omp end target teams + + ! BOTH: omp.target + ! BOTH-NOT: host_eval({{.*}}) + ! BOTH-SAME: { + ! BOTH: omp.teams + !$omp target teams + call foo() !< Prevents this from being Generic-SPMD. + + ! BOTH: omp.distribute + !$omp distribute + do i=1,10 +call foo() + end do + !$omp end distribute + !$omp end target teams + + ! BOTH: omp.teams + !$omp teams + + ! BOTH: omp.distribute + !$omp distribute + do i=1,10 +call foo() + end do + !$omp end distribute + !$omp end teams +end subroutine distribute + +! BOTH-LABEL: func.func @_QPdistribute_simd +subroutine distribute_simd() + ! BOTH: omp.target + + ! HOST-SAME: host_eval(%{{.*}} -> %[[LB:.*]], %{{.*}} -> %[[UB:.*]], %{{.*}} -> %[[STEP:.*]] : i32, i32, i32) + + ! DEVICE-NOT: host_eval({{.*}}) + ! DEVICE-SAME: { + + ! BOTH: omp.teams + !$omp target teams + + ! BOTH: omp.distribute + ! BOTH-NEXT: omp.simd + ! BOTH-NEXT: omp.loop_nest + + ! HOST-SAME: (%{{.*}}) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) + !$omp distribute simd + do i=1,10 +call foo() + end do + !$omp end distribute simd + !$omp end target teams + + ! BOTH: omp.target + ! BOTH-NOT: host_eval({{.*}}) + ! BOTH-SAME: { + ! BOTH: omp.teams + !$omp target teams + call foo() !< Prevents this from being Generic-SPMD. + + ! BOTH: omp.distribute + ! BOTH-NEXT: omp.simd + !$omp di
[llvm-branch-commits] [mlir] [MLIR][OpenMP] Host lowering of distribute-parallel-do/for (PR #127819)
https://github.com/skatrak created https://github.com/llvm/llvm-project/pull/127819 This patch adds support for translating composite `omp.parallel` + `omp.distribute` + `omp.wsloop` loops to LLVM IR on the host. This is done by passing an updated `WorksharingLoopType` to the call to `applyWorkshareLoop` associated to the lowering of the `omp.wsloop` operation, so that `__kmpc_dist_for_static_init` is called at runtime in place of `__kmpc_for_static_init`. Existing translation rules take care of creating a parallel region to hold the workshared and workdistributed loop. >From 38ba269f0681b8d962841c4471a242fe382a6106 Mon Sep 17 00:00:00 2001 From: Sergio Afonso Date: Tue, 18 Feb 2025 13:07:51 + Subject: [PATCH] [MLIR][OpenMP] Host lowering of distribute-parallel-do/for This patch adds support for translating composite `omp.parallel` + `omp.distribute` + `omp.wsloop` loops to LLVM IR on the host. This is done by passing an updated `WorksharingLoopType` to the call to `applyWorkshareLoop` associated to the lowering of the `omp.wsloop` operation, so that `__kmpc_dist_for_static_init` is called at runtime in place of `__kmpc_for_static_init`. Existing translation rules take care of creating a parallel region to hold the workshared and workdistributed loop. --- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 21 -- mlir/test/Target/LLVMIR/openmp-llvm.mlir | 65 +++ mlir/test/Target/LLVMIR/openmp-todo.mlir | 19 -- 3 files changed, 81 insertions(+), 24 deletions(-) diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index c8221a9f9854a..7e8a9bdb5b133 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -260,10 +260,6 @@ static LogicalResult checkImplementationStatus(Operation &op) { LogicalResult result = success(); llvm::TypeSwitch(op) .Case([&](omp::DistributeOp op) { -if (op.isComposite() && -isa_and_present(op.getNestedWrapper())) - result = op.emitError() << "not yet implemented: " - "composite omp.distribute + omp.wsloop"; checkAllocate(op, result); checkDistSchedule(op, result); checkOrder(op, result); @@ -1993,6 +1989,14 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, bool isSimd = wsloopOp.getScheduleSimd(); bool loopNeedsBarrier = !wsloopOp.getNowait(); + // The only legal way for the direct parent to be omp.distribute is that this + // represents 'distribute parallel do'. Otherwise, this is a regular + // worksharing loop. + llvm::omp::WorksharingLoopType workshareLoopType = + llvm::isa_and_present(opInst.getParentOp()) + ? llvm::omp::WorksharingLoopType::DistributeForStaticLoop + : llvm::omp::WorksharingLoopType::ForStaticLoop; + llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); llvm::Expected regionBlock = convertOmpOpRegions( wsloopOp.getRegion(), "omp.wsloop.region", builder, moduleTranslation); @@ -2008,7 +2012,8 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier, convertToScheduleKind(schedule), chunk, isSimd, scheduleMod == omp::ScheduleModifier::monotonic, - scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered); + scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered, + workshareLoopType); if (failed(handleError(wsloopIP, opInst))) return failure(); @@ -3792,6 +3797,12 @@ convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder, return regionBlock.takeError(); builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin()); +// Skip applying a workshare loop below when translating 'distribute +// parallel do' (it's been already handled by this point while translating +// the nested omp.wsloop). +if (isa_and_present(distributeOp.getNestedWrapper())) + return llvm::Error::success(); + // TODO: Add support for clauses which are valid for DISTRIBUTE constructs. // Static schedule is the default. auto schedule = omp::ClauseScheduleKind::Static; diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir index a5a490e527d79..d85b149c66811 100644 --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -3307,3 +3307,68 @@ llvm.func @distribute() { // CHECK: store i64 1, ptr %[[STRIDE]] // CHECK: %[[TID:.*]] = call i32 @__kmpc_global_thread_num({{.*}}) // CHECK: call void @__kmpc_for_static_init_{{.*}}(ptr @{{.*}}, i32 %[[TID]], i32 92, ptr %[[LASTITER]], ptr %[[LB]], ptr %[[UB]], ptr %[[STRIDE]], i64 1, i64 0) + +// - + +llvm.func @distribute_w
[llvm-branch-commits] [llvm] [OpenMPIRBuilder] Add support for distribute constructs (PR #127816)
https://github.com/skatrak created https://github.com/llvm/llvm-project/pull/127816 This patch adds the `OpenMPIRBuilder::createDistribute()` function and updates `OpenMPIRBuilder::applyStaticWorkshareLoop()` in preparation for adding `distribute` support to flang. >From a79b7a2d6a443ef26bf4beaf73ec3c8042d968d1 Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Mon, 17 Feb 2025 14:25:40 + Subject: [PATCH] [OpenMPIRBuilder] Add support for distribute constructs This patch adds the `OpenMPIRBuilder::createDistribute()` function and updates `OpenMPIRBuilder::applyStaticWorkshareLoop()` in preparation for adding `distribute` support to flang. Co-authored-by: Sergio Afonso --- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 17 -- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 59 --- 2 files changed, 64 insertions(+), 12 deletions(-) diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index d25077cae63e4..9ad85413acd34 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -1004,12 +1004,12 @@ class OpenMPIRBuilder { /// preheader of the loop. /// \param NeedsBarrier Indicates whether a barrier must be inserted after /// the loop. + /// \param LoopType Type of workshare loop. /// /// \returns Point where to insert code after the workshare construct. - InsertPointOrErrorTy applyStaticWorkshareLoop(DebugLoc DL, -CanonicalLoopInfo *CLI, -InsertPointTy AllocaIP, -bool NeedsBarrier); + InsertPointOrErrorTy applyStaticWorkshareLoop( + DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, + omp::WorksharingLoopType LoopType, bool NeedsBarrier); /// Modifies the canonical loop a statically-scheduled workshare loop with a /// user-specified chunk size. @@ -2660,6 +2660,15 @@ class OpenMPIRBuilder { Value *NumTeamsLower = nullptr, Value *NumTeamsUpper = nullptr, Value *ThreadLimit = nullptr, Value *IfExpr = nullptr); + /// Generator for `#omp distribute` + /// + /// \param Loc The location where the distribute construct was encountered. + /// \param AllocaIP The insertion points to be used for alloca instructions. + /// \param BodyGenCB Callback that will generate the region code. + InsertPointOrErrorTy createDistribute(const LocationDescription &Loc, +InsertPointTy AllocaIP, +BodyGenCallbackTy BodyGenCB); + /// Generate conditional branch and relevant BasicBlocks through which private /// threads copy the 'copyin' variables from Master copy to threadprivate /// copies. diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 04acab1e5765e..9e380bf2d3dbe 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -2295,7 +2295,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createSections( return LoopInfo.takeError(); InsertPointOrErrorTy WsloopIP = - applyStaticWorkshareLoop(Loc.DL, *LoopInfo, AllocaIP, !IsNowait); + applyStaticWorkshareLoop(Loc.DL, *LoopInfo, AllocaIP, + WorksharingLoopType::ForStaticLoop, !IsNowait); if (!WsloopIP) return WsloopIP.takeError(); InsertPointTy AfterIP = *WsloopIP; @@ -4145,10 +4146,9 @@ static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, llvm_unreachable("unknown OpenMP loop iterator bitwidth"); } -OpenMPIRBuilder::InsertPointOrErrorTy -OpenMPIRBuilder::applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, - InsertPointTy AllocaIP, - bool NeedsBarrier) { +OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyStaticWorkshareLoop( +DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, +WorksharingLoopType LoopType, bool NeedsBarrier) { assert(CLI->isValid() && "Requires a valid canonical loop"); assert(!isConflictIP(AllocaIP, CLI->getPreheaderIP()) && "Require dedicated allocate IP"); @@ -4191,8 +4191,12 @@ OpenMPIRBuilder::applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, Value *ThreadNum = getOrCreateThreadID(SrcLoc); - Constant *SchedulingType = ConstantInt::get( - I32Type, static_cast(OMPScheduleType::UnorderedStatic)); + OMPScheduleType SchedType = + (LoopType == WorksharingLoopType::DistributeStaticLoop) + ? OMPScheduleType::OrderedDistribute + : OMPScheduleType::UnorderedStatic; + Constant *SchedulingType = + ConstantInt::get(I32Type, static_cast(SchedType)); // Call the "init" function and update
[llvm-branch-commits] [llvm] [OpenMPIRBuilder] Split calculation of canonical loop trip count, NFC (PR #127820)
https://github.com/tblah approved this pull request. LGTM, thanks! https://github.com/llvm/llvm-project/pull/127820 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [lldb] release/20.x: Reland: [clang] fix P3310 overload resolution flag propagation (#125791) (PR #127779)
https://github.com/mizvekov closed https://github.com/llvm/llvm-project/pull/127779 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [lldb] Backport: [clang] fix P3310 overload resolution flag propagation (#125791) (PR #127831)
llvmbot wrote: @llvm/pr-subscribers-lldb Author: Matheus Izvekov (mizvekov) Changes Class templates might be only instantiated when they are required to be complete, but checking the template args against the primary template is immediate. This result is cached so that later when the class is instantiated, checking against the primary template is not repeated. The 'MatchedPackOnParmToNonPackOnArg' flag is also produced upon checking against the primary template, so it needs to be cached in the specialziation as well. This fixes a bug which has not been in any release, so there are no release notes. Fixes #125290 --- Patch is 232.71 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/127831.diff 16 Files Affected: - (modified) clang/include/clang/AST/DeclTemplate.h (+14-2) - (modified) clang/include/clang/Sema/Sema.h (+2-2) - (modified) clang/lib/AST/ASTImporter.cpp (+3-3) - (modified) clang/lib/AST/DeclTemplate.cpp (+24-23) - (modified) clang/lib/AST/JSONNodeDumper.cpp (+5) - (modified) clang/lib/AST/TextNodeDumper.cpp (+4-1) - (modified) clang/lib/Sema/SemaTemplate.cpp (+5-3) - (modified) clang/lib/Sema/SemaTemplateDeduction.cpp (-2) - (modified) clang/lib/Sema/SemaTemplateInstantiateDecl.cpp (+1-1) - (modified) clang/lib/Sema/SemaType.cpp (+2-1) - (modified) clang/lib/Serialization/ASTReaderDecl.cpp (+1) - (modified) clang/lib/Serialization/ASTWriterDecl.cpp (+1) - (modified) clang/test/AST/ast-dump-templates.cpp (+6045-2) - (modified) clang/test/AST/gen_ast_dump_json_test.py (+17-4) - (modified) clang/test/SemaTemplate/cwg2398.cpp (+20) - (modified) lldb/source/Plugins/ExpressionParser/Clang/CxxModuleHandler.cpp (+2-1) ``diff diff --git a/clang/include/clang/AST/DeclTemplate.h b/clang/include/clang/AST/DeclTemplate.h index 9ecff2c898acd..03c43765206b1 100644 --- a/clang/include/clang/AST/DeclTemplate.h +++ b/clang/include/clang/AST/DeclTemplate.h @@ -1841,15 +1841,23 @@ class ClassTemplateSpecializationDecl : public CXXRecordDecl, LLVM_PREFERRED_TYPE(TemplateSpecializationKind) unsigned SpecializationKind : 3; + /// Indicate that we have matched a parameter pack with a non pack + /// argument, when the opposite match is also allowed (strict pack match). + /// This needs to be cached as deduction is performed during declaration, + /// and we need the information to be preserved so that it is consistent + /// during instantiation. + bool MatchedPackOnParmToNonPackOnArg : 1; + protected: ClassTemplateSpecializationDecl(ASTContext &Context, Kind DK, TagKind TK, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, ClassTemplateDecl *SpecializedTemplate, ArrayRef Args, + bool MatchedPackOnParmToNonPackOnArg, ClassTemplateSpecializationDecl *PrevDecl); - explicit ClassTemplateSpecializationDecl(ASTContext &C, Kind DK); + ClassTemplateSpecializationDecl(ASTContext &C, Kind DK); public: friend class ASTDeclReader; @@ -1859,7 +1867,7 @@ class ClassTemplateSpecializationDecl : public CXXRecordDecl, Create(ASTContext &Context, TagKind TK, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, ClassTemplateDecl *SpecializedTemplate, - ArrayRef Args, + ArrayRef Args, bool MatchedPackOnParmToNonPackOnArg, ClassTemplateSpecializationDecl *PrevDecl); static ClassTemplateSpecializationDecl *CreateDeserialized(ASTContext &C, GlobalDeclID ID); @@ -1930,6 +1938,10 @@ class ClassTemplateSpecializationDecl : public CXXRecordDecl, SpecializationKind = TSK; } + bool hasMatchedPackOnParmToNonPackOnArg() const { +return MatchedPackOnParmToNonPackOnArg; + } + /// Get the point of instantiation (if any), or null if none. SourceLocation getPointOfInstantiation() const { return PointOfInstantiation; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index eb82d1b978e94..a30a7076ea5d4 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -13491,8 +13491,8 @@ class Sema final : public SemaBase { bool InstantiateClassTemplateSpecialization( SourceLocation PointOfInstantiation, ClassTemplateSpecializationDecl *ClassTemplateSpec, - TemplateSpecializationKind TSK, bool Complain = true, - bool PrimaryHasMatchedPackOnParmToNonPackOnArg = false); + TemplateSpecializationKind TSK, bool Complain, + bool PrimaryHasMatchedPackOnParmToNonPackOnArg); /// Instantiates the definitions of all of the member /// of the given class, which is an instantiation of a class template diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index 09fa10f716e
[llvm-branch-commits] [clang] [lldb] release/20.x: Reland: [clang] fix P3310 overload resolution flag propagation (#125791) (PR #127779)
mizvekov wrote: Closing as this needs manual rebase. https://github.com/llvm/llvm-project/pull/127779 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [lldb] Backport: [clang] fix P3310 overload resolution flag propagation (#125791) (PR #127831)
https://github.com/mizvekov milestoned https://github.com/llvm/llvm-project/pull/127831 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [Flang][OpenMP] Allow host evaluation of loop bounds for distribute (PR #127822)
skatrak wrote: PR stack: - #115475 - #127217 - #127816 - #127817 - #127818 - #127819 - #127820 - #127821 - :arrow_right: #127822 https://github.com/llvm/llvm-project/pull/127822 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang] Implement instantiation context note for checking template parameters (PR #126088)
@@ -1909,7 +1909,22 @@ class Sema final : public SemaBase { /// '\#pragma clang attribute push' directives to the given declaration. void AddPragmaAttributes(Scope *S, Decl *D); - void PrintPragmaAttributeInstantiationPoint(); + using DiagFuncRef = + llvm::function_ref; + auto getDefaultDiagFunc() { +return [this](SourceLocation Loc, PartialDiagnostic PD) { + // This bypasses a lof of the filters in the diag engine, as it's mizvekov wrote: Not part of this PR, this is currently not rebased. This has been fixed in the parent PR, and will disappear when rebased. https://github.com/llvm/llvm-project/pull/126088 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [lldb] Backport: [clang] fix P3310 overload resolution flag propagation (#125791) (PR #127831)
llvmbot wrote: @llvm/pr-subscribers-clang-modules Author: Matheus Izvekov (mizvekov) Changes Class templates might be only instantiated when they are required to be complete, but checking the template args against the primary template is immediate. This result is cached so that later when the class is instantiated, checking against the primary template is not repeated. The 'MatchedPackOnParmToNonPackOnArg' flag is also produced upon checking against the primary template, so it needs to be cached in the specialziation as well. This fixes a bug which has not been in any release, so there are no release notes. Fixes #125290 --- Patch is 232.71 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/127831.diff 16 Files Affected: - (modified) clang/include/clang/AST/DeclTemplate.h (+14-2) - (modified) clang/include/clang/Sema/Sema.h (+2-2) - (modified) clang/lib/AST/ASTImporter.cpp (+3-3) - (modified) clang/lib/AST/DeclTemplate.cpp (+24-23) - (modified) clang/lib/AST/JSONNodeDumper.cpp (+5) - (modified) clang/lib/AST/TextNodeDumper.cpp (+4-1) - (modified) clang/lib/Sema/SemaTemplate.cpp (+5-3) - (modified) clang/lib/Sema/SemaTemplateDeduction.cpp (-2) - (modified) clang/lib/Sema/SemaTemplateInstantiateDecl.cpp (+1-1) - (modified) clang/lib/Sema/SemaType.cpp (+2-1) - (modified) clang/lib/Serialization/ASTReaderDecl.cpp (+1) - (modified) clang/lib/Serialization/ASTWriterDecl.cpp (+1) - (modified) clang/test/AST/ast-dump-templates.cpp (+6045-2) - (modified) clang/test/AST/gen_ast_dump_json_test.py (+17-4) - (modified) clang/test/SemaTemplate/cwg2398.cpp (+20) - (modified) lldb/source/Plugins/ExpressionParser/Clang/CxxModuleHandler.cpp (+2-1) ``diff diff --git a/clang/include/clang/AST/DeclTemplate.h b/clang/include/clang/AST/DeclTemplate.h index 9ecff2c898acd..03c43765206b1 100644 --- a/clang/include/clang/AST/DeclTemplate.h +++ b/clang/include/clang/AST/DeclTemplate.h @@ -1841,15 +1841,23 @@ class ClassTemplateSpecializationDecl : public CXXRecordDecl, LLVM_PREFERRED_TYPE(TemplateSpecializationKind) unsigned SpecializationKind : 3; + /// Indicate that we have matched a parameter pack with a non pack + /// argument, when the opposite match is also allowed (strict pack match). + /// This needs to be cached as deduction is performed during declaration, + /// and we need the information to be preserved so that it is consistent + /// during instantiation. + bool MatchedPackOnParmToNonPackOnArg : 1; + protected: ClassTemplateSpecializationDecl(ASTContext &Context, Kind DK, TagKind TK, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, ClassTemplateDecl *SpecializedTemplate, ArrayRef Args, + bool MatchedPackOnParmToNonPackOnArg, ClassTemplateSpecializationDecl *PrevDecl); - explicit ClassTemplateSpecializationDecl(ASTContext &C, Kind DK); + ClassTemplateSpecializationDecl(ASTContext &C, Kind DK); public: friend class ASTDeclReader; @@ -1859,7 +1867,7 @@ class ClassTemplateSpecializationDecl : public CXXRecordDecl, Create(ASTContext &Context, TagKind TK, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, ClassTemplateDecl *SpecializedTemplate, - ArrayRef Args, + ArrayRef Args, bool MatchedPackOnParmToNonPackOnArg, ClassTemplateSpecializationDecl *PrevDecl); static ClassTemplateSpecializationDecl *CreateDeserialized(ASTContext &C, GlobalDeclID ID); @@ -1930,6 +1938,10 @@ class ClassTemplateSpecializationDecl : public CXXRecordDecl, SpecializationKind = TSK; } + bool hasMatchedPackOnParmToNonPackOnArg() const { +return MatchedPackOnParmToNonPackOnArg; + } + /// Get the point of instantiation (if any), or null if none. SourceLocation getPointOfInstantiation() const { return PointOfInstantiation; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index eb82d1b978e94..a30a7076ea5d4 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -13491,8 +13491,8 @@ class Sema final : public SemaBase { bool InstantiateClassTemplateSpecialization( SourceLocation PointOfInstantiation, ClassTemplateSpecializationDecl *ClassTemplateSpec, - TemplateSpecializationKind TSK, bool Complain = true, - bool PrimaryHasMatchedPackOnParmToNonPackOnArg = false); + TemplateSpecializationKind TSK, bool Complain, + bool PrimaryHasMatchedPackOnParmToNonPackOnArg); /// Instantiates the definitions of all of the member /// of the given class, which is an instantiation of a class template diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index 09
[llvm-branch-commits] [mlir] [MLIR][OpenMP] Support target SPMD (PR #127821)
https://github.com/Meinersbur edited https://github.com/llvm/llvm-project/pull/127821 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [OpenMPIRBuilder] Add support for distribute constructs (PR #127816)
skatrak wrote: PR stack: - #115475 - #127217 - :arrow_right: #127816 - #127817 - #127818 - #127819 - #127820 - #127821 - #127822 https://github.com/llvm/llvm-project/pull/127816 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [MLIR][OpenMP] Host lowering of standalone distribute (PR #127817)
skatrak wrote: PR stack: - #115475 - #127217 - #127816 - :arrow_right: #127817 - #127818 - #127819 - #127820 - #127821 - #127822 https://github.com/llvm/llvm-project/pull/127817 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [MLIR][OpenMP] Support target SPMD (PR #127821)
skatrak wrote: PR stack: - #115475 - #127217 - #127816 - #127817 - #127818 - #127819 - #127820 - :arrow_right: #127821 - #127822 https://github.com/llvm/llvm-project/pull/127821 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [MLIR][OpenMP] Host lowering of distribute-parallel-do/for (PR #127819)
skatrak wrote: PR stack: - #115475 - #127217 - #127816 - #127817 - #127818 - :arrow_right: #127819 - #127820 - #127821 - #127822 https://github.com/llvm/llvm-project/pull/127819 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [Flang][OpenMP] Allow host evaluation of loop bounds for distribute (PR #127822)
llvmbot wrote: @llvm/pr-subscribers-flang-fir-hlfir Author: Sergio Afonso (skatrak) Changes This patch adds `target teams distribute [simd]` and equivalent construct nests to the list of cases where loop bounds can be evaluated in the host, as they represent kernels for which the trip count must also be evaluated in advance to the kernel call. --- Full diff: https://github.com/llvm/llvm-project/pull/127822.diff 2 Files Affected: - (modified) flang/lib/Lower/OpenMP/OpenMP.cpp (+7-5) - (modified) flang/test/Lower/OpenMP/host-eval.f90 (+103) ``diff diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index bd794033cdf11..8c80453610473 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -562,8 +562,11 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, [[fallthrough]]; case OMPD_distribute_parallel_do: case OMPD_distribute_parallel_do_simd: - cp.processCollapse(loc, eval, hostInfo.ops, hostInfo.iv); cp.processNumThreads(stmtCtx, hostInfo.ops); + [[fallthrough]]; +case OMPD_distribute: +case OMPD_distribute_simd: + cp.processCollapse(loc, eval, hostInfo.ops, hostInfo.iv); break; // Cases where 'teams' clauses might be present, and target SPMD is @@ -573,10 +576,8 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, [[fallthrough]]; case OMPD_target_teams: cp.processNumTeams(stmtCtx, hostInfo.ops); - processSingleNestedIf([](Directive nestedDir) { -return nestedDir == OMPD_distribute_parallel_do || - nestedDir == OMPD_distribute_parallel_do_simd; - }); + processSingleNestedIf( + [](Directive nestedDir) { return topDistributeSet.test(nestedDir); }); break; // Cases where only 'teams' host-evaluated clauses might be present. @@ -586,6 +587,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, [[fallthrough]]; case OMPD_target_teams_distribute: case OMPD_target_teams_distribute_simd: + cp.processCollapse(loc, eval, hostInfo.ops, hostInfo.iv); cp.processNumTeams(stmtCtx, hostInfo.ops); break; diff --git a/flang/test/Lower/OpenMP/host-eval.f90 b/flang/test/Lower/OpenMP/host-eval.f90 index 32c52462b86a7..65258c91e5daf 100644 --- a/flang/test/Lower/OpenMP/host-eval.f90 +++ b/flang/test/Lower/OpenMP/host-eval.f90 @@ -155,3 +155,106 @@ subroutine distribute_parallel_do_simd() !$omp end distribute parallel do simd !$omp end teams end subroutine distribute_parallel_do_simd + +! BOTH-LABEL: func.func @_QPdistribute +subroutine distribute() + ! BOTH: omp.target + + ! HOST-SAME: host_eval(%{{.*}} -> %[[LB:.*]], %{{.*}} -> %[[UB:.*]], %{{.*}} -> %[[STEP:.*]] : i32, i32, i32) + + ! DEVICE-NOT: host_eval({{.*}}) + ! DEVICE-SAME: { + + ! BOTH: omp.teams + !$omp target teams + + ! BOTH: omp.distribute + ! BOTH-NEXT: omp.loop_nest + + ! HOST-SAME: (%{{.*}}) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) + !$omp distribute + do i=1,10 +call foo() + end do + !$omp end distribute + !$omp end target teams + + ! BOTH: omp.target + ! BOTH-NOT: host_eval({{.*}}) + ! BOTH-SAME: { + ! BOTH: omp.teams + !$omp target teams + call foo() !< Prevents this from being Generic-SPMD. + + ! BOTH: omp.distribute + !$omp distribute + do i=1,10 +call foo() + end do + !$omp end distribute + !$omp end target teams + + ! BOTH: omp.teams + !$omp teams + + ! BOTH: omp.distribute + !$omp distribute + do i=1,10 +call foo() + end do + !$omp end distribute + !$omp end teams +end subroutine distribute + +! BOTH-LABEL: func.func @_QPdistribute_simd +subroutine distribute_simd() + ! BOTH: omp.target + + ! HOST-SAME: host_eval(%{{.*}} -> %[[LB:.*]], %{{.*}} -> %[[UB:.*]], %{{.*}} -> %[[STEP:.*]] : i32, i32, i32) + + ! DEVICE-NOT: host_eval({{.*}}) + ! DEVICE-SAME: { + + ! BOTH: omp.teams + !$omp target teams + + ! BOTH: omp.distribute + ! BOTH-NEXT: omp.simd + ! BOTH-NEXT: omp.loop_nest + + ! HOST-SAME: (%{{.*}}) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) + !$omp distribute simd + do i=1,10 +call foo() + end do + !$omp end distribute simd + !$omp end target teams + + ! BOTH: omp.target + ! BOTH-NOT: host_eval({{.*}}) + ! BOTH-SAME: { + ! BOTH: omp.teams + !$omp target teams + call foo() !< Prevents this from being Generic-SPMD. + + ! BOTH: omp.distribute + ! BOTH-NEXT: omp.simd + !$omp distribute simd + do i=1,10 +call foo() + end do + !$omp end distribute simd + !$omp end target teams + + ! BOTH: omp.teams + !$omp teams + + ! BOTH: omp.distribute + ! BOTH-NEXT: omp.simd + !$omp distribute simd + do i=1,10 +call foo() + end do + !$omp end distribute simd + !$omp end teams +end subroutine distribute_simd `` https://github.com/llvm/llvm-project/pull/127822
[llvm-branch-commits] [llvm] [OpenMPIRBuilder] Split calculation of canonical loop trip count, NFC (PR #127820)
skatrak wrote: PR stack: - #115475 - #127217 - #127816 - #127817 - #127818 - #127819 - :arrow_right: #127820 - #127821 - #127822 https://github.com/llvm/llvm-project/pull/127820 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [OpenMPIRBuilder] Add support for distribute constructs (PR #127816)
llvmbot wrote: @llvm/pr-subscribers-flang-openmp Author: Sergio Afonso (skatrak) Changes This patch adds the `OpenMPIRBuilder::createDistribute()` function and updates `OpenMPIRBuilder::applyStaticWorkshareLoop()` in preparation for adding `distribute` support to flang. --- Full diff: https://github.com/llvm/llvm-project/pull/127816.diff 2 Files Affected: - (modified) llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h (+13-4) - (modified) llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp (+51-8) ``diff diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index d25077cae63e4..9ad85413acd34 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -1004,12 +1004,12 @@ class OpenMPIRBuilder { /// preheader of the loop. /// \param NeedsBarrier Indicates whether a barrier must be inserted after /// the loop. + /// \param LoopType Type of workshare loop. /// /// \returns Point where to insert code after the workshare construct. - InsertPointOrErrorTy applyStaticWorkshareLoop(DebugLoc DL, -CanonicalLoopInfo *CLI, -InsertPointTy AllocaIP, -bool NeedsBarrier); + InsertPointOrErrorTy applyStaticWorkshareLoop( + DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, + omp::WorksharingLoopType LoopType, bool NeedsBarrier); /// Modifies the canonical loop a statically-scheduled workshare loop with a /// user-specified chunk size. @@ -2660,6 +2660,15 @@ class OpenMPIRBuilder { Value *NumTeamsLower = nullptr, Value *NumTeamsUpper = nullptr, Value *ThreadLimit = nullptr, Value *IfExpr = nullptr); + /// Generator for `#omp distribute` + /// + /// \param Loc The location where the distribute construct was encountered. + /// \param AllocaIP The insertion points to be used for alloca instructions. + /// \param BodyGenCB Callback that will generate the region code. + InsertPointOrErrorTy createDistribute(const LocationDescription &Loc, +InsertPointTy AllocaIP, +BodyGenCallbackTy BodyGenCB); + /// Generate conditional branch and relevant BasicBlocks through which private /// threads copy the 'copyin' variables from Master copy to threadprivate /// copies. diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 04acab1e5765e..9e380bf2d3dbe 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -2295,7 +2295,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createSections( return LoopInfo.takeError(); InsertPointOrErrorTy WsloopIP = - applyStaticWorkshareLoop(Loc.DL, *LoopInfo, AllocaIP, !IsNowait); + applyStaticWorkshareLoop(Loc.DL, *LoopInfo, AllocaIP, + WorksharingLoopType::ForStaticLoop, !IsNowait); if (!WsloopIP) return WsloopIP.takeError(); InsertPointTy AfterIP = *WsloopIP; @@ -4145,10 +4146,9 @@ static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, llvm_unreachable("unknown OpenMP loop iterator bitwidth"); } -OpenMPIRBuilder::InsertPointOrErrorTy -OpenMPIRBuilder::applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, - InsertPointTy AllocaIP, - bool NeedsBarrier) { +OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyStaticWorkshareLoop( +DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, +WorksharingLoopType LoopType, bool NeedsBarrier) { assert(CLI->isValid() && "Requires a valid canonical loop"); assert(!isConflictIP(AllocaIP, CLI->getPreheaderIP()) && "Require dedicated allocate IP"); @@ -4191,8 +4191,12 @@ OpenMPIRBuilder::applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, Value *ThreadNum = getOrCreateThreadID(SrcLoc); - Constant *SchedulingType = ConstantInt::get( - I32Type, static_cast(OMPScheduleType::UnorderedStatic)); + OMPScheduleType SchedType = + (LoopType == WorksharingLoopType::DistributeStaticLoop) + ? OMPScheduleType::OrderedDistribute + : OMPScheduleType::UnorderedStatic; + Constant *SchedulingType = + ConstantInt::get(I32Type, static_cast(SchedType)); // Call the "init" function and update the trip count of the loop with the // value it produced. @@ -4452,6 +4456,7 @@ static void createTargetLoopWorkshareCall( RealArgs.push_back(TripCount); if (LoopType == WorksharingLoopType::DistributeStaticLoop) { RealArgs.push_back(ConstantInt::get(TripCountTy, 0)); +Builder.restoreIP({InsertBlock, std::prev(InsertBlock->end())});
[llvm-branch-commits] [mlir] [MLIR][OpenMP] Support target SPMD (PR #127821)
llvmbot wrote: @llvm/pr-subscribers-mlir-llvm Author: Sergio Afonso (skatrak) Changes This patch implements MLIR to LLVM IR translation of host-evaluated loop bounds, completing initial support for `target teams distribute parallel do [simd]` and `target teams distribute [simd]`. --- Full diff: https://github.com/llvm/llvm-project/pull/127821.diff 3 Files Affected: - (modified) mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp (+63-20) - (added) mlir/test/Target/LLVMIR/openmp-target-spmd.mlir (+96) - (modified) mlir/test/Target/LLVMIR/openmp-todo.mlir (-24) ``diff diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 7e8a9bdb5b133..93a88c89162d6 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -176,15 +176,6 @@ static LogicalResult checkImplementationStatus(Operation &op) { if (op.getHint()) op.emitWarning("hint clause discarded"); }; - auto checkHostEval = [](auto op, LogicalResult &result) { -// Host evaluated clauses are supported, except for loop bounds. -for (BlockArgument arg : - cast(*op).getHostEvalBlockArgs()) - for (Operation *user : arg.getUsers()) -if (isa(user)) - result = op.emitError("not yet implemented: host evaluation of loop " -"bounds in omp.target operation"); - }; auto checkInReduction = [&todo](auto op, LogicalResult &result) { if (!op.getInReductionVars().empty() || op.getInReductionByref() || op.getInReductionSyms()) @@ -321,7 +312,6 @@ static LogicalResult checkImplementationStatus(Operation &op) { checkBare(op, result); checkDevice(op, result); checkHasDeviceAddr(op, result); -checkHostEval(op, result); checkInReduction(op, result); checkIsDevicePtr(op, result); checkPrivate(op, result); @@ -4054,9 +4044,13 @@ createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg, /// /// Loop bounds and steps are only optionally populated, if output vectors are /// provided. -static void extractHostEvalClauses(omp::TargetOp targetOp, Value &numThreads, - Value &numTeamsLower, Value &numTeamsUpper, - Value &threadLimit) { +static void +extractHostEvalClauses(omp::TargetOp targetOp, Value &numThreads, + Value &numTeamsLower, Value &numTeamsUpper, + Value &threadLimit, + llvm::SmallVectorImpl *lowerBounds = nullptr, + llvm::SmallVectorImpl *upperBounds = nullptr, + llvm::SmallVectorImpl *steps = nullptr) { auto blockArgIface = llvm::cast(*targetOp); for (auto item : llvm::zip_equal(targetOp.getHostEvalVars(), blockArgIface.getHostEvalBlockArgs())) { @@ -4081,11 +4075,26 @@ static void extractHostEvalClauses(omp::TargetOp targetOp, Value &numThreads, llvm_unreachable("unsupported host_eval use"); }) .Case([&](omp::LoopNestOp loopOp) { -// TODO: Extract bounds and step values. Currently, this cannot be -// reached because translation would have been stopped earlier as a -// result of `checkImplementationStatus` detecting and reporting -// this situation. -llvm_unreachable("unsupported host_eval use"); +auto processBounds = +[&](OperandRange opBounds, +llvm::SmallVectorImpl *outBounds) -> bool { + bool found = false; + for (auto [i, lb] : llvm::enumerate(opBounds)) { +if (lb == blockArg) { + found = true; + if (outBounds) +(*outBounds)[i] = hostEvalVar; +} + } + return found; +}; +bool found = +processBounds(loopOp.getLoopLowerBounds(), lowerBounds); +found = processBounds(loopOp.getLoopUpperBounds(), upperBounds) || +found; +found = processBounds(loopOp.getLoopSteps(), steps) || found; +if (!found) + llvm_unreachable("unsupported host_eval use"); }) .Default([](Operation *) { llvm_unreachable("unsupported host_eval use"); @@ -4222,6 +4231,7 @@ initTargetDefaultAttrs(omp::TargetOp targetOp, combinedMaxThreadsVal = maxThreadsVal; // Update kernel bounds structure for the `OpenMPIRBuilder` to use. + attrs.ExecFlags = targetOp.getKernelExecFlags(); attrs.MinTeams = minTeamsVal; attrs.MaxTeams.front() = maxTeamsVal; attrs.MinThreads = 1; @@ -4239,9 +4249,15 @@ initTargetRuntimeAttrs(llvm::IRBuilderBase &build
[llvm-branch-commits] [mlir] [MLIR][OpenMP] Host lowering of standalone distribute (PR #127817)
llvmbot wrote: @llvm/pr-subscribers-flang-openmp Author: Sergio Afonso (skatrak) Changes This patch adds MLIR to LLVM IR translation support for standalone `omp.distribute` operations, as well as `distribute simd` through ignoring SIMD information (similarly to `do/for simd`). --- Full diff: https://github.com/llvm/llvm-project/pull/127817.diff 3 Files Affected: - (modified) mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp (+78) - (modified) mlir/test/Target/LLVMIR/openmp-llvm.mlir (+37) - (modified) mlir/test/Target/LLVMIR/openmp-todo.mlir (+63-3) ``diff diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index a5ff3eff6439f..c8221a9f9854a 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -164,6 +164,10 @@ static LogicalResult checkImplementationStatus(Operation &op) { if (op.getDevice()) result = todo("device"); }; + auto checkDistSchedule = [&todo](auto op, LogicalResult &result) { +if (op.getDistScheduleChunkSize()) + result = todo("dist_schedule with chunk_size"); + }; auto checkHasDeviceAddr = [&todo](auto op, LogicalResult &result) { if (!op.getHasDeviceAddrVars().empty()) result = todo("has_device_addr"); @@ -255,6 +259,16 @@ static LogicalResult checkImplementationStatus(Operation &op) { LogicalResult result = success(); llvm::TypeSwitch(op) + .Case([&](omp::DistributeOp op) { +if (op.isComposite() && +isa_and_present(op.getNestedWrapper())) + result = op.emitError() << "not yet implemented: " + "composite omp.distribute + omp.wsloop"; +checkAllocate(op, result); +checkDistSchedule(op, result); +checkOrder(op, result); +checkPrivate(op, result); + }) .Case([&](omp::OrderedRegionOp op) { checkParLevelSimd(op, result); }) .Case([&](omp::SectionsOp op) { checkAllocate(op, result); @@ -3755,6 +3769,67 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, return success(); } +static LogicalResult +convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + auto distributeOp = cast(opInst); + if (failed(checkImplementationStatus(opInst))) +return failure(); + + using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + auto bodyGenCB = [&](InsertPointTy allocaIP, + InsertPointTy codeGenIP) -> llvm::Error { +// DistributeOp has only one region associated with it. +builder.restoreIP(codeGenIP); + +llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); +llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); +llvm::Expected regionBlock = +convertOmpOpRegions(distributeOp.getRegion(), "omp.distribute.region", +builder, moduleTranslation); +if (!regionBlock) + return regionBlock.takeError(); +builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin()); + +// TODO: Add support for clauses which are valid for DISTRIBUTE constructs. +// Static schedule is the default. +auto schedule = omp::ClauseScheduleKind::Static; +bool isOrdered = false; +std::optional scheduleMod; +bool isSimd = false; +llvm::omp::WorksharingLoopType workshareLoopType = +llvm::omp::WorksharingLoopType::DistributeStaticLoop; +bool loopNeedsBarrier = false; +llvm::Value *chunk = nullptr; + +llvm::CanonicalLoopInfo *loopInfo = *findCurrentLoopInfo(moduleTranslation); +llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP = +ompBuilder->applyWorkshareLoop( +ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier, +convertToScheduleKind(schedule), chunk, isSimd, +scheduleMod == omp::ScheduleModifier::monotonic, +scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered, +workshareLoopType); + +if (!wsloopIP) + return wsloopIP.takeError(); +return llvm::Error::success(); + }; + + llvm::OpenMPIRBuilder::InsertPointTy allocaIP = + findAllocaInsertPoint(builder, moduleTranslation); + llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); + llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = + ompBuilder->createDistribute(ompLoc, allocaIP, bodyGenCB); + + if (failed(handleError(afterIP, opInst))) +return failure(); + + builder.restoreIP(*afterIP); + return success(); +} + /// Lowers the FlagsAttr which is applied to the module on the device /// pass when offloading, this attribute contains OpenMP RTL globals that can /// be passed as flags to the frontend, otherw
[llvm-branch-commits] [llvm] [OpenMPIRBuilder] Add support for distribute-parallel-for/do constructs (PR #127818)
llvmbot wrote: @llvm/pr-subscribers-flang-openmp Author: Sergio Afonso (skatrak) Changes This patch adds codegen for `kmpc_dist_for_static_init` runtime calls, used to support worksharing a single loop across teams and threads. This can be used to implement `distribute parallel for/do` support. --- Full diff: https://github.com/llvm/llvm-project/pull/127818.diff 1 Files Affected: - (modified) llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp (+30-4) ``diff diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 9e380bf2d3dbe..7788897fc0795 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -4130,6 +4130,23 @@ Expected OpenMPIRBuilder::createCanonicalLoop( return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name); } +// Returns an LLVM function to call for initializing loop bounds using OpenMP +// static scheduling for composite `distribute parallel for` depending on +// `type`. Only i32 and i64 are supported by the runtime. Always interpret +// integers as unsigned similarly to CanonicalLoopInfo. +static FunctionCallee +getKmpcDistForStaticInitForType(Type *Ty, Module &M, +OpenMPIRBuilder &OMPBuilder) { + unsigned Bitwidth = Ty->getIntegerBitWidth(); + if (Bitwidth == 32) +return OMPBuilder.getOrCreateRuntimeFunction( +M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_4u); + if (Bitwidth == 64) +return OMPBuilder.getOrCreateRuntimeFunction( +M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_8u); + llvm_unreachable("unknown OpenMP loop iterator bitwidth"); +} + // Returns an LLVM function to call for initializing loop bounds using OpenMP // static scheduling depending on `type`. Only i32 and i64 are supported by the // runtime. Always interpret integers as unsigned similarly to @@ -4164,7 +4181,10 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyStaticWorkshareLoop( // Declare useful OpenMP runtime functions. Value *IV = CLI->getIndVar(); Type *IVTy = IV->getType(); - FunctionCallee StaticInit = getKmpcForStaticInitForType(IVTy, M, *this); + FunctionCallee StaticInit = + LoopType == WorksharingLoopType::DistributeForStaticLoop + ? getKmpcDistForStaticInitForType(IVTy, M, *this) + : getKmpcForStaticInitForType(IVTy, M, *this); FunctionCallee StaticFini = getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini); @@ -4200,9 +4220,15 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyStaticWorkshareLoop( // Call the "init" function and update the trip count of the loop with the // value it produced. - Builder.CreateCall(StaticInit, - {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound, - PUpperBound, PStride, One, Zero}); + SmallVector Args( + {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound, PUpperBound}); + if (LoopType == WorksharingLoopType::DistributeForStaticLoop) { +Value *PDistUpperBound = +Builder.CreateAlloca(IVTy, nullptr, "p.distupperbound"); +Args.push_back(PDistUpperBound); + } + Args.append({PStride, One, Zero}); + Builder.CreateCall(StaticInit, Args); Value *LowerBound = Builder.CreateLoad(IVTy, PLowerBound); Value *InclusiveUpperBound = Builder.CreateLoad(IVTy, PUpperBound); Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, LowerBound); `` https://github.com/llvm/llvm-project/pull/127818 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [OpenMPIRBuilder] Split calculation of canonical loop trip count, NFC (PR #127820)
llvmbot wrote: @llvm/pr-subscribers-flang-openmp Author: Sergio Afonso (skatrak) Changes This patch splits off the calculation of canonical loop trip counts from the creation of canonical loops. This makes it possible to reuse this logic to, for instance, populate the `__tgt_target_kernel` runtime call for SPMD kernels. This feature is used to simplify one of the existing OpenMPIRBuilder tests. --- Full diff: https://github.com/llvm/llvm-project/pull/127820.diff 3 Files Affected: - (modified) llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h (+31-7) - (modified) llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp (+18-9) - (modified) llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp (+3-13) ``diff diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 9ad85413acd34..207ca7fb05f62 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -728,13 +728,12 @@ class OpenMPIRBuilder { LoopBodyGenCallbackTy BodyGenCB, Value *TripCount, const Twine &Name = "loop"); - /// Generator for the control flow structure of an OpenMP canonical loop. + /// Calculate the trip count of a canonical loop. /// - /// Instead of a logical iteration space, this allows specifying user-defined - /// loop counter values using increment, upper- and lower bounds. To - /// disambiguate the terminology when counting downwards, instead of lower - /// bounds we use \p Start for the loop counter value in the first body - /// iteration. + /// This allows specifying user-defined loop counter values using increment, + /// upper- and lower bounds. To disambiguate the terminology when counting + /// downwards, instead of lower bounds we use \p Start for the loop counter + /// value in the first body iteration. /// /// Consider the following limitations: /// @@ -758,7 +757,32 @@ class OpenMPIRBuilder { /// /// for (int i = 0; i < 42; i -= 1u) /// - // + /// \param Loc The insert and source location description. + /// \param Start Value of the loop counter for the first iterations. + /// \param Stop Loop counter values past this will stop the loop. + /// \param Step Loop counter increment after each iteration; negative + /// means counting down. + /// \param IsSigned Whether Start, Stop and Step are signed integers. + /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop + /// counter. + /// \param Name Base name used to derive instruction names. + /// + /// \returns The value holding the calculated trip count. + Value *calculateCanonicalLoopTripCount(const LocationDescription &Loc, + Value *Start, Value *Stop, Value *Step, + bool IsSigned, bool InclusiveStop, + const Twine &Name = "loop"); + + /// Generator for the control flow structure of an OpenMP canonical loop. + /// + /// Instead of a logical iteration space, this allows specifying user-defined + /// loop counter values using increment, upper- and lower bounds. To + /// disambiguate the terminology when counting downwards, instead of lower + /// bounds we use \p Start for the loop counter value in the first body + /// + /// It calls \see calculateCanonicalLoopTripCount for trip count calculations, + /// so limitations of that method apply here as well. + /// /// \param Loc The insert and source location description. /// \param BodyGenCB Callback that will generate the loop body code. /// \param Start Value of the loop counter for the first iterations. diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 7788897fc0795..eee6e3e54d615 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -4059,10 +4059,9 @@ OpenMPIRBuilder::createCanonicalLoop(const LocationDescription &Loc, return CL; } -Expected OpenMPIRBuilder::createCanonicalLoop( -const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, -Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, -InsertPointTy ComputeIP, const Twine &Name) { +Value *OpenMPIRBuilder::calculateCanonicalLoopTripCount( +const LocationDescription &Loc, Value *Start, Value *Stop, Value *Step, +bool IsSigned, bool InclusiveStop, const Twine &Name) { // Consider the following difficulties (assuming 8-bit signed integers): // * Adding \p Step to the loop counter which passes \p Stop may overflow: @@ -4075,9 +4074,7 @@ Expected OpenMPIRBuilder::createCanonicalLoop( assert(IndVarTy == Stop->getType() && "Stop type mismatch"); assert(IndVarTy == Step->getType() && "Step type mismatch"); - LocationDescription ComputeLoc = -
[llvm-branch-commits] [mlir] [MLIR][OpenMP] Support target SPMD (PR #127821)
llvmbot wrote: @llvm/pr-subscribers-mlir Author: Sergio Afonso (skatrak) Changes This patch implements MLIR to LLVM IR translation of host-evaluated loop bounds, completing initial support for `target teams distribute parallel do [simd]` and `target teams distribute [simd]`. --- Full diff: https://github.com/llvm/llvm-project/pull/127821.diff 3 Files Affected: - (modified) mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp (+63-20) - (added) mlir/test/Target/LLVMIR/openmp-target-spmd.mlir (+96) - (modified) mlir/test/Target/LLVMIR/openmp-todo.mlir (-24) ``diff diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 7e8a9bdb5b133..93a88c89162d6 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -176,15 +176,6 @@ static LogicalResult checkImplementationStatus(Operation &op) { if (op.getHint()) op.emitWarning("hint clause discarded"); }; - auto checkHostEval = [](auto op, LogicalResult &result) { -// Host evaluated clauses are supported, except for loop bounds. -for (BlockArgument arg : - cast(*op).getHostEvalBlockArgs()) - for (Operation *user : arg.getUsers()) -if (isa(user)) - result = op.emitError("not yet implemented: host evaluation of loop " -"bounds in omp.target operation"); - }; auto checkInReduction = [&todo](auto op, LogicalResult &result) { if (!op.getInReductionVars().empty() || op.getInReductionByref() || op.getInReductionSyms()) @@ -321,7 +312,6 @@ static LogicalResult checkImplementationStatus(Operation &op) { checkBare(op, result); checkDevice(op, result); checkHasDeviceAddr(op, result); -checkHostEval(op, result); checkInReduction(op, result); checkIsDevicePtr(op, result); checkPrivate(op, result); @@ -4054,9 +4044,13 @@ createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg, /// /// Loop bounds and steps are only optionally populated, if output vectors are /// provided. -static void extractHostEvalClauses(omp::TargetOp targetOp, Value &numThreads, - Value &numTeamsLower, Value &numTeamsUpper, - Value &threadLimit) { +static void +extractHostEvalClauses(omp::TargetOp targetOp, Value &numThreads, + Value &numTeamsLower, Value &numTeamsUpper, + Value &threadLimit, + llvm::SmallVectorImpl *lowerBounds = nullptr, + llvm::SmallVectorImpl *upperBounds = nullptr, + llvm::SmallVectorImpl *steps = nullptr) { auto blockArgIface = llvm::cast(*targetOp); for (auto item : llvm::zip_equal(targetOp.getHostEvalVars(), blockArgIface.getHostEvalBlockArgs())) { @@ -4081,11 +4075,26 @@ static void extractHostEvalClauses(omp::TargetOp targetOp, Value &numThreads, llvm_unreachable("unsupported host_eval use"); }) .Case([&](omp::LoopNestOp loopOp) { -// TODO: Extract bounds and step values. Currently, this cannot be -// reached because translation would have been stopped earlier as a -// result of `checkImplementationStatus` detecting and reporting -// this situation. -llvm_unreachable("unsupported host_eval use"); +auto processBounds = +[&](OperandRange opBounds, +llvm::SmallVectorImpl *outBounds) -> bool { + bool found = false; + for (auto [i, lb] : llvm::enumerate(opBounds)) { +if (lb == blockArg) { + found = true; + if (outBounds) +(*outBounds)[i] = hostEvalVar; +} + } + return found; +}; +bool found = +processBounds(loopOp.getLoopLowerBounds(), lowerBounds); +found = processBounds(loopOp.getLoopUpperBounds(), upperBounds) || +found; +found = processBounds(loopOp.getLoopSteps(), steps) || found; +if (!found) + llvm_unreachable("unsupported host_eval use"); }) .Default([](Operation *) { llvm_unreachable("unsupported host_eval use"); @@ -4222,6 +4231,7 @@ initTargetDefaultAttrs(omp::TargetOp targetOp, combinedMaxThreadsVal = maxThreadsVal; // Update kernel bounds structure for the `OpenMPIRBuilder` to use. + attrs.ExecFlags = targetOp.getKernelExecFlags(); attrs.MinTeams = minTeamsVal; attrs.MaxTeams.front() = maxTeamsVal; attrs.MinThreads = 1; @@ -4239,9 +4249,15 @@ initTargetRuntimeAttrs(llvm::IRBuilderBase &builder,
[llvm-branch-commits] [llvm] [OpenMPIRBuilder] Add support for distribute-parallel-for/do constructs (PR #127818)
https://github.com/skatrak created https://github.com/llvm/llvm-project/pull/127818 This patch adds codegen for `kmpc_dist_for_static_init` runtime calls, used to support worksharing a single loop across teams and threads. This can be used to implement `distribute parallel for/do` support. >From cb7ae2d2aa19a0bdb46e38943eab629d74c8de2c Mon Sep 17 00:00:00 2001 From: Sergio Afonso Date: Tue, 18 Feb 2025 12:04:53 + Subject: [PATCH] [OpenMPIRBuilder] Add support for distribute-parallel-for/do constructs This patch adds codegen for `kmpc_dist_for_static_init` runtime calls, used to support worksharing a single loop across teams and threads. This can be used to implement `distribute parallel for/do` support. --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 34 --- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 9e380bf2d3dbe..7788897fc0795 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -4130,6 +4130,23 @@ Expected OpenMPIRBuilder::createCanonicalLoop( return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name); } +// Returns an LLVM function to call for initializing loop bounds using OpenMP +// static scheduling for composite `distribute parallel for` depending on +// `type`. Only i32 and i64 are supported by the runtime. Always interpret +// integers as unsigned similarly to CanonicalLoopInfo. +static FunctionCallee +getKmpcDistForStaticInitForType(Type *Ty, Module &M, +OpenMPIRBuilder &OMPBuilder) { + unsigned Bitwidth = Ty->getIntegerBitWidth(); + if (Bitwidth == 32) +return OMPBuilder.getOrCreateRuntimeFunction( +M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_4u); + if (Bitwidth == 64) +return OMPBuilder.getOrCreateRuntimeFunction( +M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_8u); + llvm_unreachable("unknown OpenMP loop iterator bitwidth"); +} + // Returns an LLVM function to call for initializing loop bounds using OpenMP // static scheduling depending on `type`. Only i32 and i64 are supported by the // runtime. Always interpret integers as unsigned similarly to @@ -4164,7 +4181,10 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyStaticWorkshareLoop( // Declare useful OpenMP runtime functions. Value *IV = CLI->getIndVar(); Type *IVTy = IV->getType(); - FunctionCallee StaticInit = getKmpcForStaticInitForType(IVTy, M, *this); + FunctionCallee StaticInit = + LoopType == WorksharingLoopType::DistributeForStaticLoop + ? getKmpcDistForStaticInitForType(IVTy, M, *this) + : getKmpcForStaticInitForType(IVTy, M, *this); FunctionCallee StaticFini = getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini); @@ -4200,9 +4220,15 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyStaticWorkshareLoop( // Call the "init" function and update the trip count of the loop with the // value it produced. - Builder.CreateCall(StaticInit, - {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound, - PUpperBound, PStride, One, Zero}); + SmallVector Args( + {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound, PUpperBound}); + if (LoopType == WorksharingLoopType::DistributeForStaticLoop) { +Value *PDistUpperBound = +Builder.CreateAlloca(IVTy, nullptr, "p.distupperbound"); +Args.push_back(PDistUpperBound); + } + Args.append({PStride, One, Zero}); + Builder.CreateCall(StaticInit, Args); Value *LowerBound = Builder.CreateLoad(IVTy, PLowerBound); Value *InclusiveUpperBound = Builder.CreateLoad(IVTy, PUpperBound); Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, LowerBound); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [MLIR][OpenMP] Support target SPMD (PR #127821)
https://github.com/Meinersbur approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/127821 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [MLIR][OpenMP] Support target SPMD (PR #127821)
@@ -4081,11 +4075,26 @@ static void extractHostEvalClauses(omp::TargetOp targetOp, Value &numThreads, llvm_unreachable("unsupported host_eval use"); }) .Case([&](omp::LoopNestOp loopOp) { -// TODO: Extract bounds and step values. Currently, this cannot be -// reached because translation would have been stopped earlier as a -// result of `checkImplementationStatus` detecting and reporting -// this situation. -llvm_unreachable("unsupported host_eval use"); +auto processBounds = +[&](OperandRange opBounds, +llvm::SmallVectorImpl *outBounds) -> bool { + bool found = false; + for (auto [i, lb] : llvm::enumerate(opBounds)) { +if (lb == blockArg) { + found = true; + if (outBounds) +(*outBounds)[i] = hostEvalVar; +} + } + return found; +}; +bool found = +processBounds(loopOp.getLoopLowerBounds(), lowerBounds); +found = processBounds(loopOp.getLoopUpperBounds(), upperBounds) || +found; +found = processBounds(loopOp.getLoopSteps(), steps) || found; +if (!found) + llvm_unreachable("unsupported host_eval use"); Meinersbur wrote: ```suggestion (void)found; assert(found && "unsupported host_eval use"); ``` https://github.com/llvm/llvm-project/pull/127821 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [MLIR][OpenMP] Support target SPMD (PR #127821)
https://github.com/skatrak created https://github.com/llvm/llvm-project/pull/127821 This patch implements MLIR to LLVM IR translation of host-evaluated loop bounds, completing initial support for `target teams distribute parallel do [simd]` and `target teams distribute [simd]`. >From 33409d2b52bfb4c69f67bbde001de5ce48feb073 Mon Sep 17 00:00:00 2001 From: Sergio Afonso Date: Wed, 19 Feb 2025 14:41:12 + Subject: [PATCH] [MLIR][OpenMP] Support target SPMD This patch implements MLIR to LLVM IR translation of host-evaluated loop bounds, completing initial support for `target teams distribute parallel do [simd]` and `target teams distribute [simd]`. --- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 83 .../Target/LLVMIR/openmp-target-spmd.mlir | 96 +++ mlir/test/Target/LLVMIR/openmp-todo.mlir | 24 - 3 files changed, 159 insertions(+), 44 deletions(-) create mode 100644 mlir/test/Target/LLVMIR/openmp-target-spmd.mlir diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 7e8a9bdb5b133..93a88c89162d6 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -176,15 +176,6 @@ static LogicalResult checkImplementationStatus(Operation &op) { if (op.getHint()) op.emitWarning("hint clause discarded"); }; - auto checkHostEval = [](auto op, LogicalResult &result) { -// Host evaluated clauses are supported, except for loop bounds. -for (BlockArgument arg : - cast(*op).getHostEvalBlockArgs()) - for (Operation *user : arg.getUsers()) -if (isa(user)) - result = op.emitError("not yet implemented: host evaluation of loop " -"bounds in omp.target operation"); - }; auto checkInReduction = [&todo](auto op, LogicalResult &result) { if (!op.getInReductionVars().empty() || op.getInReductionByref() || op.getInReductionSyms()) @@ -321,7 +312,6 @@ static LogicalResult checkImplementationStatus(Operation &op) { checkBare(op, result); checkDevice(op, result); checkHasDeviceAddr(op, result); -checkHostEval(op, result); checkInReduction(op, result); checkIsDevicePtr(op, result); checkPrivate(op, result); @@ -4054,9 +4044,13 @@ createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg, /// /// Loop bounds and steps are only optionally populated, if output vectors are /// provided. -static void extractHostEvalClauses(omp::TargetOp targetOp, Value &numThreads, - Value &numTeamsLower, Value &numTeamsUpper, - Value &threadLimit) { +static void +extractHostEvalClauses(omp::TargetOp targetOp, Value &numThreads, + Value &numTeamsLower, Value &numTeamsUpper, + Value &threadLimit, + llvm::SmallVectorImpl *lowerBounds = nullptr, + llvm::SmallVectorImpl *upperBounds = nullptr, + llvm::SmallVectorImpl *steps = nullptr) { auto blockArgIface = llvm::cast(*targetOp); for (auto item : llvm::zip_equal(targetOp.getHostEvalVars(), blockArgIface.getHostEvalBlockArgs())) { @@ -4081,11 +4075,26 @@ static void extractHostEvalClauses(omp::TargetOp targetOp, Value &numThreads, llvm_unreachable("unsupported host_eval use"); }) .Case([&](omp::LoopNestOp loopOp) { -// TODO: Extract bounds and step values. Currently, this cannot be -// reached because translation would have been stopped earlier as a -// result of `checkImplementationStatus` detecting and reporting -// this situation. -llvm_unreachable("unsupported host_eval use"); +auto processBounds = +[&](OperandRange opBounds, +llvm::SmallVectorImpl *outBounds) -> bool { + bool found = false; + for (auto [i, lb] : llvm::enumerate(opBounds)) { +if (lb == blockArg) { + found = true; + if (outBounds) +(*outBounds)[i] = hostEvalVar; +} + } + return found; +}; +bool found = +processBounds(loopOp.getLoopLowerBounds(), lowerBounds); +found = processBounds(loopOp.getLoopUpperBounds(), upperBounds) || +found; +found = processBounds(loopOp.getLoopSteps(), steps) || found; +if (!found) + llvm_unreachable("unsupported host_eval use"); }) .Default([](Operation *) { llvm_unreachable("unsupported host_eval use"); @@ -4222,6 +4231,7 @@ initTargetDefaul
[llvm-branch-commits] [mlir] [MLIR][OpenMP] Host lowering of distribute-parallel-do/for (PR #127819)
llvmbot wrote: @llvm/pr-subscribers-mlir-llvm @llvm/pr-subscribers-mlir-openmp Author: Sergio Afonso (skatrak) Changes This patch adds support for translating composite `omp.parallel` + `omp.distribute` + `omp.wsloop` loops to LLVM IR on the host. This is done by passing an updated `WorksharingLoopType` to the call to `applyWorkshareLoop` associated to the lowering of the `omp.wsloop` operation, so that `__kmpc_dist_for_static_init` is called at runtime in place of `__kmpc_for_static_init`. Existing translation rules take care of creating a parallel region to hold the workshared and workdistributed loop. --- Full diff: https://github.com/llvm/llvm-project/pull/127819.diff 3 Files Affected: - (modified) mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp (+16-5) - (modified) mlir/test/Target/LLVMIR/openmp-llvm.mlir (+65) - (modified) mlir/test/Target/LLVMIR/openmp-todo.mlir (-19) ``diff diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index c8221a9f9854a..7e8a9bdb5b133 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -260,10 +260,6 @@ static LogicalResult checkImplementationStatus(Operation &op) { LogicalResult result = success(); llvm::TypeSwitch(op) .Case([&](omp::DistributeOp op) { -if (op.isComposite() && -isa_and_present(op.getNestedWrapper())) - result = op.emitError() << "not yet implemented: " - "composite omp.distribute + omp.wsloop"; checkAllocate(op, result); checkDistSchedule(op, result); checkOrder(op, result); @@ -1993,6 +1989,14 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, bool isSimd = wsloopOp.getScheduleSimd(); bool loopNeedsBarrier = !wsloopOp.getNowait(); + // The only legal way for the direct parent to be omp.distribute is that this + // represents 'distribute parallel do'. Otherwise, this is a regular + // worksharing loop. + llvm::omp::WorksharingLoopType workshareLoopType = + llvm::isa_and_present(opInst.getParentOp()) + ? llvm::omp::WorksharingLoopType::DistributeForStaticLoop + : llvm::omp::WorksharingLoopType::ForStaticLoop; + llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); llvm::Expected regionBlock = convertOmpOpRegions( wsloopOp.getRegion(), "omp.wsloop.region", builder, moduleTranslation); @@ -2008,7 +2012,8 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier, convertToScheduleKind(schedule), chunk, isSimd, scheduleMod == omp::ScheduleModifier::monotonic, - scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered); + scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered, + workshareLoopType); if (failed(handleError(wsloopIP, opInst))) return failure(); @@ -3792,6 +3797,12 @@ convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder, return regionBlock.takeError(); builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin()); +// Skip applying a workshare loop below when translating 'distribute +// parallel do' (it's been already handled by this point while translating +// the nested omp.wsloop). +if (isa_and_present(distributeOp.getNestedWrapper())) + return llvm::Error::success(); + // TODO: Add support for clauses which are valid for DISTRIBUTE constructs. // Static schedule is the default. auto schedule = omp::ClauseScheduleKind::Static; diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir index a5a490e527d79..d85b149c66811 100644 --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -3307,3 +3307,68 @@ llvm.func @distribute() { // CHECK: store i64 1, ptr %[[STRIDE]] // CHECK: %[[TID:.*]] = call i32 @__kmpc_global_thread_num({{.*}}) // CHECK: call void @__kmpc_for_static_init_{{.*}}(ptr @{{.*}}, i32 %[[TID]], i32 92, ptr %[[LASTITER]], ptr %[[LB]], ptr %[[UB]], ptr %[[STRIDE]], i64 1, i64 0) + +// - + +llvm.func @distribute_wsloop(%lb : i32, %ub : i32, %step : i32) { + omp.parallel { +omp.distribute { + omp.wsloop { +omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) { + omp.yield +} + } {omp.composite} +} {omp.composite} +omp.terminator + } {omp.composite} + llvm.return +} + +// CHECK-LABEL: define void @distribute_wsloop +// CHECK: call void{{.*}}@__kmpc_fork_call({{.*}}, ptr @[[OUTLINED_PARALLEL:.*]], + +// CHECK: define internal void @[[OUTLINED_PARALLEL]]({{.*}}) +// CHECK: %[[ARGS:.*]] = alloca { i32, i32, i32, ptr, ptr
[llvm-branch-commits] [mlir] [MLIR][OpenMP] Host lowering of distribute-parallel-do/for (PR #127819)
llvmbot wrote: @llvm/pr-subscribers-mlir Author: Sergio Afonso (skatrak) Changes This patch adds support for translating composite `omp.parallel` + `omp.distribute` + `omp.wsloop` loops to LLVM IR on the host. This is done by passing an updated `WorksharingLoopType` to the call to `applyWorkshareLoop` associated to the lowering of the `omp.wsloop` operation, so that `__kmpc_dist_for_static_init` is called at runtime in place of `__kmpc_for_static_init`. Existing translation rules take care of creating a parallel region to hold the workshared and workdistributed loop. --- Full diff: https://github.com/llvm/llvm-project/pull/127819.diff 3 Files Affected: - (modified) mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp (+16-5) - (modified) mlir/test/Target/LLVMIR/openmp-llvm.mlir (+65) - (modified) mlir/test/Target/LLVMIR/openmp-todo.mlir (-19) ``diff diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index c8221a9f9854a..7e8a9bdb5b133 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -260,10 +260,6 @@ static LogicalResult checkImplementationStatus(Operation &op) { LogicalResult result = success(); llvm::TypeSwitch(op) .Case([&](omp::DistributeOp op) { -if (op.isComposite() && -isa_and_present(op.getNestedWrapper())) - result = op.emitError() << "not yet implemented: " - "composite omp.distribute + omp.wsloop"; checkAllocate(op, result); checkDistSchedule(op, result); checkOrder(op, result); @@ -1993,6 +1989,14 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, bool isSimd = wsloopOp.getScheduleSimd(); bool loopNeedsBarrier = !wsloopOp.getNowait(); + // The only legal way for the direct parent to be omp.distribute is that this + // represents 'distribute parallel do'. Otherwise, this is a regular + // worksharing loop. + llvm::omp::WorksharingLoopType workshareLoopType = + llvm::isa_and_present(opInst.getParentOp()) + ? llvm::omp::WorksharingLoopType::DistributeForStaticLoop + : llvm::omp::WorksharingLoopType::ForStaticLoop; + llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); llvm::Expected regionBlock = convertOmpOpRegions( wsloopOp.getRegion(), "omp.wsloop.region", builder, moduleTranslation); @@ -2008,7 +2012,8 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier, convertToScheduleKind(schedule), chunk, isSimd, scheduleMod == omp::ScheduleModifier::monotonic, - scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered); + scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered, + workshareLoopType); if (failed(handleError(wsloopIP, opInst))) return failure(); @@ -3792,6 +3797,12 @@ convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder, return regionBlock.takeError(); builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin()); +// Skip applying a workshare loop below when translating 'distribute +// parallel do' (it's been already handled by this point while translating +// the nested omp.wsloop). +if (isa_and_present(distributeOp.getNestedWrapper())) + return llvm::Error::success(); + // TODO: Add support for clauses which are valid for DISTRIBUTE constructs. // Static schedule is the default. auto schedule = omp::ClauseScheduleKind::Static; diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir index a5a490e527d79..d85b149c66811 100644 --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -3307,3 +3307,68 @@ llvm.func @distribute() { // CHECK: store i64 1, ptr %[[STRIDE]] // CHECK: %[[TID:.*]] = call i32 @__kmpc_global_thread_num({{.*}}) // CHECK: call void @__kmpc_for_static_init_{{.*}}(ptr @{{.*}}, i32 %[[TID]], i32 92, ptr %[[LASTITER]], ptr %[[LB]], ptr %[[UB]], ptr %[[STRIDE]], i64 1, i64 0) + +// - + +llvm.func @distribute_wsloop(%lb : i32, %ub : i32, %step : i32) { + omp.parallel { +omp.distribute { + omp.wsloop { +omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) { + omp.yield +} + } {omp.composite} +} {omp.composite} +omp.terminator + } {omp.composite} + llvm.return +} + +// CHECK-LABEL: define void @distribute_wsloop +// CHECK: call void{{.*}}@__kmpc_fork_call({{.*}}, ptr @[[OUTLINED_PARALLEL:.*]], + +// CHECK: define internal void @[[OUTLINED_PARALLEL]]({{.*}}) +// CHECK: %[[ARGS:.*]] = alloca { i32, i32, i32, ptr, ptr, ptr, ptr } +// CHECK: %[[LAS
[llvm-branch-commits] [flang] release/20.x: flang: Fix build with latest libc++ (#127362) (PR #127805)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/127805 Backport 2b340c10a611d929fee25e6222909c8915e3d6b6 Requested by: @tstellar >From 2b70b17d30744ee6720eb2645ef8b61e043ed295 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 19 Feb 2025 06:53:30 -0800 Subject: [PATCH] flang: Fix build with latest libc++ (#127362) I think this first stopped working with 954836634abb446f18719b14120c386a929a42d1. This patch fixes the following error: /home/runner/work/llvm-project/llvm-project/flang/runtime/io-api-minimal.cpp:153:11: error: '__libcpp_verbose_abort' is missing exception specification 'noexcept' 153 | void std::__libcpp_verbose_abort(char const *format, ...) { | ^ | noexcept /mnt/build/bin/../include/c++/v1/__verbose_abort:30:28: note: previous declaration is here 30 | __printf__, 1, 2) void __libcpp_verbose_abort(const char* __format, ...) _LIBCPP_VERBOSE_ABORT_NOEXCEPT; |^ 1 error generated. (cherry picked from commit 2b340c10a611d929fee25e6222909c8915e3d6b6) --- flang/runtime/io-api-minimal.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/flang/runtime/io-api-minimal.cpp b/flang/runtime/io-api-minimal.cpp index 68768427be0c2..93ac82248aa4c 100644 --- a/flang/runtime/io-api-minimal.cpp +++ b/flang/runtime/io-api-minimal.cpp @@ -150,7 +150,8 @@ bool IODEF(OutputLogical)(Cookie cookie, bool truth) { // Provide own definition for `std::__libcpp_verbose_abort` to avoid dependency // on the version provided by libc++. -void std::__libcpp_verbose_abort(char const *format, ...) { +void std::__libcpp_verbose_abort(char const *format, ...) noexcept( +noexcept(std::__libcpp_verbose_abort(""))) { va_list list; va_start(list, format); std::vfprintf(stderr, format, list); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang] Implement instantiation context note for checking template parameters (PR #126088)
@@ -1018,9 +1019,9 @@ namespace cwg62 { // cwg62: 2.9 struct A { struct { int n; } b; }; - template struct X {}; - template T get() { return get(); } - template int take(T) { return 0; } + template struct X {}; // cxx98-note 6{{template parameter is declared here}} mizvekov wrote: Why though? These notes are not particularly relevant for a DR test. This makes these tests very cumbersome to update. I don't quite understand all these special rules for these DR tests, as I think they popped up while I was on a break from the project, and I probably missed discussions here. But this goes around the design intent of the diagnostic verifier, which encourages these sorts of matches, while at the same time does not support matching on a diagnostic sequence at all, while this can give the people illusion that this is actually verified. I think this makes the verifier unsuited for this kind of test. FileCheck on the raw clang output and a generator/updater script would be a superior solution, which would actually support matching on a sequence of diagnostics. https://github.com/llvm/llvm-project/pull/126088 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] Backport: [clang] Track function template instantiation from definition… (PR #127777)
https://github.com/mizvekov edited https://github.com/llvm/llvm-project/pull/12 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] Backport: [clang] Track function template instantiation from definition… (PR #127777)
https://github.com/erichkeane approved this pull request. https://github.com/llvm/llvm-project/pull/12 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [lldb] Backport: [clang] fix P3310 overload resolution flag propagation (#125791) (PR #127831)
https://github.com/erichkeane approved this pull request. https://github.com/llvm/llvm-project/pull/127831 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] Backport: [clang] Track function template instantiation from definition (#125266) (PR #127777)
https://github.com/mizvekov edited https://github.com/llvm/llvm-project/pull/12 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] release/20.x: flang: Fix build with latest libc++ (#127362) (PR #127805)
https://github.com/ldionne approved this pull request. https://github.com/llvm/llvm-project/pull/127805 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] release/20.x: [libc++] Guard include of with __has_include (#127691) (PR #127842)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/127842 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] release/20.x: [libc++] Guard include of with __has_include (#127691) (PR #127842)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/127842 Backport 2c8b1248513624e89b510397224f0f405116f3d3 Requested by: @ldionne >From 315226cf7b7751303615984ec3d84664d156 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Wed, 19 Feb 2025 08:21:56 -0500 Subject: [PATCH] [libc++] Guard include of with __has_include (#127691) Some configurations define __AMDGPU__ or __NVPTX__ on platforms that don't provide , such as CUDA on Mac. (cherry picked from commit 2c8b1248513624e89b510397224f0f405116f3d3) --- libcxx/include/__configuration/platform.h | 14 -- 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/libcxx/include/__configuration/platform.h b/libcxx/include/__configuration/platform.h index cff99376ee24b..8d0f8f63f5213 100644 --- a/libcxx/include/__configuration/platform.h +++ b/libcxx/include/__configuration/platform.h @@ -32,12 +32,14 @@ // Need to detect which libc we're using if we're on Linux. #if defined(__linux__) || defined(__AMDGPU__) || defined(__NVPTX__) -# include -# if defined(__GLIBC_PREREQ) -#define _LIBCPP_GLIBC_PREREQ(a, b) __GLIBC_PREREQ(a, b) -# else -#define _LIBCPP_GLIBC_PREREQ(a, b) 0 -# endif // defined(__GLIBC_PREREQ) +# if __has_include() +#include +#if defined(__GLIBC_PREREQ) +# define _LIBCPP_GLIBC_PREREQ(a, b) __GLIBC_PREREQ(a, b) +#else +# define _LIBCPP_GLIBC_PREREQ(a, b) 0 +#endif // defined(__GLIBC_PREREQ) +# endif #endif #ifndef __BYTE_ORDER__ ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang] Implement instantiation context note for checking template parameters (PR #126088)
@@ -1018,9 +1019,9 @@ namespace cwg62 { // cwg62: 2.9 struct A { struct { int n; } b; }; - template struct X {}; - template T get() { return get(); } - template int take(T) { return 0; } + template struct X {}; // cxx98-note 6{{template parameter is declared here}} mizvekov wrote: I support the bookmarks, they are fine, sure let's use them more. My concern is not about the bookmarks though, is about the way the tests are written here, checking the whole diagnostic sequence, but without actually checking it, and without any kind of automated update. I would be strongly against adopting this for the other tests, as if this were a thing a while ago, this would have made many of my PRs unviable due to the amount of manual test rework required. That is not to say that I don't agree with the overall goal, I think we do need to test for duplicated notes and things like that, but we should implement this in tooling, being actually verified and not manually updated. https://github.com/llvm/llvm-project/pull/126088 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang] Implement instantiation context note for checking template parameters (PR #126088)
@@ -1018,9 +1019,9 @@ namespace cwg62 { // cwg62: 2.9 struct A { struct { int n; } b; }; - template struct X {}; - template T get() { return get(); } - template int take(T) { return 0; } + template struct X {}; // cxx98-note 6{{template parameter is declared here}} mizvekov wrote: > I'm not certain I understand the concern then. The request is to do something > like: > so either way it is actually checking the whole diagnostic sequence, just > that with bookmarks it's easier to tell which warnings/errors generate what > notes. No it's not actually checking the sequence, because the notes can be attached to random diagnostics which are not actually the preceding ones, and the tests will still pass. The diagnostic verifier does not currently check the order diagnostics appear in, writing the test expectations like this only gives you the illusion that it does :) https://github.com/llvm/llvm-project/pull/126088 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: Revert "[C++20][Modules][Serialization] Delay marking pending incompl… (#127136) (PR #127252)
AaronBallman wrote: > Not super familiar with the release cherry-picking workflow. How should I > move this forward? You've done it correctly (though you should set your email to public per https://github.com/llvm/llvm-project/pull/127252#issuecomment-2660155014). It's just waiting on review before it gets merged into the release branch. https://github.com/llvm/llvm-project/pull/127252 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang] Implement instantiation context note for checking template parameters (PR #126088)
@@ -1018,9 +1019,9 @@ namespace cwg62 { // cwg62: 2.9 struct A { struct { int n; } b; }; - template struct X {}; - template T get() { return get(); } - template int take(T) { return 0; } + template struct X {}; // cxx98-note 6{{template parameter is declared here}} AaronBallman wrote: > My concern is not about the bookmarks though, is about the way the tests are > written here, checking the whole diagnostic sequence, but without actually checking it, and without any kind of automated update. I'm not certain I understand the concern then. The request is to do something like: ``` template struct X {}; // #template_struct_X ... ... // expected-warning {{yada yada}} // expected-note@#template_struct_X {{declared here}} ... ... // expected-warning {{yada yada}} \ // expected-note@#template_struct_X {{declared here}} \ // expected-error {{a different yada on the same line}} \ // expected-note@#template_struct_X {{declared here}} ``` instead of doing: ``` template struct X {}; // expected-note 3 {{declared here}} ... ... // expected-warning {{yada yada}} ... ... // expected-warning {{yada yada}} \ // expected-error {{a different yada on the same line}} ``` so either way it is actually checking the whole diagnostic sequence, just that with bookmarks it's easier to tell which warnings/errors generate what notes. And we've never had a way to automatically update `-verify` tests. We do for some kinds of `FileCheck` tests, but that can be contentious because of how easy it is to generate the test updates, bugs and all. (I'm not opposed to having tooling which helps with diagnostic changes; upgrading warnings to errors is a prime example of something that could hopefully be made easier.) https://github.com/llvm/llvm-project/pull/126088 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang] Implement instantiation context note for checking template parameters (PR #126088)
@@ -1018,9 +1019,9 @@ namespace cwg62 { // cwg62: 2.9 struct A { struct { int n; } b; }; - template struct X {}; - template T get() { return get(); } - template int take(T) { return 0; } + template struct X {}; // cxx98-note 6{{template parameter is declared here}} mizvekov wrote: No worries, I'll do the update here on this PR. I think it would be good to post on discourse to get some design feedback on that. If this were actually checked, this would significantly reduce the amount of work required when manually updating tests, to the point that it would be fine for me to not also need to support automatic generation and updates of test expectations for now. https://github.com/llvm/llvm-project/pull/126088 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [CUDA] Add support for sm101 and sm120 target architectures (#127187) (PR #127918)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/127918 Backport 0127f169dc8e0b5b6c2a24f74cd42d9d277916f6 Requested by: @Artem-B >From c3f0998ff02643c3811cfa1af46ba6b0ed2c24c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Jod=C5=82owski?= Date: Wed, 19 Feb 2025 14:41:07 -0800 Subject: [PATCH] [CUDA] Add support for sm101 and sm120 target architectures (#127187) Add support for sm101 and sm120 target architectures. It requires CUDA 12.8. - Co-authored-by: Sebastian Jodlowski (cherry picked from commit 0127f169dc8e0b5b6c2a24f74cd42d9d277916f6) --- clang/include/clang/Basic/BuiltinsNVPTX.td| 8 --- clang/include/clang/Basic/Cuda.h | 4 clang/lib/Basic/Cuda.cpp | 8 +++ clang/lib/Basic/Targets/NVPTX.cpp | 23 +++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 4 .../test/Misc/target-invalid-cpu-note/nvptx.c | 4 6 files changed, 43 insertions(+), 8 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.td b/clang/include/clang/Basic/BuiltinsNVPTX.td index 9d24a992563a4..b550fff8567df 100644 --- a/clang/include/clang/Basic/BuiltinsNVPTX.td +++ b/clang/include/clang/Basic/BuiltinsNVPTX.td @@ -21,12 +21,14 @@ class SM newer_list> : SMFeatures { !strconcat(f, "|", newer.Features)); } +let Features = "sm_120a" in def SM_120a : SMFeatures; +let Features = "sm_101a" in def SM_101a : SMFeatures; let Features = "sm_100a" in def SM_100a : SMFeatures; - -def SM_100 : SM<"100", [SM_100a]>; - let Features = "sm_90a" in def SM_90a : SMFeatures; +def SM_120 : SM<"120", [SM_120a]>; +def SM_101 : SM<"101", [SM_101a, SM_120]>; +def SM_100 : SM<"100", [SM_100a, SM_101]>; def SM_90 : SM<"90", [SM_90a, SM_100]>; def SM_89 : SM<"89", [SM_90]>; def SM_87 : SM<"87", [SM_89]>; diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h index f33ba46233a7a..5c909a8e9ca11 100644 --- a/clang/include/clang/Basic/Cuda.h +++ b/clang/include/clang/Basic/Cuda.h @@ -82,6 +82,10 @@ enum class OffloadArch { SM_90a, SM_100, SM_100a, + SM_101, + SM_101a, + SM_120, + SM_120a, GFX600, GFX601, GFX602, diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp index 1bfec0b37c5ee..79cac0ec119dd 100644 --- a/clang/lib/Basic/Cuda.cpp +++ b/clang/lib/Basic/Cuda.cpp @@ -100,6 +100,10 @@ static const OffloadArchToStringMap arch_names[] = { SM(90a), // Hopper SM(100), // Blackwell SM(100a),// Blackwell +SM(101), // Blackwell +SM(101a),// Blackwell +SM(120), // Blackwell +SM(120a),// Blackwell GFX(600), // gfx600 GFX(601), // gfx601 GFX(602), // gfx602 @@ -230,6 +234,10 @@ CudaVersion MinVersionForOffloadArch(OffloadArch A) { return CudaVersion::CUDA_120; case OffloadArch::SM_100: case OffloadArch::SM_100a: + case OffloadArch::SM_101: + case OffloadArch::SM_101a: + case OffloadArch::SM_120: + case OffloadArch::SM_120a: return CudaVersion::CUDA_128; default: llvm_unreachable("invalid enum"); diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index a03f4983b9d03..9be12cbe7ac19 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -176,7 +176,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, if (Opts.CUDAIsDevice || Opts.OpenMPIsTargetDevice || !HostTarget) { // Set __CUDA_ARCH__ for the GPU specified. -std::string CUDAArchCode = [this] { +llvm::StringRef CUDAArchCode = [this] { switch (GPU) { case OffloadArch::GFX600: case OffloadArch::GFX601: @@ -283,14 +283,27 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, case OffloadArch::SM_100: case OffloadArch::SM_100a: return "1000"; + case OffloadArch::SM_101: + case OffloadArch::SM_101a: + return "1010"; + case OffloadArch::SM_120: + case OffloadArch::SM_120a: + return "1200"; } llvm_unreachable("unhandled OffloadArch"); }(); Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode); -if (GPU == OffloadArch::SM_90a) - Builder.defineMacro("__CUDA_ARCH_FEAT_SM90_ALL", "1"); -if (GPU == OffloadArch::SM_100a) - Builder.defineMacro("__CUDA_ARCH_FEAT_SM100_ALL", "1"); +switch(GPU) { + case OffloadArch::SM_90a: + case OffloadArch::SM_100a: + case OffloadArch::SM_101a: + case OffloadArch::SM_120a: +Builder.defineMacro("__CUDA_ARCH_FEAT_SM" + CUDAArchCode.drop_back() + "_ALL", "1"); +break; + default: +// Do nothing if this is not an enhanced architecture. +break; +} } } diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/
[llvm-branch-commits] [clang] release/20.x: [CUDA] Add support for sm101 and sm120 target architectures (#127187) (PR #127918)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/127918 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [CUDA] Add support for sm101 and sm120 target architectures (#127187) (PR #127918)
llvmbot wrote: @Artem-B What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/127918 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [CUDA] Add support for sm101 and sm120 target architectures (#127187) (PR #127918)
llvmbot wrote: @llvm/pr-subscribers-clang-codegen Author: None (llvmbot) Changes Backport 0127f169dc8e0b5b6c2a24f74cd42d9d277916f6 Requested by: @Artem-B --- Full diff: https://github.com/llvm/llvm-project/pull/127918.diff 6 Files Affected: - (modified) clang/include/clang/Basic/BuiltinsNVPTX.td (+5-3) - (modified) clang/include/clang/Basic/Cuda.h (+4) - (modified) clang/lib/Basic/Cuda.cpp (+8) - (modified) clang/lib/Basic/Targets/NVPTX.cpp (+18-5) - (modified) clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp (+4) - (modified) clang/test/Misc/target-invalid-cpu-note/nvptx.c (+4) ``diff diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.td b/clang/include/clang/Basic/BuiltinsNVPTX.td index 9d24a992563a4..b550fff8567df 100644 --- a/clang/include/clang/Basic/BuiltinsNVPTX.td +++ b/clang/include/clang/Basic/BuiltinsNVPTX.td @@ -21,12 +21,14 @@ class SM newer_list> : SMFeatures { !strconcat(f, "|", newer.Features)); } +let Features = "sm_120a" in def SM_120a : SMFeatures; +let Features = "sm_101a" in def SM_101a : SMFeatures; let Features = "sm_100a" in def SM_100a : SMFeatures; - -def SM_100 : SM<"100", [SM_100a]>; - let Features = "sm_90a" in def SM_90a : SMFeatures; +def SM_120 : SM<"120", [SM_120a]>; +def SM_101 : SM<"101", [SM_101a, SM_120]>; +def SM_100 : SM<"100", [SM_100a, SM_101]>; def SM_90 : SM<"90", [SM_90a, SM_100]>; def SM_89 : SM<"89", [SM_90]>; def SM_87 : SM<"87", [SM_89]>; diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h index f33ba46233a7a..5c909a8e9ca11 100644 --- a/clang/include/clang/Basic/Cuda.h +++ b/clang/include/clang/Basic/Cuda.h @@ -82,6 +82,10 @@ enum class OffloadArch { SM_90a, SM_100, SM_100a, + SM_101, + SM_101a, + SM_120, + SM_120a, GFX600, GFX601, GFX602, diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp index 1bfec0b37c5ee..79cac0ec119dd 100644 --- a/clang/lib/Basic/Cuda.cpp +++ b/clang/lib/Basic/Cuda.cpp @@ -100,6 +100,10 @@ static const OffloadArchToStringMap arch_names[] = { SM(90a), // Hopper SM(100), // Blackwell SM(100a),// Blackwell +SM(101), // Blackwell +SM(101a),// Blackwell +SM(120), // Blackwell +SM(120a),// Blackwell GFX(600), // gfx600 GFX(601), // gfx601 GFX(602), // gfx602 @@ -230,6 +234,10 @@ CudaVersion MinVersionForOffloadArch(OffloadArch A) { return CudaVersion::CUDA_120; case OffloadArch::SM_100: case OffloadArch::SM_100a: + case OffloadArch::SM_101: + case OffloadArch::SM_101a: + case OffloadArch::SM_120: + case OffloadArch::SM_120a: return CudaVersion::CUDA_128; default: llvm_unreachable("invalid enum"); diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index a03f4983b9d03..9be12cbe7ac19 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -176,7 +176,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, if (Opts.CUDAIsDevice || Opts.OpenMPIsTargetDevice || !HostTarget) { // Set __CUDA_ARCH__ for the GPU specified. -std::string CUDAArchCode = [this] { +llvm::StringRef CUDAArchCode = [this] { switch (GPU) { case OffloadArch::GFX600: case OffloadArch::GFX601: @@ -283,14 +283,27 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, case OffloadArch::SM_100: case OffloadArch::SM_100a: return "1000"; + case OffloadArch::SM_101: + case OffloadArch::SM_101a: + return "1010"; + case OffloadArch::SM_120: + case OffloadArch::SM_120a: + return "1200"; } llvm_unreachable("unhandled OffloadArch"); }(); Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode); -if (GPU == OffloadArch::SM_90a) - Builder.defineMacro("__CUDA_ARCH_FEAT_SM90_ALL", "1"); -if (GPU == OffloadArch::SM_100a) - Builder.defineMacro("__CUDA_ARCH_FEAT_SM100_ALL", "1"); +switch(GPU) { + case OffloadArch::SM_90a: + case OffloadArch::SM_100a: + case OffloadArch::SM_101a: + case OffloadArch::SM_120a: +Builder.defineMacro("__CUDA_ARCH_FEAT_SM" + CUDAArchCode.drop_back() + "_ALL", "1"); +break; + default: +// Do nothing if this is not an enhanced architecture. +break; +} } } diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index c13928f61a748..dc417880a50e9 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -2278,6 +2278,10 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) { case OffloadArch::SM_90a: case OffloadArch::SM_100: case OffloadArch::SM_100a: + case OffloadArch::SM_101: