[llvm-branch-commits] [lld] 1d5277c - Revert "[llvm-objdump] Print ... even if a data mapping symbol is active"
Author: Justin Bogner Date: 2024-09-25T19:14:39-07:00 New Revision: 1d5277c271bc01fbc5af90292f8dd9af9ded623a URL: https://github.com/llvm/llvm-project/commit/1d5277c271bc01fbc5af90292f8dd9af9ded623a DIFF: https://github.com/llvm/llvm-project/commit/1d5277c271bc01fbc5af90292f8dd9af9ded623a.diff LOG: Revert "[llvm-objdump] Print ... even if a data mapping symbol is active" This reverts commit abe0dd195a3b2630afdc5c1c233eb2a068b2d72f. Added: Modified: lld/test/ELF/aarch64-undefined-weak.s llvm/test/MC/ARM/ltorg-range.s llvm/tools/llvm-objdump/llvm-objdump.cpp Removed: llvm/test/tools/llvm-objdump/ELF/AArch64/zeroes.test llvm/test/tools/llvm-objdump/ELF/ARM/zeroes.test diff --git a/lld/test/ELF/aarch64-undefined-weak.s b/lld/test/ELF/aarch64-undefined-weak.s index 015f9c9a043e54..f4628453ec3fea 100644 --- a/lld/test/ELF/aarch64-undefined-weak.s +++ b/lld/test/ELF/aarch64-undefined-weak.s @@ -1,7 +1,7 @@ // REQUIRES: aarch64 // RUN: llvm-mc -filetype=obj -triple=aarch64-none-linux %s -o %t.o // RUN: ld.lld --image-base=0x1000 %t.o -o %t -// RUN: llvm-objdump -d -z --no-show-raw-insn %t | FileCheck %s +// RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s // Check that the ARM 64-bit ABI rules for undefined weak symbols are applied. // Branch instructions are resolved to the next instruction. Undefined diff --git a/llvm/test/MC/ARM/ltorg-range.s b/llvm/test/MC/ARM/ltorg-range.s index 88b9bb3cb5be80..5c27d4cd0df26f 100644 --- a/llvm/test/MC/ARM/ltorg-range.s +++ b/llvm/test/MC/ARM/ltorg-range.s @@ -1,5 +1,5 @@ @ RUN: llvm-mc -triple armv7-unknown-linux-gnueabi -filetype obj -o - %s \ -@ RUN: | llvm-objdump -d -z - | FileCheck %s +@ RUN: | llvm-objdump -d - | FileCheck %s ldr r0, =0x01020304 @ CHECK: ldr diff --git a/llvm/test/tools/llvm-objdump/ELF/AArch64/zeroes.test b/llvm/test/tools/llvm-objdump/ELF/AArch64/zeroes.test deleted file mode 100644 index a56d056f8a2256..00 --- a/llvm/test/tools/llvm-objdump/ELF/AArch64/zeroes.test +++ /dev/null @@ -1,66 +0,0 @@ -## Test zero dumping when a data mapping symbol is active. -# RUN: llvm-mc -filetype=obj -triple=aarch64 %s -o %t -# RUN: llvm-objdump -t -d %t | FileCheck %s - -# CHECK: SYMBOL TABLE: -# CHECK-NEXT: l .text $d -# CHECK-NEXT: 000c l .text $x -# CHECK-NEXT: 0010 l .text $d - -# CHECK: <_start>: -# CHECK-NEXT: ... -# CHECK-NEXT:8: 01 00 00 00 .word 0x0001 -# CHECK-NEXT:c: d503201f nop -# CHECK-NEXT: ... -# CHECK-NEXT: 18: d503201f nop -# CHECK-NEXT: ... -# CHECK-NEXT: 2c: d503201f nop -# CHECK-NEXT: ... -# CHECK-NEXT: 48: d503201f nop - -# RUN: llvm-objdump -d -z %t | FileCheck %s --check-prefix=ZERO - -# ZERO: <_start>: -# ZERO-NEXT:0: 00 00 00 00 .word 0x -# ZERO-NEXT:4: 00 00 00 00 .word 0x -# ZERO-NEXT:8: 01 00 00 00 .word 0x0001 -# ZERO-NEXT:c: d503201f nop -# ZERO-NEXT: 10: 00 00 00 00 .word 0x -# ZERO-NEXT: 14: 00 00 00 00 .word 0x -# ZERO-NEXT: 18: d503201f nop - -## Check we do not skip zeroes blocks if have relocations pointed to these places. -# RUN: llvm-objdump -d -r %t | FileCheck %s --check-prefix=RELOC - -# RELOC: <_start>: -# RELOC-NEXT: ... -# RELOC-NEXT:8: 01 00 00 00 .word 0x0001 -# RELOC-NEXT:c: d503201f nop -# RELOC-NEXT: ... -# RELOC-NEXT: 18: d503201f nop -# RELOC-NEXT: 1c: 00 00 00 00 .word 0x -# RELOC-NEXT: 001c: R_AARCH64_ABS64 x1 -# RELOC-NEXT: ... -# RELOC-NEXT: 2c: d503201f nop -# RELOC-NEXT: ... -# RELOC-NEXT: 38: 00 00 00 00 .word 0x -# RELOC-NEXT: 0038: R_AARCH64_ABS64 x2 -# RELOC-NEXT: ... -# RELOC-NEXT: 48: d503201f nop - -.globl _start -_start: - .space 8 - .long 1 - nop - .space 8 - nop - - .quad x1 - .space 8 - nop - - .space 8 - .quad x2 - .space 8 - nop diff --git a/llvm/test/tools/llvm-objdump/ELF/ARM/zeroes.test b/llvm/test/tools/llvm-objdump/ELF/ARM/zeroes.test deleted file mode 100644 index 8601343bd146e9..00 --- a/llvm/test/tools/llvm-objdump/ELF/ARM/zeroes.test +++ /dev/null @@ -1,47 +0,0 @@ -## Test zero dumping when a data mapping symbol is active. -# RUN: llvm-mc -filetype=obj -triple=armv7 %s -o %t -# RUN: llvm-objdump -t -d %t | FileCheck %s - -# CHECK: SYMBOL TABLE: -# CHECK-NEXT: l .text $d -# CHECK-NEXT: 000c l
[llvm-branch-commits] [llvm] [AMDGPU] Add tests for SIPreAllocateWWMRegs (PR #109963)
@@ -0,0 +1,26 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass=si-pre-allocate-wwm-regs -o - -mcpu=tahiti %s | FileCheck %s + +--- + +name: pre_allocate_wwm_regs_strict +tracksRegLiveness: true +body: | Akshat-Oke wrote: What exactly should be serialized for allocated registers? I am working on serializing virtual register flags (currently there is only one WWM_REG) https://github.com/llvm/llvm-project/pull/109963 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NVPTX] add address class for variables with a single register location (PR #110030)
llvmbot wrote: @llvm/pr-subscribers-debuginfo Author: William G Hatch (willghatch) Changes This is the final piece to enable register debugging for variables in registers that have single locations that last throughout their enclosing scope. The next step after this for supporting register debugging for NVPTX is to support the .debug_loc section. Stacked on top of: https://github.com/llvm/llvm-project/pull/109495 --- Patch is 357.48 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/110030.diff 4 Files Affected: - (modified) llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp (+8) - (modified) llvm/test/DebugInfo/NVPTX/cu-range-hole.ll (+15-11) - (modified) llvm/test/DebugInfo/NVPTX/debug-addr-class.ll (+20-16) - (modified) llvm/test/DebugInfo/NVPTX/debug-info.ll (+1123-1120) ``diff diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 0a1ff189bedbc4..a52699e4fe38e2 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -779,6 +779,14 @@ DIE *DwarfCompileUnit::constructVariableDIE(DbgVariable &DV, bool Abstract) { void DwarfCompileUnit::applyConcreteDbgVariableAttributes( const Loc::Single &Single, const DbgVariable &DV, DIE &VariableDie) { const DbgValueLoc *DVal = &Single.getValueLoc(); + if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB() && + !Single.getExpr()) { + +// Lack of expression means it is a register. Registers for PTX need to +// be marked with DW_AT_address_class = 2. See +// https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf +addUInt(VariableDie, dwarf::DW_AT_address_class, dwarf::DW_FORM_data1, 2); + } if (!DVal->isVariadic()) { const DbgValueLocEntry *Entry = DVal->getLocEntries().begin(); if (Entry->isLocation()) { diff --git a/llvm/test/DebugInfo/NVPTX/cu-range-hole.ll b/llvm/test/DebugInfo/NVPTX/cu-range-hole.ll index 6acc1ba2512711..2d927b18d976d9 100644 --- a/llvm/test/DebugInfo/NVPTX/cu-range-hole.ll +++ b/llvm/test/DebugInfo/NVPTX/cu-range-hole.ll @@ -120,6 +120,8 @@ entry: ; CHECK-NEXT: .b8 3// Abbreviation Code ; CHECK-NEXT: .b8 5// DW_TAG_formal_parameter ; CHECK-NEXT: .b8 0// DW_CHILDREN_no +; CHECK-NEXT: .b8 51 // DW_AT_address_class +; CHECK-NEXT: .b8 11 // DW_FORM_data1 ; CHECK-NEXT: .b8 2// DW_AT_location ; CHECK-NEXT: .b8 10 // DW_FORM_block1 ; CHECK-NEXT: .b8 3// DW_AT_name @@ -147,12 +149,12 @@ entry: ; CHECK-NEXT: } ; CHECK-NEXT: .section .debug_info ; CHECK-NEXT: { -; CHECK-NEXT: .b32 195 // Length of Unit +; CHECK-NEXT: .b32 197 // Length of Unit ; CHECK-NEXT: .b8 2// DWARF version number ; CHECK-NEXT: .b8 0 ; CHECK-NEXT: .b32 .debug_abbrev // Offset Into Abbrev. Section ; CHECK-NEXT: .b8 8// Address Size (in bytes) -; CHECK-NEXT: .b8 1// Abbrev [1] 0xb:0xbc DW_TAG_compile_unit +; CHECK-NEXT: .b8 1// Abbrev [1] 0xb:0xbe DW_TAG_compile_unit ; CHECK-NEXT: .b8 99 // DW_AT_producer ; CHECK-NEXT: .b8 108 ; CHECK-NEXT: .b8 97 @@ -225,7 +227,7 @@ entry: ; CHECK-NEXT: .b8 0 ; CHECK-NEXT: .b64 $L__func_begin0 // DW_AT_low_pc ; CHECK-NEXT: .b64 $L__func_end2 // DW_AT_high_pc -; CHECK-NEXT: .b8 2// Abbrev [2] 0x65:0x2d DW_TAG_subprogram +; CHECK-NEXT: .b8 2// Abbrev [2] 0x65:0x2e DW_TAG_subprogram ; CHECK-NEXT: .b64 $L__func_begin0 // DW_AT_low_pc ; CHECK-NEXT: .b64 $L__func_end0 // DW_AT_high_pc ; CHECK-NEXT: .b8 1// DW_AT_frame_base @@ -235,9 +237,10 @@ entry: ; CHECK-NEXT: .b8 1// DW_AT_decl_file ; CHECK-NEXT: .b8 1// DW_AT_decl_line ; CHECK-NEXT: .b8 1// DW_AT_prototyped -; CHECK-NEXT: .b32 191 // DW_AT_type +; CHECK-NEXT: .b32 193 // DW_AT_type ; CHECK-NEXT: .b8 1// DW_AT_external -; CHECK-NEXT: .b8 3// Abbrev [3] 0x82:0xf DW_TAG_formal_parameter +; CHECK-NEXT: .b8 3// Abbrev [3] 0x82:0x10 DW_TAG_formal_parameter +; CHECK-NEXT: .b8 2// DW_AT_address_class ; CHECK-NEXT: .b8 5// DW_AT_loca
[llvm-branch-commits] [llvm] [Attributor] Take the address space from addrspacecast directly (PR #108258)
@@ -12571,17 +12571,59 @@ struct AAAddressSpaceImpl : public AAAddressSpace { void initialize(Attributor &A) override { assert(getAssociatedType()->isPtrOrPtrVectorTy() && "Associated value is not a pointer"); -if (getAssociatedType()->getPointerAddressSpace()) +// If the pointer already has non-generic address space, we assume it is the +// correct one. +if (getAssociatedType()->getPointerAddressSpace()) { + [[maybe_unused]] bool R = + takeAddressSpace(getAssociatedType()->getPointerAddressSpace()); + assert(R && "the take should happen"); indicateOptimisticFixpoint(); + return; +} +// If the pointer is an addrspacecast, we assume the source address space is +// the correct one. +Value *V = &getAssociatedValue(); +if (auto *ASC = dyn_cast(V)) { + [[maybe_unused]] bool R = takeAddressSpace(ASC->getSrcAddressSpace()); + assert(R && "the take should happen"); + indicateOptimisticFixpoint(); + return; +} +if (auto *C = dyn_cast(V)) { + if (C->getOpcode() == Instruction::AddrSpaceCast) { +[[maybe_unused]] bool R = takeAddressSpace( +C->getOperand(0)->getType()->getPointerAddressSpace()); +assert(R && "the take should happen"); +indicateOptimisticFixpoint(); +return; + } +} } ChangeStatus updateImpl(Attributor &A) override { -int32_t OldAddressSpace = AssumedAddressSpace; +uint32_t OldAddressSpace = AssumedAddressSpace; auto *AUO = A.getOrCreateAAFor(getIRPosition(), this, DepClassTy::REQUIRED); auto Pred = [&](Value &Obj) { if (isa(&Obj)) return true; + // If an argument in generic address space has addrspace cast uses, and + // those casts are same, then we take the dst addrspace. + if (auto *Arg = dyn_cast(&Obj)) { shiltian wrote: If that's the case, the `AMDGPUPromoteKernelArgumentsPass` should not exist at the first place. Both Flang and Clang (for OpenMP target offloading) still generate generic pointer. HIP and OpenCL front ends generate AS1 pointer, which is convenient. https://github.com/llvm/llvm-project/pull/108258 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NVPTX] add address class for variables with a single register location (PR #110030)
https://github.com/walter-erquinigo approved this pull request. amazing https://github.com/llvm/llvm-project/pull/110030 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NVPTX] add support for encoding PTX registers for DWARF (PR #109495)
https://github.com/walter-erquinigo approved this pull request. https://github.com/llvm/llvm-project/pull/109495 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] 153a49a - Revert "[MLIR] Make `OneShotModuleBufferize` use `OpInterface` (#107295)"
Author: Matthias Springer Date: 2024-09-25T09:17:16+02:00 New Revision: 153a49ab4875fb6c7bb69d9efc1e5ce35f2b61f9 URL: https://github.com/llvm/llvm-project/commit/153a49ab4875fb6c7bb69d9efc1e5ce35f2b61f9 DIFF: https://github.com/llvm/llvm-project/commit/153a49ab4875fb6c7bb69d9efc1e5ce35f2b61f9.diff LOG: Revert "[MLIR] Make `OneShotModuleBufferize` use `OpInterface` (#107295)" This reverts commit f586b1e3f42788025aa6f55be70c5e361cc8b529. Added: Modified: mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h mlir/include/mlir/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.h mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir mlir/test/Dialect/LLVM/transform-e2e.mlir mlir/test/Dialect/Linalg/matmul-shared-memory-padding.mlir mlir/test/Dialect/Linalg/pad-to-specific-memory-space.mlir mlir/test/Dialect/Vector/transform-vector.mlir mlir/test/Examples/transform/ChH/full.mlir Removed: diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h index d19687ec9afee1..aceb9d059b95f3 100644 --- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h +++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h @@ -11,7 +11,6 @@ #include "mlir/IR/Operation.h" #include "mlir/IR/PatternMatch.h" -#include "mlir/Interfaces/FunctionInterfaces.h" #include "mlir/Support/LLVM.h" #include "llvm/ADT/DenseMapInfoVariant.h" #include "llvm/ADT/SetVector.h" @@ -261,9 +260,9 @@ struct BufferizationOptions { using AnalysisStateInitFn = std::function; /// Tensor -> MemRef type converter. /// Parameters: Value, memory space, func op, bufferization options - using FunctionArgTypeConverterFn = std::function; + using FunctionArgTypeConverterFn = + std::function; /// Tensor -> MemRef type converter. /// Parameters: Value, memory space, bufferization options using UnknownTypeConverterFn = std::function equivalentFuncArgs; + DenseMap equivalentFuncArgs; /// A mapping of FuncOp BBArg indices to aliasing ReturnOp OpOperand indices. - DenseMap aliasingReturnVals; + DenseMap aliasingReturnVals; /// A set of all read BlockArguments of FuncOps. - DenseMap readBbArgs; + DenseMap readBbArgs; /// A set of all written-to BlockArguments of FuncOps. - DenseMap writtenBbArgs; + DenseMap writtenBbArgs; /// Keep track of which FuncOps are fully analyzed or currently being /// analyzed. - DenseMap analyzedFuncOps; + DenseMap analyzedFuncOps; /// This function is called right before analyzing the given FuncOp. It /// initializes the data structures for the FuncOp in this state object. - void startFunctionAnalysis(FunctionOpInterface funcOp); + void startFunctionAnalysis(FuncOp funcOp); }; void registerBufferizableOpInterfaceExternalModels(DialectRegistry ®istry); diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp index 92f757111cbaf7..85604eef2f2830 100644 --- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp +++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp @@ -18,7 +18,6 @@ #include "mlir/IR/TypeUtilities.h" #include "mlir/IR/Value.h" #include "mlir/Interfaces/ControlFlowInterfaces.h" -#include "mlir/Interfaces/FunctionInterfaces.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/Support/Debug.h" @@ -315,7 +314,7 @@ namespace { /// Default function arg type converter: Use a fully dynamic layout map. BaseMemRefType defaultFunctionArgTypeConverter(TensorType type, Attribute memorySpace, -FunctionOpInterface funcOp, +func::FuncOp funcOp, const BufferizationOptions &options) { return getMemRefTypeWithFullyDynamicLayout(type, memorySpace); } @@ -362,7 +361,7 @@ BufferizationOptions::dynCastBufferizableOp(Value value) const { void BufferizationOptions::setFunctionBoundaryTypeConversion( LayoutMapOption layoutMapOption) { functionArgTypeConverterFn = [=](TensorType tensorType, Attribute memorySpace, - FunctionOpInterface funcOp, + func::FuncOp funcOp, const BufferizationOptions &options) { if (layoutMapOption == LayoutMapOption::IdentityLayoutMap) return bufferization::getMemRefTypeWithStaticIdentityLayout(tensorType, diff --git a/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp b/mlir/
[llvm-branch-commits] [llvm] release/19.x: [SDAG] Honor signed arguments in floating point libcalls (#109134) (PR #109920)
llvmbot wrote: @arsenm What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/109920 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [SDAG] Honor signed arguments in floating point libcalls (#109134) (PR #109920)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/109920 Backport 90c14748638f1e10e31173b145fdbb5c4529c922 Requested by: @nikic >From 55eaa21656d6a9d55732207c25cdb0825cf4be2e Mon Sep 17 00:00:00 2001 From: Timothy Pearson <162513562+tpearson-...@users.noreply.github.com> Date: Wed, 25 Sep 2024 02:09:50 -0500 Subject: [PATCH] [SDAG] Honor signed arguments in floating point libcalls (#109134) In ExpandFPLibCall, an assumption is made that all floating point libcalls that take integer arguments use unsigned integers. In the case of ldexp and frexp, this assumption is incorrect, leading to miscompilation and subsequent target-dependent incorrect operation. Indicate that ldexp and frexp utilize signed arguments in ExpandFPLibCall. Fixes #108904 Signed-off-by: Timothy Pearson (cherry picked from commit 90c14748638f1e10e31173b145fdbb5c4529c922) --- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 3 +- llvm/test/CodeGen/PowerPC/ldexp-libcall.ll| 4 +- llvm/test/CodeGen/PowerPC/ldexp.ll| 36 ++ .../PowerPC/negative-integer-fp-libcall.ll| 26 +++ .../X86/fold-int-pow2-with-fmul-or-fdiv.ll| 69 --- 5 files changed, 96 insertions(+), 42 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/negative-integer-fp-libcall.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 7f5b46af01c62f..4b25f553ffae91 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2190,7 +2190,8 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, Results.push_back(Tmp.first); Results.push_back(Tmp.second); } else { -SDValue Tmp = ExpandLibCall(LC, Node, false).first; +bool IsSignedArgument = Node->getOpcode() == ISD::FLDEXP; +SDValue Tmp = ExpandLibCall(LC, Node, IsSignedArgument).first; Results.push_back(Tmp); } } diff --git a/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll b/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll index 6144a9d9203651..e531516c37e87e 100644 --- a/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll +++ b/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll @@ -10,7 +10,7 @@ define float @call_ldexpf(float %a, i32 %b) { ; CHECK-NEXT:std r0, 48(r1) ; CHECK-NEXT:.cfi_def_cfa_offset 32 ; CHECK-NEXT:.cfi_offset lr, 16 -; CHECK-NEXT:clrldi r4, r4, 32 +; CHECK-NEXT:extsw r4, r4 ; CHECK-NEXT:bl ldexpf ; CHECK-NEXT:nop ; CHECK-NEXT:addi r1, r1, 32 @@ -29,7 +29,7 @@ define double @call_ldexp(double %a, i32 %b) { ; CHECK-NEXT:std r0, 48(r1) ; CHECK-NEXT:.cfi_def_cfa_offset 32 ; CHECK-NEXT:.cfi_offset lr, 16 -; CHECK-NEXT:clrldi r4, r4, 32 +; CHECK-NEXT:extsw r4, r4 ; CHECK-NEXT:bl ldexp ; CHECK-NEXT:nop ; CHECK-NEXT:addi r1, r1, 32 diff --git a/llvm/test/CodeGen/PowerPC/ldexp.ll b/llvm/test/CodeGen/PowerPC/ldexp.ll index 151df6096b30bd..ffc826cc86de59 100644 --- a/llvm/test/CodeGen/PowerPC/ldexp.ll +++ b/llvm/test/CodeGen/PowerPC/ldexp.ll @@ -57,22 +57,24 @@ define <2 x float> @ldexp_v2f32(<2 x float> %val, <2 x i32> %exp) { ; CHECK-NEXT:.cfi_offset v29, -48 ; CHECK-NEXT:.cfi_offset v30, -32 ; CHECK-NEXT:.cfi_offset v31, -16 -; CHECK-NEXT:xxsldwi vs0, v2, v2, 3 ; CHECK-NEXT:li r3, 0 +; CHECK-NEXT:xxsldwi vs0, v2, v2, 3 ; CHECK-NEXT:stxv v29, 32(r1) # 16-byte Folded Spill ; CHECK-NEXT:xscvspdpn f1, vs0 -; CHECK-NEXT:vextuwrx r4, r3, v3 +; CHECK-NEXT:vextuwrx r3, r3, v3 ; CHECK-NEXT:stxv v30, 48(r1) # 16-byte Folded Spill ; CHECK-NEXT:stxv v31, 64(r1) # 16-byte Folded Spill +; CHECK-NEXT:extsw r4, r3 ; CHECK-NEXT:vmr v31, v3 ; CHECK-NEXT:vmr v30, v2 ; CHECK-NEXT:bl ldexpf ; CHECK-NEXT:nop -; CHECK-NEXT:xxswapd vs0, v30 ; CHECK-NEXT:li r3, 4 +; CHECK-NEXT:xxswapd vs0, v30 ; CHECK-NEXT:xscvdpspn v29, f1 ; CHECK-NEXT:xscvspdpn f1, vs0 -; CHECK-NEXT:vextuwrx r4, r3, v31 +; CHECK-NEXT:vextuwrx r3, r3, v31 +; CHECK-NEXT:extsw r4, r3 ; CHECK-NEXT:bl ldexpf ; CHECK-NEXT:nop ; CHECK-NEXT:xscvdpspn vs0, f1 @@ -100,35 +102,39 @@ define <4 x float> @ldexp_v4f32(<4 x float> %val, <4 x i32> %exp) { ; CHECK-NEXT:.cfi_offset v29, -48 ; CHECK-NEXT:.cfi_offset v30, -32 ; CHECK-NEXT:.cfi_offset v31, -16 -; CHECK-NEXT:li r3, 12 -; CHECK-NEXT:xscvspdpn f1, v2 +; CHECK-NEXT:li r3, 4 +; CHECK-NEXT:xxswapd vs0, v2 ; CHECK-NEXT:stxv v28, 32(r1) # 16-byte Folded Spill +; CHECK-NEXT:xscvspdpn f1, vs0 +; CHECK-NEXT:vextuwrx r3, r3, v3 ; CHECK-NEXT:stxv v29, 48(r1) # 16-byte Folded Spill ; CHECK-NEXT:stxv v30, 64(r1) # 16-byte Folded Spill ; CHECK-NEXT:stxv v31, 80(r1) # 16-byte Folded Spill ; CHECK-NEXT:vmr v31, v3 +; CHECK-NEXT:extsw r4, r3 ; CHECK-NEXT:vmr v30, v2 -; CHECK-NEXT:vextuwrx r4, r3, v3 ; CHECK-NEXT:bl ldexpf ; CHECK-NEXT:no
[llvm-branch-commits] [llvm] release/19.x: [SDAG] Honor signed arguments in floating point libcalls (#109134) (PR #109920)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/109920 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [SDAG] Honor signed arguments in floating point libcalls (#109134) (PR #109920)
llvmbot wrote: @llvm/pr-subscribers-llvm-selectiondag Author: None (llvmbot) Changes Backport 90c14748638f1e10e31173b145fdbb5c4529c922 Requested by: @nikic --- Full diff: https://github.com/llvm/llvm-project/pull/109920.diff 5 Files Affected: - (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp (+2-1) - (modified) llvm/test/CodeGen/PowerPC/ldexp-libcall.ll (+2-2) - (modified) llvm/test/CodeGen/PowerPC/ldexp.ll (+21-15) - (added) llvm/test/CodeGen/PowerPC/negative-integer-fp-libcall.ll (+26) - (modified) llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll (+45-24) ``diff diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 7f5b46af01c62f..4b25f553ffae91 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2190,7 +2190,8 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, Results.push_back(Tmp.first); Results.push_back(Tmp.second); } else { -SDValue Tmp = ExpandLibCall(LC, Node, false).first; +bool IsSignedArgument = Node->getOpcode() == ISD::FLDEXP; +SDValue Tmp = ExpandLibCall(LC, Node, IsSignedArgument).first; Results.push_back(Tmp); } } diff --git a/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll b/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll index 6144a9d9203651..e531516c37e87e 100644 --- a/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll +++ b/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll @@ -10,7 +10,7 @@ define float @call_ldexpf(float %a, i32 %b) { ; CHECK-NEXT:std r0, 48(r1) ; CHECK-NEXT:.cfi_def_cfa_offset 32 ; CHECK-NEXT:.cfi_offset lr, 16 -; CHECK-NEXT:clrldi r4, r4, 32 +; CHECK-NEXT:extsw r4, r4 ; CHECK-NEXT:bl ldexpf ; CHECK-NEXT:nop ; CHECK-NEXT:addi r1, r1, 32 @@ -29,7 +29,7 @@ define double @call_ldexp(double %a, i32 %b) { ; CHECK-NEXT:std r0, 48(r1) ; CHECK-NEXT:.cfi_def_cfa_offset 32 ; CHECK-NEXT:.cfi_offset lr, 16 -; CHECK-NEXT:clrldi r4, r4, 32 +; CHECK-NEXT:extsw r4, r4 ; CHECK-NEXT:bl ldexp ; CHECK-NEXT:nop ; CHECK-NEXT:addi r1, r1, 32 diff --git a/llvm/test/CodeGen/PowerPC/ldexp.ll b/llvm/test/CodeGen/PowerPC/ldexp.ll index 151df6096b30bd..ffc826cc86de59 100644 --- a/llvm/test/CodeGen/PowerPC/ldexp.ll +++ b/llvm/test/CodeGen/PowerPC/ldexp.ll @@ -57,22 +57,24 @@ define <2 x float> @ldexp_v2f32(<2 x float> %val, <2 x i32> %exp) { ; CHECK-NEXT:.cfi_offset v29, -48 ; CHECK-NEXT:.cfi_offset v30, -32 ; CHECK-NEXT:.cfi_offset v31, -16 -; CHECK-NEXT:xxsldwi vs0, v2, v2, 3 ; CHECK-NEXT:li r3, 0 +; CHECK-NEXT:xxsldwi vs0, v2, v2, 3 ; CHECK-NEXT:stxv v29, 32(r1) # 16-byte Folded Spill ; CHECK-NEXT:xscvspdpn f1, vs0 -; CHECK-NEXT:vextuwrx r4, r3, v3 +; CHECK-NEXT:vextuwrx r3, r3, v3 ; CHECK-NEXT:stxv v30, 48(r1) # 16-byte Folded Spill ; CHECK-NEXT:stxv v31, 64(r1) # 16-byte Folded Spill +; CHECK-NEXT:extsw r4, r3 ; CHECK-NEXT:vmr v31, v3 ; CHECK-NEXT:vmr v30, v2 ; CHECK-NEXT:bl ldexpf ; CHECK-NEXT:nop -; CHECK-NEXT:xxswapd vs0, v30 ; CHECK-NEXT:li r3, 4 +; CHECK-NEXT:xxswapd vs0, v30 ; CHECK-NEXT:xscvdpspn v29, f1 ; CHECK-NEXT:xscvspdpn f1, vs0 -; CHECK-NEXT:vextuwrx r4, r3, v31 +; CHECK-NEXT:vextuwrx r3, r3, v31 +; CHECK-NEXT:extsw r4, r3 ; CHECK-NEXT:bl ldexpf ; CHECK-NEXT:nop ; CHECK-NEXT:xscvdpspn vs0, f1 @@ -100,35 +102,39 @@ define <4 x float> @ldexp_v4f32(<4 x float> %val, <4 x i32> %exp) { ; CHECK-NEXT:.cfi_offset v29, -48 ; CHECK-NEXT:.cfi_offset v30, -32 ; CHECK-NEXT:.cfi_offset v31, -16 -; CHECK-NEXT:li r3, 12 -; CHECK-NEXT:xscvspdpn f1, v2 +; CHECK-NEXT:li r3, 4 +; CHECK-NEXT:xxswapd vs0, v2 ; CHECK-NEXT:stxv v28, 32(r1) # 16-byte Folded Spill +; CHECK-NEXT:xscvspdpn f1, vs0 +; CHECK-NEXT:vextuwrx r3, r3, v3 ; CHECK-NEXT:stxv v29, 48(r1) # 16-byte Folded Spill ; CHECK-NEXT:stxv v30, 64(r1) # 16-byte Folded Spill ; CHECK-NEXT:stxv v31, 80(r1) # 16-byte Folded Spill ; CHECK-NEXT:vmr v31, v3 +; CHECK-NEXT:extsw r4, r3 ; CHECK-NEXT:vmr v30, v2 -; CHECK-NEXT:vextuwrx r4, r3, v3 ; CHECK-NEXT:bl ldexpf ; CHECK-NEXT:nop -; CHECK-NEXT:xxswapd vs0, v30 -; CHECK-NEXT:li r3, 4 +; CHECK-NEXT:li r3, 12 ; CHECK-NEXT:xscpsgndp v29, f1, f1 -; CHECK-NEXT:xscvspdpn f1, vs0 -; CHECK-NEXT:vextuwrx r4, r3, v31 +; CHECK-NEXT:xscvspdpn f1, v30 +; CHECK-NEXT:vextuwrx r3, r3, v31 +; CHECK-NEXT:extsw r4, r3 ; CHECK-NEXT:bl ldexpf ; CHECK-NEXT:nop -; CHECK-NEXT:xxmrghd vs0, v29, vs1 +; CHECK-NEXT:xxmrghd vs0, vs1, v29 ; CHECK-NEXT:li r3, 0 -; CHECK-NEXT:vextuwrx r4, r3, v31 +; CHECK-NEXT:vextuwrx r3, r3, v31 ; CHECK-NEXT:xvcvdpsp v28, vs0 ; CHECK-NEXT:xxsldwi vs0, v30, v30, 3 +; CHECK-NEXT:extsw r4, r3 ; CHECK-NEXT:xscvspdpn f1, vs0 ; CHECK-NEXT:
[llvm-branch-commits] [llvm] release/19.x: [SDAG] Honor signed arguments in floating point libcalls (#109134) (PR #109920)
llvmbot wrote: @llvm/pr-subscribers-backend-x86 Author: None (llvmbot) Changes Backport 90c14748638f1e10e31173b145fdbb5c4529c922 Requested by: @nikic --- Full diff: https://github.com/llvm/llvm-project/pull/109920.diff 5 Files Affected: - (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp (+2-1) - (modified) llvm/test/CodeGen/PowerPC/ldexp-libcall.ll (+2-2) - (modified) llvm/test/CodeGen/PowerPC/ldexp.ll (+21-15) - (added) llvm/test/CodeGen/PowerPC/negative-integer-fp-libcall.ll (+26) - (modified) llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll (+45-24) ``diff diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 7f5b46af01c62f..4b25f553ffae91 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2190,7 +2190,8 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, Results.push_back(Tmp.first); Results.push_back(Tmp.second); } else { -SDValue Tmp = ExpandLibCall(LC, Node, false).first; +bool IsSignedArgument = Node->getOpcode() == ISD::FLDEXP; +SDValue Tmp = ExpandLibCall(LC, Node, IsSignedArgument).first; Results.push_back(Tmp); } } diff --git a/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll b/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll index 6144a9d9203651..e531516c37e87e 100644 --- a/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll +++ b/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll @@ -10,7 +10,7 @@ define float @call_ldexpf(float %a, i32 %b) { ; CHECK-NEXT:std r0, 48(r1) ; CHECK-NEXT:.cfi_def_cfa_offset 32 ; CHECK-NEXT:.cfi_offset lr, 16 -; CHECK-NEXT:clrldi r4, r4, 32 +; CHECK-NEXT:extsw r4, r4 ; CHECK-NEXT:bl ldexpf ; CHECK-NEXT:nop ; CHECK-NEXT:addi r1, r1, 32 @@ -29,7 +29,7 @@ define double @call_ldexp(double %a, i32 %b) { ; CHECK-NEXT:std r0, 48(r1) ; CHECK-NEXT:.cfi_def_cfa_offset 32 ; CHECK-NEXT:.cfi_offset lr, 16 -; CHECK-NEXT:clrldi r4, r4, 32 +; CHECK-NEXT:extsw r4, r4 ; CHECK-NEXT:bl ldexp ; CHECK-NEXT:nop ; CHECK-NEXT:addi r1, r1, 32 diff --git a/llvm/test/CodeGen/PowerPC/ldexp.ll b/llvm/test/CodeGen/PowerPC/ldexp.ll index 151df6096b30bd..ffc826cc86de59 100644 --- a/llvm/test/CodeGen/PowerPC/ldexp.ll +++ b/llvm/test/CodeGen/PowerPC/ldexp.ll @@ -57,22 +57,24 @@ define <2 x float> @ldexp_v2f32(<2 x float> %val, <2 x i32> %exp) { ; CHECK-NEXT:.cfi_offset v29, -48 ; CHECK-NEXT:.cfi_offset v30, -32 ; CHECK-NEXT:.cfi_offset v31, -16 -; CHECK-NEXT:xxsldwi vs0, v2, v2, 3 ; CHECK-NEXT:li r3, 0 +; CHECK-NEXT:xxsldwi vs0, v2, v2, 3 ; CHECK-NEXT:stxv v29, 32(r1) # 16-byte Folded Spill ; CHECK-NEXT:xscvspdpn f1, vs0 -; CHECK-NEXT:vextuwrx r4, r3, v3 +; CHECK-NEXT:vextuwrx r3, r3, v3 ; CHECK-NEXT:stxv v30, 48(r1) # 16-byte Folded Spill ; CHECK-NEXT:stxv v31, 64(r1) # 16-byte Folded Spill +; CHECK-NEXT:extsw r4, r3 ; CHECK-NEXT:vmr v31, v3 ; CHECK-NEXT:vmr v30, v2 ; CHECK-NEXT:bl ldexpf ; CHECK-NEXT:nop -; CHECK-NEXT:xxswapd vs0, v30 ; CHECK-NEXT:li r3, 4 +; CHECK-NEXT:xxswapd vs0, v30 ; CHECK-NEXT:xscvdpspn v29, f1 ; CHECK-NEXT:xscvspdpn f1, vs0 -; CHECK-NEXT:vextuwrx r4, r3, v31 +; CHECK-NEXT:vextuwrx r3, r3, v31 +; CHECK-NEXT:extsw r4, r3 ; CHECK-NEXT:bl ldexpf ; CHECK-NEXT:nop ; CHECK-NEXT:xscvdpspn vs0, f1 @@ -100,35 +102,39 @@ define <4 x float> @ldexp_v4f32(<4 x float> %val, <4 x i32> %exp) { ; CHECK-NEXT:.cfi_offset v29, -48 ; CHECK-NEXT:.cfi_offset v30, -32 ; CHECK-NEXT:.cfi_offset v31, -16 -; CHECK-NEXT:li r3, 12 -; CHECK-NEXT:xscvspdpn f1, v2 +; CHECK-NEXT:li r3, 4 +; CHECK-NEXT:xxswapd vs0, v2 ; CHECK-NEXT:stxv v28, 32(r1) # 16-byte Folded Spill +; CHECK-NEXT:xscvspdpn f1, vs0 +; CHECK-NEXT:vextuwrx r3, r3, v3 ; CHECK-NEXT:stxv v29, 48(r1) # 16-byte Folded Spill ; CHECK-NEXT:stxv v30, 64(r1) # 16-byte Folded Spill ; CHECK-NEXT:stxv v31, 80(r1) # 16-byte Folded Spill ; CHECK-NEXT:vmr v31, v3 +; CHECK-NEXT:extsw r4, r3 ; CHECK-NEXT:vmr v30, v2 -; CHECK-NEXT:vextuwrx r4, r3, v3 ; CHECK-NEXT:bl ldexpf ; CHECK-NEXT:nop -; CHECK-NEXT:xxswapd vs0, v30 -; CHECK-NEXT:li r3, 4 +; CHECK-NEXT:li r3, 12 ; CHECK-NEXT:xscpsgndp v29, f1, f1 -; CHECK-NEXT:xscvspdpn f1, vs0 -; CHECK-NEXT:vextuwrx r4, r3, v31 +; CHECK-NEXT:xscvspdpn f1, v30 +; CHECK-NEXT:vextuwrx r3, r3, v31 +; CHECK-NEXT:extsw r4, r3 ; CHECK-NEXT:bl ldexpf ; CHECK-NEXT:nop -; CHECK-NEXT:xxmrghd vs0, v29, vs1 +; CHECK-NEXT:xxmrghd vs0, vs1, v29 ; CHECK-NEXT:li r3, 0 -; CHECK-NEXT:vextuwrx r4, r3, v31 +; CHECK-NEXT:vextuwrx r3, r3, v31 ; CHECK-NEXT:xvcvdpsp v28, vs0 ; CHECK-NEXT:xxsldwi vs0, v30, v30, 3 +; CHECK-NEXT:extsw r4, r3 ; CHECK-NEXT:xscvspdpn f1, vs0 ; CHECK-NEXT:bl l
[llvm-branch-commits] [llvm] release/19.x: [SDAG] Honor signed arguments in floating point libcalls (#109134) (PR #109920)
github-actions[bot] wrote: ⚠️ We detected that you are using a GitHub private e-mail address to contribute to the repo. Please turn off [Keep my email addresses private](https://github.com/settings/emails) setting in your account. See [LLVM Discourse](https://discourse.llvm.org/t/hidden-emails-on-github-should-we-do-something-about-it) for more information. https://github.com/llvm/llvm-project/pull/109920 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [SDAG] Honor signed arguments in floating point libcalls (#109134) (PR #109920)
https://github.com/arsenm approved this pull request. https://github.com/llvm/llvm-project/pull/109920 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [ADT] Add more useful methods to SmallSet API (PR #108601)
https://github.com/vhscampos updated https://github.com/llvm/llvm-project/pull/108601 >From 12b657a4761351d52fccb93ce52e64c3c1b1e91f Mon Sep 17 00:00:00 2001 From: Victor Campos Date: Fri, 9 Aug 2024 14:00:32 +0100 Subject: [PATCH 1/3] [ADT] Add more useful methods to SmallSet API This patch adds useful methods to the SmallSet API: - Constructor that takes pair of iterators. - Constructor that takes a range. - Constructor that takes an initializer list. - Copy constructor. - Move constructor. - Copy assignment operator. - Move assignment operator. --- llvm/include/llvm/ADT/SmallSet.h| 17 llvm/unittests/ADT/SmallSetTest.cpp | 60 + 2 files changed, 77 insertions(+) diff --git a/llvm/include/llvm/ADT/SmallSet.h b/llvm/include/llvm/ADT/SmallSet.h index 56259ea7cf9d0f..431fdee56c20e0 100644 --- a/llvm/include/llvm/ADT/SmallSet.h +++ b/llvm/include/llvm/ADT/SmallSet.h @@ -19,6 +19,7 @@ #include "llvm/ADT/iterator.h" #include #include +#include #include #include @@ -147,6 +148,22 @@ class SmallSet { using const_iterator = SmallSetIterator; SmallSet() = default; + SmallSet(const SmallSet &) = default; + SmallSet(SmallSet &&) = default; + + template SmallSet(IterT Begin, IterT End) { +this->insert(Begin, End); + } + + template + explicit SmallSet(const iterator_range &R) { +this->insert(R.begin(), R.end()); + } + + SmallSet(std::initializer_list L) { this->insert(L.begin(), L.end()); } + + SmallSet &operator=(const SmallSet &) = default; + SmallSet &operator=(SmallSet &&) = default; [[nodiscard]] bool empty() const { return Vector.empty() && Set.empty(); } diff --git a/llvm/unittests/ADT/SmallSetTest.cpp b/llvm/unittests/ADT/SmallSetTest.cpp index 0fb20b19df9254..8219bf6f4b4c55 100644 --- a/llvm/unittests/ADT/SmallSetTest.cpp +++ b/llvm/unittests/ADT/SmallSetTest.cpp @@ -17,6 +17,66 @@ using namespace llvm; +TEST(SmallSetTest, ConstructorIteratorPair) { + auto L = {1, 2, 3, 4, 5}; + SmallSet S(std::begin(L), std::end(L)); + for (int Value : L) +EXPECT_TRUE(S.contains(Value)); +} + +TEST(SmallSet, ConstructorRange) { + auto L = {1, 2, 3, 4, 5}; + + SmallSet S(llvm::make_range(std::begin(L), std::end(L))); + for (int Value : L) +EXPECT_TRUE(S.contains(Value)); +} + +TEST(SmallSet, ConstructorInitializerList) { + auto L = {1, 2, 3, 4, 5}; + SmallSet S = {1, 2, 3, 4, 5}; + for (int Value : L) +EXPECT_TRUE(S.contains(Value)); +} + +TEST(SmallSet, CopyConstructor) { + SmallSet S = {1, 2, 3}; + SmallSet T = S; + + EXPECT_EQ(S, T); +} + +TEST(SmallSet, MoveConstructor) { + auto L = {1, 2, 3}; + SmallSet S = L; + SmallSet T = std::move(S); + + EXPECT_TRUE(T.size() == L.size()); + for (int Value : L) { +EXPECT_TRUE(T.contains(Value)); + } +} + +TEST(SmallSet, CopyAssignment) { + SmallSet S = {1, 2, 3}; + SmallSet T; + T = S; + + EXPECT_EQ(S, T); +} + +TEST(SmallSet, MoveAssignment) { + auto L = {1, 2, 3}; + SmallSet S = L; + SmallSet T; + T = std::move(S); + + EXPECT_TRUE(T.size() == L.size()); + for (int Value : L) { +EXPECT_TRUE(T.contains(Value)); + } +} + TEST(SmallSetTest, Insert) { SmallSet s1; >From d122983eb4f1f66da2a4a6b5bcdb9c8171d18205 Mon Sep 17 00:00:00 2001 From: Victor Campos Date: Tue, 24 Sep 2024 17:43:42 +0100 Subject: [PATCH 2/3] fixup! [ADT] Add more useful methods to SmallSet API --- llvm/include/llvm/ADT/SmallSet.h| 4 ++-- llvm/unittests/ADT/SmallSetTest.cpp | 34 +++-- 2 files changed, 15 insertions(+), 23 deletions(-) diff --git a/llvm/include/llvm/ADT/SmallSet.h b/llvm/include/llvm/ADT/SmallSet.h index 431fdee56c20e0..1b8ad542846630 100644 --- a/llvm/include/llvm/ADT/SmallSet.h +++ b/llvm/include/llvm/ADT/SmallSet.h @@ -152,12 +152,12 @@ class SmallSet { SmallSet(SmallSet &&) = default; template SmallSet(IterT Begin, IterT End) { -this->insert(Begin, End); +insert(Begin, End); } template explicit SmallSet(const iterator_range &R) { -this->insert(R.begin(), R.end()); +insert(R.begin(), R.end()); } SmallSet(std::initializer_list L) { this->insert(L.begin(), L.end()); } diff --git a/llvm/unittests/ADT/SmallSetTest.cpp b/llvm/unittests/ADT/SmallSetTest.cpp index 8219bf6f4b4c55..2feb0b1feb421b 100644 --- a/llvm/unittests/ADT/SmallSetTest.cpp +++ b/llvm/unittests/ADT/SmallSetTest.cpp @@ -12,49 +12,44 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/STLExtras.h" +#include "gmock/gmock.h" #include "gtest/gtest.h" #include using namespace llvm; TEST(SmallSetTest, ConstructorIteratorPair) { - auto L = {1, 2, 3, 4, 5}; + std::initializer_list L = {1, 2, 3, 4, 5}; SmallSet S(std::begin(L), std::end(L)); - for (int Value : L) -EXPECT_TRUE(S.contains(Value)); + EXPECT_THAT(S, testing::UnorderedElementsAreArray(L)); } TEST(SmallSet, ConstructorRange) { - auto L = {1, 2, 3, 4, 5}; + std::initializer_list L = {1, 2, 3, 4, 5}; SmallSet S
[llvm-branch-commits] [flang] [flang][Semantics] Add LangOptions to SemanticsContext (PR #110013)
github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning: You can test this locally with the following command: ``bash git-clang-format --diff 027a0602fc3fc547ba89dd71b2c59304119f3bb5 c04b0c4e26240e2c1a47b9af4974981ab4535305 --extensions cpp,h -- flang/include/flang/Semantics/semantics.h flang/lib/Frontend/CompilerInvocation.cpp flang/lib/Semantics/semantics.cpp flang/tools/bbc/bbc.cpp `` View the diff from clang-format here. ``diff diff --git a/flang/include/flang/Semantics/semantics.h b/flang/include/flang/Semantics/semantics.h index 4ea9f9e081..2a326074b3 100644 --- a/flang/include/flang/Semantics/semantics.h +++ b/flang/include/flang/Semantics/semantics.h @@ -76,9 +76,7 @@ public: const common::LanguageFeatureControl &languageFeatures() const { return languageFeatures_; } - const common::LangOptions &langOptions() const { -return langOpts_; - } + const common::LangOptions &langOptions() const { return langOpts_; } int GetDefaultKind(TypeCategory) const; int doublePrecisionKind() const { return defaultKinds_.doublePrecisionKind(); `` https://github.com/llvm/llvm-project/pull/110013 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][OpenMP] Add version checks for clauses (PR #110015)
github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning: You can test this locally with the following command: ``bash git-clang-format --diff c04b0c4e26240e2c1a47b9af4974981ab4535305 3c786ad2a50f146d357d882b0c1d966486f7295f --extensions h,cpp -- flang/lib/Semantics/check-omp-structure.cpp flang/lib/Semantics/check-omp-structure.h `` View the diff from clang-format here. ``diff diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 976c159e25..239bc38392 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -187,14 +187,14 @@ bool OmpStructureChecker::CheckAllowedClause(llvmOmpClause clause) { // Only report it if there is a later version that allows it. // If it's not allowed at all, it will be reported by CheckAllowed. if (allowedInVersion != 0) { - std::string thisVersion{std::to_string(version / 10) + "." + - std::to_string(version % 10)}; + std::string thisVersion{ + std::to_string(version / 10) + "." + std::to_string(version % 10)}; std::string goodVersion{std::to_string(allowedInVersion)}; context_.Say(dirCtx.clauseSource, - "%s clause is not allowed on directive %s in OpenMP v%s, " - "try -fopenmp-version=%d"_err_en_US, - clauseName, dirName, thisVersion, allowedInVersion); + "%s clause is not allowed on directive %s in OpenMP v%s, " + "try -fopenmp-version=%d"_err_en_US, + clauseName, dirName, thisVersion, allowedInVersion); } } return CheckAllowed(clause); `` https://github.com/llvm/llvm-project/pull/110015 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][OpenMP] Add version checks for clauses (PR #110015)
https://github.com/kparzysz created https://github.com/llvm/llvm-project/pull/110015 If there is a clause that is allowed on a given directive in a later version of the OpenMP spec, report an error and provide the minimal spec version that allows the clause. The case where a clause is not allowed on a directive at all is already handled elsewhere. >From 3c786ad2a50f146d357d882b0c1d966486f7295f Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 24 Sep 2024 17:41:16 -0500 Subject: [PATCH] [flang][OpenMP] Add version checks for clauses If there is a clause that is allowed on a given directive in a later version of the OpenMP spec, report an error and provide the minimal spec version that allows the clause. The case where a clause is not allowed on a directive at all is already handled elsewhere. --- flang/lib/Semantics/check-omp-structure.cpp | 93 +-- flang/lib/Semantics/check-omp-structure.h | 1 + flang/test/Lower/OpenMP/atomic-capture.f90| 4 +- flang/test/Lower/OpenMP/atomic-read.f90 | 2 +- flang/test/Lower/OpenMP/atomic-update.f90 | 4 +- flang/test/Lower/OpenMP/atomic-write.f90 | 2 +- .../test/Lower/OpenMP/declare-target-data.f90 | 4 +- .../declare-target-deferred-marking.f90 | 4 +- .../OpenMP/declare-target-func-and-subr.f90 | 4 +- ...arget-implicit-func-and-subr-cap-enter.f90 | 8 +- ...lare-target-implicit-func-and-subr-cap.f90 | 8 +- .../declare-target-implicit-tarop-cap.f90 | 8 +- .../Lower/OpenMP/function-filtering-2.f90 | 12 +-- .../test/Lower/OpenMP/function-filtering.f90 | 12 +-- .../OpenMP/declare_target-device_type.f90 | 4 +- .../Parser/OpenMP/in-reduction-clause.f90 | 4 +- flang/test/Parser/OpenMP/order-clause01.f90 | 4 +- flang/test/Parser/OpenMP/tile-size.f90| 4 +- flang/test/Parser/OpenMP/unroll-full.f90 | 4 +- flang/test/Parser/OpenMP/unroll.f90 | 4 +- .../Semantics/OpenMP/atomic-hint-clause.f90 | 2 +- flang/test/Semantics/OpenMP/atomic01.f90 | 2 +- flang/test/Semantics/OpenMP/atomic05.f90 | 2 +- .../Semantics/OpenMP/clause-validity01.f90| 2 +- .../OpenMP/declarative-directive.f90 | 2 +- .../Semantics/OpenMP/declare-target01.f90 | 2 +- .../Semantics/OpenMP/declare-target02.f90 | 2 +- .../Semantics/OpenMP/declare-target06.f90 | 2 +- .../Semantics/OpenMP/device-constructs.f90| 2 +- flang/test/Semantics/OpenMP/flush02.f90 | 2 +- flang/test/Semantics/OpenMP/if-clause.f90 | 2 +- flang/test/Semantics/OpenMP/nontemporal.f90 | 2 +- .../test/Semantics/OpenMP/order-clause01.f90 | 2 +- .../Semantics/OpenMP/requires-atomic01.f90| 2 +- .../Semantics/OpenMP/requires-atomic02.f90| 2 +- flang/test/Semantics/OpenMP/requires04.f90| 2 +- flang/test/Semantics/OpenMP/requires05.f90| 2 +- .../Semantics/OpenMP/simd-nontemporal.f90 | 2 +- flang/test/Semantics/OpenMP/target01.f90 | 2 +- flang/test/Semantics/OpenMP/taskgroup01.f90 | 2 +- .../test/Semantics/OpenMP/use_device_addr.f90 | 2 +- .../Semantics/OpenMP/use_device_addr1.f90 | 2 +- .../test/Semantics/OpenMP/use_device_ptr1.f90 | 2 +- 43 files changed, 137 insertions(+), 99 deletions(-) diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index dfc3f3290a81be..976c159e252f12 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -16,25 +16,25 @@ namespace Fortran::semantics { // Use when clause falls under 'struct OmpClause' in 'parse-tree.h'. #define CHECK_SIMPLE_CLAUSE(X, Y) \ void OmpStructureChecker::Enter(const parser::OmpClause::X &) { \ -CheckAllowed(llvm::omp::Clause::Y); \ +CheckAllowedClause(llvm::omp::Clause::Y); \ } #define CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(X, Y) \ void OmpStructureChecker::Enter(const parser::OmpClause::X &c) { \ -CheckAllowed(llvm::omp::Clause::Y); \ +CheckAllowedClause(llvm::omp::Clause::Y); \ RequiresConstantPositiveParameter(llvm::omp::Clause::Y, c.v); \ } #define CHECK_REQ_SCALAR_INT_CLAUSE(X, Y) \ void OmpStructureChecker::Enter(const parser::OmpClause::X &c) { \ -CheckAllowed(llvm::omp::Clause::Y); \ +CheckAllowedClause(llvm::omp::Clause::Y); \ RequiresPositiveParameter(llvm::omp::Clause::Y, c.v); \ } // Use when clause don't falls under 'struct OmpClause' in 'parse-tree.h'. #define CHECK_SIMPLE_PARSER_CLAUSE(X, Y) \ void OmpStructureChecker::Enter(const parser::X &) { \ -CheckAllowed(llvm::omp::Y); \ +CheckAllowedClause(llvm::omp::Y); \ } // 'OmpWorkshareBlockChecker' is used to check the validity of the assignment @@ -163,6 +163,43 @@ class AssociatedLoopChecker { std::map constructNamesAndLevels_; }; +bool OmpStructureChecker::CheckAllowedClause(llvmOmpClause clause) { + unsigned version{context_.langOptions().OpenMPVersion}; + DirectiveContext &dirCt
[llvm-branch-commits] [flang] [flang][Semantics] Add LangOptions to SemanticsContext (PR #110013)
https://github.com/kparzysz created https://github.com/llvm/llvm-project/pull/110013 The motivation for this is to make OpenMP settings visible in the semantic checks (OpenMP version in particular). >From c04b0c4e26240e2c1a47b9af4974981ab4535305 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 24 Sep 2024 15:14:39 -0500 Subject: [PATCH] [flang][Semantics] Add LangOptions to SemanticsContext The motivation for this is to make OpenMP settings visible in the semantic checks (OpenMP version in particular). --- flang/include/flang/Semantics/semantics.h | 10 -- flang/lib/Frontend/CompilerInvocation.cpp | 3 ++- flang/lib/Semantics/semantics.cpp | 3 ++- flang/tools/bbc/bbc.cpp | 18 +- 4 files changed, 29 insertions(+), 5 deletions(-) diff --git a/flang/include/flang/Semantics/semantics.h b/flang/include/flang/Semantics/semantics.h index e73f9d2e85d589..4ea9f9e081b701 100644 --- a/flang/include/flang/Semantics/semantics.h +++ b/flang/include/flang/Semantics/semantics.h @@ -12,6 +12,7 @@ #include "scope.h" #include "symbol.h" #include "flang/Common/Fortran-features.h" +#include "flang/Common/LangOptions.h" #include "flang/Evaluate/common.h" #include "flang/Evaluate/intrinsics.h" #include "flang/Evaluate/target.h" @@ -65,7 +66,8 @@ using ConstructStack = std::vector; class SemanticsContext { public: SemanticsContext(const common::IntrinsicTypeDefaultKinds &, - const common::LanguageFeatureControl &, parser::AllCookedSources &); + const common::LanguageFeatureControl &, const common::LangOptions &, + parser::AllCookedSources &); ~SemanticsContext(); const common::IntrinsicTypeDefaultKinds &defaultKinds() const { @@ -73,7 +75,10 @@ class SemanticsContext { } const common::LanguageFeatureControl &languageFeatures() const { return languageFeatures_; - }; + } + const common::LangOptions &langOptions() const { +return langOpts_; + } int GetDefaultKind(TypeCategory) const; int doublePrecisionKind() const { return defaultKinds_.doublePrecisionKind(); @@ -273,6 +278,7 @@ class SemanticsContext { const common::IntrinsicTypeDefaultKinds &defaultKinds_; const common::LanguageFeatureControl &languageFeatures_; + const common::LangOptions &langOpts_; parser::AllCookedSources &allCookedSources_; std::optional location_; std::vector searchDirectories_; diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index 52ca9f61c56f74..05b03ba9ebdf30 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -1531,7 +1531,8 @@ CompilerInvocation::getSemanticsCtx( auto &fortranOptions = getFortranOpts(); auto semanticsContext = std::make_unique( - getDefaultKinds(), fortranOptions.features, allCookedSources); + getDefaultKinds(), fortranOptions.features, getLangOpts(), + allCookedSources); semanticsContext->set_moduleDirectory(getModuleDir()) .set_searchDirectories(fortranOptions.searchDirectories) diff --git a/flang/lib/Semantics/semantics.cpp b/flang/lib/Semantics/semantics.cpp index 8592d1e5d6217e..1f2980b07b3e0e 100644 --- a/flang/lib/Semantics/semantics.cpp +++ b/flang/lib/Semantics/semantics.cpp @@ -348,9 +348,10 @@ class CommonBlockMap { SemanticsContext::SemanticsContext( const common::IntrinsicTypeDefaultKinds &defaultKinds, const common::LanguageFeatureControl &languageFeatures, +const common::LangOptions &langOpts, parser::AllCookedSources &allCookedSources) : defaultKinds_{defaultKinds}, languageFeatures_{languageFeatures}, - allCookedSources_{allCookedSources}, + langOpts_{langOpts}, allCookedSources_{allCookedSources}, intrinsics_{evaluate::IntrinsicProcTable::Configure(defaultKinds_)}, globalScope_{*this}, intrinsicModulesScope_{globalScope_.MakeScope( Scope::Kind::IntrinsicModules, nullptr)}, diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp index dcff4503f16571..2a976d5a52fae6 100644 --- a/flang/tools/bbc/bbc.cpp +++ b/flang/tools/bbc/bbc.cpp @@ -15,6 +15,7 @@ //===--===// #include "flang/Common/Fortran-features.h" +#include "flang/Common/LangOptions.h" #include "flang/Common/OpenMP-features.h" #include "flang/Common/Version.h" #include "flang/Common/default-kinds.h" @@ -507,6 +508,21 @@ int main(int argc, char **argv) { options.predefinitions.emplace_back( "__flang_patchlevel__"s, std::string{FLANG_VERSION_PATCHLEVEL_STRING}); + Fortran::common::LangOptions langOpts; + langOpts.NoGPULib = setNoGPULib; + langOpts.OpenMPVersion = setOpenMPVersion; + langOpts.OpenMPIsTargetDevice = enableOpenMPDevice; + langOpts.OpenMPIsGPU = enableOpenMPGPU; + langOpts.OpenMPForceUSM = enableOpenMPForceUSM; + langOpts.OpenMPTargetDebug = setOpenMPTargetDebug; + langOpts.Op
[llvm-branch-commits] [flang] [flang][Semantics] Add LangOptions to SemanticsContext (PR #110013)
llvmbot wrote: @llvm/pr-subscribers-flang-semantics @llvm/pr-subscribers-flang-driver Author: Krzysztof Parzyszek (kparzysz) Changes The motivation for this is to make OpenMP settings visible in the semantic checks (OpenMP version in particular). --- Full diff: https://github.com/llvm/llvm-project/pull/110013.diff 4 Files Affected: - (modified) flang/include/flang/Semantics/semantics.h (+8-2) - (modified) flang/lib/Frontend/CompilerInvocation.cpp (+2-1) - (modified) flang/lib/Semantics/semantics.cpp (+2-1) - (modified) flang/tools/bbc/bbc.cpp (+17-1) ``diff diff --git a/flang/include/flang/Semantics/semantics.h b/flang/include/flang/Semantics/semantics.h index e73f9d2e85d589..4ea9f9e081b701 100644 --- a/flang/include/flang/Semantics/semantics.h +++ b/flang/include/flang/Semantics/semantics.h @@ -12,6 +12,7 @@ #include "scope.h" #include "symbol.h" #include "flang/Common/Fortran-features.h" +#include "flang/Common/LangOptions.h" #include "flang/Evaluate/common.h" #include "flang/Evaluate/intrinsics.h" #include "flang/Evaluate/target.h" @@ -65,7 +66,8 @@ using ConstructStack = std::vector; class SemanticsContext { public: SemanticsContext(const common::IntrinsicTypeDefaultKinds &, - const common::LanguageFeatureControl &, parser::AllCookedSources &); + const common::LanguageFeatureControl &, const common::LangOptions &, + parser::AllCookedSources &); ~SemanticsContext(); const common::IntrinsicTypeDefaultKinds &defaultKinds() const { @@ -73,7 +75,10 @@ class SemanticsContext { } const common::LanguageFeatureControl &languageFeatures() const { return languageFeatures_; - }; + } + const common::LangOptions &langOptions() const { +return langOpts_; + } int GetDefaultKind(TypeCategory) const; int doublePrecisionKind() const { return defaultKinds_.doublePrecisionKind(); @@ -273,6 +278,7 @@ class SemanticsContext { const common::IntrinsicTypeDefaultKinds &defaultKinds_; const common::LanguageFeatureControl &languageFeatures_; + const common::LangOptions &langOpts_; parser::AllCookedSources &allCookedSources_; std::optional location_; std::vector searchDirectories_; diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index 52ca9f61c56f74..05b03ba9ebdf30 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -1531,7 +1531,8 @@ CompilerInvocation::getSemanticsCtx( auto &fortranOptions = getFortranOpts(); auto semanticsContext = std::make_unique( - getDefaultKinds(), fortranOptions.features, allCookedSources); + getDefaultKinds(), fortranOptions.features, getLangOpts(), + allCookedSources); semanticsContext->set_moduleDirectory(getModuleDir()) .set_searchDirectories(fortranOptions.searchDirectories) diff --git a/flang/lib/Semantics/semantics.cpp b/flang/lib/Semantics/semantics.cpp index 8592d1e5d6217e..1f2980b07b3e0e 100644 --- a/flang/lib/Semantics/semantics.cpp +++ b/flang/lib/Semantics/semantics.cpp @@ -348,9 +348,10 @@ class CommonBlockMap { SemanticsContext::SemanticsContext( const common::IntrinsicTypeDefaultKinds &defaultKinds, const common::LanguageFeatureControl &languageFeatures, +const common::LangOptions &langOpts, parser::AllCookedSources &allCookedSources) : defaultKinds_{defaultKinds}, languageFeatures_{languageFeatures}, - allCookedSources_{allCookedSources}, + langOpts_{langOpts}, allCookedSources_{allCookedSources}, intrinsics_{evaluate::IntrinsicProcTable::Configure(defaultKinds_)}, globalScope_{*this}, intrinsicModulesScope_{globalScope_.MakeScope( Scope::Kind::IntrinsicModules, nullptr)}, diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp index dcff4503f16571..2a976d5a52fae6 100644 --- a/flang/tools/bbc/bbc.cpp +++ b/flang/tools/bbc/bbc.cpp @@ -15,6 +15,7 @@ //===--===// #include "flang/Common/Fortran-features.h" +#include "flang/Common/LangOptions.h" #include "flang/Common/OpenMP-features.h" #include "flang/Common/Version.h" #include "flang/Common/default-kinds.h" @@ -507,6 +508,21 @@ int main(int argc, char **argv) { options.predefinitions.emplace_back( "__flang_patchlevel__"s, std::string{FLANG_VERSION_PATCHLEVEL_STRING}); + Fortran::common::LangOptions langOpts; + langOpts.NoGPULib = setNoGPULib; + langOpts.OpenMPVersion = setOpenMPVersion; + langOpts.OpenMPIsTargetDevice = enableOpenMPDevice; + langOpts.OpenMPIsGPU = enableOpenMPGPU; + langOpts.OpenMPForceUSM = enableOpenMPForceUSM; + langOpts.OpenMPTargetDebug = setOpenMPTargetDebug; + langOpts.OpenMPThreadSubscription = setOpenMPThreadSubscription; + langOpts.OpenMPTeamSubscription = setOpenMPTeamSubscription; + langOpts.OpenMPNoThreadState = setOpenMPNoThreadState; + langOpts.OpenMPNoNestedParallelism = setOpe
[llvm-branch-commits] [flang] [flang][OpenMP] Add version checks for clauses (PR #110015)
llvmbot wrote: @llvm/pr-subscribers-flang-openmp Author: Krzysztof Parzyszek (kparzysz) Changes If there is a clause that is allowed on a given directive in a later version of the OpenMP spec, report an error and provide the minimal spec version that allows the clause. The case where a clause is not allowed on a directive at all is already handled elsewhere. --- Patch is 41.30 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/110015.diff 43 Files Affected: - (modified) flang/lib/Semantics/check-omp-structure.cpp (+65-28) - (modified) flang/lib/Semantics/check-omp-structure.h (+1) - (modified) flang/test/Lower/OpenMP/atomic-capture.f90 (+2-2) - (modified) flang/test/Lower/OpenMP/atomic-read.f90 (+1-1) - (modified) flang/test/Lower/OpenMP/atomic-update.f90 (+2-2) - (modified) flang/test/Lower/OpenMP/atomic-write.f90 (+1-1) - (modified) flang/test/Lower/OpenMP/declare-target-data.f90 (+2-2) - (modified) flang/test/Lower/OpenMP/declare-target-deferred-marking.f90 (+2-2) - (modified) flang/test/Lower/OpenMP/declare-target-func-and-subr.f90 (+2-2) - (modified) flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap-enter.f90 (+4-4) - (modified) flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap.f90 (+4-4) - (modified) flang/test/Lower/OpenMP/declare-target-implicit-tarop-cap.f90 (+4-4) - (modified) flang/test/Lower/OpenMP/function-filtering-2.f90 (+6-6) - (modified) flang/test/Lower/OpenMP/function-filtering.f90 (+6-6) - (modified) flang/test/Parser/OpenMP/declare_target-device_type.f90 (+2-2) - (modified) flang/test/Parser/OpenMP/in-reduction-clause.f90 (+2-2) - (modified) flang/test/Parser/OpenMP/order-clause01.f90 (+2-2) - (modified) flang/test/Parser/OpenMP/tile-size.f90 (+2-2) - (modified) flang/test/Parser/OpenMP/unroll-full.f90 (+2-2) - (modified) flang/test/Parser/OpenMP/unroll.f90 (+2-2) - (modified) flang/test/Semantics/OpenMP/atomic-hint-clause.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/atomic01.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/atomic05.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/clause-validity01.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/declarative-directive.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/declare-target01.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/declare-target02.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/declare-target06.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/device-constructs.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/flush02.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/if-clause.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/nontemporal.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/order-clause01.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/requires-atomic01.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/requires-atomic02.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/requires04.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/requires05.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/simd-nontemporal.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/target01.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/taskgroup01.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/use_device_addr.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/use_device_addr1.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/use_device_ptr1.f90 (+1-1) ``diff diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index dfc3f3290a81be..976c159e252f12 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -16,25 +16,25 @@ namespace Fortran::semantics { // Use when clause falls under 'struct OmpClause' in 'parse-tree.h'. #define CHECK_SIMPLE_CLAUSE(X, Y) \ void OmpStructureChecker::Enter(const parser::OmpClause::X &) { \ -CheckAllowed(llvm::omp::Clause::Y); \ +CheckAllowedClause(llvm::omp::Clause::Y); \ } #define CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(X, Y) \ void OmpStructureChecker::Enter(const parser::OmpClause::X &c) { \ -CheckAllowed(llvm::omp::Clause::Y); \ +CheckAllowedClause(llvm::omp::Clause::Y); \ RequiresConstantPositiveParameter(llvm::omp::Clause::Y, c.v); \ } #define CHECK_REQ_SCALAR_INT_CLAUSE(X, Y) \ void OmpStructureChecker::Enter(const parser::OmpClause::X &c) { \ -CheckAllowed(llvm::omp::Clause::Y); \ +CheckAllowedClause(llvm::omp::Clause::Y); \ RequiresPositiveParameter(llvm::omp::Clause::Y, c.v); \ } // Use when clause don't falls under 'struct OmpClause' in 'parse-tree.h'. #define CHECK_SIMPLE_PARSER_CLAUSE(X, Y) \ void OmpStructureChecker::Enter(const parser::X &) { \ -CheckAllowed(llvm::omp::Y); \ +CheckAllowedClause(llvm::omp::Y); \ } // 'OmpWorkshareBlockChecker' is used to check the validity of the assignment @@ -163,6 +163,
[llvm-branch-commits] [flang] [flang][OpenMP] Add version checks for clauses (PR #110015)
llvmbot wrote: @llvm/pr-subscribers-flang-fir-hlfir Author: Krzysztof Parzyszek (kparzysz) Changes If there is a clause that is allowed on a given directive in a later version of the OpenMP spec, report an error and provide the minimal spec version that allows the clause. The case where a clause is not allowed on a directive at all is already handled elsewhere. --- Patch is 41.30 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/110015.diff 43 Files Affected: - (modified) flang/lib/Semantics/check-omp-structure.cpp (+65-28) - (modified) flang/lib/Semantics/check-omp-structure.h (+1) - (modified) flang/test/Lower/OpenMP/atomic-capture.f90 (+2-2) - (modified) flang/test/Lower/OpenMP/atomic-read.f90 (+1-1) - (modified) flang/test/Lower/OpenMP/atomic-update.f90 (+2-2) - (modified) flang/test/Lower/OpenMP/atomic-write.f90 (+1-1) - (modified) flang/test/Lower/OpenMP/declare-target-data.f90 (+2-2) - (modified) flang/test/Lower/OpenMP/declare-target-deferred-marking.f90 (+2-2) - (modified) flang/test/Lower/OpenMP/declare-target-func-and-subr.f90 (+2-2) - (modified) flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap-enter.f90 (+4-4) - (modified) flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap.f90 (+4-4) - (modified) flang/test/Lower/OpenMP/declare-target-implicit-tarop-cap.f90 (+4-4) - (modified) flang/test/Lower/OpenMP/function-filtering-2.f90 (+6-6) - (modified) flang/test/Lower/OpenMP/function-filtering.f90 (+6-6) - (modified) flang/test/Parser/OpenMP/declare_target-device_type.f90 (+2-2) - (modified) flang/test/Parser/OpenMP/in-reduction-clause.f90 (+2-2) - (modified) flang/test/Parser/OpenMP/order-clause01.f90 (+2-2) - (modified) flang/test/Parser/OpenMP/tile-size.f90 (+2-2) - (modified) flang/test/Parser/OpenMP/unroll-full.f90 (+2-2) - (modified) flang/test/Parser/OpenMP/unroll.f90 (+2-2) - (modified) flang/test/Semantics/OpenMP/atomic-hint-clause.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/atomic01.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/atomic05.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/clause-validity01.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/declarative-directive.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/declare-target01.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/declare-target02.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/declare-target06.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/device-constructs.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/flush02.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/if-clause.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/nontemporal.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/order-clause01.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/requires-atomic01.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/requires-atomic02.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/requires04.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/requires05.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/simd-nontemporal.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/target01.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/taskgroup01.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/use_device_addr.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/use_device_addr1.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/use_device_ptr1.f90 (+1-1) ``diff diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index dfc3f3290a81be..976c159e252f12 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -16,25 +16,25 @@ namespace Fortran::semantics { // Use when clause falls under 'struct OmpClause' in 'parse-tree.h'. #define CHECK_SIMPLE_CLAUSE(X, Y) \ void OmpStructureChecker::Enter(const parser::OmpClause::X &) { \ -CheckAllowed(llvm::omp::Clause::Y); \ +CheckAllowedClause(llvm::omp::Clause::Y); \ } #define CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(X, Y) \ void OmpStructureChecker::Enter(const parser::OmpClause::X &c) { \ -CheckAllowed(llvm::omp::Clause::Y); \ +CheckAllowedClause(llvm::omp::Clause::Y); \ RequiresConstantPositiveParameter(llvm::omp::Clause::Y, c.v); \ } #define CHECK_REQ_SCALAR_INT_CLAUSE(X, Y) \ void OmpStructureChecker::Enter(const parser::OmpClause::X &c) { \ -CheckAllowed(llvm::omp::Clause::Y); \ +CheckAllowedClause(llvm::omp::Clause::Y); \ RequiresPositiveParameter(llvm::omp::Clause::Y, c.v); \ } // Use when clause don't falls under 'struct OmpClause' in 'parse-tree.h'. #define CHECK_SIMPLE_PARSER_CLAUSE(X, Y) \ void OmpStructureChecker::Enter(const parser::X &) { \ -CheckAllowed(llvm::omp::Y); \ +CheckAllowedClause(llvm::omp::Y); \ } // 'OmpWorkshareBlockChecker' is used to check the validity of the assignment @@ -163,6 +1
[llvm-branch-commits] [flang] [flang][OpenMP] Add version checks for clauses (PR #110015)
llvmbot wrote: @llvm/pr-subscribers-flang-semantics @llvm/pr-subscribers-flang-parser Author: Krzysztof Parzyszek (kparzysz) Changes If there is a clause that is allowed on a given directive in a later version of the OpenMP spec, report an error and provide the minimal spec version that allows the clause. The case where a clause is not allowed on a directive at all is already handled elsewhere. --- Patch is 41.30 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/110015.diff 43 Files Affected: - (modified) flang/lib/Semantics/check-omp-structure.cpp (+65-28) - (modified) flang/lib/Semantics/check-omp-structure.h (+1) - (modified) flang/test/Lower/OpenMP/atomic-capture.f90 (+2-2) - (modified) flang/test/Lower/OpenMP/atomic-read.f90 (+1-1) - (modified) flang/test/Lower/OpenMP/atomic-update.f90 (+2-2) - (modified) flang/test/Lower/OpenMP/atomic-write.f90 (+1-1) - (modified) flang/test/Lower/OpenMP/declare-target-data.f90 (+2-2) - (modified) flang/test/Lower/OpenMP/declare-target-deferred-marking.f90 (+2-2) - (modified) flang/test/Lower/OpenMP/declare-target-func-and-subr.f90 (+2-2) - (modified) flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap-enter.f90 (+4-4) - (modified) flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap.f90 (+4-4) - (modified) flang/test/Lower/OpenMP/declare-target-implicit-tarop-cap.f90 (+4-4) - (modified) flang/test/Lower/OpenMP/function-filtering-2.f90 (+6-6) - (modified) flang/test/Lower/OpenMP/function-filtering.f90 (+6-6) - (modified) flang/test/Parser/OpenMP/declare_target-device_type.f90 (+2-2) - (modified) flang/test/Parser/OpenMP/in-reduction-clause.f90 (+2-2) - (modified) flang/test/Parser/OpenMP/order-clause01.f90 (+2-2) - (modified) flang/test/Parser/OpenMP/tile-size.f90 (+2-2) - (modified) flang/test/Parser/OpenMP/unroll-full.f90 (+2-2) - (modified) flang/test/Parser/OpenMP/unroll.f90 (+2-2) - (modified) flang/test/Semantics/OpenMP/atomic-hint-clause.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/atomic01.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/atomic05.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/clause-validity01.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/declarative-directive.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/declare-target01.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/declare-target02.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/declare-target06.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/device-constructs.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/flush02.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/if-clause.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/nontemporal.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/order-clause01.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/requires-atomic01.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/requires-atomic02.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/requires04.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/requires05.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/simd-nontemporal.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/target01.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/taskgroup01.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/use_device_addr.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/use_device_addr1.f90 (+1-1) - (modified) flang/test/Semantics/OpenMP/use_device_ptr1.f90 (+1-1) ``diff diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index dfc3f3290a81be..976c159e252f12 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -16,25 +16,25 @@ namespace Fortran::semantics { // Use when clause falls under 'struct OmpClause' in 'parse-tree.h'. #define CHECK_SIMPLE_CLAUSE(X, Y) \ void OmpStructureChecker::Enter(const parser::OmpClause::X &) { \ -CheckAllowed(llvm::omp::Clause::Y); \ +CheckAllowedClause(llvm::omp::Clause::Y); \ } #define CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(X, Y) \ void OmpStructureChecker::Enter(const parser::OmpClause::X &c) { \ -CheckAllowed(llvm::omp::Clause::Y); \ +CheckAllowedClause(llvm::omp::Clause::Y); \ RequiresConstantPositiveParameter(llvm::omp::Clause::Y, c.v); \ } #define CHECK_REQ_SCALAR_INT_CLAUSE(X, Y) \ void OmpStructureChecker::Enter(const parser::OmpClause::X &c) { \ -CheckAllowed(llvm::omp::Clause::Y); \ +CheckAllowedClause(llvm::omp::Clause::Y); \ RequiresPositiveParameter(llvm::omp::Clause::Y, c.v); \ } // Use when clause don't falls under 'struct OmpClause' in 'parse-tree.h'. #define CHECK_SIMPLE_PARSER_CLAUSE(X, Y) \ void OmpStructureChecker::Enter(const parser::X &) { \ -CheckAllowed(llvm::omp::Y); \ +CheckAllowedClause(llvm::omp::Y); \ } // 'OmpWorkshareBlockChecker' is used to check the valid
[llvm-branch-commits] [clang] release/19.x: [clang-scan-deps] Don't inspect Args[0] as an option (#109050) (PR #109865)
Martin =?utf-8?q?Storsj=C3=B6?= , Martin =?utf-8?q?Storsj=C3=B6?= , Martin =?utf-8?q?Storsj=C3=B6?= Message-ID: In-Reply-To: https://github.com/jansvoboda11 approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/109865 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][Semantics] Add LangOptions to SemanticsContext (PR #110013)
https://github.com/kparzysz updated https://github.com/llvm/llvm-project/pull/110013 >From c04b0c4e26240e2c1a47b9af4974981ab4535305 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 24 Sep 2024 15:14:39 -0500 Subject: [PATCH] [flang][Semantics] Add LangOptions to SemanticsContext The motivation for this is to make OpenMP settings visible in the semantic checks (OpenMP version in particular). --- flang/include/flang/Semantics/semantics.h | 10 -- flang/lib/Frontend/CompilerInvocation.cpp | 3 ++- flang/lib/Semantics/semantics.cpp | 3 ++- flang/tools/bbc/bbc.cpp | 18 +- 4 files changed, 29 insertions(+), 5 deletions(-) diff --git a/flang/include/flang/Semantics/semantics.h b/flang/include/flang/Semantics/semantics.h index e73f9d2e85d589..4ea9f9e081b701 100644 --- a/flang/include/flang/Semantics/semantics.h +++ b/flang/include/flang/Semantics/semantics.h @@ -12,6 +12,7 @@ #include "scope.h" #include "symbol.h" #include "flang/Common/Fortran-features.h" +#include "flang/Common/LangOptions.h" #include "flang/Evaluate/common.h" #include "flang/Evaluate/intrinsics.h" #include "flang/Evaluate/target.h" @@ -65,7 +66,8 @@ using ConstructStack = std::vector; class SemanticsContext { public: SemanticsContext(const common::IntrinsicTypeDefaultKinds &, - const common::LanguageFeatureControl &, parser::AllCookedSources &); + const common::LanguageFeatureControl &, const common::LangOptions &, + parser::AllCookedSources &); ~SemanticsContext(); const common::IntrinsicTypeDefaultKinds &defaultKinds() const { @@ -73,7 +75,10 @@ class SemanticsContext { } const common::LanguageFeatureControl &languageFeatures() const { return languageFeatures_; - }; + } + const common::LangOptions &langOptions() const { +return langOpts_; + } int GetDefaultKind(TypeCategory) const; int doublePrecisionKind() const { return defaultKinds_.doublePrecisionKind(); @@ -273,6 +278,7 @@ class SemanticsContext { const common::IntrinsicTypeDefaultKinds &defaultKinds_; const common::LanguageFeatureControl &languageFeatures_; + const common::LangOptions &langOpts_; parser::AllCookedSources &allCookedSources_; std::optional location_; std::vector searchDirectories_; diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index 52ca9f61c56f74..05b03ba9ebdf30 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -1531,7 +1531,8 @@ CompilerInvocation::getSemanticsCtx( auto &fortranOptions = getFortranOpts(); auto semanticsContext = std::make_unique( - getDefaultKinds(), fortranOptions.features, allCookedSources); + getDefaultKinds(), fortranOptions.features, getLangOpts(), + allCookedSources); semanticsContext->set_moduleDirectory(getModuleDir()) .set_searchDirectories(fortranOptions.searchDirectories) diff --git a/flang/lib/Semantics/semantics.cpp b/flang/lib/Semantics/semantics.cpp index 8592d1e5d6217e..1f2980b07b3e0e 100644 --- a/flang/lib/Semantics/semantics.cpp +++ b/flang/lib/Semantics/semantics.cpp @@ -348,9 +348,10 @@ class CommonBlockMap { SemanticsContext::SemanticsContext( const common::IntrinsicTypeDefaultKinds &defaultKinds, const common::LanguageFeatureControl &languageFeatures, +const common::LangOptions &langOpts, parser::AllCookedSources &allCookedSources) : defaultKinds_{defaultKinds}, languageFeatures_{languageFeatures}, - allCookedSources_{allCookedSources}, + langOpts_{langOpts}, allCookedSources_{allCookedSources}, intrinsics_{evaluate::IntrinsicProcTable::Configure(defaultKinds_)}, globalScope_{*this}, intrinsicModulesScope_{globalScope_.MakeScope( Scope::Kind::IntrinsicModules, nullptr)}, diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp index dcff4503f16571..2a976d5a52fae6 100644 --- a/flang/tools/bbc/bbc.cpp +++ b/flang/tools/bbc/bbc.cpp @@ -15,6 +15,7 @@ //===--===// #include "flang/Common/Fortran-features.h" +#include "flang/Common/LangOptions.h" #include "flang/Common/OpenMP-features.h" #include "flang/Common/Version.h" #include "flang/Common/default-kinds.h" @@ -507,6 +508,21 @@ int main(int argc, char **argv) { options.predefinitions.emplace_back( "__flang_patchlevel__"s, std::string{FLANG_VERSION_PATCHLEVEL_STRING}); + Fortran::common::LangOptions langOpts; + langOpts.NoGPULib = setNoGPULib; + langOpts.OpenMPVersion = setOpenMPVersion; + langOpts.OpenMPIsTargetDevice = enableOpenMPDevice; + langOpts.OpenMPIsGPU = enableOpenMPGPU; + langOpts.OpenMPForceUSM = enableOpenMPForceUSM; + langOpts.OpenMPTargetDebug = setOpenMPTargetDebug; + langOpts.OpenMPThreadSubscription = setOpenMPThreadSubscription; + langOpts.OpenMPTeamSubscription = setOpenMPTeamSubscription
[llvm-branch-commits] [flang] [flang][Semantics] Add LangOptions to SemanticsContext (PR #110013)
https://github.com/kparzysz updated https://github.com/llvm/llvm-project/pull/110013 >From c04b0c4e26240e2c1a47b9af4974981ab4535305 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 24 Sep 2024 15:14:39 -0500 Subject: [PATCH 1/2] [flang][Semantics] Add LangOptions to SemanticsContext The motivation for this is to make OpenMP settings visible in the semantic checks (OpenMP version in particular). --- flang/include/flang/Semantics/semantics.h | 10 -- flang/lib/Frontend/CompilerInvocation.cpp | 3 ++- flang/lib/Semantics/semantics.cpp | 3 ++- flang/tools/bbc/bbc.cpp | 18 +- 4 files changed, 29 insertions(+), 5 deletions(-) diff --git a/flang/include/flang/Semantics/semantics.h b/flang/include/flang/Semantics/semantics.h index e73f9d2e85d589..4ea9f9e081b701 100644 --- a/flang/include/flang/Semantics/semantics.h +++ b/flang/include/flang/Semantics/semantics.h @@ -12,6 +12,7 @@ #include "scope.h" #include "symbol.h" #include "flang/Common/Fortran-features.h" +#include "flang/Common/LangOptions.h" #include "flang/Evaluate/common.h" #include "flang/Evaluate/intrinsics.h" #include "flang/Evaluate/target.h" @@ -65,7 +66,8 @@ using ConstructStack = std::vector; class SemanticsContext { public: SemanticsContext(const common::IntrinsicTypeDefaultKinds &, - const common::LanguageFeatureControl &, parser::AllCookedSources &); + const common::LanguageFeatureControl &, const common::LangOptions &, + parser::AllCookedSources &); ~SemanticsContext(); const common::IntrinsicTypeDefaultKinds &defaultKinds() const { @@ -73,7 +75,10 @@ class SemanticsContext { } const common::LanguageFeatureControl &languageFeatures() const { return languageFeatures_; - }; + } + const common::LangOptions &langOptions() const { +return langOpts_; + } int GetDefaultKind(TypeCategory) const; int doublePrecisionKind() const { return defaultKinds_.doublePrecisionKind(); @@ -273,6 +278,7 @@ class SemanticsContext { const common::IntrinsicTypeDefaultKinds &defaultKinds_; const common::LanguageFeatureControl &languageFeatures_; + const common::LangOptions &langOpts_; parser::AllCookedSources &allCookedSources_; std::optional location_; std::vector searchDirectories_; diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index 52ca9f61c56f74..05b03ba9ebdf30 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -1531,7 +1531,8 @@ CompilerInvocation::getSemanticsCtx( auto &fortranOptions = getFortranOpts(); auto semanticsContext = std::make_unique( - getDefaultKinds(), fortranOptions.features, allCookedSources); + getDefaultKinds(), fortranOptions.features, getLangOpts(), + allCookedSources); semanticsContext->set_moduleDirectory(getModuleDir()) .set_searchDirectories(fortranOptions.searchDirectories) diff --git a/flang/lib/Semantics/semantics.cpp b/flang/lib/Semantics/semantics.cpp index 8592d1e5d6217e..1f2980b07b3e0e 100644 --- a/flang/lib/Semantics/semantics.cpp +++ b/flang/lib/Semantics/semantics.cpp @@ -348,9 +348,10 @@ class CommonBlockMap { SemanticsContext::SemanticsContext( const common::IntrinsicTypeDefaultKinds &defaultKinds, const common::LanguageFeatureControl &languageFeatures, +const common::LangOptions &langOpts, parser::AllCookedSources &allCookedSources) : defaultKinds_{defaultKinds}, languageFeatures_{languageFeatures}, - allCookedSources_{allCookedSources}, + langOpts_{langOpts}, allCookedSources_{allCookedSources}, intrinsics_{evaluate::IntrinsicProcTable::Configure(defaultKinds_)}, globalScope_{*this}, intrinsicModulesScope_{globalScope_.MakeScope( Scope::Kind::IntrinsicModules, nullptr)}, diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp index dcff4503f16571..2a976d5a52fae6 100644 --- a/flang/tools/bbc/bbc.cpp +++ b/flang/tools/bbc/bbc.cpp @@ -15,6 +15,7 @@ //===--===// #include "flang/Common/Fortran-features.h" +#include "flang/Common/LangOptions.h" #include "flang/Common/OpenMP-features.h" #include "flang/Common/Version.h" #include "flang/Common/default-kinds.h" @@ -507,6 +508,21 @@ int main(int argc, char **argv) { options.predefinitions.emplace_back( "__flang_patchlevel__"s, std::string{FLANG_VERSION_PATCHLEVEL_STRING}); + Fortran::common::LangOptions langOpts; + langOpts.NoGPULib = setNoGPULib; + langOpts.OpenMPVersion = setOpenMPVersion; + langOpts.OpenMPIsTargetDevice = enableOpenMPDevice; + langOpts.OpenMPIsGPU = enableOpenMPGPU; + langOpts.OpenMPForceUSM = enableOpenMPForceUSM; + langOpts.OpenMPTargetDebug = setOpenMPTargetDebug; + langOpts.OpenMPThreadSubscription = setOpenMPThreadSubscription; + langOpts.OpenMPTeamSubscription = setOpenMPTeamSubscrip
[llvm-branch-commits] [flang] [flang][OpenMP] Add version checks for clauses (PR #110015)
https://github.com/kparzysz updated https://github.com/llvm/llvm-project/pull/110015 >From 3c786ad2a50f146d357d882b0c1d966486f7295f Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 24 Sep 2024 17:41:16 -0500 Subject: [PATCH] [flang][OpenMP] Add version checks for clauses If there is a clause that is allowed on a given directive in a later version of the OpenMP spec, report an error and provide the minimal spec version that allows the clause. The case where a clause is not allowed on a directive at all is already handled elsewhere. --- flang/lib/Semantics/check-omp-structure.cpp | 93 +-- flang/lib/Semantics/check-omp-structure.h | 1 + flang/test/Lower/OpenMP/atomic-capture.f90| 4 +- flang/test/Lower/OpenMP/atomic-read.f90 | 2 +- flang/test/Lower/OpenMP/atomic-update.f90 | 4 +- flang/test/Lower/OpenMP/atomic-write.f90 | 2 +- .../test/Lower/OpenMP/declare-target-data.f90 | 4 +- .../declare-target-deferred-marking.f90 | 4 +- .../OpenMP/declare-target-func-and-subr.f90 | 4 +- ...arget-implicit-func-and-subr-cap-enter.f90 | 8 +- ...lare-target-implicit-func-and-subr-cap.f90 | 8 +- .../declare-target-implicit-tarop-cap.f90 | 8 +- .../Lower/OpenMP/function-filtering-2.f90 | 12 +-- .../test/Lower/OpenMP/function-filtering.f90 | 12 +-- .../OpenMP/declare_target-device_type.f90 | 4 +- .../Parser/OpenMP/in-reduction-clause.f90 | 4 +- flang/test/Parser/OpenMP/order-clause01.f90 | 4 +- flang/test/Parser/OpenMP/tile-size.f90| 4 +- flang/test/Parser/OpenMP/unroll-full.f90 | 4 +- flang/test/Parser/OpenMP/unroll.f90 | 4 +- .../Semantics/OpenMP/atomic-hint-clause.f90 | 2 +- flang/test/Semantics/OpenMP/atomic01.f90 | 2 +- flang/test/Semantics/OpenMP/atomic05.f90 | 2 +- .../Semantics/OpenMP/clause-validity01.f90| 2 +- .../OpenMP/declarative-directive.f90 | 2 +- .../Semantics/OpenMP/declare-target01.f90 | 2 +- .../Semantics/OpenMP/declare-target02.f90 | 2 +- .../Semantics/OpenMP/declare-target06.f90 | 2 +- .../Semantics/OpenMP/device-constructs.f90| 2 +- flang/test/Semantics/OpenMP/flush02.f90 | 2 +- flang/test/Semantics/OpenMP/if-clause.f90 | 2 +- flang/test/Semantics/OpenMP/nontemporal.f90 | 2 +- .../test/Semantics/OpenMP/order-clause01.f90 | 2 +- .../Semantics/OpenMP/requires-atomic01.f90| 2 +- .../Semantics/OpenMP/requires-atomic02.f90| 2 +- flang/test/Semantics/OpenMP/requires04.f90| 2 +- flang/test/Semantics/OpenMP/requires05.f90| 2 +- .../Semantics/OpenMP/simd-nontemporal.f90 | 2 +- flang/test/Semantics/OpenMP/target01.f90 | 2 +- flang/test/Semantics/OpenMP/taskgroup01.f90 | 2 +- .../test/Semantics/OpenMP/use_device_addr.f90 | 2 +- .../Semantics/OpenMP/use_device_addr1.f90 | 2 +- .../test/Semantics/OpenMP/use_device_ptr1.f90 | 2 +- 43 files changed, 137 insertions(+), 99 deletions(-) diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index dfc3f3290a81be..976c159e252f12 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -16,25 +16,25 @@ namespace Fortran::semantics { // Use when clause falls under 'struct OmpClause' in 'parse-tree.h'. #define CHECK_SIMPLE_CLAUSE(X, Y) \ void OmpStructureChecker::Enter(const parser::OmpClause::X &) { \ -CheckAllowed(llvm::omp::Clause::Y); \ +CheckAllowedClause(llvm::omp::Clause::Y); \ } #define CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(X, Y) \ void OmpStructureChecker::Enter(const parser::OmpClause::X &c) { \ -CheckAllowed(llvm::omp::Clause::Y); \ +CheckAllowedClause(llvm::omp::Clause::Y); \ RequiresConstantPositiveParameter(llvm::omp::Clause::Y, c.v); \ } #define CHECK_REQ_SCALAR_INT_CLAUSE(X, Y) \ void OmpStructureChecker::Enter(const parser::OmpClause::X &c) { \ -CheckAllowed(llvm::omp::Clause::Y); \ +CheckAllowedClause(llvm::omp::Clause::Y); \ RequiresPositiveParameter(llvm::omp::Clause::Y, c.v); \ } // Use when clause don't falls under 'struct OmpClause' in 'parse-tree.h'. #define CHECK_SIMPLE_PARSER_CLAUSE(X, Y) \ void OmpStructureChecker::Enter(const parser::X &) { \ -CheckAllowed(llvm::omp::Y); \ +CheckAllowedClause(llvm::omp::Y); \ } // 'OmpWorkshareBlockChecker' is used to check the validity of the assignment @@ -163,6 +163,43 @@ class AssociatedLoopChecker { std::map constructNamesAndLevels_; }; +bool OmpStructureChecker::CheckAllowedClause(llvmOmpClause clause) { + unsigned version{context_.langOptions().OpenMPVersion}; + DirectiveContext &dirCtx = GetContext(); + llvm::omp::Directive dir{dirCtx.directive}; + + if (!llvm::omp::isAllowedClauseForDirective(dir, clause, version)) { +unsigned allowedInVersion{[&] { + for (unsigned v : {45, 50, 51, 52, 60}) { +if (v <= version) { + conti
[llvm-branch-commits] [flang] [mlir] [MLIR][OpenMP] Normalize representation of entry block arg-defining clauses (PR #109809)
https://github.com/tblah approved this pull request. Thanks for the cleanup! https://github.com/llvm/llvm-project/pull/109809 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [MLIR][OpenMP] Document entry block argument-defining clauses (NFC) (PR #109811)
https://github.com/tblah approved this pull request. LGTM, thanks! https://github.com/llvm/llvm-project/pull/109811 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][OpenMP] Add version checks for clauses (PR #110015)
https://github.com/kparzysz updated https://github.com/llvm/llvm-project/pull/110015 >From 3c786ad2a50f146d357d882b0c1d966486f7295f Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 24 Sep 2024 17:41:16 -0500 Subject: [PATCH 1/3] [flang][OpenMP] Add version checks for clauses If there is a clause that is allowed on a given directive in a later version of the OpenMP spec, report an error and provide the minimal spec version that allows the clause. The case where a clause is not allowed on a directive at all is already handled elsewhere. --- flang/lib/Semantics/check-omp-structure.cpp | 93 +-- flang/lib/Semantics/check-omp-structure.h | 1 + flang/test/Lower/OpenMP/atomic-capture.f90| 4 +- flang/test/Lower/OpenMP/atomic-read.f90 | 2 +- flang/test/Lower/OpenMP/atomic-update.f90 | 4 +- flang/test/Lower/OpenMP/atomic-write.f90 | 2 +- .../test/Lower/OpenMP/declare-target-data.f90 | 4 +- .../declare-target-deferred-marking.f90 | 4 +- .../OpenMP/declare-target-func-and-subr.f90 | 4 +- ...arget-implicit-func-and-subr-cap-enter.f90 | 8 +- ...lare-target-implicit-func-and-subr-cap.f90 | 8 +- .../declare-target-implicit-tarop-cap.f90 | 8 +- .../Lower/OpenMP/function-filtering-2.f90 | 12 +-- .../test/Lower/OpenMP/function-filtering.f90 | 12 +-- .../OpenMP/declare_target-device_type.f90 | 4 +- .../Parser/OpenMP/in-reduction-clause.f90 | 4 +- flang/test/Parser/OpenMP/order-clause01.f90 | 4 +- flang/test/Parser/OpenMP/tile-size.f90| 4 +- flang/test/Parser/OpenMP/unroll-full.f90 | 4 +- flang/test/Parser/OpenMP/unroll.f90 | 4 +- .../Semantics/OpenMP/atomic-hint-clause.f90 | 2 +- flang/test/Semantics/OpenMP/atomic01.f90 | 2 +- flang/test/Semantics/OpenMP/atomic05.f90 | 2 +- .../Semantics/OpenMP/clause-validity01.f90| 2 +- .../OpenMP/declarative-directive.f90 | 2 +- .../Semantics/OpenMP/declare-target01.f90 | 2 +- .../Semantics/OpenMP/declare-target02.f90 | 2 +- .../Semantics/OpenMP/declare-target06.f90 | 2 +- .../Semantics/OpenMP/device-constructs.f90| 2 +- flang/test/Semantics/OpenMP/flush02.f90 | 2 +- flang/test/Semantics/OpenMP/if-clause.f90 | 2 +- flang/test/Semantics/OpenMP/nontemporal.f90 | 2 +- .../test/Semantics/OpenMP/order-clause01.f90 | 2 +- .../Semantics/OpenMP/requires-atomic01.f90| 2 +- .../Semantics/OpenMP/requires-atomic02.f90| 2 +- flang/test/Semantics/OpenMP/requires04.f90| 2 +- flang/test/Semantics/OpenMP/requires05.f90| 2 +- .../Semantics/OpenMP/simd-nontemporal.f90 | 2 +- flang/test/Semantics/OpenMP/target01.f90 | 2 +- flang/test/Semantics/OpenMP/taskgroup01.f90 | 2 +- .../test/Semantics/OpenMP/use_device_addr.f90 | 2 +- .../Semantics/OpenMP/use_device_addr1.f90 | 2 +- .../test/Semantics/OpenMP/use_device_ptr1.f90 | 2 +- 43 files changed, 137 insertions(+), 99 deletions(-) diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index dfc3f3290a81be..976c159e252f12 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -16,25 +16,25 @@ namespace Fortran::semantics { // Use when clause falls under 'struct OmpClause' in 'parse-tree.h'. #define CHECK_SIMPLE_CLAUSE(X, Y) \ void OmpStructureChecker::Enter(const parser::OmpClause::X &) { \ -CheckAllowed(llvm::omp::Clause::Y); \ +CheckAllowedClause(llvm::omp::Clause::Y); \ } #define CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(X, Y) \ void OmpStructureChecker::Enter(const parser::OmpClause::X &c) { \ -CheckAllowed(llvm::omp::Clause::Y); \ +CheckAllowedClause(llvm::omp::Clause::Y); \ RequiresConstantPositiveParameter(llvm::omp::Clause::Y, c.v); \ } #define CHECK_REQ_SCALAR_INT_CLAUSE(X, Y) \ void OmpStructureChecker::Enter(const parser::OmpClause::X &c) { \ -CheckAllowed(llvm::omp::Clause::Y); \ +CheckAllowedClause(llvm::omp::Clause::Y); \ RequiresPositiveParameter(llvm::omp::Clause::Y, c.v); \ } // Use when clause don't falls under 'struct OmpClause' in 'parse-tree.h'. #define CHECK_SIMPLE_PARSER_CLAUSE(X, Y) \ void OmpStructureChecker::Enter(const parser::X &) { \ -CheckAllowed(llvm::omp::Y); \ +CheckAllowedClause(llvm::omp::Y); \ } // 'OmpWorkshareBlockChecker' is used to check the validity of the assignment @@ -163,6 +163,43 @@ class AssociatedLoopChecker { std::map constructNamesAndLevels_; }; +bool OmpStructureChecker::CheckAllowedClause(llvmOmpClause clause) { + unsigned version{context_.langOptions().OpenMPVersion}; + DirectiveContext &dirCtx = GetContext(); + llvm::omp::Directive dir{dirCtx.directive}; + + if (!llvm::omp::isAllowedClauseForDirective(dir, clause, version)) { +unsigned allowedInVersion{[&] { + for (unsigned v : {45, 50, 51, 52, 60}) { +if (v <= version) { + c
[llvm-branch-commits] [llvm] [NewPM][AMDGPU] Port SIPreAllocateWWMRegs to NPM (PR #109939)
https://github.com/Akshat-Oke updated https://github.com/llvm/llvm-project/pull/109939 >From 646d2d1a54ca0ac3bc312f4038826fb431890bf6 Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Tue, 24 Sep 2024 11:41:18 + Subject: [PATCH] [NewPM][AMDGPU] Port SIPreAllocateWWMRegs to NPM --- .../llvm/Passes/MachinePassRegistry.def | 4 +- llvm/lib/Target/AMDGPU/AMDGPU.h | 6 +- .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 6 +- .../Target/AMDGPU/SIPreAllocateWWMRegs.cpp| 60 --- llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h | 30 ++ .../AMDGPU/si-pre-allocate-wwm-regs.mir | 26 .../si-pre-allocate-wwm-sgpr-spills.mir | 21 +++ 7 files changed, 124 insertions(+), 29 deletions(-) create mode 100644 llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h create mode 100644 llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir create mode 100644 llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-sgpr-spills.mir diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index bdc56ca03f392a..72e2cf232bfd17 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -96,6 +96,7 @@ LOOP_PASS("loop-term-fold", LoopTermFoldPass()) // computed. (We still either need to regenerate kill flags after regalloc, or // preferably fix the scavenger to not depend on them). MACHINE_FUNCTION_ANALYSIS("live-intervals", LiveIntervalsAnalysis()) +MACHINE_FUNCTION_ANALYSIS("live-reg-matrix", LiveRegMatrixAnalysis()) MACHINE_FUNCTION_ANALYSIS("live-vars", LiveVariablesAnalysis()) MACHINE_FUNCTION_ANALYSIS("machine-block-freq", MachineBlockFrequencyAnalysis()) MACHINE_FUNCTION_ANALYSIS("machine-branch-prob", @@ -122,8 +123,7 @@ MACHINE_FUNCTION_ANALYSIS("virtregmap", VirtRegMapAnalysis()) // MachineRegionInfoPassAnalysis()) // MACHINE_FUNCTION_ANALYSIS("machine-trace-metrics", // MachineTraceMetricsAnalysis()) MACHINE_FUNCTION_ANALYSIS("reaching-def", -// ReachingDefAnalysisAnalysis()) MACHINE_FUNCTION_ANALYSIS("live-reg-matrix", -// LiveRegMatrixAnalysis()) MACHINE_FUNCTION_ANALYSIS("gc-analysis", +// ReachingDefAnalysisAnalysis()) MACHINE_FUNCTION_ANALYSIS("gc-analysis", // GCMachineCodeAnalysisPass()) #undef MACHINE_FUNCTION_ANALYSIS diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index b2dd354e496a2e..c0fd5e4625895a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -49,7 +49,7 @@ FunctionPass *createSIFixSGPRCopiesLegacyPass(); FunctionPass *createLowerWWMCopiesPass(); FunctionPass *createSIMemoryLegalizerPass(); FunctionPass *createSIInsertWaitcntsPass(); -FunctionPass *createSIPreAllocateWWMRegsPass(); +FunctionPass *createSIPreAllocateWWMRegsLegacyPass(); FunctionPass *createSIFormMemoryClausesPass(); FunctionPass *createSIPostRABundlerPass(); @@ -208,8 +208,8 @@ extern char &SILateBranchLoweringPassID; void initializeSIOptimizeExecMaskingPass(PassRegistry &); extern char &SIOptimizeExecMaskingID; -void initializeSIPreAllocateWWMRegsPass(PassRegistry &); -extern char &SIPreAllocateWWMRegsID; +void initializeSIPreAllocateWWMRegsLegacyPass(PassRegistry &); +extern char &SIPreAllocateWWMRegsLegacyID; void initializeAMDGPUImageIntrinsicOptimizerPass(PassRegistry &); extern char &AMDGPUImageIntrinsicOptimizerID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 04fdee0819b502..a39293863d1c54 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -461,7 +461,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeSILateBranchLoweringPass(*PR); initializeSIMemoryLegalizerPass(*PR); initializeSIOptimizeExecMaskingPass(*PR); - initializeSIPreAllocateWWMRegsPass(*PR); + initializeSIPreAllocateWWMRegsLegacyPass(*PR); initializeSIFormMemoryClausesPass(*PR); initializeSIPostRABundlerPass(*PR); initializeGCNCreateVOPDPass(*PR); @@ -1443,7 +1443,7 @@ bool GCNPassConfig::addRegAssignAndRewriteFast() { // Equivalent of PEI for SGPRs. addPass(&SILowerSGPRSpillsLegacyID); - addPass(&SIPreAllocateWWMRegsID); + addPass(&SIPreAllocateWWMRegsLegacyID); addPass(createVGPRAllocPass(false)); @@ -1467,7 +1467,7 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() { // Equivalent of PEI for SGPRs. addPass(&SILowerSGPRSpillsLegacyID); - addPass(&SIPreAllocateWWMRegsID); + addPass(&SIPreAllocateWWMRegsLegacyID); addPass(createVGPRAllocPass(true)); diff --git a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp index 0635cab7b872e2..c1d7a464a81537 100644 --- a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp +++ b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp @@ -11,6 +11,7 @@ // //===
[llvm-branch-commits] [libcxx] [libc++][format][3/3] Improves formatting performance. (PR #108990)
https://github.com/mordante updated https://github.com/llvm/llvm-project/pull/108990 >From d435a3118ffe1cca91eff7eeea19f4bc243384c4 Mon Sep 17 00:00:00 2001 From: Mark de Wever Date: Sat, 30 Mar 2024 17:35:56 +0100 Subject: [PATCH] [libc++][format][3/3] Improves formatting performance. This changes the __output_buffer to a new structure. This improves the performace of std::format, std::format_to, std::format_to_n, and std::foramtted size. --- libcxx/include/__format/buffer.h | 616 ++ libcxx/include/__format/format_functions.h| 29 +- .../test/libcxx/transitive_includes/cxx03.csv | 18 - .../test/libcxx/transitive_includes/cxx11.csv | 18 - .../test/libcxx/transitive_includes/cxx14.csv | 18 - .../test/libcxx/transitive_includes/cxx17.csv | 8 - .../format/format.functions/format_tests.h| 2 +- 7 files changed, 352 insertions(+), 357 deletions(-) diff --git a/libcxx/include/__format/buffer.h b/libcxx/include/__format/buffer.h index 8598f0a1c03957..a0caaccb47c2d2 100644 --- a/libcxx/include/__format/buffer.h +++ b/libcxx/include/__format/buffer.h @@ -14,6 +14,7 @@ #include <__algorithm/fill_n.h> #include <__algorithm/max.h> #include <__algorithm/min.h> +#include <__algorithm/ranges_copy.h> #include <__algorithm/ranges_copy_n.h> #include <__algorithm/transform.h> #include <__algorithm/unwrap_iter.h> @@ -29,6 +30,7 @@ #include <__iterator/wrap_iter.h> #include <__memory/addressof.h> #include <__memory/allocate_at_least.h> +#include <__memory/allocator.h> #include <__memory/allocator_traits.h> #include <__memory/construct_at.h> #include <__memory/ranges_construct_at.h> @@ -38,6 +40,7 @@ #include <__utility/exception_guard.h> #include <__utility/move.h> #include +#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -53,24 +56,150 @@ _LIBCPP_BEGIN_NAMESPACE_STD namespace __format { +// A helper to limit the total size of code units written. +class _LIBCPP_HIDE_FROM_ABI __max_output_size { +public: + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI explicit __max_output_size(size_t __max_size) : __max_size_{__max_size} {} + + // This function adjusts the size of a (bulk) write operations. It ensures the + // number of code units written by a __output_buffer never exceeds + // __max_size_ code units. + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI size_t __write_request(size_t __code_units) { +size_t __result = +__code_units_written_ < __max_size_ ? std::min(__code_units, __max_size_ - __code_units_written_) : 0; +__code_units_written_ += __code_units; +return __result; + } + + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI size_t __code_units_written() const noexcept { return __code_units_written_; } + +private: + size_t __max_size_; + // The code units that would have been written if there was no limit. + // format_to_n returns this value. + size_t __code_units_written_{0}; +}; + /// A "buffer" that handles writing to the proper iterator. /// /// This helper is used together with the @ref back_insert_iterator to offer /// type-erasure for the formatting functions. This reduces the number to /// template instantiations. +/// +/// The design is the following: +/// - There is an external object that connects the buffer to the output. +/// - This buffer object: +/// - inherits publicly from this class. +/// - has a static or dynamic buffer. +/// - has a static member function to make space in its buffer write +/// operations. This can be done by increasing the size of the internal +/// buffer or by writing the contents of the buffer to the output iterator. +/// +/// This member function is a constructor argument, so its name is not +/// fixed. The code uses the name __prepare_write. +/// - The number of output code units can be limited by a __max_output_size +/// object. This is used in format_to_n This object: +/// - Contains the maximum number of code units to be written. +/// - Contains the number of code units that are requested to be written. +/// This number is returned to the user of format_to_n. +/// - The write functions call the object's __request_write member function. +/// This function: +/// - Updates the number of code units that are requested to be written. +/// - Returns the number of code units that can be written without +/// exceeding the maximum number of code units to be written. +/// +/// Documentation for the buffer usage members: +/// - __ptr_ +/// The start of the buffer. +/// - __capacity_ +/// The number of code units that can be written. This means +/// [__ptr_, __ptr_ + __capacity_) is a valid range to write to. +/// - __size_ +/// The number of code units written in the buffer. The next code unit will +/// be written at __ptr_ + __size_. This __size_ may NOT contain the total +/// number of code units written by the __output_buffer. Whether or not it +/// does depends on the sub-class used. Typically the total number o
[llvm-branch-commits] [libcxx] [libc++][format][3/3] Improves formatting performance. (PR #108990)
https://github.com/ldionne edited https://github.com/llvm/llvm-project/pull/108990 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] [libc++][format][3/3] Improves formatting performance. (PR #108990)
@@ -319,188 +353,222 @@ struct _LIBCPP_TEMPLATE_VIS __back_insert_iterator_container -class _LIBCPP_TEMPLATE_VIS __writer_container { +// A dynamically growing buffer. +template <__fmt_char_type _CharT> +class _LIBCPP_TEMPLATE_VIS __allocating_buffer : public __output_buffer<_CharT> { public: - using _CharT = typename _Container::value_type; + __allocating_buffer(const __allocating_buffer&)= delete; + __allocating_buffer& operator=(const __allocating_buffer&) = delete; - _LIBCPP_HIDE_FROM_ABI explicit __writer_container(back_insert_iterator<_Container> __out_it) - : __container_{__out_it.__get_container()} {} + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI __allocating_buffer() : __allocating_buffer{nullptr} {} - _LIBCPP_HIDE_FROM_ABI auto __out_it() { return std::back_inserter(*__container_); } + [[nodiscard]] + _LIBCPP_HIDE_FROM_ABI explicit __allocating_buffer(__max_output_size* __max_output_size) + : __output_buffer<_CharT>{__buffer_, __buffer_size_, __prepare_write, __max_output_size} {} - _LIBCPP_HIDE_FROM_ABI void __flush(_CharT* __ptr, size_t __n) { -__container_->insert(__container_->end(), __ptr, __ptr + __n); + _LIBCPP_HIDE_FROM_ABI ~__allocating_buffer() { +if (__ptr_ != __buffer_) { + ranges::destroy_n(__ptr_, this->__size()); + allocator_traits<_Alloc>::deallocate(__alloc_, __ptr_, this->__capacity()); +} } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI basic_string_view<_CharT> __view() { return {__ptr_, this->__size()}; } + private: - _Container* __container_; -}; + // At the moment the allocator is hard-code. There might be reasons to have + // an allocator trait in the future. This ensures forward compatibility. + using _Alloc = allocator<_CharT>; + _LIBCPP_NO_UNIQUE_ADDRESS _Alloc __alloc_; -/// Selects the type of the writer used for the output iterator. -template -class _LIBCPP_TEMPLATE_VIS __writer_selector { - using _Container = typename __back_insert_iterator_container<_OutIt>::type; + // Since allocating is expensive the class has a small internal buffer. When + // its capacity is exceeded a dynamic buffer will be allocated. + static constexpr size_t __buffer_size_ = 256; + _CharT __buffer_[__buffer_size_]; -public: - using type = - conditional_t, -__writer_container<_Container>, -conditional_t<__enable_direct_output<_OutIt, _CharT>, - __writer_direct<_OutIt, _CharT>, - __writer_iterator<_OutIt, _CharT>>>; + _CharT* __ptr_{__buffer_}; + + _LIBCPP_HIDE_FROM_ABI void __grow_buffer(size_t __capacity) { +if (__capacity < __buffer_size_) + return; + +_LIBCPP_ASSERT_INTERNAL(__capacity > this->__capacity(), "the buffer must grow"); +auto __result = std::__allocate_at_least(__alloc_, __capacity); +auto __guard = std::__make_exception_guard([&] { + allocator_traits<_Alloc>::deallocate(__alloc_, __result.ptr, __result.count); +}); +// This shouldn't throw, but just to be safe. Note that at -O1 this +// guard is optimized away so there is no runtime overhead. +new (__result.ptr) _CharT[__result.count]; +std::copy_n(__ptr_, this->__size(), __result.ptr); +__guard.__complete(); +if (__ptr_ != __buffer_) { + ranges::destroy_n(__ptr_, this->__capacity()); + allocator_traits<_Alloc>::deallocate(__alloc_, __ptr_, this->__capacity()); +} + +__ptr_ = __result.ptr; +this->__buffer_moved(__ptr_, __result.count); + } + + _LIBCPP_HIDE_FROM_ABI void __prepare_write(size_t __size_hint) { +__grow_buffer(std::max(this->__capacity() + __size_hint, this->__capacity() * 1.6)); + } + + _LIBCPP_HIDE_FROM_ABI static void __prepare_write(__output_buffer<_CharT>& __buffer, size_t __size_hint) { + static_cast<__allocating_buffer<_CharT>&>(__buffer).__prepare_write(__size_hint); + } }; -/// The generic formatting buffer. +// A buffer that directly writes to the underlying buffer. template - requires(output_iterator<_OutIt, const _CharT&>) -class _LIBCPP_TEMPLATE_VIS __format_buffer { - using _Storage = - conditional_t<__enable_direct_output<_OutIt, _CharT>, __direct_storage<_CharT>, __internal_storage<_CharT>>; - +class _LIBCPP_TEMPLATE_VIS __direct_iterator_buffer : public __output_buffer<_CharT> { public: - _LIBCPP_HIDE_FROM_ABI explicit __format_buffer(_OutIt __out_it) -requires(same_as<_Storage, __internal_storage<_CharT>>) - : __output_(__storage_.__begin(), __storage_.__buffer_size, this), __writer_(std::move(__out_it)) {} + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI explicit __direct_iterator_buffer(_OutIt __out_it) + : __direct_iterator_buffer{__out_it, nullptr} {} - _LIBCPP_HIDE_FROM_ABI explicit __format_buffer(_OutIt __out_it) -requires(same_as<_Storage, __direct_storage<_CharT>>) - : __output_(std::__unwrap_iter(__out_it), size_t(-1), this), __writer_(std::move(__out_it))
[llvm-branch-commits] [libcxx] [libc++][format][3/3] Improves formatting performance. (PR #108990)
@@ -53,24 +56,150 @@ _LIBCPP_BEGIN_NAMESPACE_STD namespace __format { +// A helper to limit the total size of code units written. +class _LIBCPP_HIDE_FROM_ABI __max_output_size { +public: + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI explicit __max_output_size(size_t __max_size) : __max_size_{__max_size} {} + + // This function adjusts the size of a (bulk) write operations. It ensures the + // number of code units written by a __output_buffer never exceeds + // __max_size_ code units. + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI size_t __write_request(size_t __code_units) { +size_t __result = +__code_units_written_ < __max_size_ ? std::min(__code_units, __max_size_ - __code_units_written_) : 0; +__code_units_written_ += __code_units; +return __result; + } + + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI size_t __code_units_written() const noexcept { return __code_units_written_; } + +private: + size_t __max_size_; + // The code units that would have been written if there was no limit. + // format_to_n returns this value. + size_t __code_units_written_{0}; +}; + /// A "buffer" that handles writing to the proper iterator. /// /// This helper is used together with the @ref back_insert_iterator to offer /// type-erasure for the formatting functions. This reduces the number to /// template instantiations. +/// +/// The design is the following: +/// - There is an external object that connects the buffer to the output. +/// - This buffer object: +/// - inherits publicly from this class. +/// - has a static or dynamic buffer. +/// - has a static member function to make space in its buffer write +/// operations. This can be done by increasing the size of the internal +/// buffer or by writing the contents of the buffer to the output iterator. +/// +/// This member function is a constructor argument, so its name is not +/// fixed. The code uses the name __prepare_write. +/// - The number of output code units can be limited by a __max_output_size +/// object. This is used in format_to_n This object: +/// - Contains the maximum number of code units to be written. +/// - Contains the number of code units that are requested to be written. +/// This number is returned to the user of format_to_n. +/// - The write functions call the object's __request_write member function. +/// This function: +/// - Updates the number of code units that are requested to be written. +/// - Returns the number of code units that can be written without +/// exceeding the maximum number of code units to be written. +/// +/// Documentation for the buffer usage members: +/// - __ptr_ +/// The start of the buffer. +/// - __capacity_ +/// The number of code units that can be written. This means +/// [__ptr_, __ptr_ + __capacity_) is a valid range to write to. +/// - __size_ +/// The number of code units written in the buffer. The next code unit will +/// be written at __ptr_ + __size_. This __size_ may NOT contain the total +/// number of code units written by the __output_buffer. Whether or not it +/// does depends on the sub-class used. Typically the total number of code +/// units written is not interesting. It is interesting for format_to_n which +/// has its own way to track this number. +/// +/// Documentation for the buffer modifying buffer operations: ldionne wrote: ```suggestion /// Documentation for the modifying buffer operations: ``` https://github.com/llvm/llvm-project/pull/108990 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] [libc++][format][3/3] Improves formatting performance. (PR #108990)
https://github.com/ldionne approved this pull request. This LGTM once the comments have been addressed. In particular, we shouldn't need to change the transitive includes. https://github.com/llvm/llvm-project/pull/108990 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [LoongArch] Fix the assertion for atomic store with 'ptr' type (PR #109915)
https://github.com/xen0n approved this pull request. fixes loongson-community/discussions#68 https://github.com/llvm/llvm-project/pull/109915 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NewPM][CodeGen] Port LiveRegMatrix to NPM (PR #109938)
https://github.com/Akshat-Oke updated https://github.com/llvm/llvm-project/pull/109938 >From 22bb8f0e07088515380c2948ce7b37a041a67e0e Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Tue, 24 Sep 2024 09:07:04 + Subject: [PATCH] [NewPM][CodeGen] Port LiveRegMatrix to NPM --- llvm/include/llvm/CodeGen/LiveRegMatrix.h | 50 --- llvm/include/llvm/InitializePasses.h | 2 +- .../llvm/Passes/MachinePassRegistry.def | 4 +- llvm/lib/CodeGen/LiveRegMatrix.cpp| 38 ++ llvm/lib/CodeGen/RegAllocBasic.cpp| 8 +-- llvm/lib/CodeGen/RegAllocGreedy.cpp | 8 +-- llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp | 6 +-- .../Target/AMDGPU/SIPreAllocateWWMRegs.cpp| 6 +-- 9 files changed, 88 insertions(+), 35 deletions(-) diff --git a/llvm/include/llvm/CodeGen/LiveRegMatrix.h b/llvm/include/llvm/CodeGen/LiveRegMatrix.h index 2b32308c7c075e..c024ca9c1dc38d 100644 --- a/llvm/include/llvm/CodeGen/LiveRegMatrix.h +++ b/llvm/include/llvm/CodeGen/LiveRegMatrix.h @@ -37,7 +37,9 @@ class MachineFunction; class TargetRegisterInfo; class VirtRegMap; -class LiveRegMatrix : public MachineFunctionPass { +class LiveRegMatrix { + friend class LiveRegMatrixWrapperPass; + friend class LiveRegMatrixAnalysis; const TargetRegisterInfo *TRI = nullptr; LiveIntervals *LIS = nullptr; VirtRegMap *VRM = nullptr; @@ -57,15 +59,21 @@ class LiveRegMatrix : public MachineFunctionPass { unsigned RegMaskVirtReg = 0; BitVector RegMaskUsable; - // MachineFunctionPass boilerplate. - void getAnalysisUsage(AnalysisUsage &) const override; - bool runOnMachineFunction(MachineFunction &) override; - void releaseMemory() override; + LiveRegMatrix() = default; + void releaseMemory(); public: - static char ID; - - LiveRegMatrix(); + LiveRegMatrix(LiveRegMatrix &&Other) + : TRI(Other.TRI), LIS(Other.LIS), VRM(Other.VRM), UserTag(Other.UserTag), +Matrix(std::move(Other.Matrix)), Queries(std::move(Other.Queries)), +RegMaskTag(Other.RegMaskTag), RegMaskVirtReg(Other.RegMaskVirtReg), +RegMaskUsable(std::move(Other.RegMaskUsable)) { +Other.TRI = nullptr; +Other.LIS = nullptr; +Other.VRM = nullptr; + } + + void init(MachineFunction &MF, LiveIntervals *LIS, VirtRegMap *VRM); //======// // High-level interface. @@ -159,6 +167,32 @@ class LiveRegMatrix : public MachineFunctionPass { Register getOneVReg(unsigned PhysReg) const; }; +class LiveRegMatrixWrapperPass : public MachineFunctionPass { + LiveRegMatrix LRM; + +public: + static char ID; + + LiveRegMatrixWrapperPass() : MachineFunctionPass(ID) {} + + LiveRegMatrix &getLRM() { return LRM; } + const LiveRegMatrix &getLRM() const { return LRM; } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnMachineFunction(MachineFunction &MF) override; + void releaseMemory() override; +}; + +class LiveRegMatrixAnalysis : public AnalysisInfoMixin { + friend AnalysisInfoMixin; + static AnalysisKey Key; + +public: + using Result = LiveRegMatrix; + + LiveRegMatrix run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM); +}; + } // end namespace llvm #endif // LLVM_CODEGEN_LIVEREGMATRIX_H diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 9ce92d7da8700b..8c5607b33096f2 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -156,7 +156,7 @@ void initializeLiveDebugValuesPass(PassRegistry &); void initializeLiveDebugVariablesPass(PassRegistry &); void initializeLiveIntervalsWrapperPassPass(PassRegistry &); void initializeLiveRangeShrinkPass(PassRegistry &); -void initializeLiveRegMatrixPass(PassRegistry &); +void initializeLiveRegMatrixWrapperPassPass(PassRegistry &); void initializeLiveStacksPass(PassRegistry &); void initializeLiveVariablesWrapperPassPass(PassRegistry &); void initializeLoadStoreOptPass(PassRegistry &); diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index bdc56ca03f392a..4497c1fce0db69 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -97,6 +97,7 @@ LOOP_PASS("loop-term-fold", LoopTermFoldPass()) // preferably fix the scavenger to not depend on them). MACHINE_FUNCTION_ANALYSIS("live-intervals", LiveIntervalsAnalysis()) MACHINE_FUNCTION_ANALYSIS("live-vars", LiveVariablesAnalysis()) +MACHINE_FUNCTION_ANALYSIS("live-reg-matrix", LiveRegMatrixAnalysis()) MACHINE_FUNCTION_ANALYSIS("machine-block-freq", MachineBlockFrequencyAnalysis()) MACHINE_FUNCTION_ANALYSIS("machine-branch-prob", MachineBranchProbabilityAnalysis()) @@ -122,8 +123,7 @@ MACHINE_FUNCTION_ANALYSIS("virtregmap", VirtRegMapAnalysis()) // MachineRegionInfoPa
[llvm-branch-commits] [llvm] [NewPM][AMDGPU] Port SIPreAllocateWWMRegs to NPM (PR #109939)
https://github.com/Akshat-Oke updated https://github.com/llvm/llvm-project/pull/109939 >From 4c5184af67fb5eaeec1eb971421b8a3030b22c76 Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Tue, 24 Sep 2024 11:41:18 + Subject: [PATCH] [NewPM][AMDGPU] Port SIPreAllocateWWMRegs to NPM --- llvm/lib/Target/AMDGPU/AMDGPU.h | 6 +- llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def | 1 + .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 7 ++- .../Target/AMDGPU/SIPreAllocateWWMRegs.cpp| 60 --- llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h | 30 ++ 5 files changed, 77 insertions(+), 27 deletions(-) create mode 100644 llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index b2dd354e496a2e..c0fd5e4625895a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -49,7 +49,7 @@ FunctionPass *createSIFixSGPRCopiesLegacyPass(); FunctionPass *createLowerWWMCopiesPass(); FunctionPass *createSIMemoryLegalizerPass(); FunctionPass *createSIInsertWaitcntsPass(); -FunctionPass *createSIPreAllocateWWMRegsPass(); +FunctionPass *createSIPreAllocateWWMRegsLegacyPass(); FunctionPass *createSIFormMemoryClausesPass(); FunctionPass *createSIPostRABundlerPass(); @@ -208,8 +208,8 @@ extern char &SILateBranchLoweringPassID; void initializeSIOptimizeExecMaskingPass(PassRegistry &); extern char &SIOptimizeExecMaskingID; -void initializeSIPreAllocateWWMRegsPass(PassRegistry &); -extern char &SIPreAllocateWWMRegsID; +void initializeSIPreAllocateWWMRegsLegacyPass(PassRegistry &); +extern char &SIPreAllocateWWMRegsLegacyID; void initializeAMDGPUImageIntrinsicOptimizerPass(PassRegistry &); extern char &AMDGPUImageIntrinsicOptimizerID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def index 0ebf34c901c142..174a90f0aa419d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def @@ -102,5 +102,6 @@ MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass()) MACHINE_FUNCTION_PASS("si-load-store-opt", SILoadStoreOptimizerPass()) MACHINE_FUNCTION_PASS("si-lower-sgpr-spills", SILowerSGPRSpillsPass()) MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass()) +MACHINE_FUNCTION_PASS("si-pre-allocate-wwm-regs", SIPreAllocateWWMRegsPass()) MACHINE_FUNCTION_PASS("si-shrink-instructions", SIShrinkInstructionsPass()) #undef MACHINE_FUNCTION_PASS diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 04fdee0819b502..9a28c648e2c4ed 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -41,6 +41,7 @@ #include "SIMachineFunctionInfo.h" #include "SIMachineScheduler.h" #include "SIPeepholeSDWA.h" +#include "SIPreAllocateWWMRegs.h" #include "SIShrinkInstructions.h" #include "TargetInfo/AMDGPUTargetInfo.h" #include "Utils/AMDGPUBaseInfo.h" @@ -461,7 +462,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeSILateBranchLoweringPass(*PR); initializeSIMemoryLegalizerPass(*PR); initializeSIOptimizeExecMaskingPass(*PR); - initializeSIPreAllocateWWMRegsPass(*PR); + initializeSIPreAllocateWWMRegsLegacyPass(*PR); initializeSIFormMemoryClausesPass(*PR); initializeSIPostRABundlerPass(*PR); initializeGCNCreateVOPDPass(*PR); @@ -1443,7 +1444,7 @@ bool GCNPassConfig::addRegAssignAndRewriteFast() { // Equivalent of PEI for SGPRs. addPass(&SILowerSGPRSpillsLegacyID); - addPass(&SIPreAllocateWWMRegsID); + addPass(&SIPreAllocateWWMRegsLegacyID); addPass(createVGPRAllocPass(false)); @@ -1467,7 +1468,7 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() { // Equivalent of PEI for SGPRs. addPass(&SILowerSGPRSpillsLegacyID); - addPass(&SIPreAllocateWWMRegsID); + addPass(&SIPreAllocateWWMRegsLegacyID); addPass(createVGPRAllocPass(true)); diff --git a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp index 0635cab7b872e2..c1d7a464a81537 100644 --- a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp +++ b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp @@ -11,6 +11,7 @@ // //===--===// +#include "SIPreAllocateWWMRegs.h" #include "AMDGPU.h" #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" @@ -34,7 +35,7 @@ static cl::opt namespace { -class SIPreAllocateWWMRegs : public MachineFunctionPass { +class SIPreAllocateWWMRegs { private: const SIInstrInfo *TII; const SIRegisterInfo *TRI; @@ -48,13 +49,21 @@ class SIPreAllocateWWMRegs : public MachineFunctionPass { #ifndef NDEBUG void printWWMInfo(const MachineInstr &MI); #endif + bool processDef(MachineOperand &MO); + void rewriteRegs(MachineFunction &MF); + +public: + SIPreAllocateWWMR
[llvm-branch-commits] [llvm] [NewPM][CodeGen] Port LiveRegMatrix to NPM (PR #109938)
@@ -57,15 +59,21 @@ class LiveRegMatrix : public MachineFunctionPass { unsigned RegMaskVirtReg = 0; BitVector RegMaskUsable; - // MachineFunctionPass boilerplate. - void getAnalysisUsage(AnalysisUsage &) const override; - bool runOnMachineFunction(MachineFunction &) override; - void releaseMemory() override; + LiveRegMatrix() = default; + void releaseMemory(); public: - static char ID; - - LiveRegMatrix(); + LiveRegMatrix(LiveRegMatrix &&Other) + : TRI(Other.TRI), LIS(Other.LIS), VRM(Other.VRM), UserTag(Other.UserTag), +Matrix(std::move(Other.Matrix)), Queries(std::move(Other.Queries)), +RegMaskTag(Other.RegMaskTag), RegMaskVirtReg(Other.RegMaskVirtReg), +RegMaskUsable(std::move(Other.RegMaskUsable)) { +Other.TRI = nullptr; +Other.LIS = nullptr; +Other.VRM = nullptr; + } + + void init(MachineFunction &MF, LiveIntervals *LIS, VirtRegMap *VRM); arsenm wrote: Make these references, these are required anyway https://github.com/llvm/llvm-project/pull/109938 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NewPM][CodeGen] Port LiveRegMatrix to NPM (PR #109938)
@@ -57,15 +59,21 @@ class LiveRegMatrix : public MachineFunctionPass { unsigned RegMaskVirtReg = 0; BitVector RegMaskUsable; - // MachineFunctionPass boilerplate. - void getAnalysisUsage(AnalysisUsage &) const override; - bool runOnMachineFunction(MachineFunction &) override; - void releaseMemory() override; + LiveRegMatrix() = default; + void releaseMemory(); public: - static char ID; - - LiveRegMatrix(); + LiveRegMatrix(LiveRegMatrix &&Other) arsenm wrote: Why does this need a move constructor? https://github.com/llvm/llvm-project/pull/109938 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NewPM][CodeGen] Port LiveRegMatrix to NPM (PR #109938)
@@ -159,6 +167,32 @@ class LiveRegMatrix : public MachineFunctionPass { Register getOneVReg(unsigned PhysReg) const; }; +class LiveRegMatrixWrapperPass : public MachineFunctionPass { arsenm wrote: Rename to LiveRegMatrixWrapperLegacy to avoid PassPass https://github.com/llvm/llvm-project/pull/109938 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NewPM][CodeGen] Port LiveRegMatrix to NPM (PR #109938)
@@ -57,15 +59,21 @@ class LiveRegMatrix : public MachineFunctionPass { unsigned RegMaskVirtReg = 0; BitVector RegMaskUsable; - // MachineFunctionPass boilerplate. - void getAnalysisUsage(AnalysisUsage &) const override; - bool runOnMachineFunction(MachineFunction &) override; - void releaseMemory() override; + LiveRegMatrix() = default; + void releaseMemory(); public: - static char ID; - - LiveRegMatrix(); + LiveRegMatrix(LiveRegMatrix &&Other) + : TRI(Other.TRI), LIS(Other.LIS), VRM(Other.VRM), UserTag(Other.UserTag), +Matrix(std::move(Other.Matrix)), Queries(std::move(Other.Queries)), +RegMaskTag(Other.RegMaskTag), RegMaskVirtReg(Other.RegMaskVirtReg), +RegMaskUsable(std::move(Other.RegMaskUsable)) { +Other.TRI = nullptr; +Other.LIS = nullptr; +Other.VRM = nullptr; arsenm wrote: Shouldn't need to clear these from Other https://github.com/llvm/llvm-project/pull/109938 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NewPM][CodeGen] Port LiveRegMatrix to NPM (PR #109938)
@@ -156,7 +156,7 @@ void initializeLiveDebugValuesPass(PassRegistry &); void initializeLiveDebugVariablesPass(PassRegistry &); void initializeLiveIntervalsWrapperPassPass(PassRegistry &); void initializeLiveRangeShrinkPass(PassRegistry &); -void initializeLiveRegMatrixPass(PassRegistry &); +void initializeLiveRegMatrixWrapperPassPass(PassRegistry &); arsenm wrote: Avoid PassPass https://github.com/llvm/llvm-project/pull/109938 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NewPM][CodeGen] Port LiveRegMatrix to NPM (PR #109938)
@@ -97,6 +97,7 @@ LOOP_PASS("loop-term-fold", LoopTermFoldPass()) // preferably fix the scavenger to not depend on them). MACHINE_FUNCTION_ANALYSIS("live-intervals", LiveIntervalsAnalysis()) MACHINE_FUNCTION_ANALYSIS("live-vars", LiveVariablesAnalysis()) +MACHINE_FUNCTION_ANALYSIS("live-reg-matrix", LiveRegMatrixAnalysis()) arsenm wrote: Alphabetize https://github.com/llvm/llvm-project/pull/109938 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Add tests for SIPreAllocateWWMRegs (PR #109963)
@@ -0,0 +1,21 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -verify-machineinstrs -amdgpu-prealloc-sgpr-spill-vgprs -run-pass=si-pre-allocate-wwm-regs -o - -mcpu=tahiti %s | FileCheck %s + +--- + +name: pre_allocate_wwm_spill_to_vgpr +tracksRegLiveness: true +body: | + bb.0: +liveins: $sgpr1 +; CHECK-LABEL: name: pre_allocate_wwm_spill_to_vgpr +; CHECK: liveins: $sgpr1 +; CHECK-NEXT: {{ $}} +; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF +; CHECK-NEXT: dead $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr1, 0, [[DEF]] +; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]] +%0:vgpr_32 = IMPLICIT_DEF +%23:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr1, 0, %0:vgpr_32 arsenm wrote: Compact register numbers https://github.com/llvm/llvm-project/pull/109963 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Add tests for SIPreAllocateWWMRegs (PR #109963)
@@ -0,0 +1,26 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass=si-pre-allocate-wwm-regs -o - -mcpu=tahiti %s | FileCheck %s + +--- + +name: pre_allocate_wwm_regs_strict +tracksRegLiveness: true +body: | + bb.0: +liveins: $sgpr1 +; CHECK-LABEL: name: pre_allocate_wwm_regs_strict +; CHECK: liveins: $sgpr1 +; CHECK-NEXT: {{ $}} +; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF +; CHECK-NEXT: renamable $sgpr4_sgpr5 = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec +; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec +; CHECK-NEXT: dead $vgpr0 = V_MOV_B32_dpp $vgpr0, [[DEF]], 323, 12, 15, 0, implicit $exec +; CHECK-NEXT: $exec = EXIT_STRICT_WWM killed renamable $sgpr4_sgpr5 +; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]] +%0:vgpr_32 = IMPLICIT_DEF +renamable $sgpr4_sgpr5 = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec +%24:vgpr_32 = V_MOV_B32_e32 0, implicit $exec arsenm wrote: Use run-pass=none to compact the register numbers https://github.com/llvm/llvm-project/pull/109963 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Add tests for SIPreAllocateWWMRegs (PR #109963)
@@ -0,0 +1,21 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -verify-machineinstrs -amdgpu-prealloc-sgpr-spill-vgprs -run-pass=si-pre-allocate-wwm-regs -o - -mcpu=tahiti %s | FileCheck %s + arsenm wrote: Why does this need to be split into a separate test file? https://github.com/llvm/llvm-project/pull/109963 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Add tests for SIPreAllocateWWMRegs (PR #109963)
@@ -0,0 +1,26 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass=si-pre-allocate-wwm-regs -o - -mcpu=tahiti %s | FileCheck %s + +--- + +name: pre_allocate_wwm_regs_strict +tracksRegLiveness: true +body: | arsenm wrote: This really needs checks for the MFI serialized allocated register. It is not yet serialized, but it really needs to be (and is currently causing issues for me) https://github.com/llvm/llvm-project/pull/109963 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NewPM][AMDGPU] Port SIPreAllocateWWMRegs to NPM (PR #109939)
https://github.com/Akshat-Oke updated https://github.com/llvm/llvm-project/pull/109939 >From 3d8720930eaf0acd31c39722c98da085066ed315 Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Wed, 25 Sep 2024 11:21:04 + Subject: [PATCH 1/2] [AMDGPU] Add tests for SIPreAllocateWWMRegs --- .../AMDGPU/si-pre-allocate-wwm-regs.mir | 26 +++ .../si-pre-allocate-wwm-sgpr-spills.mir | 21 +++ 2 files changed, 47 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir create mode 100644 llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-sgpr-spills.mir diff --git a/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir new file mode 100644 index 00..f2db299f575f5e --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir @@ -0,0 +1,26 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass=si-pre-allocate-wwm-regs -o - -mcpu=tahiti %s | FileCheck %s + +--- + +name: pre_allocate_wwm_regs_strict +tracksRegLiveness: true +body: | + bb.0: +liveins: $sgpr1 +; CHECK-LABEL: name: pre_allocate_wwm_regs_strict +; CHECK: liveins: $sgpr1 +; CHECK-NEXT: {{ $}} +; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF +; CHECK-NEXT: renamable $sgpr4_sgpr5 = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec +; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec +; CHECK-NEXT: dead $vgpr0 = V_MOV_B32_dpp $vgpr0, [[DEF]], 323, 12, 15, 0, implicit $exec +; CHECK-NEXT: $exec = EXIT_STRICT_WWM killed renamable $sgpr4_sgpr5 +; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]] +%0:vgpr_32 = IMPLICIT_DEF +renamable $sgpr4_sgpr5 = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec +%24:vgpr_32 = V_MOV_B32_e32 0, implicit $exec +%25:vgpr_32 = V_MOV_B32_dpp %24:vgpr_32(tied-def 0), %0:vgpr_32, 323, 12, 15, 0, implicit $exec +$exec = EXIT_STRICT_WWM killed renamable $sgpr4_sgpr5 +%2:vgpr_32 = COPY %0:vgpr_32 +... diff --git a/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-sgpr-spills.mir b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-sgpr-spills.mir new file mode 100644 index 00..f0efe74878d831 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-sgpr-spills.mir @@ -0,0 +1,21 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -verify-machineinstrs -amdgpu-prealloc-sgpr-spill-vgprs -run-pass=si-pre-allocate-wwm-regs -o - -mcpu=tahiti %s | FileCheck %s + +--- + +name: pre_allocate_wwm_spill_to_vgpr +tracksRegLiveness: true +body: | + bb.0: +liveins: $sgpr1 +; CHECK-LABEL: name: pre_allocate_wwm_spill_to_vgpr +; CHECK: liveins: $sgpr1 +; CHECK-NEXT: {{ $}} +; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF +; CHECK-NEXT: dead $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr1, 0, [[DEF]] +; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]] +%0:vgpr_32 = IMPLICIT_DEF +%23:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr1, 0, %0:vgpr_32 +%2:vgpr_32 = COPY %0:vgpr_32 +... + >From 0d0cd3fb0bdc41731c89492dbe34a1ebf939c52e Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Tue, 24 Sep 2024 11:41:18 + Subject: [PATCH 2/2] [NewPM][AMDGPU] Port SIPreAllocateWWMRegs to NPM --- llvm/lib/Target/AMDGPU/AMDGPU.h | 6 +- llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def | 1 + .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 7 ++- .../Target/AMDGPU/SIPreAllocateWWMRegs.cpp| 60 --- llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h | 30 ++ .../AMDGPU/si-pre-allocate-wwm-regs.mir | 1 + .../si-pre-allocate-wwm-sgpr-spills.mir | 1 + 7 files changed, 79 insertions(+), 27 deletions(-) create mode 100644 llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index b2dd354e496a2e..c0fd5e4625895a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -49,7 +49,7 @@ FunctionPass *createSIFixSGPRCopiesLegacyPass(); FunctionPass *createLowerWWMCopiesPass(); FunctionPass *createSIMemoryLegalizerPass(); FunctionPass *createSIInsertWaitcntsPass(); -FunctionPass *createSIPreAllocateWWMRegsPass(); +FunctionPass *createSIPreAllocateWWMRegsLegacyPass(); FunctionPass *createSIFormMemoryClausesPass(); FunctionPass *createSIPostRABundlerPass(); @@ -208,8 +208,8 @@ extern char &SILateBranchLoweringPassID; void initializeSIOptimizeExecMaskingPass(PassRegistry &); extern char &SIOptimizeExecMaskingID; -void initializeSIPreAllocateWWMRegsPass(PassRegistry &); -extern char &SIPreAllocateWWMRegsID; +void initializeSIPreAllocateWWMRegsLegacyPass(PassRegistry &); +extern char &SIPreAllocateWWMRegsLegacyID;
[llvm-branch-commits] [llvm] [NewPM][AMDGPU] Port SIPreAllocateWWMRegs to NPM (PR #109939)
https://github.com/Akshat-Oke edited https://github.com/llvm/llvm-project/pull/109939 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Add tests for SIPreAllocateWWMRegs (PR #109963)
https://github.com/Akshat-Oke created https://github.com/llvm/llvm-project/pull/109963 None >From 3d8720930eaf0acd31c39722c98da085066ed315 Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Wed, 25 Sep 2024 11:21:04 + Subject: [PATCH] [AMDGPU] Add tests for SIPreAllocateWWMRegs --- .../AMDGPU/si-pre-allocate-wwm-regs.mir | 26 +++ .../si-pre-allocate-wwm-sgpr-spills.mir | 21 +++ 2 files changed, 47 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir create mode 100644 llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-sgpr-spills.mir diff --git a/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir new file mode 100644 index 00..f2db299f575f5e --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir @@ -0,0 +1,26 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass=si-pre-allocate-wwm-regs -o - -mcpu=tahiti %s | FileCheck %s + +--- + +name: pre_allocate_wwm_regs_strict +tracksRegLiveness: true +body: | + bb.0: +liveins: $sgpr1 +; CHECK-LABEL: name: pre_allocate_wwm_regs_strict +; CHECK: liveins: $sgpr1 +; CHECK-NEXT: {{ $}} +; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF +; CHECK-NEXT: renamable $sgpr4_sgpr5 = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec +; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec +; CHECK-NEXT: dead $vgpr0 = V_MOV_B32_dpp $vgpr0, [[DEF]], 323, 12, 15, 0, implicit $exec +; CHECK-NEXT: $exec = EXIT_STRICT_WWM killed renamable $sgpr4_sgpr5 +; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]] +%0:vgpr_32 = IMPLICIT_DEF +renamable $sgpr4_sgpr5 = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec +%24:vgpr_32 = V_MOV_B32_e32 0, implicit $exec +%25:vgpr_32 = V_MOV_B32_dpp %24:vgpr_32(tied-def 0), %0:vgpr_32, 323, 12, 15, 0, implicit $exec +$exec = EXIT_STRICT_WWM killed renamable $sgpr4_sgpr5 +%2:vgpr_32 = COPY %0:vgpr_32 +... diff --git a/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-sgpr-spills.mir b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-sgpr-spills.mir new file mode 100644 index 00..f0efe74878d831 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-sgpr-spills.mir @@ -0,0 +1,21 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -verify-machineinstrs -amdgpu-prealloc-sgpr-spill-vgprs -run-pass=si-pre-allocate-wwm-regs -o - -mcpu=tahiti %s | FileCheck %s + +--- + +name: pre_allocate_wwm_spill_to_vgpr +tracksRegLiveness: true +body: | + bb.0: +liveins: $sgpr1 +; CHECK-LABEL: name: pre_allocate_wwm_spill_to_vgpr +; CHECK: liveins: $sgpr1 +; CHECK-NEXT: {{ $}} +; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF +; CHECK-NEXT: dead $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr1, 0, [[DEF]] +; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]] +%0:vgpr_32 = IMPLICIT_DEF +%23:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr1, 0, %0:vgpr_32 +%2:vgpr_32 = COPY %0:vgpr_32 +... + ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Add tests for SIPreAllocateWWMRegs (PR #109963)
Akshat-Oke wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/109963?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#109939** https://app.graphite.dev/github/pr/llvm/llvm-project/109939?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#109963** https://app.graphite.dev/github/pr/llvm/llvm-project/109963?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 * **#109938** https://app.graphite.dev/github/pr/llvm/llvm-project/109938?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#109937** https://app.graphite.dev/github/pr/llvm/llvm-project/109937?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#109936** https://app.graphite.dev/github/pr/llvm/llvm-project/109936?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about stacking. Join @Akshat-Oke and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/109963 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NewPM][AMDGPU] Port SIPreAllocateWWMRegs to NPM (PR #109939)
@@ -254,3 +262,13 @@ bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) { rewriteRegs(MF); return true; } + +PreservedAnalyses +SIPreAllocateWWMRegsPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + auto *LIS = &MFAM.getResult(MF); + auto *Matrix = &MFAM.getResult(MF); + auto *VRM = &MFAM.getResult(MF); arsenm wrote: I thought this should be using getCachedResult, and the pass supported no LIS for the fast RA path. But I see now the legacy path is requiring them (although it probably shouldn't?) https://github.com/llvm/llvm-project/pull/109939 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NewPM][AMDGPU] Port SIPreAllocateWWMRegs to NPM (PR #109939)
@@ -0,0 +1,25 @@ +//===--- SIPreAllocateWWMRegs.h ---===// arsenm wrote: Missing C++ mode comment https://github.com/llvm/llvm-project/pull/109939 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NewPM][CodeGen] Port LiveRegMatrix to NPM (PR #109938)
https://github.com/Akshat-Oke ready_for_review https://github.com/llvm/llvm-project/pull/109938 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Add tests for SIPreAllocateWWMRegs (PR #109963)
https://github.com/Akshat-Oke ready_for_review https://github.com/llvm/llvm-project/pull/109963 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] Update correct dependency (PR #109937)
https://github.com/Akshat-Oke ready_for_review https://github.com/llvm/llvm-project/pull/109937 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] [libc++] Adjust the version of __cpp_lib_ranges in C++20 mode (PR #109324)
ldionne wrote: @tru Our CI is currently super unstable, that's the failures. We've been without CI for roughly the past week. We're working on it. https://github.com/llvm/llvm-project/pull/109324 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [Attributor] Take the address space from addrspacecast directly (PR #108258)
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/108258 >From f79d612a7335e7a150c2347638ea6a9e36bbc1ea Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Wed, 25 Sep 2024 14:42:09 -0400 Subject: [PATCH] [Attributor] Take the address space from addrspacecast directly If the value to be analyzed is directly from addrspacecast, we take the source address space directly. This is to improve the case where in `AMDGPUPromoteKernelArgumentsPass`, the kernel argument is promoted by insertting an addrspacecast directly from a generic pointer. However, during the analysis, the underlying object will be the generic pointer, instead of the addrspacecast, thus the inferred address space is the generic one, which is not ideal. --- .../Transforms/IPO/AttributorAttributes.cpp | 60 ++- llvm/test/CodeGen/AMDGPU/aa-as-infer.ll | 35 +++ 2 files changed, 81 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 416dd09ca874bf..0cb2e5117741ad 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -12596,16 +12596,37 @@ struct AAAddressSpaceImpl : public AAAddressSpace { } ChangeStatus updateImpl(Attributor &A) override { +assert(A.getInfoCache().getFlatAddressSpace().has_value()); +unsigned FlatAS = A.getInfoCache().getFlatAddressSpace().value(); uint32_t OldAddressSpace = AssumedAddressSpace; -auto *AUO = A.getOrCreateAAFor(getIRPosition(), this, -DepClassTy::REQUIRED); -auto Pred = [&](Value &Obj) { + +auto CheckAddressSpace = [&](Value &Obj) { if (isa(&Obj)) return true; + // If an argument in flat address space only has addrspace cast uses, and + // those casts are same, then we take the dst addrspace. + if (auto *Arg = dyn_cast(&Obj)) { +if (Arg->getType()->getPointerAddressSpace() == FlatAS) { + unsigned CastAddrSpace = FlatAS; + for (auto *U : Arg->users()) { +auto *ASCI = dyn_cast(U); +if (!ASCI) + return takeAddressSpace(Obj.getType()->getPointerAddressSpace()); +if (CastAddrSpace != FlatAS && +CastAddrSpace != ASCI->getDestAddressSpace()) + return false; +CastAddrSpace = ASCI->getDestAddressSpace(); + } + if (CastAddrSpace != FlatAS) +return takeAddressSpace(CastAddrSpace); +} + } return takeAddressSpace(Obj.getType()->getPointerAddressSpace()); }; -if (!AUO->forallUnderlyingObjects(Pred)) +auto *AUO = A.getOrCreateAAFor(getIRPosition(), this, +DepClassTy::REQUIRED); +if (!AUO->forallUnderlyingObjects(CheckAddressSpace)) return indicatePessimisticFixpoint(); return OldAddressSpace == AssumedAddressSpace ? ChangeStatus::UNCHANGED @@ -12614,17 +12635,21 @@ struct AAAddressSpaceImpl : public AAAddressSpace { /// See AbstractAttribute::manifest(...). ChangeStatus manifest(Attributor &A) override { -if (getAddressSpace() == InvalidAddressSpace || -getAddressSpace() == getAssociatedType()->getPointerAddressSpace()) +unsigned NewAS = getAddressSpace(); + +if (NewAS == InvalidAddressSpace || +NewAS == getAssociatedType()->getPointerAddressSpace()) return ChangeStatus::UNCHANGED; +unsigned FlatAS = A.getInfoCache().getFlatAddressSpace().value(); + Value *AssociatedValue = &getAssociatedValue(); -Value *OriginalValue = peelAddrspacecast(AssociatedValue); +Value *OriginalValue = peelAddrspacecast(AssociatedValue, FlatAS); PointerType *NewPtrTy = -PointerType::get(getAssociatedType()->getContext(), getAddressSpace()); +PointerType::get(getAssociatedType()->getContext(), NewAS); bool UseOriginalValue = -OriginalValue->getType()->getPointerAddressSpace() == getAddressSpace(); +OriginalValue->getType()->getPointerAddressSpace() == NewAS; bool Changed = false; @@ -12684,12 +12709,19 @@ struct AAAddressSpaceImpl : public AAAddressSpace { return AssumedAddressSpace == AS; } - static Value *peelAddrspacecast(Value *V) { -if (auto *I = dyn_cast(V)) - return peelAddrspacecast(I->getPointerOperand()); + static Value *peelAddrspacecast(Value *V, unsigned FlatAS) { +if (auto *I = dyn_cast(V)) { + assert(I->getSrcAddressSpace() != FlatAS && + "there should not be flat AS -> non-flat AS"); + return I->getPointerOperand(); +} if (auto *C = dyn_cast(V)) - if (C->getOpcode() == Instruction::AddrSpaceCast) -return peelAddrspacecast(C->getOperand(0)); + if (C->getOpcode() == Instruction::AddrSpaceCast) { +assert(C->getOperand(0)->getType()->getPointerAddressSpac
[llvm-branch-commits] [llvm] [Attributor] Take the address space from addrspacecast directly (PR #108258)
shiltian wrote: I unstacked from https://github.com/llvm/llvm-project/pull/108786 to unblock this since the ticket needs to be fixed promptly. https://github.com/llvm/llvm-project/pull/108258 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][OpenMP] Add version checks for clauses (PR #110015)
https://github.com/kparzysz updated https://github.com/llvm/llvm-project/pull/110015 >From 3c786ad2a50f146d357d882b0c1d966486f7295f Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 24 Sep 2024 17:41:16 -0500 Subject: [PATCH 1/4] [flang][OpenMP] Add version checks for clauses If there is a clause that is allowed on a given directive in a later version of the OpenMP spec, report an error and provide the minimal spec version that allows the clause. The case where a clause is not allowed on a directive at all is already handled elsewhere. --- flang/lib/Semantics/check-omp-structure.cpp | 93 +-- flang/lib/Semantics/check-omp-structure.h | 1 + flang/test/Lower/OpenMP/atomic-capture.f90| 4 +- flang/test/Lower/OpenMP/atomic-read.f90 | 2 +- flang/test/Lower/OpenMP/atomic-update.f90 | 4 +- flang/test/Lower/OpenMP/atomic-write.f90 | 2 +- .../test/Lower/OpenMP/declare-target-data.f90 | 4 +- .../declare-target-deferred-marking.f90 | 4 +- .../OpenMP/declare-target-func-and-subr.f90 | 4 +- ...arget-implicit-func-and-subr-cap-enter.f90 | 8 +- ...lare-target-implicit-func-and-subr-cap.f90 | 8 +- .../declare-target-implicit-tarop-cap.f90 | 8 +- .../Lower/OpenMP/function-filtering-2.f90 | 12 +-- .../test/Lower/OpenMP/function-filtering.f90 | 12 +-- .../OpenMP/declare_target-device_type.f90 | 4 +- .../Parser/OpenMP/in-reduction-clause.f90 | 4 +- flang/test/Parser/OpenMP/order-clause01.f90 | 4 +- flang/test/Parser/OpenMP/tile-size.f90| 4 +- flang/test/Parser/OpenMP/unroll-full.f90 | 4 +- flang/test/Parser/OpenMP/unroll.f90 | 4 +- .../Semantics/OpenMP/atomic-hint-clause.f90 | 2 +- flang/test/Semantics/OpenMP/atomic01.f90 | 2 +- flang/test/Semantics/OpenMP/atomic05.f90 | 2 +- .../Semantics/OpenMP/clause-validity01.f90| 2 +- .../OpenMP/declarative-directive.f90 | 2 +- .../Semantics/OpenMP/declare-target01.f90 | 2 +- .../Semantics/OpenMP/declare-target02.f90 | 2 +- .../Semantics/OpenMP/declare-target06.f90 | 2 +- .../Semantics/OpenMP/device-constructs.f90| 2 +- flang/test/Semantics/OpenMP/flush02.f90 | 2 +- flang/test/Semantics/OpenMP/if-clause.f90 | 2 +- flang/test/Semantics/OpenMP/nontemporal.f90 | 2 +- .../test/Semantics/OpenMP/order-clause01.f90 | 2 +- .../Semantics/OpenMP/requires-atomic01.f90| 2 +- .../Semantics/OpenMP/requires-atomic02.f90| 2 +- flang/test/Semantics/OpenMP/requires04.f90| 2 +- flang/test/Semantics/OpenMP/requires05.f90| 2 +- .../Semantics/OpenMP/simd-nontemporal.f90 | 2 +- flang/test/Semantics/OpenMP/target01.f90 | 2 +- flang/test/Semantics/OpenMP/taskgroup01.f90 | 2 +- .../test/Semantics/OpenMP/use_device_addr.f90 | 2 +- .../Semantics/OpenMP/use_device_addr1.f90 | 2 +- .../test/Semantics/OpenMP/use_device_ptr1.f90 | 2 +- 43 files changed, 137 insertions(+), 99 deletions(-) diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index dfc3f3290a81be..976c159e252f12 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -16,25 +16,25 @@ namespace Fortran::semantics { // Use when clause falls under 'struct OmpClause' in 'parse-tree.h'. #define CHECK_SIMPLE_CLAUSE(X, Y) \ void OmpStructureChecker::Enter(const parser::OmpClause::X &) { \ -CheckAllowed(llvm::omp::Clause::Y); \ +CheckAllowedClause(llvm::omp::Clause::Y); \ } #define CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(X, Y) \ void OmpStructureChecker::Enter(const parser::OmpClause::X &c) { \ -CheckAllowed(llvm::omp::Clause::Y); \ +CheckAllowedClause(llvm::omp::Clause::Y); \ RequiresConstantPositiveParameter(llvm::omp::Clause::Y, c.v); \ } #define CHECK_REQ_SCALAR_INT_CLAUSE(X, Y) \ void OmpStructureChecker::Enter(const parser::OmpClause::X &c) { \ -CheckAllowed(llvm::omp::Clause::Y); \ +CheckAllowedClause(llvm::omp::Clause::Y); \ RequiresPositiveParameter(llvm::omp::Clause::Y, c.v); \ } // Use when clause don't falls under 'struct OmpClause' in 'parse-tree.h'. #define CHECK_SIMPLE_PARSER_CLAUSE(X, Y) \ void OmpStructureChecker::Enter(const parser::X &) { \ -CheckAllowed(llvm::omp::Y); \ +CheckAllowedClause(llvm::omp::Y); \ } // 'OmpWorkshareBlockChecker' is used to check the validity of the assignment @@ -163,6 +163,43 @@ class AssociatedLoopChecker { std::map constructNamesAndLevels_; }; +bool OmpStructureChecker::CheckAllowedClause(llvmOmpClause clause) { + unsigned version{context_.langOptions().OpenMPVersion}; + DirectiveContext &dirCtx = GetContext(); + llvm::omp::Directive dir{dirCtx.directive}; + + if (!llvm::omp::isAllowedClauseForDirective(dir, clause, version)) { +unsigned allowedInVersion{[&] { + for (unsigned v : {45, 50, 51, 52, 60}) { +if (v <= version) { + c