[llvm-branch-commits] [clang] PR for llvm/llvm-project#79479 (PR #79596)
https://github.com/nikic closed https://github.com/llvm/llvm-project/pull/79596 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] New llvm.amdgcn.wave.id intrinsic (#79325) (PR #79689)
https://github.com/jayfoad created https://github.com/llvm/llvm-project/pull/79689 This is only valid on targets with architected SGPRs. >From c5949b09b05e7417d0494b2301781b84d22b95ef Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Thu, 25 Jan 2024 07:48:06 + Subject: [PATCH] [AMDGPU] New llvm.amdgcn.wave.id intrinsic (#79325) This is only valid on targets with architected SGPRs. --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 4 ++ .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 19 ++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h | 1 + llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 14 + llvm/lib/Target/AMDGPU/SIISelLowering.h | 1 + .../CodeGen/AMDGPU/llvm.amdgcn.wave.id.ll | 61 +++ 6 files changed, 100 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.id.ll diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 9eb1ac8e27befb..c5f43d17d1c148 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2777,6 +2777,10 @@ class AMDGPULoadTr: def int_amdgcn_global_load_tr : AMDGPULoadTr; +// i32 @llvm.amdgcn.wave.id() +def int_amdgcn_wave_id : + DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>; + //===--===// // Deep learning intrinsics. //===--===// diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 32921bb248caf0..118c8b7c66690f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -6848,6 +6848,23 @@ bool AMDGPULegalizerInfo::legalizeStackSave(MachineInstr &MI, return true; } +bool AMDGPULegalizerInfo::legalizeWaveID(MachineInstr &MI, + MachineIRBuilder &B) const { + // With architected SGPRs, waveIDinGroup is in TTMP8[29:25]. + if (!ST.hasArchitectedSGPRs()) +return false; + LLT S32 = LLT::scalar(32); + Register DstReg = MI.getOperand(0).getReg(); + Register TTMP8 = + getFunctionLiveInPhysReg(B.getMF(), B.getTII(), AMDGPU::TTMP8, + AMDGPU::SReg_32RegClass, B.getDebugLoc(), S32); + auto LSB = B.buildConstant(S32, 25); + auto Width = B.buildConstant(S32, 5); + B.buildUbfx(DstReg, TTMP8, LSB, Width); + MI.eraseFromParent(); + return true; +} + bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const { MachineIRBuilder &B = Helper.MIRBuilder; @@ -6970,6 +6987,8 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, case Intrinsic::amdgcn_workgroup_id_z: return legalizePreloadedArgIntrin(MI, MRI, B, AMDGPUFunctionArgInfo::WORKGROUP_ID_Z); + case Intrinsic::amdgcn_wave_id: +return legalizeWaveID(MI, B); case Intrinsic::amdgcn_lds_kernel_id: return legalizePreloadedArgIntrin(MI, MRI, B, AMDGPUFunctionArgInfo::LDS_KERNEL_ID); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h index 56aabd4f6ab71b..ecbe42681c6690 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -212,6 +212,7 @@ class AMDGPULegalizerInfo final : public LegalizerInfo { bool legalizeFPTruncRound(MachineInstr &MI, MachineIRBuilder &B) const; bool legalizeStackSave(MachineInstr &MI, MachineIRBuilder &B) const; + bool legalizeWaveID(MachineInstr &MI, MachineIRBuilder &B) const; bool legalizeImageIntrinsic( MachineInstr &MI, MachineIRBuilder &B, diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index d35b76c8ad54eb..9cbcf0012ea878 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -7890,6 +7890,18 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc, return Loads[0]; } +SDValue SITargetLowering::lowerWaveID(SelectionDAG &DAG, SDValue Op) const { + // With architected SGPRs, waveIDinGroup is in TTMP8[29:25]. + if (!Subtarget->hasArchitectedSGPRs()) +return {}; + SDLoc SL(Op); + MVT VT = MVT::i32; + SDValue TTMP8 = CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass, + AMDGPU::TTMP8, VT, SL); + return DAG.getNode(AMDGPUISD::BFE_U32, SL, VT, TTMP8, + DAG.getConstant(25, SL, VT), DAG.getConstant(5, SL, VT)); +} + SDValue SITargetLowering::lowerWorkitemID(SelectionDAG &DAG, SDValue Op, unsigned Dim, const ArgDescriptor &Arg) const { @@ -8060,6 +8072,8 @@ SDValue SITargetLowering::Lower
[llvm-branch-commits] [llvm] [AMDGPU] New llvm.amdgcn.wave.id intrinsic (#79325) (PR #79689)
llvmbot wrote: @llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-backend-amdgpu Author: Jay Foad (jayfoad) Changes This is only valid on targets with architected SGPRs. --- Full diff: https://github.com/llvm/llvm-project/pull/79689.diff 6 Files Affected: - (modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (+4) - (modified) llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (+19) - (modified) llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h (+1) - (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+14) - (modified) llvm/lib/Target/AMDGPU/SIISelLowering.h (+1) - (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.id.ll (+61) ``diff diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 9eb1ac8e27befb1..c5f43d17d1c1481 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2777,6 +2777,10 @@ class AMDGPULoadTr: def int_amdgcn_global_load_tr : AMDGPULoadTr; +// i32 @llvm.amdgcn.wave.id() +def int_amdgcn_wave_id : + DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>; + //===--===// // Deep learning intrinsics. //===--===// diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 32921bb248caf07..118c8b7c66690f1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -6848,6 +6848,23 @@ bool AMDGPULegalizerInfo::legalizeStackSave(MachineInstr &MI, return true; } +bool AMDGPULegalizerInfo::legalizeWaveID(MachineInstr &MI, + MachineIRBuilder &B) const { + // With architected SGPRs, waveIDinGroup is in TTMP8[29:25]. + if (!ST.hasArchitectedSGPRs()) +return false; + LLT S32 = LLT::scalar(32); + Register DstReg = MI.getOperand(0).getReg(); + Register TTMP8 = + getFunctionLiveInPhysReg(B.getMF(), B.getTII(), AMDGPU::TTMP8, + AMDGPU::SReg_32RegClass, B.getDebugLoc(), S32); + auto LSB = B.buildConstant(S32, 25); + auto Width = B.buildConstant(S32, 5); + B.buildUbfx(DstReg, TTMP8, LSB, Width); + MI.eraseFromParent(); + return true; +} + bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const { MachineIRBuilder &B = Helper.MIRBuilder; @@ -6970,6 +6987,8 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, case Intrinsic::amdgcn_workgroup_id_z: return legalizePreloadedArgIntrin(MI, MRI, B, AMDGPUFunctionArgInfo::WORKGROUP_ID_Z); + case Intrinsic::amdgcn_wave_id: +return legalizeWaveID(MI, B); case Intrinsic::amdgcn_lds_kernel_id: return legalizePreloadedArgIntrin(MI, MRI, B, AMDGPUFunctionArgInfo::LDS_KERNEL_ID); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h index 56aabd4f6ab71b6..ecbe42681c6690c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -212,6 +212,7 @@ class AMDGPULegalizerInfo final : public LegalizerInfo { bool legalizeFPTruncRound(MachineInstr &MI, MachineIRBuilder &B) const; bool legalizeStackSave(MachineInstr &MI, MachineIRBuilder &B) const; + bool legalizeWaveID(MachineInstr &MI, MachineIRBuilder &B) const; bool legalizeImageIntrinsic( MachineInstr &MI, MachineIRBuilder &B, diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index d35b76c8ad54ebc..9cbcf0012ea8788 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -7890,6 +7890,18 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc, return Loads[0]; } +SDValue SITargetLowering::lowerWaveID(SelectionDAG &DAG, SDValue Op) const { + // With architected SGPRs, waveIDinGroup is in TTMP8[29:25]. + if (!Subtarget->hasArchitectedSGPRs()) +return {}; + SDLoc SL(Op); + MVT VT = MVT::i32; + SDValue TTMP8 = CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass, + AMDGPU::TTMP8, VT, SL); + return DAG.getNode(AMDGPUISD::BFE_U32, SL, VT, TTMP8, + DAG.getConstant(25, SL, VT), DAG.getConstant(5, SL, VT)); +} + SDValue SITargetLowering::lowerWorkitemID(SelectionDAG &DAG, SDValue Op, unsigned Dim, const ArgDescriptor &Arg) const { @@ -8060,6 +8072,8 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::amdgcn_workgroup_id_z: return getPreloadedValue(DAG, *MFI, VT, AMDGPUFunctionArgInfo::WORKGROUP_
[llvm-branch-commits] [llvm] [AMDGPU] New llvm.amdgcn.wave.id intrinsic (#79325) (PR #79689)
https://github.com/jayfoad milestoned https://github.com/llvm/llvm-project/pull/79689 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] New llvm.amdgcn.wave.id intrinsic (#79325) (PR #79689)
https://github.com/jayfoad edited https://github.com/llvm/llvm-project/pull/79689 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [clang] PR for llvm/llvm-project#79277 (PR #79340)
tstellar wrote: Merged: 2d759eff89d3e8c95caaa3b82a6c567cc42a560b https://github.com/llvm/llvm-project/pull/79340 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] PR for llvm/llvm-project#79277 (PR #79340)
https://github.com/tstellar closed https://github.com/llvm/llvm-project/pull/79340 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] 15aeb35 - [LTO] Fix fat-lto output for -c -emit-llvm. (#79404)
Author: Sean Fertile Date: 2024-01-27T06:51:08-08:00 New Revision: 15aeb35c53f23dd9b7a6781e210795bd4ff7ccae URL: https://github.com/llvm/llvm-project/commit/15aeb35c53f23dd9b7a6781e210795bd4ff7ccae DIFF: https://github.com/llvm/llvm-project/commit/15aeb35c53f23dd9b7a6781e210795bd4ff7ccae.diff LOG: [LTO] Fix fat-lto output for -c -emit-llvm. (#79404) Fix and add a test case for combining '-ffat-lto-objects -c -emit-llvm' options and fix a spelling mistake in same test. (cherry picked from commit f1b1611148fa533fe198fec3fa4ef8139224dc80) Added: Modified: clang/lib/Driver/Driver.cpp clang/test/Driver/fat-lto-objects.c Removed: diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 7109faa1072de5..93cddf742d521d 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4764,9 +4764,9 @@ Action *Driver::ConstructPhaseAction( case phases::Backend: { if (isUsingLTO() && TargetDeviceOffloadKind == Action::OFK_None) { types::ID Output; - if (Args.hasArg(options::OPT_ffat_lto_objects)) -Output = Args.hasArg(options::OPT_emit_llvm) ? types::TY_LTO_IR - : types::TY_PP_Asm; + if (Args.hasArg(options::OPT_ffat_lto_objects) && + !Args.hasArg(options::OPT_emit_llvm)) +Output = types::TY_PP_Asm; else if (Args.hasArg(options::OPT_S)) Output = types::TY_LTO_IR; else diff --git a/clang/test/Driver/fat-lto-objects.c b/clang/test/Driver/fat-lto-objects.c index 97002db6edc51e..d9a5ba88ea6d6f 100644 --- a/clang/test/Driver/fat-lto-objects.c +++ b/clang/test/Driver/fat-lto-objects.c @@ -23,11 +23,17 @@ // CHECK-CC-S-EL-LTO-SAME: -emit-llvm // CHECK-CC-S-EL-LTO-SAME: -ffat-lto-objects -/// When fat LTO is enabled wihtout -S we expect native object output and -ffat-lto-object to be passed to cc1. +/// When fat LTO is enabled without -S we expect native object output and -ffat-lto-object to be passed to cc1. // RUN: %clang --target=x86_64-unknown-linux-gnu -flto -ffat-lto-objects -### %s -c 2>&1 | FileCheck %s -check-prefix=CHECK-CC-C-LTO // CHECK-CC-C-LTO: -cc1 -// CHECK-CC-C-LTO: -emit-obj -// CHECK-CC-C-LTO: -ffat-lto-objects +// CHECK-CC-C-LTO-SAME: -emit-obj +// CHECK-CC-C-LTO-SAME: -ffat-lto-objects + +/// When fat LTO is enabled with -c and -emit-llvm we expect bitcode output and -ffat-lto-object to be passed to cc1. +// RUN: %clang --target=x86_64-unknown-linux-gnu -flto -ffat-lto-objects -### %s -c -emit-llvm 2>&1 | FileCheck %s -check-prefix=CHECK-CC-C-EL-LTO +// CHECK-CC-C-EL-LTO: -cc1 +// CHECK-CC-C-EL-LTO-SAME: -emit-llvm-bc +// CHECK-CC-C-EL-LTO-SAME: -ffat-lto-objects /// Make sure we don't have a warning for -ffat-lto-objects being unused // RUN: %clang --target=x86_64-unknown-linux-gnu -ffat-lto-objects -fdriver-only -Werror -v %s -c 2>&1 | FileCheck %s -check-prefix=CHECK-CC-NOLTO ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] PR for llvm/llvm-project#79511 (PR #79513)
https://github.com/tstellar closed https://github.com/llvm/llvm-project/pull/79513 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] PR for llvm/llvm-project#79511 (PR #79513)
tstellar wrote: Merged: 15aeb35c53f23dd9b7a6781e210795bd4ff7ccae https://github.com/llvm/llvm-project/pull/79513 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 2cf04c0 - [X86] Do not end 'note.gnu.property' section with -fcf-protection (#79360)
Author: Adhemerval Zanella Date: 2024-01-27T10:19:11-08:00 New Revision: 2cf04c020f4a772f1b3d683dae2763a2544720ef URL: https://github.com/llvm/llvm-project/commit/2cf04c020f4a772f1b3d683dae2763a2544720ef DIFF: https://github.com/llvm/llvm-project/commit/2cf04c020f4a772f1b3d683dae2763a2544720ef.diff LOG: [X86] Do not end 'note.gnu.property' section with -fcf-protection (#79360) The glibc now adds the required minimum ISA level for libc-nonshared.a (linked on all programs) and this is done with an inline asm along with .note.gnu.property and .pushsection/.popsection. However, the x86 backend always ends the 'note.gnu.property' section when building with -fcf-protection, leading to assert failure: llvm/llvm-project-git/llvm/lib/MC/MCStreamer.cpp:1251: virtual void llvm::MCStreamer::switchSection(llvm::MCSection*, const llvm::MCExpr*): Assertion `!Section->hasEnded() && "Section already ended"' failed. [1] https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/x86/isa-level.c;h=3f1b269848a52f994275bab6f60dded3ded6b144;hb=HEAD (cherry picked from commit a58c62fa824fd24d20fa2366e0ec8f241cb321fe) Added: llvm/test/CodeGen/X86/note-cet-property-inlineasm.ll Modified: llvm/lib/Target/X86/X86AsmPrinter.cpp Removed: diff --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp b/llvm/lib/Target/X86/X86AsmPrinter.cpp index 9f0fd4d0938e97f..87ec8aa23080e00 100644 --- a/llvm/lib/Target/X86/X86AsmPrinter.cpp +++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp @@ -877,7 +877,6 @@ void X86AsmPrinter::emitStartOfAsmFile(Module &M) { OutStreamer->emitInt32(FeatureFlagsAnd);// data emitAlignment(WordSize == 4 ? Align(4) : Align(8)); // padding - OutStreamer->endSection(Nt); OutStreamer->switchSection(Cur); } } diff --git a/llvm/test/CodeGen/X86/note-cet-property-inlineasm.ll b/llvm/test/CodeGen/X86/note-cet-property-inlineasm.ll new file mode 100644 index 000..a0e5b4add1b386e --- /dev/null +++ b/llvm/test/CodeGen/X86/note-cet-property-inlineasm.ll @@ -0,0 +1,30 @@ +; RUN: llc -mtriple x86_64-unknown-linux-gnu %s -o %t.o -filetype=obj +; RUN: llvm-readobj -n %t.o | FileCheck %s + +module asm ".pushsection \22.note.gnu.property\22,\22a\22,@note" +module asm " .p2align 3" +module asm " .long 1f - 0f" +module asm " .long 4f - 1f" +module asm " .long 5" +module asm "0: .asciz \22GNU\22" +module asm "1: .p2align 3" +module asm " .long 0xc0008002" +module asm " .long 3f - 2f" +module asm "2: .long ((1U << 0) | 0 | 0 | 0)" +module asm "3: .p2align 3" +module asm "4:" +module asm " .popsection" + +!llvm.module.flags = !{!0, !1} + +!0 = !{i32 4, !"cf-protection-return", i32 1} +!1 = !{i32 4, !"cf-protection-branch", i32 1} + +; CHECK: Type: NT_GNU_PROPERTY_TYPE_0 +; CHECK-NEXT: Property [ +; CHECK-NEXT: x86 feature: IBT, SHSTK +; CHECK-NEXT: ] +; CHECK: Type: NT_GNU_PROPERTY_TYPE_0 +; CHECK-NEXT: Property [ +; CHECK-NEXT: x86 ISA needed: x86-64-baseline +; CHECK-NEXT: ] ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] PR for llvm/llvm-project#79629 (PR #79673)
tstellar wrote: Merged: 2cf04c020f4a772f1b3d683dae2763a2544720ef https://github.com/llvm/llvm-project/pull/79673 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] PR for llvm/llvm-project#79629 (PR #79673)
https://github.com/tstellar closed https://github.com/llvm/llvm-project/pull/79673 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] PR for llvm/llvm-project#79564 (PR #79566)
https://github.com/llvmbot updated https://github.com/llvm/llvm-project/pull/79566 >From fc56d1a3ff759720649c0573c04b5ffadf8c9504 Mon Sep 17 00:00:00 2001 From: Paschalis Mpeis Date: Thu, 25 Jan 2024 09:29:46 + Subject: [PATCH] [LTO] Fix Veclib flags correctly pass to LTO flags (#78749) Flags `-fveclib=name` were not passed to LTO flags. This pass fixes that by converting the `-fveclib` flags to their relevant names for opt's `-vector-lib=name` flags. For example: `-fveclib=SLEEF` would become `-vector-library=sleefgnuabi` and passed through the `-plugin-opt` flag. (cherry picked from commit 03cf0e9354e7e56ff794e9efb682ed2971bc91ec) --- clang/lib/Driver/ToolChains/CommonArgs.cpp | 22 ++ clang/test/Driver/fveclib.c| 18 ++ 2 files changed, 40 insertions(+) diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index ff4047298d70d52..2b916f0003368de 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -810,6 +810,28 @@ void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args, "-generate-arange-section")); } + // Pass vector library arguments to LTO. + Arg *ArgVecLib = Args.getLastArg(options::OPT_fveclib); + if (ArgVecLib && ArgVecLib->getNumValues() == 1) { +// Map the vector library names from clang front-end to opt front-end. The +// values are taken from the TargetLibraryInfo class command line options. +std::optional OptVal = +llvm::StringSwitch>(ArgVecLib->getValue()) +.Case("Accelerate", "Accelerate") +.Case("LIBMVEC", "LIBMVEC-X86") +.Case("MASSV", "MASSV") +.Case("SVML", "SVML") +.Case("SLEEF", "sleefgnuabi") +.Case("Darwin_libsystem_m", "Darwin_libsystem_m") +.Case("ArmPL", "ArmPL") +.Case("none", "none") +.Default(std::nullopt); + +if (OptVal) + CmdArgs.push_back(Args.MakeArgString( + Twine(PluginOptPrefix) + "-vector-library=" + OptVal.value())); + } + // Try to pass driver level flags relevant to LTO code generation down to // the plugin. diff --git a/clang/test/Driver/fveclib.c b/clang/test/Driver/fveclib.c index e2a7619e9b89f7f..8a230284bcdfe4f 100644 --- a/clang/test/Driver/fveclib.c +++ b/clang/test/Driver/fveclib.c @@ -31,3 +31,21 @@ // RUN: %clang -fveclib=Accelerate %s -nodefaultlibs -target arm64-apple-ios8.0.0 -### 2>&1 | FileCheck --check-prefix=CHECK-LINK-NODEFAULTLIBS %s // CHECK-LINK-NODEFAULTLIBS-NOT: "-framework" "Accelerate" + + +/* Verify that the correct vector library is passed to LTO flags. */ + +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fveclib=LIBMVEC -flto %s 2>&1 | FileCheck -check-prefix CHECK-LTO-LIBMVEC %s +// CHECK-LTO-LIBMVEC: "-plugin-opt=-vector-library=LIBMVEC-X86" + +// RUN: %clang -### --target=powerpc64-unknown-linux-gnu -fveclib=MASSV -flto %s 2>&1 | FileCheck -check-prefix CHECK-LTO-MASSV %s +// CHECK-LTO-MASSV: "-plugin-opt=-vector-library=MASSV" + +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fveclib=SVML -flto %s 2>&1 | FileCheck -check-prefix CHECK-LTO-SVML %s +// CHECK-LTO-SVML: "-plugin-opt=-vector-library=SVML" + +// RUN: %clang -### --target=aarch64-linux-gnu -fveclib=SLEEF -flto %s 2>&1 | FileCheck -check-prefix CHECK-LTO-SLEEF %s +// CHECK-LTO-SLEEF: "-plugin-opt=-vector-library=sleefgnuabi" + +// RUN: %clang -### --target=aarch64-linux-gnu -fveclib=ArmPL -flto %s 2>&1 | FileCheck -check-prefix CHECK-LTO-ARMPL %s +// CHECK-LTO-ARMPL: "-plugin-opt=-vector-library=ArmPL" ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] PR for llvm/llvm-project#79564 (PR #79566)
llvmbot wrote: @llvm/pr-subscribers-clang @llvm/pr-subscribers-clang-driver Author: None (github-actions[bot]) Changes resolves llvm/llvm-project#79564 --- Full diff: https://github.com/llvm/llvm-project/pull/79566.diff 2 Files Affected: - (modified) clang/lib/Driver/ToolChains/CommonArgs.cpp (+22) - (modified) clang/test/Driver/fveclib.c (+18) ``diff diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index ff4047298d70d52..2b916f0003368de 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -810,6 +810,28 @@ void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args, "-generate-arange-section")); } + // Pass vector library arguments to LTO. + Arg *ArgVecLib = Args.getLastArg(options::OPT_fveclib); + if (ArgVecLib && ArgVecLib->getNumValues() == 1) { +// Map the vector library names from clang front-end to opt front-end. The +// values are taken from the TargetLibraryInfo class command line options. +std::optional OptVal = +llvm::StringSwitch>(ArgVecLib->getValue()) +.Case("Accelerate", "Accelerate") +.Case("LIBMVEC", "LIBMVEC-X86") +.Case("MASSV", "MASSV") +.Case("SVML", "SVML") +.Case("SLEEF", "sleefgnuabi") +.Case("Darwin_libsystem_m", "Darwin_libsystem_m") +.Case("ArmPL", "ArmPL") +.Case("none", "none") +.Default(std::nullopt); + +if (OptVal) + CmdArgs.push_back(Args.MakeArgString( + Twine(PluginOptPrefix) + "-vector-library=" + OptVal.value())); + } + // Try to pass driver level flags relevant to LTO code generation down to // the plugin. diff --git a/clang/test/Driver/fveclib.c b/clang/test/Driver/fveclib.c index e2a7619e9b89f7f..8a230284bcdfe4f 100644 --- a/clang/test/Driver/fveclib.c +++ b/clang/test/Driver/fveclib.c @@ -31,3 +31,21 @@ // RUN: %clang -fveclib=Accelerate %s -nodefaultlibs -target arm64-apple-ios8.0.0 -### 2>&1 | FileCheck --check-prefix=CHECK-LINK-NODEFAULTLIBS %s // CHECK-LINK-NODEFAULTLIBS-NOT: "-framework" "Accelerate" + + +/* Verify that the correct vector library is passed to LTO flags. */ + +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fveclib=LIBMVEC -flto %s 2>&1 | FileCheck -check-prefix CHECK-LTO-LIBMVEC %s +// CHECK-LTO-LIBMVEC: "-plugin-opt=-vector-library=LIBMVEC-X86" + +// RUN: %clang -### --target=powerpc64-unknown-linux-gnu -fveclib=MASSV -flto %s 2>&1 | FileCheck -check-prefix CHECK-LTO-MASSV %s +// CHECK-LTO-MASSV: "-plugin-opt=-vector-library=MASSV" + +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fveclib=SVML -flto %s 2>&1 | FileCheck -check-prefix CHECK-LTO-SVML %s +// CHECK-LTO-SVML: "-plugin-opt=-vector-library=SVML" + +// RUN: %clang -### --target=aarch64-linux-gnu -fveclib=SLEEF -flto %s 2>&1 | FileCheck -check-prefix CHECK-LTO-SLEEF %s +// CHECK-LTO-SLEEF: "-plugin-opt=-vector-library=sleefgnuabi" + +// RUN: %clang -### --target=aarch64-linux-gnu -fveclib=ArmPL -flto %s 2>&1 | FileCheck -check-prefix CHECK-LTO-ARMPL %s +// CHECK-LTO-ARMPL: "-plugin-opt=-vector-library=ArmPL" `` https://github.com/llvm/llvm-project/pull/79566 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] 16bfe1e - Fix comparison of Structural Values
Author: erichkeane Date: 2024-01-27T10:31:53-08:00 New Revision: 16bfe1e89f6bc6c9cf5c584247856840f83fad62 URL: https://github.com/llvm/llvm-project/commit/16bfe1e89f6bc6c9cf5c584247856840f83fad62 DIFF: https://github.com/llvm/llvm-project/commit/16bfe1e89f6bc6c9cf5c584247856840f83fad62.diff LOG: Fix comparison of Structural Values Fixes a regression from #78041 as reported in the review. The original patch failed to compare the canonical type, which this adds. A slightly modified test of the original report is added. (cherry picked from commit e3ee3762304aa81e4a240500844bfdd003401b36) Added: Modified: clang/lib/AST/TemplateBase.cpp clang/test/SemaTemplate/temp_arg_nontype_cxx20.cpp Removed: diff --git a/clang/lib/AST/TemplateBase.cpp b/clang/lib/AST/TemplateBase.cpp index 2bdbeb08ef20465..3310d7dc24c59d2 100644 --- a/clang/lib/AST/TemplateBase.cpp +++ b/clang/lib/AST/TemplateBase.cpp @@ -450,7 +450,8 @@ bool TemplateArgument::structurallyEquals(const TemplateArgument &Other) const { getAsIntegral() == Other.getAsIntegral(); case StructuralValue: { -if (getStructuralValueType() != Other.getStructuralValueType()) +if (getStructuralValueType().getCanonicalType() != +Other.getStructuralValueType().getCanonicalType()) return false; llvm::FoldingSetNodeID A, B; diff --git a/clang/test/SemaTemplate/temp_arg_nontype_cxx20.cpp b/clang/test/SemaTemplate/temp_arg_nontype_cxx20.cpp index b5b8cadc909ce00..834174cdf6a32dc 100644 --- a/clang/test/SemaTemplate/temp_arg_nontype_cxx20.cpp +++ b/clang/test/SemaTemplate/temp_arg_nontype_cxx20.cpp @@ -336,3 +336,21 @@ template void bar(B b) { (b.operator Tbar(), ...); } } + +namespace ReportedRegression1 { + const char kt[] = "dummy"; + + template +class SomeTempl { }; + + template +class SomeTempl { + public: +int exit_code() const { return 0; } +}; + + int use() { +SomeTempl dummy; +return dummy.exit_code(); + } +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] PR for llvm/llvm-project#79355 (PR #79361)
tstellar wrote: Merged: 16bfe1e89f6bc6c9cf5c584247856840f83fad62 https://github.com/llvm/llvm-project/pull/79361 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] PR for llvm/llvm-project#79355 (PR #79361)
https://github.com/tstellar closed https://github.com/llvm/llvm-project/pull/79361 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] workflows: Merge LLVM tests together into a single job (#78877) (PR #79710)
https://github.com/tstellar created https://github.com/llvm/llvm-project/pull/79710 This is possible now that the free GitHub runners for Windows and Linux have more disk space: https://github.blog/2024-01-17-github-hosted-runners-double-the-power-for-open-source/ I also had to switch from macOS-11 to macOS-13 in order to prevent the job from timing out. macOS-13 runners have 4 vCPUs and the macOS-11 runners only have 3. >From 63332b878c3e4bb5e446755855c4e3558242b585 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Sat, 27 Jan 2024 14:04:00 -0800 Subject: [PATCH] workflows: Merge LLVM tests together into a single job (#78877) This is possible now that the free GitHub runners for Windows and Linux have more disk space: https://github.blog/2024-01-17-github-hosted-runners-double-the-power-for-open-source/ I also had to switch from macOS-11 to macOS-13 in order to prevent the job from timing out. macOS-13 runners have 4 vCPUs and the macOS-11 runners only have 3. --- .github/workflows/llvm-project-tests.yml | 9 - .github/workflows/llvm-tests.yml | 24 +++- 2 files changed, 7 insertions(+), 26 deletions(-) diff --git a/.github/workflows/llvm-project-tests.yml b/.github/workflows/llvm-project-tests.yml index a1404e1f1efa95..91d0b258394ef7 100644 --- a/.github/workflows/llvm-project-tests.yml +++ b/.github/workflows/llvm-project-tests.yml @@ -14,7 +14,7 @@ on: required: false os_list: required: false -default: '["ubuntu-latest", "windows-2019", "macOS-11"]' +default: '["ubuntu-latest", "windows-2019", "macOS-13"]' python_version: required: false type: string @@ -38,9 +38,7 @@ on: type: string # Use windows-2019 due to: # https://developercommunity.visualstudio.com/t/Prev-Issue---with-__assume-isnan-/1597317 -# We're using a specific version of macOS due to: -# https://github.com/actions/virtual-environments/issues/5900 -default: '["ubuntu-latest", "windows-2019", "macOS-11"]' +default: '["ubuntu-latest", "windows-2019", "macOS-13"]' python_version: required: false @@ -95,7 +93,8 @@ jobs: # run creates a new cache entry so we want to ensure that we have # enough cache space for all the tests to run at once and still # fit under the 10 GB limit. - max-size: 500M + # Default to 2G to workaround: https://github.com/hendrikmuhs/ccache-action/issues/174 + max-size: 2G key: ${{ matrix.os }} variant: sccache - name: Build and Test diff --git a/.github/workflows/llvm-tests.yml b/.github/workflows/llvm-tests.yml index cc9855ce182b2b..63f0f3abfd70a5 100644 --- a/.github/workflows/llvm-tests.yml +++ b/.github/workflows/llvm-tests.yml @@ -27,31 +27,13 @@ concurrency: cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }} jobs: - check_all: + check-all: if: github.repository_owner == 'llvm' -name: Test llvm,clang,libclc +name: Build and Test uses: ./.github/workflows/llvm-project-tests.yml with: build_target: check-all - projects: clang;libclc - - # These need to be separate from the check_all job, becuase there is not enough disk - # space to build all these projects on Windows. - build_lldb: -if: github.repository_owner == 'llvm' -name: Build lldb -uses: ./.github/workflows/llvm-project-tests.yml -with: - build_target: '' - projects: clang;lldb - - check_lld: -if: github.repository_owner == 'llvm' -name: Test lld -uses: ./.github/workflows/llvm-project-tests.yml -with: - build_target: check-lld - projects: lld + projects: clang;lld;libclc;lldb abi-dump-setup: if: github.repository_owner == 'llvm' ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] workflows: Merge LLVM tests together into a single job (#78877) (PR #79710)
llvmbot wrote: @llvm/pr-subscribers-github-workflow Author: Tom Stellard (tstellar) Changes This is possible now that the free GitHub runners for Windows and Linux have more disk space: https://github.blog/2024-01-17-github-hosted-runners-double-the-power-for-open-source/ I also had to switch from macOS-11 to macOS-13 in order to prevent the job from timing out. macOS-13 runners have 4 vCPUs and the macOS-11 runners only have 3. --- Full diff: https://github.com/llvm/llvm-project/pull/79710.diff 2 Files Affected: - (modified) .github/workflows/llvm-project-tests.yml (+4-5) - (modified) .github/workflows/llvm-tests.yml (+3-21) ``diff diff --git a/.github/workflows/llvm-project-tests.yml b/.github/workflows/llvm-project-tests.yml index a1404e1f1efa95..91d0b258394ef7 100644 --- a/.github/workflows/llvm-project-tests.yml +++ b/.github/workflows/llvm-project-tests.yml @@ -14,7 +14,7 @@ on: required: false os_list: required: false -default: '["ubuntu-latest", "windows-2019", "macOS-11"]' +default: '["ubuntu-latest", "windows-2019", "macOS-13"]' python_version: required: false type: string @@ -38,9 +38,7 @@ on: type: string # Use windows-2019 due to: # https://developercommunity.visualstudio.com/t/Prev-Issue---with-__assume-isnan-/1597317 -# We're using a specific version of macOS due to: -# https://github.com/actions/virtual-environments/issues/5900 -default: '["ubuntu-latest", "windows-2019", "macOS-11"]' +default: '["ubuntu-latest", "windows-2019", "macOS-13"]' python_version: required: false @@ -95,7 +93,8 @@ jobs: # run creates a new cache entry so we want to ensure that we have # enough cache space for all the tests to run at once and still # fit under the 10 GB limit. - max-size: 500M + # Default to 2G to workaround: https://github.com/hendrikmuhs/ccache-action/issues/174 + max-size: 2G key: ${{ matrix.os }} variant: sccache - name: Build and Test diff --git a/.github/workflows/llvm-tests.yml b/.github/workflows/llvm-tests.yml index cc9855ce182b2b..63f0f3abfd70a5 100644 --- a/.github/workflows/llvm-tests.yml +++ b/.github/workflows/llvm-tests.yml @@ -27,31 +27,13 @@ concurrency: cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }} jobs: - check_all: + check-all: if: github.repository_owner == 'llvm' -name: Test llvm,clang,libclc +name: Build and Test uses: ./.github/workflows/llvm-project-tests.yml with: build_target: check-all - projects: clang;libclc - - # These need to be separate from the check_all job, becuase there is not enough disk - # space to build all these projects on Windows. - build_lldb: -if: github.repository_owner == 'llvm' -name: Build lldb -uses: ./.github/workflows/llvm-project-tests.yml -with: - build_target: '' - projects: clang;lldb - - check_lld: -if: github.repository_owner == 'llvm' -name: Test lld -uses: ./.github/workflows/llvm-project-tests.yml -with: - build_target: check-lld - projects: lld + projects: clang;lld;libclc;lldb abi-dump-setup: if: github.repository_owner == 'llvm' `` https://github.com/llvm/llvm-project/pull/79710 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] fa0a72b - [LTO] Fix Veclib flags correctly pass to LTO flags (#78749)
Author: Paschalis Mpeis Date: 2024-01-27T15:52:23-08:00 New Revision: fa0a72b584a06fcf210a9667a344c1f87a2a081b URL: https://github.com/llvm/llvm-project/commit/fa0a72b584a06fcf210a9667a344c1f87a2a081b DIFF: https://github.com/llvm/llvm-project/commit/fa0a72b584a06fcf210a9667a344c1f87a2a081b.diff LOG: [LTO] Fix Veclib flags correctly pass to LTO flags (#78749) Flags `-fveclib=name` were not passed to LTO flags. This pass fixes that by converting the `-fveclib` flags to their relevant names for opt's `-vector-lib=name` flags. For example: `-fveclib=SLEEF` would become `-vector-library=sleefgnuabi` and passed through the `-plugin-opt` flag. (cherry picked from commit 03cf0e9354e7e56ff794e9efb682ed2971bc91ec) Added: Modified: clang/lib/Driver/ToolChains/CommonArgs.cpp clang/test/Driver/fveclib.c Removed: diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index ff4047298d70d52..2b916f0003368de 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -810,6 +810,28 @@ void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args, "-generate-arange-section")); } + // Pass vector library arguments to LTO. + Arg *ArgVecLib = Args.getLastArg(options::OPT_fveclib); + if (ArgVecLib && ArgVecLib->getNumValues() == 1) { +// Map the vector library names from clang front-end to opt front-end. The +// values are taken from the TargetLibraryInfo class command line options. +std::optional OptVal = +llvm::StringSwitch>(ArgVecLib->getValue()) +.Case("Accelerate", "Accelerate") +.Case("LIBMVEC", "LIBMVEC-X86") +.Case("MASSV", "MASSV") +.Case("SVML", "SVML") +.Case("SLEEF", "sleefgnuabi") +.Case("Darwin_libsystem_m", "Darwin_libsystem_m") +.Case("ArmPL", "ArmPL") +.Case("none", "none") +.Default(std::nullopt); + +if (OptVal) + CmdArgs.push_back(Args.MakeArgString( + Twine(PluginOptPrefix) + "-vector-library=" + OptVal.value())); + } + // Try to pass driver level flags relevant to LTO code generation down to // the plugin. diff --git a/clang/test/Driver/fveclib.c b/clang/test/Driver/fveclib.c index e2a7619e9b89f7f..8a230284bcdfe4f 100644 --- a/clang/test/Driver/fveclib.c +++ b/clang/test/Driver/fveclib.c @@ -31,3 +31,21 @@ // RUN: %clang -fveclib=Accelerate %s -nodefaultlibs -target arm64-apple-ios8.0.0 -### 2>&1 | FileCheck --check-prefix=CHECK-LINK-NODEFAULTLIBS %s // CHECK-LINK-NODEFAULTLIBS-NOT: "-framework" "Accelerate" + + +/* Verify that the correct vector library is passed to LTO flags. */ + +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fveclib=LIBMVEC -flto %s 2>&1 | FileCheck -check-prefix CHECK-LTO-LIBMVEC %s +// CHECK-LTO-LIBMVEC: "-plugin-opt=-vector-library=LIBMVEC-X86" + +// RUN: %clang -### --target=powerpc64-unknown-linux-gnu -fveclib=MASSV -flto %s 2>&1 | FileCheck -check-prefix CHECK-LTO-MASSV %s +// CHECK-LTO-MASSV: "-plugin-opt=-vector-library=MASSV" + +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fveclib=SVML -flto %s 2>&1 | FileCheck -check-prefix CHECK-LTO-SVML %s +// CHECK-LTO-SVML: "-plugin-opt=-vector-library=SVML" + +// RUN: %clang -### --target=aarch64-linux-gnu -fveclib=SLEEF -flto %s 2>&1 | FileCheck -check-prefix CHECK-LTO-SLEEF %s +// CHECK-LTO-SLEEF: "-plugin-opt=-vector-library=sleefgnuabi" + +// RUN: %clang -### --target=aarch64-linux-gnu -fveclib=ArmPL -flto %s 2>&1 | FileCheck -check-prefix CHECK-LTO-ARMPL %s +// CHECK-LTO-ARMPL: "-plugin-opt=-vector-library=ArmPL" ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] PR for llvm/llvm-project#79564 (PR #79566)
https://github.com/tstellar closed https://github.com/llvm/llvm-project/pull/79566 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] PR for llvm/llvm-project#79564 (PR #79566)
tstellar wrote: Merged: fa0a72b584a06fcf210a9667a344c1f87a2a081b https://github.com/llvm/llvm-project/pull/79566 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] PR for llvm/llvm-project#79675 (PR #79721)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/79721 resolves llvm/llvm-project#79675 >From 0c579c019c0acf36e4901e4ca89557fbc115c0a7 Mon Sep 17 00:00:00 2001 From: Shengchen Kan Date: Sun, 28 Jan 2024 09:06:27 +0800 Subject: [PATCH] [X86][tablgen] Fix the broadcast tables (#79675) (cherry picked from commit 7c3ee7cbe6419ea5e37ce2723cc1a1688380581f) --- .../include/llvm/Support/X86FoldTablesUtils.h | 11 +- llvm/lib/Target/X86/X86InstrAVX512.td | 2 +- llvm/lib/Target/X86/X86InstrFoldTables.cpp| 11 +- llvm/test/TableGen/x86-fold-tables.inc| 378 +- llvm/utils/TableGen/X86FoldTablesEmitter.cpp | 58 +-- 5 files changed, 223 insertions(+), 237 deletions(-) diff --git a/llvm/include/llvm/Support/X86FoldTablesUtils.h b/llvm/include/llvm/Support/X86FoldTablesUtils.h index ed244febc38d3a5..77d32cc7fb37ed3 100644 --- a/llvm/include/llvm/Support/X86FoldTablesUtils.h +++ b/llvm/include/llvm/Support/X86FoldTablesUtils.h @@ -46,11 +46,12 @@ enum { // Broadcast type. // (stored in bits 12 - 14) TB_BCAST_TYPE_SHIFT = TB_ALIGN_SHIFT + 3, - TB_BCAST_D = 0 << TB_BCAST_TYPE_SHIFT, - TB_BCAST_Q = 1 << TB_BCAST_TYPE_SHIFT, - TB_BCAST_SS = 2 << TB_BCAST_TYPE_SHIFT, - TB_BCAST_SD = 3 << TB_BCAST_TYPE_SHIFT, - TB_BCAST_SH = 4 << TB_BCAST_TYPE_SHIFT, + TB_BCAST_W = 0 << TB_BCAST_TYPE_SHIFT, + TB_BCAST_D = 1 << TB_BCAST_TYPE_SHIFT, + TB_BCAST_Q = 2 << TB_BCAST_TYPE_SHIFT, + TB_BCAST_SS = 3 << TB_BCAST_TYPE_SHIFT, + TB_BCAST_SD = 4 << TB_BCAST_TYPE_SHIFT, + TB_BCAST_SH = 5 << TB_BCAST_TYPE_SHIFT, TB_BCAST_MASK = 0x7 << TB_BCAST_TYPE_SHIFT, // Unused bits 15-16 diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index fe7d90fbcdf7070..bb5e22c71427939 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -12422,7 +12422,7 @@ multiclass GF2P8AFFINE_avx512_rmb_imm Op, string OpStr, SDNode OpNode, : avx512_3Op_rm_imm8 { let ExeDomain = VTI.ExeDomain in defm rmbi : AVX512_maskableTheDef; - StringRef RegInstName = RegRec->getName(); - StringRef MemInstName = MemInst->TheDef->getName(); - Record *Domain = RegRec->getValueAsDef("ExeDomain"); - bool IsSSEPackedInt = Domain->getName() == "SSEPackedInt"; - if ((RegInstName.contains("DZ") || RegInstName.contains("DWZ") || - RegInstName.contains("Dr") || RegInstName.contains("I32")) && - IsSSEPackedInt) { -assert((MemInstName.contains("DZ") || RegInstName.contains("DWZ") || -MemInstName.contains("Dr") || MemInstName.contains("I32")) && - "Unmatched names for broadcast"); -Result.BroadcastKind = X86FoldTableEntry::BCAST_D; - } else if ((RegInstName.contains("QZ") || RegInstName.contains("QBZ") || - RegInstName.contains("Qr") || RegInstName.contains("I64")) && - IsSSEPackedInt) { -assert((MemInstName.contains("QZ") || MemInstName.contains("QBZ") || -MemInstName.contains("Qr") || MemInstName.contains("I64")) && - "Unmatched names for broadcast"); -Result.BroadcastKind = X86FoldTableEntry::BCAST_Q; - } else if ((RegInstName.contains("PS") || RegInstName.contains("F32") || - RegInstName.contains("CPH")) && - !RegInstName.contains("PH2PS")) { -assert((MemInstName.contains("PS") || MemInstName.contains("F32") || -MemInstName.contains("CPH")) && - "Unmatched names for broadcast"); -Result.BroadcastKind = X86FoldTableEntry::BCAST_SS; - } else if ((RegInstName.contains("PD") || RegInstName.contains("F64")) && - !RegInstName.contains("PH2PD")) { -assert((MemInstName.contains("PD") || MemInstName.contains("F64")) && - "Unmatched names for broadcast"); -Result.BroadcastKind = X86FoldTableEntry::BCAST_SD; - } else if (RegInstName.contains("PH")) { -assert(MemInstName.contains("PH") && "Unmatched names for broadcast"); -Result.BroadcastKind = X86FoldTableEntry::BCAST_SH; - } else { -errs() << RegInstName << ", " << MemInstName << "\n"; -llvm_unreachable("Name is not canoicalized for broadcast or " - "ExeDomain is incorrect"); + DagInit *In = MemInst->TheDef->getValueAsDag("InOperandList"); + for (unsigned I = 0, E = In->getNumArgs(); I != E; ++I) { +Result.BroadcastKind = + StringSwitch(In->getArg(I)->getAsString()) +.Case("i16mem", X86FoldTableEntry::BCAST_W) +.Case("i32mem", X86FoldTableEntry::BCAST_D) +.Case("i64mem", X86FoldTableEntry::BCAST_Q) +.Case("f16mem", X86FoldTableEntry::BCAST_SH) +.Case("f32mem", X86FoldTableEntry::BCAST_SS) +.Case("f64mem", X86FoldTableEntry::BCAST_SD) +.Default(X86FoldTableEntry::BCAST_NONE); +if (Result.BroadcastKind != X86FoldTableEntry::BCAST_NONE) + break; } + assert(Result.BroadcastKind != X86FoldTableEntry::BCAST_NONE && +
[llvm-branch-commits] [llvm] PR for llvm/llvm-project#79675 (PR #79721)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/79721 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] PR for llvm/llvm-project#79675 (PR #79721)
llvmbot wrote: @phoebewang What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/79721 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] PR for llvm/llvm-project#79675 (PR #79721)
llvmbot wrote: @llvm/pr-subscribers-backend-x86 Author: None (llvmbot) Changes resolves llvm/llvm-project#79675 --- Patch is 41.03 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/79721.diff 5 Files Affected: - (modified) llvm/include/llvm/Support/X86FoldTablesUtils.h (+6-5) - (modified) llvm/lib/Target/X86/X86InstrAVX512.td (+1-1) - (modified) llvm/lib/Target/X86/X86InstrFoldTables.cpp (+7-4) - (modified) llvm/test/TableGen/x86-fold-tables.inc (+189-189) - (modified) llvm/utils/TableGen/X86FoldTablesEmitter.cpp (+20-38) ``diff diff --git a/llvm/include/llvm/Support/X86FoldTablesUtils.h b/llvm/include/llvm/Support/X86FoldTablesUtils.h index ed244febc38d3a..77d32cc7fb37ed 100644 --- a/llvm/include/llvm/Support/X86FoldTablesUtils.h +++ b/llvm/include/llvm/Support/X86FoldTablesUtils.h @@ -46,11 +46,12 @@ enum { // Broadcast type. // (stored in bits 12 - 14) TB_BCAST_TYPE_SHIFT = TB_ALIGN_SHIFT + 3, - TB_BCAST_D = 0 << TB_BCAST_TYPE_SHIFT, - TB_BCAST_Q = 1 << TB_BCAST_TYPE_SHIFT, - TB_BCAST_SS = 2 << TB_BCAST_TYPE_SHIFT, - TB_BCAST_SD = 3 << TB_BCAST_TYPE_SHIFT, - TB_BCAST_SH = 4 << TB_BCAST_TYPE_SHIFT, + TB_BCAST_W = 0 << TB_BCAST_TYPE_SHIFT, + TB_BCAST_D = 1 << TB_BCAST_TYPE_SHIFT, + TB_BCAST_Q = 2 << TB_BCAST_TYPE_SHIFT, + TB_BCAST_SS = 3 << TB_BCAST_TYPE_SHIFT, + TB_BCAST_SD = 4 << TB_BCAST_TYPE_SHIFT, + TB_BCAST_SH = 5 << TB_BCAST_TYPE_SHIFT, TB_BCAST_MASK = 0x7 << TB_BCAST_TYPE_SHIFT, // Unused bits 15-16 diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index fe7d90fbcdf707..bb5e22c7142793 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -12422,7 +12422,7 @@ multiclass GF2P8AFFINE_avx512_rmb_imm Op, string OpStr, SDNode OpNode, : avx512_3Op_rm_imm8 { let ExeDomain = VTI.ExeDomain in defm rmbi : AVX512_maskable https://github.com/llvm/llvm-project/pull/79721 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] PR for llvm/llvm-project#79675 (PR #79721)
llvmbot wrote: @llvm/pr-subscribers-llvm-support Author: None (llvmbot) Changes resolves llvm/llvm-project#79675 --- Patch is 41.03 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/79721.diff 5 Files Affected: - (modified) llvm/include/llvm/Support/X86FoldTablesUtils.h (+6-5) - (modified) llvm/lib/Target/X86/X86InstrAVX512.td (+1-1) - (modified) llvm/lib/Target/X86/X86InstrFoldTables.cpp (+7-4) - (modified) llvm/test/TableGen/x86-fold-tables.inc (+189-189) - (modified) llvm/utils/TableGen/X86FoldTablesEmitter.cpp (+20-38) ``diff diff --git a/llvm/include/llvm/Support/X86FoldTablesUtils.h b/llvm/include/llvm/Support/X86FoldTablesUtils.h index ed244febc38d3a..77d32cc7fb37ed 100644 --- a/llvm/include/llvm/Support/X86FoldTablesUtils.h +++ b/llvm/include/llvm/Support/X86FoldTablesUtils.h @@ -46,11 +46,12 @@ enum { // Broadcast type. // (stored in bits 12 - 14) TB_BCAST_TYPE_SHIFT = TB_ALIGN_SHIFT + 3, - TB_BCAST_D = 0 << TB_BCAST_TYPE_SHIFT, - TB_BCAST_Q = 1 << TB_BCAST_TYPE_SHIFT, - TB_BCAST_SS = 2 << TB_BCAST_TYPE_SHIFT, - TB_BCAST_SD = 3 << TB_BCAST_TYPE_SHIFT, - TB_BCAST_SH = 4 << TB_BCAST_TYPE_SHIFT, + TB_BCAST_W = 0 << TB_BCAST_TYPE_SHIFT, + TB_BCAST_D = 1 << TB_BCAST_TYPE_SHIFT, + TB_BCAST_Q = 2 << TB_BCAST_TYPE_SHIFT, + TB_BCAST_SS = 3 << TB_BCAST_TYPE_SHIFT, + TB_BCAST_SD = 4 << TB_BCAST_TYPE_SHIFT, + TB_BCAST_SH = 5 << TB_BCAST_TYPE_SHIFT, TB_BCAST_MASK = 0x7 << TB_BCAST_TYPE_SHIFT, // Unused bits 15-16 diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index fe7d90fbcdf707..bb5e22c7142793 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -12422,7 +12422,7 @@ multiclass GF2P8AFFINE_avx512_rmb_imm Op, string OpStr, SDNode OpNode, : avx512_3Op_rm_imm8 { let ExeDomain = VTI.ExeDomain in defm rmbi : AVX512_maskable https://github.com/llvm/llvm-project/pull/79721 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] PR for llvm/llvm-project#79675 (PR #79721)
phoebewang wrote: > @phoebewang What do you think about merging this PR to the release branch? This patch fixes prior mistakes, so should be merged to release branch. @KanRobert Do I understand it right? https://github.com/llvm/llvm-project/pull/79721 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] 4cb7679 - Revert "[mlir][complex] Prevent underflow in complex.abs"
Author: Mehdi Amini Date: 2024-01-27T19:24:30-08:00 New Revision: 4cb76790260f76cdff9923c00f02417249ffe2e9 URL: https://github.com/llvm/llvm-project/commit/4cb76790260f76cdff9923c00f02417249ffe2e9 DIFF: https://github.com/llvm/llvm-project/commit/4cb76790260f76cdff9923c00f02417249ffe2e9.diff LOG: Revert "[mlir][complex] Prevent underflow in complex.abs" Added: Modified: mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir mlir/test/Conversion/ComplexToStandard/full-conversion.mlir mlir/test/Integration/Dialect/Complex/CPU/correctness.mlir Removed: diff --git a/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp b/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp index 81601ce51f431c..4c9dad9e2c1731 100644 --- a/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp +++ b/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp @@ -26,57 +26,29 @@ namespace mlir { using namespace mlir; namespace { -// The algorithm is listed in https://dl.acm.org/doi/pdf/10.1145/363717.363780. struct AbsOpConversion : public OpConversionPattern { using OpConversionPattern::OpConversionPattern; LogicalResult matchAndRewrite(complex::AbsOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { -mlir::ImplicitLocOpBuilder b(op.getLoc(), rewriter); +auto loc = op.getLoc(); +auto type = op.getType(); arith::FastMathFlagsAttr fmf = op.getFastMathFlagsAttr(); -Type elementType = op.getType(); -Value arg = adaptor.getComplex(); - -Value zero = -b.create(elementType, b.getZeroAttr(elementType)); -Value one = b.create(elementType, -b.getFloatAttr(elementType, 1.0)); - -Value real = b.create(elementType, arg); -Value imag = b.create(elementType, arg); - -Value realIsZero = -b.create(arith::CmpFPredicate::OEQ, real, zero); -Value imagIsZero = -b.create(arith::CmpFPredicate::OEQ, imag, zero); - -// Real > Imag -Value imagDivReal = b.create(imag, real, fmf.getValue()); -Value imagSq = -b.create(imagDivReal, imagDivReal, fmf.getValue()); -Value imagSqPlusOne = b.create(imagSq, one, fmf.getValue()); -Value imagSqrt = b.create(imagSqPlusOne, fmf.getValue()); -Value absImag = b.create(imagSqrt, real, fmf.getValue()); - -// Real <= Imag -Value realDivImag = b.create(real, imag, fmf.getValue()); -Value realSq = -b.create(realDivImag, realDivImag, fmf.getValue()); -Value realSqPlusOne = b.create(realSq, one, fmf.getValue()); -Value realSqrt = b.create(realSqPlusOne, fmf.getValue()); -Value absReal = b.create(realSqrt, imag, fmf.getValue()); - -rewriter.replaceOpWithNewOp( -op, realIsZero, imag, -b.create( -imagIsZero, real, -b.create( -b.create(arith::CmpFPredicate::OGT, real, imag), -absImag, absReal))); - +Value real = +rewriter.create(loc, type, adaptor.getComplex()); +Value imag = +rewriter.create(loc, type, adaptor.getComplex()); +Value realSqr = +rewriter.create(loc, real, real, fmf.getValue()); +Value imagSqr = +rewriter.create(loc, imag, imag, fmf.getValue()); +Value sqNorm = +rewriter.create(loc, realSqr, imagSqr, fmf.getValue()); + +rewriter.replaceOpWithNewOp(op, sqNorm); return success(); } }; diff --git a/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir b/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir index d5f83e0af4184e..8fa29ea43854a4 100644 --- a/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir +++ b/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir @@ -7,28 +7,13 @@ func.func @complex_abs(%arg: complex) -> f32 { %abs = complex.abs %arg: complex return %abs : f32 } - -// CHECK: %[[ZERO:.*]] = arith.constant 0.00e+00 : f32 -// CHECK: %[[ONE:.*]] = arith.constant 1.00e+00 : f32 // CHECK: %[[REAL:.*]] = complex.re %[[ARG]] : complex // CHECK: %[[IMAG:.*]] = complex.im %[[ARG]] : complex -// CHECK: %[[IS_REAL_ZERO:.*]] = arith.cmpf oeq, %[[REAL]], %[[ZERO]] : f32 -// CHECK: %[[IS_IMAG_ZERO:.*]] = arith.cmpf oeq, %[[IMAG]], %[[ZERO]] : f32 -// CHECK: %[[IMAG_DIV_REAL:.*]] = arith.divf %[[IMAG]], %[[REAL]] : f32 -// CHECK: %[[IMAG_SQ:.*]] = arith.mulf %[[IMAG_DIV_REAL]], %[[IMAG_DIV_REAL]] : f32 -// CHECK: %[[IMAG_SQ_PLUS_ONE:.*]] = arith.addf %[[IMAG_SQ]], %[[ONE]] : f32 -// CHECK: %[[IMAG_SQRT:.*]] = math.sqrt %[[IMAG_SQ_PLUS_ONE]] : f32 -// CHECK: %[[ABS_IMAG:.*]] = arith.mulf %[[IMAG_SQRT]], %[[REAL]] : f32 -// CHECK: %[[REAL_DIV_IMAG:.*]] = arith.divf %[[REAL]], %[[IMAG]] : f32 -// CHECK: %[[REAL_SQ:.*]] = arith.mulf %[[REAL_DIV_IMAG]], %[[REAL_DIV_I
[llvm-branch-commits] [llvm] PR for llvm/llvm-project#79675 (PR #79721)
KanRobert wrote: > > @phoebewang What do you think about merging this PR to the release branch? > > This patch fixes prior mistakes, so should be merged to release branch. > @KanRobert Do I understand it right? Agree, though I haven't notice any runfail due to this mistake. https://github.com/llvm/llvm-project/pull/79721 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] workflows: Merge LLVM tests together into a single job (#78877) (PR #79710)
https://github.com/tstellar closed https://github.com/llvm/llvm-project/pull/79710 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 2765447 - [AMDGPU] Move architected SGPR implementation into isel (#79120)
Author: Jay Foad Date: 2024-01-27T23:18:25-08:00 New Revision: 27654471cc7acfca733c104e2ec24f882cfc6132 URL: https://github.com/llvm/llvm-project/commit/27654471cc7acfca733c104e2ec24f882cfc6132 DIFF: https://github.com/llvm/llvm-project/commit/27654471cc7acfca733c104e2ec24f882cfc6132.diff LOG: [AMDGPU] Move architected SGPR implementation into isel (#79120) (cherry picked from commit 70fc9703788e8965813c5b677a85cb84b66671b6) Added: llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll Modified: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp llvm/lib/Target/AMDGPU/SIISelLowering.cpp llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll Removed: llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 32921bb248caf0..615685822f91ee 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -4178,10 +4178,45 @@ bool AMDGPULegalizerInfo::loadInputValue( Register DstReg, MachineIRBuilder &B, AMDGPUFunctionArgInfo::PreloadedValue ArgType) const { const SIMachineFunctionInfo *MFI = B.getMF().getInfo(); - const ArgDescriptor *Arg; + const ArgDescriptor *Arg = nullptr; const TargetRegisterClass *ArgRC; LLT ArgTy; - std::tie(Arg, ArgRC, ArgTy) = MFI->getPreloadedValue(ArgType); + + CallingConv::ID CC = B.getMF().getFunction().getCallingConv(); + const ArgDescriptor WorkGroupIDX = + ArgDescriptor::createRegister(AMDGPU::TTMP9); + // If GridZ is not programmed in an entry function then the hardware will set + // it to all zeros, so there is no need to mask the GridY value in the low + // order bits. + const ArgDescriptor WorkGroupIDY = ArgDescriptor::createRegister( + AMDGPU::TTMP7, + AMDGPU::isEntryFunctionCC(CC) && !MFI->hasWorkGroupIDZ() ? ~0u : 0xu); + const ArgDescriptor WorkGroupIDZ = + ArgDescriptor::createRegister(AMDGPU::TTMP7, 0xu); + if (ST.hasArchitectedSGPRs() && AMDGPU::isCompute(CC)) { +switch (ArgType) { +case AMDGPUFunctionArgInfo::WORKGROUP_ID_X: + Arg = &WorkGroupIDX; + ArgRC = &AMDGPU::SReg_32RegClass; + ArgTy = LLT::scalar(32); + break; +case AMDGPUFunctionArgInfo::WORKGROUP_ID_Y: + Arg = &WorkGroupIDY; + ArgRC = &AMDGPU::SReg_32RegClass; + ArgTy = LLT::scalar(32); + break; +case AMDGPUFunctionArgInfo::WORKGROUP_ID_Z: + Arg = &WorkGroupIDZ; + ArgRC = &AMDGPU::SReg_32RegClass; + ArgTy = LLT::scalar(32); + break; +default: + break; +} + } + + if (!Arg) +std::tie(Arg, ArgRC, ArgTy) = MFI->getPreloadedValue(ArgType); if (!Arg) { if (ArgType == AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR) { diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index d35b76c8ad54eb..d60f511302613e 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -2072,11 +2072,45 @@ SDValue SITargetLowering::getPreloadedValue(SelectionDAG &DAG, const SIMachineFunctionInfo &MFI, EVT VT, AMDGPUFunctionArgInfo::PreloadedValue PVID) const { - const ArgDescriptor *Reg; + const ArgDescriptor *Reg = nullptr; const TargetRegisterClass *RC; LLT Ty; - std::tie(Reg, RC, Ty) = MFI.getPreloadedValue(PVID); + CallingConv::ID CC = DAG.getMachineFunction().getFunction().getCallingConv(); + const ArgDescriptor WorkGroupIDX = + ArgDescriptor::createRegister(AMDGPU::TTMP9); + // If GridZ is not programmed in an entry function then the hardware will set + // it to all zeros, so there is no need to mask the GridY value in the low + // order bits. + const ArgDescriptor WorkGroupIDY = ArgDescriptor::createRegister( + AMDGPU::TTMP7, + AMDGPU::isEntryFunctionCC(CC) && !MFI.hasWorkGroupIDZ() ? ~0u : 0xu); + const ArgDescriptor WorkGroupIDZ = + ArgDescriptor::createRegister(AMDGPU::TTMP7, 0xu); + if (Subtarget->hasArchitectedSGPRs() && AMDGPU::isCompute(CC)) { +switch (PVID) { +case AMDGPUFunctionArgInfo::WORKGROUP_ID_X: + Reg = &WorkGroupIDX; + RC = &AMDGPU::SReg_32RegClass; + Ty = LLT::scalar(32); + break; +case AMDGPUFunctionArgInfo::WORKGROUP_ID_Y: + Reg = &WorkGroupIDY; + RC = &AMDGPU::SReg_32RegClass; + Ty = LLT::scalar(32); + break; +case AMDGPUFunctionArgInfo::WORKGROUP_ID_Z: + Reg = &WorkGroupIDZ; + RC = &AMDGPU::SReg_32RegClass; + Ty = LLT::scalar(32); + break; +default: + break; +} + } + + if (!Reg) +std::tie(Reg, RC, Ty) = MFI.getPreloadedValue(PVID); if (!R
[llvm-branch-commits] [llvm] PR for llvm/llvm-project#79451 (PR #79457)
https://github.com/tstellar closed https://github.com/llvm/llvm-project/pull/79457 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] PR for llvm/llvm-project#79451 (PR #79457)
tstellar wrote: Merged: 27654471cc7acfca733c104e2ec24f882cfc6132 https://github.com/llvm/llvm-project/pull/79457 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] PR for llvm/llvm-project#79547 (PR #79548)
https://github.com/llvmbot updated https://github.com/llvm/llvm-project/pull/79548 >From eaf68babab013f48a9724283bb8087ac60220af8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Fri, 26 Jan 2024 05:09:28 +0100 Subject: [PATCH] [llvm] [cmake] Include httplib in LLVMConfig.cmake (#79305) Include LLVM_ENABLE_HTTPLIB along with httplib package finding in LLVMConfig.cmake, as this dependency is needed by LLVMDebuginfod that is now used by LLDB. Without it, building LLDB standalone fails with: ``` CMake Error at /usr/lib/llvm/19/lib64/cmake/llvm/LLVMExports.cmake:90 (set_target_properties): The link interface of target "LLVMDebuginfod" contains: httplib::httplib but the target was not found. Possible reasons include: * There is a typo in the target name. * A find_package call is missing for an IMPORTED target. * An ALIAS target is missing. Call Stack (most recent call first): /usr/lib/llvm/19/lib64/cmake/llvm/LLVMConfig.cmake:357 (include) cmake/modules/LLDBStandalone.cmake:9 (find_package) CMakeLists.txt:34 (include) ``` (cherry picked from commit 3c9f34c12450345c6eb524e47cf79664271e4260) --- llvm/cmake/modules/LLVMConfig.cmake.in | 5 + 1 file changed, 5 insertions(+) diff --git a/llvm/cmake/modules/LLVMConfig.cmake.in b/llvm/cmake/modules/LLVMConfig.cmake.in index 74e1c6bf52e2305..770a9caea322e6a 100644 --- a/llvm/cmake/modules/LLVMConfig.cmake.in +++ b/llvm/cmake/modules/LLVMConfig.cmake.in @@ -90,6 +90,11 @@ if(LLVM_ENABLE_CURL) find_package(CURL) endif() +set(LLVM_ENABLE_HTTPLIB @LLVM_ENABLE_HTTPLIB@) +if(LLVM_ENABLE_HTTPLIB) + find_package(httplib) +endif() + set(LLVM_WITH_Z3 @LLVM_WITH_Z3@) set(LLVM_ENABLE_DIA_SDK @LLVM_ENABLE_DIA_SDK@) ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] PR for llvm/llvm-project#79137 (PR #79561)
https://github.com/llvmbot updated https://github.com/llvm/llvm-project/pull/79561 >From b4abadcb6972a934d681ac59f978434c626e1444 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 24 Jan 2024 10:45:20 +0100 Subject: [PATCH 1/2] [PhaseOrdering] Add additional test for #79161 (NFC) (cherry picked from commit 543cf08636f3a3bb55dddba2e8cad787601647ba) --- .../X86/loop-vectorizer-noalias.ll| 147 ++ 1 file changed, 147 insertions(+) create mode 100644 llvm/test/Transforms/PhaseOrdering/X86/loop-vectorizer-noalias.ll diff --git a/llvm/test/Transforms/PhaseOrdering/X86/loop-vectorizer-noalias.ll b/llvm/test/Transforms/PhaseOrdering/X86/loop-vectorizer-noalias.ll new file mode 100644 index 000..846787f721ba7ed --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/X86/loop-vectorizer-noalias.ll @@ -0,0 +1,147 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S -O3 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +define internal void @acc(ptr noalias noundef %val, ptr noalias noundef %prev) { +entry: + %0 = load i8, ptr %prev, align 1 + %conv = zext i8 %0 to i32 + %1 = load i8, ptr %val, align 1 + %conv1 = zext i8 %1 to i32 + %add = add nsw i32 %conv1, %conv + %conv2 = trunc i32 %add to i8 + store i8 %conv2, ptr %val, align 1 + ret void +} + +; This loop should not get vectorized. +; FIXME: This is a miscompile. +define void @accsum(ptr noundef %vals, i64 noundef %num) #0 { +; CHECK-LABEL: define void @accsum( +; CHECK-SAME: ptr nocapture noundef [[VALS:%.*]], i64 noundef [[NUM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT:[[CMP1:%.*]] = icmp ugt i64 [[NUM]], 1 +; CHECK-NEXT:br i1 [[CMP1]], label [[ITER_CHECK:%.*]], label [[FOR_END:%.*]] +; CHECK: iter.check: +; CHECK-NEXT:[[TMP0:%.*]] = add i64 [[NUM]], -1 +; CHECK-NEXT:[[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUM]], 9 +; CHECK-NEXT:br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] +; CHECK: vector.main.loop.iter.check: +; CHECK-NEXT:[[MIN_ITERS_CHECK3:%.*]] = icmp ult i64 [[NUM]], 33 +; CHECK-NEXT:br i1 [[MIN_ITERS_CHECK3]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT:[[N_VEC:%.*]] = and i64 [[TMP0]], -32 +; CHECK-NEXT:br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT:[[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT:[[OFFSET_IDX:%.*]] = or disjoint i64 [[INDEX]], 1 +; CHECK-NEXT:[[TMP1:%.*]] = getelementptr inbounds i8, ptr [[VALS]], i64 [[OFFSET_IDX]] +; CHECK-NEXT:[[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 -1 +; CHECK-NEXT:tail call void @llvm.experimental.noalias.scope.decl(metadata [[META0:![0-9]+]]) +; CHECK-NEXT:tail call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +; CHECK-NEXT:[[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 15 +; CHECK-NEXT:[[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1, !alias.scope [[META3]], !noalias [[META0]] +; CHECK-NEXT:[[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1, !alias.scope [[META3]], !noalias [[META0]] +; CHECK-NEXT:[[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16 +; CHECK-NEXT:[[WIDE_LOAD5:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1, !alias.scope [[META0]], !noalias [[META3]] +; CHECK-NEXT:[[WIDE_LOAD6:%.*]] = load <16 x i8>, ptr [[TMP4]], align 1, !alias.scope [[META0]], !noalias [[META3]] +; CHECK-NEXT:[[TMP5:%.*]] = add <16 x i8> [[WIDE_LOAD5]], [[WIDE_LOAD]] +; CHECK-NEXT:[[TMP6:%.*]] = add <16 x i8> [[WIDE_LOAD6]], [[WIDE_LOAD4]] +; CHECK-NEXT:store <16 x i8> [[TMP5]], ptr [[TMP1]], align 1, !alias.scope [[META0]], !noalias [[META3]] +; CHECK-NEXT:store <16 x i8> [[TMP6]], ptr [[TMP4]], align 1, !alias.scope [[META0]], !noalias [[META3]] +; CHECK-NEXT:[[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 +; CHECK-NEXT:[[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT:br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT:[[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; CHECK-NEXT:br i1 [[CMP_N]], label [[FOR_END]], label [[VEC_EPILOG_ITER_CHECK:%.*]] +; CHECK: vec.epilog.iter.check: +; CHECK-NEXT:[[IND_END9:%.*]] = or disjoint i64 [[N_VEC]], 1 +; CHECK-NEXT:[[N_VEC_REMAINING:%.*]] = and i64 [[TMP0]], 24 +; CHECK-NEXT:[[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0 +; CHECK-NEXT:br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[FOR_BODY_PREHEADER]], label [[VEC_EPILOG_PH]] +; CHECK: vec.epilog.ph: +; CHECK-NEXT:[[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER
[llvm-branch-commits] [llvm] PR for llvm/llvm-project#79137 (PR #79561)
llvmbot wrote: @llvm/pr-subscribers-llvm-analysis Author: None (github-actions[bot]) Changes resolves llvm/llvm-project#79137 --- Full diff: https://github.com/llvm/llvm-project/pull/79561.diff 3 Files Affected: - (modified) llvm/lib/Analysis/LoopAccessAnalysis.cpp (+46-7) - (modified) llvm/test/Analysis/LoopAccessAnalysis/noalias-scope-decl.ll (+10-1) - (added) llvm/test/Transforms/PhaseOrdering/X86/loop-vectorizer-noalias.ll (+78) ``diff diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 7e67c90152829dc..dd6b88fee415a7a 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -657,16 +657,18 @@ class AccessAnalysis { AccessAnalysis(Loop *TheLoop, AAResults *AA, LoopInfo *LI, MemoryDepChecker::DepCandidates &DA, - PredicatedScalarEvolution &PSE) - : TheLoop(TheLoop), BAA(*AA), AST(BAA), LI(LI), DepCands(DA), PSE(PSE) { + PredicatedScalarEvolution &PSE, + SmallPtrSetImpl &LoopAliasScopes) + : TheLoop(TheLoop), BAA(*AA), AST(BAA), LI(LI), DepCands(DA), PSE(PSE), +LoopAliasScopes(LoopAliasScopes) { // We're analyzing dependences across loop iterations. BAA.enableCrossIterationMode(); } /// Register a load and whether it is only read from. void addLoad(MemoryLocation &Loc, Type *AccessTy, bool IsReadOnly) { -Value *Ptr = const_cast(Loc.Ptr); -AST.add(Loc.getWithNewSize(LocationSize::beforeOrAfterPointer())); +Value *Ptr = const_cast(Loc.Ptr); +AST.add(adjustLoc(Loc)); Accesses[MemAccessInfo(Ptr, false)].insert(AccessTy); if (IsReadOnly) ReadOnlyPtr.insert(Ptr); @@ -674,8 +676,8 @@ class AccessAnalysis { /// Register a store. void addStore(MemoryLocation &Loc, Type *AccessTy) { -Value *Ptr = const_cast(Loc.Ptr); -AST.add(Loc.getWithNewSize(LocationSize::beforeOrAfterPointer())); +Value *Ptr = const_cast(Loc.Ptr); +AST.add(adjustLoc(Loc)); Accesses[MemAccessInfo(Ptr, true)].insert(AccessTy); } @@ -731,6 +733,32 @@ class AccessAnalysis { private: typedef MapVector> PtrAccessMap; + /// Adjust the MemoryLocation so that it represents accesses to this + /// location across all iterations, rather than a single one. + MemoryLocation adjustLoc(MemoryLocation Loc) const { +// The accessed location varies within the loop, but remains within the +// underlying object. +Loc.Size = LocationSize::beforeOrAfterPointer(); +Loc.AATags.Scope = adjustAliasScopeList(Loc.AATags.Scope); +Loc.AATags.NoAlias = adjustAliasScopeList(Loc.AATags.NoAlias); +return Loc; + } + + /// Drop alias scopes that are only valid within a single loop iteration. + MDNode *adjustAliasScopeList(MDNode *ScopeList) const { +if (!ScopeList) + return nullptr; + +// For the sake of simplicity, drop the whole scope list if any scope is +// iteration-local. +if (any_of(ScopeList->operands(), [&](Metadata *Scope) { + return LoopAliasScopes.contains(cast(Scope)); +})) + return nullptr; + +return ScopeList; + } + /// Go over all memory access and check whether runtime pointer checks /// are needed and build sets of dependency check candidates. void processMemAccesses(); @@ -775,6 +803,10 @@ class AccessAnalysis { PredicatedScalarEvolution &PSE; DenseMap> UnderlyingObjects; + + /// Alias scopes that are declared inside the loop, and as such not valid + /// across iterations. + SmallPtrSetImpl &LoopAliasScopes; }; } // end anonymous namespace @@ -2283,6 +2315,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, // Holds the Load and Store instructions. SmallVector Loads; SmallVector Stores; + SmallPtrSet LoopAliasScopes; // Holds all the different accesses in the loop. unsigned NumReads = 0; @@ -2326,6 +2359,11 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, if (HasComplexMemInst) continue; + // Record alias scopes defined inside the loop. + if (auto *Decl = dyn_cast(&I)) +for (Metadata *Op : Decl->getScopeList()->operands()) + LoopAliasScopes.insert(cast(Op)); + // Many math library functions read the rounding mode. We will only // vectorize a loop if it contains known function calls that don't set // the flag. Therefore, it is safe to ignore this read from memory. @@ -2407,7 +2445,8 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, } MemoryDepChecker::DepCandidates DependentAccesses; - AccessAnalysis Accesses(TheLoop, AA, LI, DependentAccesses, *PSE); + AccessAnalysis Accesses(TheLoop, AA, LI, DependentAccesses, *PSE, + LoopAliasScopes); // Holds the analyzed pointers. We don't want to call getUnderlyingObjects // multiple times on the same object. If the ptr is accessed twice, once d
[llvm-branch-commits] [llvm] PR for llvm/llvm-project#79137 (PR #79561)
llvmbot wrote: @llvm/pr-subscribers-llvm-transforms Author: None (github-actions[bot]) Changes resolves llvm/llvm-project#79137 --- Full diff: https://github.com/llvm/llvm-project/pull/79561.diff 3 Files Affected: - (modified) llvm/lib/Analysis/LoopAccessAnalysis.cpp (+46-7) - (modified) llvm/test/Analysis/LoopAccessAnalysis/noalias-scope-decl.ll (+10-1) - (added) llvm/test/Transforms/PhaseOrdering/X86/loop-vectorizer-noalias.ll (+78) ``diff diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 7e67c90152829dc..dd6b88fee415a7a 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -657,16 +657,18 @@ class AccessAnalysis { AccessAnalysis(Loop *TheLoop, AAResults *AA, LoopInfo *LI, MemoryDepChecker::DepCandidates &DA, - PredicatedScalarEvolution &PSE) - : TheLoop(TheLoop), BAA(*AA), AST(BAA), LI(LI), DepCands(DA), PSE(PSE) { + PredicatedScalarEvolution &PSE, + SmallPtrSetImpl &LoopAliasScopes) + : TheLoop(TheLoop), BAA(*AA), AST(BAA), LI(LI), DepCands(DA), PSE(PSE), +LoopAliasScopes(LoopAliasScopes) { // We're analyzing dependences across loop iterations. BAA.enableCrossIterationMode(); } /// Register a load and whether it is only read from. void addLoad(MemoryLocation &Loc, Type *AccessTy, bool IsReadOnly) { -Value *Ptr = const_cast(Loc.Ptr); -AST.add(Loc.getWithNewSize(LocationSize::beforeOrAfterPointer())); +Value *Ptr = const_cast(Loc.Ptr); +AST.add(adjustLoc(Loc)); Accesses[MemAccessInfo(Ptr, false)].insert(AccessTy); if (IsReadOnly) ReadOnlyPtr.insert(Ptr); @@ -674,8 +676,8 @@ class AccessAnalysis { /// Register a store. void addStore(MemoryLocation &Loc, Type *AccessTy) { -Value *Ptr = const_cast(Loc.Ptr); -AST.add(Loc.getWithNewSize(LocationSize::beforeOrAfterPointer())); +Value *Ptr = const_cast(Loc.Ptr); +AST.add(adjustLoc(Loc)); Accesses[MemAccessInfo(Ptr, true)].insert(AccessTy); } @@ -731,6 +733,32 @@ class AccessAnalysis { private: typedef MapVector> PtrAccessMap; + /// Adjust the MemoryLocation so that it represents accesses to this + /// location across all iterations, rather than a single one. + MemoryLocation adjustLoc(MemoryLocation Loc) const { +// The accessed location varies within the loop, but remains within the +// underlying object. +Loc.Size = LocationSize::beforeOrAfterPointer(); +Loc.AATags.Scope = adjustAliasScopeList(Loc.AATags.Scope); +Loc.AATags.NoAlias = adjustAliasScopeList(Loc.AATags.NoAlias); +return Loc; + } + + /// Drop alias scopes that are only valid within a single loop iteration. + MDNode *adjustAliasScopeList(MDNode *ScopeList) const { +if (!ScopeList) + return nullptr; + +// For the sake of simplicity, drop the whole scope list if any scope is +// iteration-local. +if (any_of(ScopeList->operands(), [&](Metadata *Scope) { + return LoopAliasScopes.contains(cast(Scope)); +})) + return nullptr; + +return ScopeList; + } + /// Go over all memory access and check whether runtime pointer checks /// are needed and build sets of dependency check candidates. void processMemAccesses(); @@ -775,6 +803,10 @@ class AccessAnalysis { PredicatedScalarEvolution &PSE; DenseMap> UnderlyingObjects; + + /// Alias scopes that are declared inside the loop, and as such not valid + /// across iterations. + SmallPtrSetImpl &LoopAliasScopes; }; } // end anonymous namespace @@ -2283,6 +2315,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, // Holds the Load and Store instructions. SmallVector Loads; SmallVector Stores; + SmallPtrSet LoopAliasScopes; // Holds all the different accesses in the loop. unsigned NumReads = 0; @@ -2326,6 +2359,11 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, if (HasComplexMemInst) continue; + // Record alias scopes defined inside the loop. + if (auto *Decl = dyn_cast(&I)) +for (Metadata *Op : Decl->getScopeList()->operands()) + LoopAliasScopes.insert(cast(Op)); + // Many math library functions read the rounding mode. We will only // vectorize a loop if it contains known function calls that don't set // the flag. Therefore, it is safe to ignore this read from memory. @@ -2407,7 +2445,8 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, } MemoryDepChecker::DepCandidates DependentAccesses; - AccessAnalysis Accesses(TheLoop, AA, LI, DependentAccesses, *PSE); + AccessAnalysis Accesses(TheLoop, AA, LI, DependentAccesses, *PSE, + LoopAliasScopes); // Holds the analyzed pointers. We don't want to call getUnderlyingObjects // multiple times on the same object. If the ptr is accessed twice, once
[llvm-branch-commits] [llvm] PR for llvm/llvm-project#79571 (PR #79572)
https://github.com/llvmbot updated https://github.com/llvm/llvm-project/pull/79572 >From 79e6c8c32c2547c3f91952c4d43b3ebb1443eb35 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 24 Jan 2024 10:15:42 +0100 Subject: [PATCH] [MSSAUpdater] Handle simplified accesses when updating phis (#78272) This is a followup to #76819. After those changes, we can still run into an assertion failure for a slight variation of the test case: When fixing up MemoryPhis, we map the incoming access to the access of the cloned instruction -- which may now no longer exist. Fix this by reusing the getNewDefiningAccessForClone() helper, which will look upwards for a new defining access in that case. (cherry picked from commit a7a1b8b17e264fb0f2d2b4165cf9a7f5094b08b3) --- llvm/lib/Analysis/MemorySSAUpdater.cpp| 22 +--- .../memssa-readnone-access.ll | 104 ++ 2 files changed, 107 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Analysis/MemorySSAUpdater.cpp b/llvm/lib/Analysis/MemorySSAUpdater.cpp index e87ae7d71fffe20..aa550f0b6a7bfd6 100644 --- a/llvm/lib/Analysis/MemorySSAUpdater.cpp +++ b/llvm/lib/Analysis/MemorySSAUpdater.cpp @@ -692,25 +692,9 @@ void MemorySSAUpdater::updateForClonedLoop(const LoopBlocksRPO &LoopBlocks, continue; // Determine incoming value and add it as incoming from IncBB. - if (MemoryUseOrDef *IncMUD = dyn_cast(IncomingAccess)) { -if (!MSSA->isLiveOnEntryDef(IncMUD)) { - Instruction *IncI = IncMUD->getMemoryInst(); - assert(IncI && "Found MemoryUseOrDef with no Instruction."); - if (Instruction *NewIncI = - cast_or_null(VMap.lookup(IncI))) { -IncMUD = MSSA->getMemoryAccess(NewIncI); -assert(IncMUD && - "MemoryUseOrDef cannot be null, all preds processed."); - } -} -NewPhi->addIncoming(IncMUD, IncBB); - } else { -MemoryPhi *IncPhi = cast(IncomingAccess); -if (MemoryAccess *NewDefPhi = MPhiMap.lookup(IncPhi)) - NewPhi->addIncoming(NewDefPhi, IncBB); -else - NewPhi->addIncoming(IncPhi, IncBB); - } + NewPhi->addIncoming( + getNewDefiningAccessForClone(IncomingAccess, VMap, MPhiMap, MSSA), + IncBB); } if (auto *SingleAccess = onlySingleValue(NewPhi)) { MPhiMap[Phi] = SingleAccess; diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/memssa-readnone-access.ll b/llvm/test/Transforms/SimpleLoopUnswitch/memssa-readnone-access.ll index 2aaf777683e116f..c6e6608d4be383a 100644 --- a/llvm/test/Transforms/SimpleLoopUnswitch/memssa-readnone-access.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/memssa-readnone-access.ll @@ -115,3 +115,107 @@ split: exit: ret void } + +; Variants of the above test with swapped branch destinations. + +define void @test1_swapped(i1 %c) { +; CHECK-LABEL: define void @test1_swapped( +; CHECK-SAME: i1 [[C:%.*]]) { +; CHECK-NEXT: start: +; CHECK-NEXT:[[C_FR:%.*]] = freeze i1 [[C]] +; CHECK-NEXT:br i1 [[C_FR]], label [[START_SPLIT_US:%.*]], label [[START_SPLIT:%.*]] +; CHECK: start.split.us: +; CHECK-NEXT:br label [[LOOP_US:%.*]] +; CHECK: loop.us: +; CHECK-NEXT:call void @foo() +; CHECK-NEXT:br label [[LOOP_US]] +; CHECK: start.split: +; CHECK-NEXT:br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT:call void @foo() +; CHECK-NEXT:br label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT:ret void +; +start: + br label %loop + +loop: + %fn = load ptr, ptr @vtable, align 8 + call void %fn() + br i1 %c, label %loop, label %exit + +exit: + ret void +} + +define void @test2_swapped(i1 %c, ptr %p) { +; CHECK-LABEL: define void @test2_swapped( +; CHECK-SAME: i1 [[C:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT:[[C_FR:%.*]] = freeze i1 [[C]] +; CHECK-NEXT:br i1 [[C_FR]], label [[DOTSPLIT_US:%.*]], label [[DOTSPLIT:%.*]] +; CHECK: .split.us: +; CHECK-NEXT:br label [[LOOP_US:%.*]] +; CHECK: loop.us: +; CHECK-NEXT:call void @foo() +; CHECK-NEXT:call void @bar() +; CHECK-NEXT:br label [[LOOP_US]] +; CHECK: .split: +; CHECK-NEXT:br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT:call void @foo() +; CHECK-NEXT:call void @bar() +; CHECK-NEXT:br label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT:ret void +; + br label %loop + +loop: + %fn = load ptr, ptr @vtable, align 8 + call void %fn() + call void @bar() + br i1 %c, label %loop, label %exit + +exit: + ret void +} + +define void @test3_swapped(i1 %c, ptr %p) { +; CHECK-LABEL: define void @test3_swapped( +; CHECK-SAME: i1 [[C:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT:[[C_FR:%.*]] = freeze i1 [[C]] +; CHECK-NEXT:br i1 [[C_FR]], label [[DOTSPLIT_US:%.*]], label [[DOTSPLIT:%.*]] +; CHECK: .split.us: +; CHECK-NEXT:br label [[LOOP_US:%.*]] +; CHECK: loop.us: +; CHECK-NEXT:br label [[SPLIT_US:%.*]] +; CHECK:
[llvm-branch-commits] [llvm] PR for llvm/llvm-project#79571 (PR #79572)
llvmbot wrote: @llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-llvm-analysis Author: None (github-actions[bot]) Changes resolves llvm/llvm-project#79571 --- Full diff: https://github.com/llvm/llvm-project/pull/79572.diff 2 Files Affected: - (modified) llvm/lib/Analysis/MemorySSAUpdater.cpp (+3-19) - (modified) llvm/test/Transforms/SimpleLoopUnswitch/memssa-readnone-access.ll (+104) ``diff diff --git a/llvm/lib/Analysis/MemorySSAUpdater.cpp b/llvm/lib/Analysis/MemorySSAUpdater.cpp index e87ae7d71fffe20..aa550f0b6a7bfd6 100644 --- a/llvm/lib/Analysis/MemorySSAUpdater.cpp +++ b/llvm/lib/Analysis/MemorySSAUpdater.cpp @@ -692,25 +692,9 @@ void MemorySSAUpdater::updateForClonedLoop(const LoopBlocksRPO &LoopBlocks, continue; // Determine incoming value and add it as incoming from IncBB. - if (MemoryUseOrDef *IncMUD = dyn_cast(IncomingAccess)) { -if (!MSSA->isLiveOnEntryDef(IncMUD)) { - Instruction *IncI = IncMUD->getMemoryInst(); - assert(IncI && "Found MemoryUseOrDef with no Instruction."); - if (Instruction *NewIncI = - cast_or_null(VMap.lookup(IncI))) { -IncMUD = MSSA->getMemoryAccess(NewIncI); -assert(IncMUD && - "MemoryUseOrDef cannot be null, all preds processed."); - } -} -NewPhi->addIncoming(IncMUD, IncBB); - } else { -MemoryPhi *IncPhi = cast(IncomingAccess); -if (MemoryAccess *NewDefPhi = MPhiMap.lookup(IncPhi)) - NewPhi->addIncoming(NewDefPhi, IncBB); -else - NewPhi->addIncoming(IncPhi, IncBB); - } + NewPhi->addIncoming( + getNewDefiningAccessForClone(IncomingAccess, VMap, MPhiMap, MSSA), + IncBB); } if (auto *SingleAccess = onlySingleValue(NewPhi)) { MPhiMap[Phi] = SingleAccess; diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/memssa-readnone-access.ll b/llvm/test/Transforms/SimpleLoopUnswitch/memssa-readnone-access.ll index 2aaf777683e116f..c6e6608d4be383a 100644 --- a/llvm/test/Transforms/SimpleLoopUnswitch/memssa-readnone-access.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/memssa-readnone-access.ll @@ -115,3 +115,107 @@ split: exit: ret void } + +; Variants of the above test with swapped branch destinations. + +define void @test1_swapped(i1 %c) { +; CHECK-LABEL: define void @test1_swapped( +; CHECK-SAME: i1 [[C:%.*]]) { +; CHECK-NEXT: start: +; CHECK-NEXT:[[C_FR:%.*]] = freeze i1 [[C]] +; CHECK-NEXT:br i1 [[C_FR]], label [[START_SPLIT_US:%.*]], label [[START_SPLIT:%.*]] +; CHECK: start.split.us: +; CHECK-NEXT:br label [[LOOP_US:%.*]] +; CHECK: loop.us: +; CHECK-NEXT:call void @foo() +; CHECK-NEXT:br label [[LOOP_US]] +; CHECK: start.split: +; CHECK-NEXT:br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT:call void @foo() +; CHECK-NEXT:br label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT:ret void +; +start: + br label %loop + +loop: + %fn = load ptr, ptr @vtable, align 8 + call void %fn() + br i1 %c, label %loop, label %exit + +exit: + ret void +} + +define void @test2_swapped(i1 %c, ptr %p) { +; CHECK-LABEL: define void @test2_swapped( +; CHECK-SAME: i1 [[C:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT:[[C_FR:%.*]] = freeze i1 [[C]] +; CHECK-NEXT:br i1 [[C_FR]], label [[DOTSPLIT_US:%.*]], label [[DOTSPLIT:%.*]] +; CHECK: .split.us: +; CHECK-NEXT:br label [[LOOP_US:%.*]] +; CHECK: loop.us: +; CHECK-NEXT:call void @foo() +; CHECK-NEXT:call void @bar() +; CHECK-NEXT:br label [[LOOP_US]] +; CHECK: .split: +; CHECK-NEXT:br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT:call void @foo() +; CHECK-NEXT:call void @bar() +; CHECK-NEXT:br label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT:ret void +; + br label %loop + +loop: + %fn = load ptr, ptr @vtable, align 8 + call void %fn() + call void @bar() + br i1 %c, label %loop, label %exit + +exit: + ret void +} + +define void @test3_swapped(i1 %c, ptr %p) { +; CHECK-LABEL: define void @test3_swapped( +; CHECK-SAME: i1 [[C:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT:[[C_FR:%.*]] = freeze i1 [[C]] +; CHECK-NEXT:br i1 [[C_FR]], label [[DOTSPLIT_US:%.*]], label [[DOTSPLIT:%.*]] +; CHECK: .split.us: +; CHECK-NEXT:br label [[LOOP_US:%.*]] +; CHECK: loop.us: +; CHECK-NEXT:br label [[SPLIT_US:%.*]] +; CHECK: split.us: +; CHECK-NEXT:call void @foo() +; CHECK-NEXT:call void @bar() +; CHECK-NEXT:br label [[LOOP_US]] +; CHECK: .split: +; CHECK-NEXT:br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT:br label [[SPLIT:%.*]] +; CHECK: split: +; CHECK-NEXT:call void @foo() +; CHECK-NEXT:call void @bar() +; CHECK-NEXT:br label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT:ret void +; + br label %loop + +loop: + %fn = load ptr, ptr @vtable, align 8 + br label %split +
[llvm-branch-commits] [mlir] PR for llvm/llvm-project#79600 (PR #79603)
https://github.com/llvmbot updated https://github.com/llvm/llvm-project/pull/79603 >From 7714cd9232837ff150dc977b54a44f7de1a213e1 Mon Sep 17 00:00:00 2001 From: Andrei Golubev Date: Fri, 26 Jan 2024 15:27:51 +0200 Subject: [PATCH] [mlir][LLVM] Use int32_t to indirectly construct GEPArg (#79562) GEPArg can only be constructed from int32_t and mlir::Value. Explicitly cast other types (e.g. unsigned, size_t) to int32_t to avoid narrowing conversion warnings on MSVC. Some recent examples of such are: ``` mlir\lib\Dialect\LLVMIR\Transforms\TypeConsistency.cpp: error C2398: Element '1': conversion from 'size_t' to 'T' requires a narrowing conversion with [ T=mlir::LLVM::GEPArg ] mlir\lib\Dialect\LLVMIR\Transforms\TypeConsistency.cpp: error C2398: Element '1': conversion from 'unsigned int' to 'T' requires a narrowing conversion with [ T=mlir::LLVM::GEPArg ] ``` Co-authored-by: Nikita Kudriavtsev (cherry picked from commit 89cd345667a5f8f4c37c621fd8abe8d84e85c050) --- mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp | 3 ++- mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp | 9 + mlir/lib/Dialect/LLVMIR/Transforms/TypeConsistency.cpp | 9 + 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp index ae2bd8e5b5405d9..73d418cb8413276 100644 --- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp +++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp @@ -529,7 +529,8 @@ LogicalResult GPUPrintfOpToVPrintfLowering::matchAndRewrite( /*alignment=*/0); for (auto [index, arg] : llvm::enumerate(args)) { Value ptr = rewriter.create( -loc, ptrType, structType, tempAlloc, ArrayRef{0, index}); +loc, ptrType, structType, tempAlloc, +ArrayRef{0, static_cast(index)}); rewriter.create(loc, arg, ptr); } std::array printfArgs = {stringStart, tempAlloc}; diff --git a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp index f853d5c47b623cf..78d4e8062468720 100644 --- a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp +++ b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp @@ -1041,13 +1041,14 @@ Value ConvertLaunchFuncOpToGpuRuntimeCallPattern::generateParamsArray( auto arrayPtr = builder.create( loc, llvmPointerType, llvmPointerType, arraySize, /*alignment=*/0); for (const auto &en : llvm::enumerate(arguments)) { +const auto index = static_cast(en.index()); Value fieldPtr = builder.create(loc, llvmPointerType, structType, structPtr, -ArrayRef{0, en.index()}); +ArrayRef{0, index}); builder.create(loc, en.value(), fieldPtr); -auto elementPtr = builder.create( -loc, llvmPointerType, llvmPointerType, arrayPtr, -ArrayRef{en.index()}); +auto elementPtr = +builder.create(loc, llvmPointerType, llvmPointerType, +arrayPtr, ArrayRef{index}); builder.create(loc, fieldPtr, elementPtr); } return arrayPtr; diff --git a/mlir/lib/Dialect/LLVMIR/Transforms/TypeConsistency.cpp b/mlir/lib/Dialect/LLVMIR/Transforms/TypeConsistency.cpp index 72f9295749a66ba..b25c831bc7172a3 100644 --- a/mlir/lib/Dialect/LLVMIR/Transforms/TypeConsistency.cpp +++ b/mlir/lib/Dialect/LLVMIR/Transforms/TypeConsistency.cpp @@ -488,7 +488,8 @@ static void splitVectorStore(const DataLayout &dataLayout, Location loc, // Other patterns will turn this into a type-consistent GEP. auto gepOp = rewriter.create( loc, address.getType(), rewriter.getI8Type(), address, -ArrayRef{storeOffset + index * elementSize}); +ArrayRef{ +static_cast(storeOffset + index * elementSize)}); rewriter.create(loc, extractOp, gepOp); } @@ -524,9 +525,9 @@ static void splitIntegerStore(const DataLayout &dataLayout, Location loc, // We create an `i8` indexed GEP here as that is the easiest (offset is // already known). Other patterns turn this into a type-consistent GEP. -auto gepOp = -rewriter.create(loc, address.getType(), rewriter.getI8Type(), - address, ArrayRef{currentOffset}); +auto gepOp = rewriter.create( +loc, address.getType(), rewriter.getI8Type(), address, +ArrayRef{static_cast(currentOffset)}); rewriter.create(loc, valueToStore, gepOp); // No need to care about padding here since we already checked previously ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] PR for llvm/llvm-project#79600 (PR #79603)
llvmbot wrote: @llvm/pr-subscribers-mlir-llvm Author: None (github-actions[bot]) Changes resolves llvm/llvm-project#79600 --- Full diff: https://github.com/llvm/llvm-project/pull/79603.diff 3 Files Affected: - (modified) mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp (+2-1) - (modified) mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp (+5-4) - (modified) mlir/lib/Dialect/LLVMIR/Transforms/TypeConsistency.cpp (+5-4) ``diff diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp index ae2bd8e5b5405d9..73d418cb8413276 100644 --- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp +++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp @@ -529,7 +529,8 @@ LogicalResult GPUPrintfOpToVPrintfLowering::matchAndRewrite( /*alignment=*/0); for (auto [index, arg] : llvm::enumerate(args)) { Value ptr = rewriter.create( -loc, ptrType, structType, tempAlloc, ArrayRef{0, index}); +loc, ptrType, structType, tempAlloc, +ArrayRef{0, static_cast(index)}); rewriter.create(loc, arg, ptr); } std::array printfArgs = {stringStart, tempAlloc}; diff --git a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp index f853d5c47b623cf..78d4e8062468720 100644 --- a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp +++ b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp @@ -1041,13 +1041,14 @@ Value ConvertLaunchFuncOpToGpuRuntimeCallPattern::generateParamsArray( auto arrayPtr = builder.create( loc, llvmPointerType, llvmPointerType, arraySize, /*alignment=*/0); for (const auto &en : llvm::enumerate(arguments)) { +const auto index = static_cast(en.index()); Value fieldPtr = builder.create(loc, llvmPointerType, structType, structPtr, -ArrayRef{0, en.index()}); +ArrayRef{0, index}); builder.create(loc, en.value(), fieldPtr); -auto elementPtr = builder.create( -loc, llvmPointerType, llvmPointerType, arrayPtr, -ArrayRef{en.index()}); +auto elementPtr = +builder.create(loc, llvmPointerType, llvmPointerType, +arrayPtr, ArrayRef{index}); builder.create(loc, fieldPtr, elementPtr); } return arrayPtr; diff --git a/mlir/lib/Dialect/LLVMIR/Transforms/TypeConsistency.cpp b/mlir/lib/Dialect/LLVMIR/Transforms/TypeConsistency.cpp index 72f9295749a66ba..b25c831bc7172a3 100644 --- a/mlir/lib/Dialect/LLVMIR/Transforms/TypeConsistency.cpp +++ b/mlir/lib/Dialect/LLVMIR/Transforms/TypeConsistency.cpp @@ -488,7 +488,8 @@ static void splitVectorStore(const DataLayout &dataLayout, Location loc, // Other patterns will turn this into a type-consistent GEP. auto gepOp = rewriter.create( loc, address.getType(), rewriter.getI8Type(), address, -ArrayRef{storeOffset + index * elementSize}); +ArrayRef{ +static_cast(storeOffset + index * elementSize)}); rewriter.create(loc, extractOp, gepOp); } @@ -524,9 +525,9 @@ static void splitIntegerStore(const DataLayout &dataLayout, Location loc, // We create an `i8` indexed GEP here as that is the easiest (offset is // already known). Other patterns turn this into a type-consistent GEP. -auto gepOp = -rewriter.create(loc, address.getType(), rewriter.getI8Type(), - address, ArrayRef{currentOffset}); +auto gepOp = rewriter.create( +loc, address.getType(), rewriter.getI8Type(), address, +ArrayRef{static_cast(currentOffset)}); rewriter.create(loc, valueToStore, gepOp); // No need to care about padding here since we already checked previously `` https://github.com/llvm/llvm-project/pull/79603 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] PR for llvm/llvm-project#79600 (PR #79603)
llvmbot wrote: @llvm/pr-subscribers-mlir-gpu Author: None (github-actions[bot]) Changes resolves llvm/llvm-project#79600 --- Full diff: https://github.com/llvm/llvm-project/pull/79603.diff 3 Files Affected: - (modified) mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp (+2-1) - (modified) mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp (+5-4) - (modified) mlir/lib/Dialect/LLVMIR/Transforms/TypeConsistency.cpp (+5-4) ``diff diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp index ae2bd8e5b5405d..73d418cb841327 100644 --- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp +++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp @@ -529,7 +529,8 @@ LogicalResult GPUPrintfOpToVPrintfLowering::matchAndRewrite( /*alignment=*/0); for (auto [index, arg] : llvm::enumerate(args)) { Value ptr = rewriter.create( -loc, ptrType, structType, tempAlloc, ArrayRef{0, index}); +loc, ptrType, structType, tempAlloc, +ArrayRef{0, static_cast(index)}); rewriter.create(loc, arg, ptr); } std::array printfArgs = {stringStart, tempAlloc}; diff --git a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp index f853d5c47b623c..78d4e806246872 100644 --- a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp +++ b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp @@ -1041,13 +1041,14 @@ Value ConvertLaunchFuncOpToGpuRuntimeCallPattern::generateParamsArray( auto arrayPtr = builder.create( loc, llvmPointerType, llvmPointerType, arraySize, /*alignment=*/0); for (const auto &en : llvm::enumerate(arguments)) { +const auto index = static_cast(en.index()); Value fieldPtr = builder.create(loc, llvmPointerType, structType, structPtr, -ArrayRef{0, en.index()}); +ArrayRef{0, index}); builder.create(loc, en.value(), fieldPtr); -auto elementPtr = builder.create( -loc, llvmPointerType, llvmPointerType, arrayPtr, -ArrayRef{en.index()}); +auto elementPtr = +builder.create(loc, llvmPointerType, llvmPointerType, +arrayPtr, ArrayRef{index}); builder.create(loc, fieldPtr, elementPtr); } return arrayPtr; diff --git a/mlir/lib/Dialect/LLVMIR/Transforms/TypeConsistency.cpp b/mlir/lib/Dialect/LLVMIR/Transforms/TypeConsistency.cpp index 72f9295749a66b..b25c831bc7172a 100644 --- a/mlir/lib/Dialect/LLVMIR/Transforms/TypeConsistency.cpp +++ b/mlir/lib/Dialect/LLVMIR/Transforms/TypeConsistency.cpp @@ -488,7 +488,8 @@ static void splitVectorStore(const DataLayout &dataLayout, Location loc, // Other patterns will turn this into a type-consistent GEP. auto gepOp = rewriter.create( loc, address.getType(), rewriter.getI8Type(), address, -ArrayRef{storeOffset + index * elementSize}); +ArrayRef{ +static_cast(storeOffset + index * elementSize)}); rewriter.create(loc, extractOp, gepOp); } @@ -524,9 +525,9 @@ static void splitIntegerStore(const DataLayout &dataLayout, Location loc, // We create an `i8` indexed GEP here as that is the easiest (offset is // already known). Other patterns turn this into a type-consistent GEP. -auto gepOp = -rewriter.create(loc, address.getType(), rewriter.getI8Type(), - address, ArrayRef{currentOffset}); +auto gepOp = rewriter.create( +loc, address.getType(), rewriter.getI8Type(), address, +ArrayRef{static_cast(currentOffset)}); rewriter.create(loc, valueToStore, gepOp); // No need to care about padding here since we already checked previously `` https://github.com/llvm/llvm-project/pull/79603 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits