[llvm-branch-commits] mimplid->mimpid (PR #116745)
https://github.com/wangpc-pp created https://github.com/llvm/llvm-project/pull/116745 None ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port SpillPlacement analysis to NPM (PR #116618)
github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning: You can test this locally with the following command: ``bash git-clang-format --diff 8a5db30a3841b88ccac2c781d933eeb45560fdfa 2dc76a68ef3d64d656b537206ad892dae1759415 --extensions cpp,h -- llvm/include/llvm/InitializePasses.h llvm/lib/CodeGen/RegAllocGreedy.cpp llvm/lib/CodeGen/RegAllocGreedy.h llvm/lib/CodeGen/SpillPlacement.cpp llvm/lib/Passes/PassBuilder.cpp llvm/include/llvm/CodeGen/SpillPlacement.h `` View the diff from clang-format here. ``diff diff --git a/llvm/include/llvm/CodeGen/SpillPlacement.h b/llvm/include/llvm/CodeGen/SpillPlacement.h index c114acb1d0..90167d3362 100644 --- a/llvm/include/llvm/CodeGen/SpillPlacement.h +++ b/llvm/include/llvm/CodeGen/SpillPlacement.h @@ -163,7 +163,7 @@ public: private: // Only for use by legacy pass manager. - SpillPlacement() : nodes(nullptr, &arrayDeleter){}; + SpillPlacement() : nodes(nullptr, &arrayDeleter) {}; void releaseMemory() { nodes.reset(); `` https://github.com/llvm/llvm-project/pull/116618 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port SpillPlacement analysis to NPM (PR #116618)
https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/116618 >From c791eaa8768073b3ef770a59859346a859bd7a7f Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Mon, 18 Nov 2024 12:42:00 + Subject: [PATCH 1/2] [CodeGen][NewPM] Port SpillPlacement analysis to NPM --- llvm/include/llvm/InitializePasses.h | 2 +- llvm/lib/CodeGen/RegAllocGreedy.cpp | 6 +- llvm/lib/CodeGen/SpillPlacement.cpp | 91 ++-- llvm/lib/CodeGen/SpillPlacement.h| 52 +--- 4 files changed, 104 insertions(+), 47 deletions(-) diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index fb8356b9c98cb9..728b178e0cdad7 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -289,7 +289,7 @@ void initializeSinkingLegacyPassPass(PassRegistry &); void initializeSjLjEHPreparePass(PassRegistry &); void initializeSlotIndexesWrapperPassPass(PassRegistry &); void initializeSpeculativeExecutionLegacyPassPass(PassRegistry &); -void initializeSpillPlacementPass(PassRegistry &); +void initializeSpillPlacementWrapperLegacyPass(PassRegistry &); void initializeStackColoringLegacyPass(PassRegistry &); void initializeStackFrameLayoutAnalysisPassPass(PassRegistry &); void initializeStackMapLivenessPass(PassRegistry &); diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index 3542bfe18af46f..3fdf2d6e07a75f 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -162,7 +162,7 @@ INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperLegacy) INITIALIZE_PASS_DEPENDENCY(LiveRegMatrixWrapperLegacy) INITIALIZE_PASS_DEPENDENCY(EdgeBundlesWrapperLegacy) -INITIALIZE_PASS_DEPENDENCY(SpillPlacement) +INITIALIZE_PASS_DEPENDENCY(SpillPlacementWrapperLegacy) INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass) INITIALIZE_PASS_DEPENDENCY(RegAllocEvictionAdvisorAnalysis) INITIALIZE_PASS_DEPENDENCY(RegAllocPriorityAdvisorAnalysis) @@ -217,7 +217,7 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addPreserved(); AU.addRequired(); - AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); @@ -2731,7 +2731,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { ORE = &getAnalysis().getORE(); Loops = &getAnalysis().getLI(); Bundles = &getAnalysis().getEdgeBundles(); - SpillPlacer = &getAnalysis(); + SpillPlacer = &getAnalysis().getResult(); DebugVars = &getAnalysis(); initializeCSRCost(); diff --git a/llvm/lib/CodeGen/SpillPlacement.cpp b/llvm/lib/CodeGen/SpillPlacement.cpp index 318e2b19322bb4..c9baabf6161d3a 100644 --- a/llvm/lib/CodeGen/SpillPlacement.cpp +++ b/llvm/lib/CodeGen/SpillPlacement.cpp @@ -44,17 +44,17 @@ using namespace llvm; #define DEBUG_TYPE "spill-code-placement" -char SpillPlacement::ID = 0; +char SpillPlacementWrapperLegacy::ID = 0; -char &llvm::SpillPlacementID = SpillPlacement::ID; +char &llvm::SpillPlacementID = SpillPlacementWrapperLegacy::ID; -INITIALIZE_PASS_BEGIN(SpillPlacement, DEBUG_TYPE, +INITIALIZE_PASS_BEGIN(SpillPlacementWrapperLegacy, DEBUG_TYPE, "Spill Code Placement Analysis", true, true) INITIALIZE_PASS_DEPENDENCY(EdgeBundlesWrapperLegacy) -INITIALIZE_PASS_END(SpillPlacement, DEBUG_TYPE, +INITIALIZE_PASS_END(SpillPlacementWrapperLegacy, DEBUG_TYPE, "Spill Code Placement Analysis", true, true) -void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const { +void SpillPlacementWrapperLegacy::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired(); AU.addRequiredTransitive(); @@ -189,32 +189,57 @@ struct SpillPlacement::Node { } }; -bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) { +bool SpillPlacementWrapperLegacy::runOnMachineFunction(MachineFunction &MF) { + auto *Bundles = &getAnalysis().getEdgeBundles(); + auto *MBFI = &getAnalysis().getMBFI(); + + Impl.reset(new SpillPlacement(Bundles, MBFI)); + Impl->run(MF); + return false; +} + +AnalysisKey SpillPlacementAnalysis::Key; + +SpillPlacement +SpillPlacementAnalysis::run(MachineFunction &MF, +MachineFunctionAnalysisManager &MFAM) { + auto *Bundles = &MFAM.getResult(MF); + auto *MBFI = &MFAM.getResult(MF); + SpillPlacement Impl(Bundles, MBFI); + Impl.run(MF); + return Impl; +} + +bool SpillPlacementAnalysis::Result::invalidate( +MachineFunction &MF, const PreservedAnalyses &PA, +MachineFunctionAnalysisManager::Invalidator &Inv) { + auto PAC = PA.getChecker(); + return !(PAC.preserved() || + PAC.preservedSet>()) || + Inv.invalidate(MF, PA) || + Inv.invalidate(MF, PA); +} + +void SpillPlacement::arrayDeleter(Node *N) { + if (N) +delete[] N; +} + +void SpillPlacement::run(MachineFunction &mf) { MF = &m
[llvm-branch-commits] [openmp] release/19.x: [OpenMP] Create versioned libgomp softlinks (#112973) (PR #115944)
tru wrote: I think it makes more sense to do this change in 20.x instead of 19.x. https://github.com/llvm/llvm-project/pull/115944 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [RISCV] Support __builtin_cpu_is (PR #116231)
https://github.com/wangpc-pp updated https://github.com/llvm/llvm-project/pull/116231 >From 9686a2c5c5276289e72d9098f497a9f246a1c457 Mon Sep 17 00:00:00 2001 From: Wang Pengcheng Date: Thu, 14 Nov 2024 22:06:45 +0800 Subject: [PATCH 1/4] Remove stale CHECKs Created using spr 1.3.6-beta.1 --- clang/test/CodeGen/builtin-cpu-is.c | 20 1 file changed, 20 deletions(-) diff --git a/clang/test/CodeGen/builtin-cpu-is.c b/clang/test/CodeGen/builtin-cpu-is.c index e4a2071cf46795..b8dd97eeacebcf 100644 --- a/clang/test/CodeGen/builtin-cpu-is.c +++ b/clang/test/CodeGen/builtin-cpu-is.c @@ -7,8 +7,6 @@ // global, the bit grab, and the icmp correct. extern void a(const char *); -// CHECK: @__cpu_model = external dso_local global { i32, i32, i32, [1 x i32] } - // CHECK-X86-LABEL: define dso_local void @intel( // CHECK-X86-SAME: ) #[[ATTR0:[0-9]+]] { // CHECK-X86-NEXT: [[ENTRY:.*:]] @@ -24,9 +22,6 @@ extern void a(const char *); void intel(void) { if (__builtin_cpu_is("intel")) a("intel"); - - // CHECK: [[LOAD:%[^ ]+]] = load i32, ptr @__cpu_model - // CHECK: = icmp eq i32 [[LOAD]], 1 } // CHECK-X86-LABEL: define dso_local void @amd( @@ -44,9 +39,6 @@ void intel(void) { void amd(void) { if (__builtin_cpu_is("amd")) a("amd"); - - // CHECK: [[LOAD:%[^ ]+]] = load i32, ptr @__cpu_model - // CHECK: = icmp eq i32 [[LOAD]], 2 } // CHECK-X86-LABEL: define dso_local void @atom( @@ -64,9 +56,6 @@ void amd(void) { void atom(void) { if (__builtin_cpu_is("atom")) a("atom"); - - // CHECK: [[LOAD:%[^ ]+]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 1) - // CHECK: = icmp eq i32 [[LOAD]], 1 } // CHECK-X86-LABEL: define dso_local void @amdfam10h( @@ -84,9 +73,6 @@ void atom(void) { void amdfam10h(void) { if (__builtin_cpu_is("amdfam10h")) a("amdfam10h"); - - // CHECK: [[LOAD:%[^ ]+]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 1) - // CHECK: = icmp eq i32 [[LOAD]], 4 } // CHECK-X86-LABEL: define dso_local void @barcelona( @@ -104,9 +90,6 @@ void amdfam10h(void) { void barcelona(void) { if (__builtin_cpu_is("barcelona")) a("barcelona"); - - // CHECK: [[LOAD:%[^ ]+]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 2) - // CHECK: = icmp eq i32 [[LOAD]], 4 } // CHECK-X86-LABEL: define dso_local void @nehalem( @@ -124,9 +107,6 @@ void barcelona(void) { void nehalem(void) { if (__builtin_cpu_is("nehalem")) a("nehalem"); - - // CHECK: [[LOAD:%[^ ]+]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 2) - // CHECK: = icmp eq i32 [[LOAD]], 1 } #endif >From 2bb2d5079b5bf98ba9f87e082ca3e67ab70068aa Mon Sep 17 00:00:00 2001 From: Wang Pengcheng Date: Thu, 14 Nov 2024 22:12:36 +0800 Subject: [PATCH 2/4] Simplify test Created using spr 1.3.6-beta.1 --- clang/test/CodeGen/builtin-cpu-is.c | 25 ++--- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/clang/test/CodeGen/builtin-cpu-is.c b/clang/test/CodeGen/builtin-cpu-is.c index b8dd97eeacebcf..8e78213a7cfcfb 100644 --- a/clang/test/CodeGen/builtin-cpu-is.c +++ b/clang/test/CodeGen/builtin-cpu-is.c @@ -111,12 +111,9 @@ void nehalem(void) { #endif #ifdef __riscv -// CHECK-RV64-LABEL: define dso_local signext i32 @test_riscv( -// CHECK-RV64-SAME: i32 noundef signext [[A:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-LABEL: define dso_local signext i32 @test_cpu_is_veyron_v1( +// CHECK-RV64-SAME: ) #[[ATTR0:[0-9]+]] { // CHECK-RV64-NEXT: [[ENTRY:.*:]] -// CHECK-RV64-NEXT:[[RETVAL:%.*]] = alloca i32, align 4 -// CHECK-RV64-NEXT:[[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK-RV64-NEXT:store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK-RV64-NEXT:[[TMP0:%.*]] = load i32, ptr @__riscv_cpu_model, align 4 // CHECK-RV64-NEXT:[[TMP1:%.*]] = icmp eq i32 [[TMP0]], 1567 // CHECK-RV64-NEXT:[[TMP2:%.*]] = load i64, ptr getelementptr inbounds ({ i32, i64, i64 }, ptr @__riscv_cpu_model, i32 0, i32 1), align 8 @@ -125,20 +122,10 @@ void nehalem(void) { // CHECK-RV64-NEXT:[[TMP5:%.*]] = load i64, ptr getelementptr inbounds ({ i32, i64, i64 }, ptr @__riscv_cpu_model, i32 0, i32 2), align 8 // CHECK-RV64-NEXT:[[TMP6:%.*]] = icmp eq i64 [[TMP5]], 273 // CHECK-RV64-NEXT:[[TMP7:%.*]] = and i1 [[TMP4]], [[TMP6]] -// CHECK-RV64-NEXT:br i1 [[TMP7]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] -// CHECK-RV64: [[IF_THEN]]: -// CHECK-RV64-NEXT:store i32 3, ptr [[RETVAL]], align 4 -// CHECK-RV64-NEXT:br label %[[RETURN:.*]] -// CHECK-RV64: [[IF_END]]: -// CHECK-RV64-NEXT:store i32 0, ptr [[RETVAL]], align 4 -// CHECK-RV64-NEXT:br label %[[RETURN]] -// CHECK-RV64: [[RETURN]]: -// CHECK-RV64-NEXT:[[TMP8:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK-RV64-NEXT:ret i32 [[TM
[llvm-branch-commits] [clang] [llvm] [RISCV] Support __builtin_cpu_is (PR #116231)
https://github.com/wangpc-pp updated https://github.com/llvm/llvm-project/pull/116231 >From 9686a2c5c5276289e72d9098f497a9f246a1c457 Mon Sep 17 00:00:00 2001 From: Wang Pengcheng Date: Thu, 14 Nov 2024 22:06:45 +0800 Subject: [PATCH 1/4] Remove stale CHECKs Created using spr 1.3.6-beta.1 --- clang/test/CodeGen/builtin-cpu-is.c | 20 1 file changed, 20 deletions(-) diff --git a/clang/test/CodeGen/builtin-cpu-is.c b/clang/test/CodeGen/builtin-cpu-is.c index e4a2071cf46795..b8dd97eeacebcf 100644 --- a/clang/test/CodeGen/builtin-cpu-is.c +++ b/clang/test/CodeGen/builtin-cpu-is.c @@ -7,8 +7,6 @@ // global, the bit grab, and the icmp correct. extern void a(const char *); -// CHECK: @__cpu_model = external dso_local global { i32, i32, i32, [1 x i32] } - // CHECK-X86-LABEL: define dso_local void @intel( // CHECK-X86-SAME: ) #[[ATTR0:[0-9]+]] { // CHECK-X86-NEXT: [[ENTRY:.*:]] @@ -24,9 +22,6 @@ extern void a(const char *); void intel(void) { if (__builtin_cpu_is("intel")) a("intel"); - - // CHECK: [[LOAD:%[^ ]+]] = load i32, ptr @__cpu_model - // CHECK: = icmp eq i32 [[LOAD]], 1 } // CHECK-X86-LABEL: define dso_local void @amd( @@ -44,9 +39,6 @@ void intel(void) { void amd(void) { if (__builtin_cpu_is("amd")) a("amd"); - - // CHECK: [[LOAD:%[^ ]+]] = load i32, ptr @__cpu_model - // CHECK: = icmp eq i32 [[LOAD]], 2 } // CHECK-X86-LABEL: define dso_local void @atom( @@ -64,9 +56,6 @@ void amd(void) { void atom(void) { if (__builtin_cpu_is("atom")) a("atom"); - - // CHECK: [[LOAD:%[^ ]+]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 1) - // CHECK: = icmp eq i32 [[LOAD]], 1 } // CHECK-X86-LABEL: define dso_local void @amdfam10h( @@ -84,9 +73,6 @@ void atom(void) { void amdfam10h(void) { if (__builtin_cpu_is("amdfam10h")) a("amdfam10h"); - - // CHECK: [[LOAD:%[^ ]+]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 1) - // CHECK: = icmp eq i32 [[LOAD]], 4 } // CHECK-X86-LABEL: define dso_local void @barcelona( @@ -104,9 +90,6 @@ void amdfam10h(void) { void barcelona(void) { if (__builtin_cpu_is("barcelona")) a("barcelona"); - - // CHECK: [[LOAD:%[^ ]+]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 2) - // CHECK: = icmp eq i32 [[LOAD]], 4 } // CHECK-X86-LABEL: define dso_local void @nehalem( @@ -124,9 +107,6 @@ void barcelona(void) { void nehalem(void) { if (__builtin_cpu_is("nehalem")) a("nehalem"); - - // CHECK: [[LOAD:%[^ ]+]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 2) - // CHECK: = icmp eq i32 [[LOAD]], 1 } #endif >From 2bb2d5079b5bf98ba9f87e082ca3e67ab70068aa Mon Sep 17 00:00:00 2001 From: Wang Pengcheng Date: Thu, 14 Nov 2024 22:12:36 +0800 Subject: [PATCH 2/4] Simplify test Created using spr 1.3.6-beta.1 --- clang/test/CodeGen/builtin-cpu-is.c | 25 ++--- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/clang/test/CodeGen/builtin-cpu-is.c b/clang/test/CodeGen/builtin-cpu-is.c index b8dd97eeacebcf..8e78213a7cfcfb 100644 --- a/clang/test/CodeGen/builtin-cpu-is.c +++ b/clang/test/CodeGen/builtin-cpu-is.c @@ -111,12 +111,9 @@ void nehalem(void) { #endif #ifdef __riscv -// CHECK-RV64-LABEL: define dso_local signext i32 @test_riscv( -// CHECK-RV64-SAME: i32 noundef signext [[A:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-LABEL: define dso_local signext i32 @test_cpu_is_veyron_v1( +// CHECK-RV64-SAME: ) #[[ATTR0:[0-9]+]] { // CHECK-RV64-NEXT: [[ENTRY:.*:]] -// CHECK-RV64-NEXT:[[RETVAL:%.*]] = alloca i32, align 4 -// CHECK-RV64-NEXT:[[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK-RV64-NEXT:store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK-RV64-NEXT:[[TMP0:%.*]] = load i32, ptr @__riscv_cpu_model, align 4 // CHECK-RV64-NEXT:[[TMP1:%.*]] = icmp eq i32 [[TMP0]], 1567 // CHECK-RV64-NEXT:[[TMP2:%.*]] = load i64, ptr getelementptr inbounds ({ i32, i64, i64 }, ptr @__riscv_cpu_model, i32 0, i32 1), align 8 @@ -125,20 +122,10 @@ void nehalem(void) { // CHECK-RV64-NEXT:[[TMP5:%.*]] = load i64, ptr getelementptr inbounds ({ i32, i64, i64 }, ptr @__riscv_cpu_model, i32 0, i32 2), align 8 // CHECK-RV64-NEXT:[[TMP6:%.*]] = icmp eq i64 [[TMP5]], 273 // CHECK-RV64-NEXT:[[TMP7:%.*]] = and i1 [[TMP4]], [[TMP6]] -// CHECK-RV64-NEXT:br i1 [[TMP7]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] -// CHECK-RV64: [[IF_THEN]]: -// CHECK-RV64-NEXT:store i32 3, ptr [[RETVAL]], align 4 -// CHECK-RV64-NEXT:br label %[[RETURN:.*]] -// CHECK-RV64: [[IF_END]]: -// CHECK-RV64-NEXT:store i32 0, ptr [[RETVAL]], align 4 -// CHECK-RV64-NEXT:br label %[[RETURN]] -// CHECK-RV64: [[RETURN]]: -// CHECK-RV64-NEXT:[[TMP8:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK-RV64-NEXT:ret i32 [[TM
[llvm-branch-commits] mimplid->mimpid (PR #116745)
https://github.com/wangpc-pp closed https://github.com/llvm/llvm-project/pull/116745 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] mimplid->mimpid (PR #116745)
llvmbot wrote: @llvm/pr-subscribers-clang-codegen Author: Pengcheng Wang (wangpc-pp) Changes --- Full diff: https://github.com/llvm/llvm-project/pull/116745.diff 1 Files Affected: - (modified) clang/lib/CodeGen/CGBuiltin.cpp (+3-3) ``diff diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 24f6209af7afe4..84626f023ec3c1 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -22539,10 +22539,10 @@ Value *CodeGenFunction::EmitRISCVCpuIs(StringRef CPUStr) { Result = Builder.CreateAnd( Result, Builder.CreateICmpEQ(ArchID, Builder.getInt64(CPUModel.MArchID))); - // Compare mimplid. - Value *ImplID = loadRISCVCPUID(2); + // Compare mimpid. + Value *ImpID = loadRISCVCPUID(2); Result = Builder.CreateAnd( - Result, Builder.CreateICmpEQ(ImplID, Builder.getInt64(CPUModel.MImpID))); + Result, Builder.CreateICmpEQ(ImpID, Builder.getInt64(CPUModel.MImpID))); return Result; } `` https://github.com/llvm/llvm-project/pull/116745 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] mimplid->mimpid (PR #116745)
llvmbot wrote: @llvm/pr-subscribers-clang Author: Pengcheng Wang (wangpc-pp) Changes --- Full diff: https://github.com/llvm/llvm-project/pull/116745.diff 1 Files Affected: - (modified) clang/lib/CodeGen/CGBuiltin.cpp (+3-3) ``diff diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 24f6209af7afe4..84626f023ec3c1 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -22539,10 +22539,10 @@ Value *CodeGenFunction::EmitRISCVCpuIs(StringRef CPUStr) { Result = Builder.CreateAnd( Result, Builder.CreateICmpEQ(ArchID, Builder.getInt64(CPUModel.MArchID))); - // Compare mimplid. - Value *ImplID = loadRISCVCPUID(2); + // Compare mimpid. + Value *ImpID = loadRISCVCPUID(2); Result = Builder.CreateAnd( - Result, Builder.CreateICmpEQ(ImplID, Builder.getInt64(CPUModel.MImpID))); + Result, Builder.CreateICmpEQ(ImpID, Builder.getInt64(CPUModel.MImpID))); return Result; } `` https://github.com/llvm/llvm-project/pull/116745 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [RISCV] Support __builtin_cpu_is (PR #116231)
@@ -22505,6 +22506,47 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID, return nullptr; } +Value *CodeGenFunction::EmitRISCVCpuIs(const CallExpr *E) { + const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts(); + StringRef CPUStr = cast(CPUExpr)->getString(); + return EmitRISCVCpuIs(CPUStr); +} + +Value *CodeGenFunction::EmitRISCVCpuIs(StringRef CPUStr) { + llvm::Type *Int32Ty = Builder.getInt32Ty(); + llvm::Type *Int64Ty = Builder.getInt64Ty(); + llvm::StructType *StructTy = llvm::StructType::get(Int32Ty, Int64Ty, Int64Ty); + llvm::Constant *RISCVCPUModel = + CGM.CreateRuntimeVariable(StructTy, "__riscv_cpu_model"); + cast(RISCVCPUModel)->setDSOLocal(true); + + auto loadRISCVCPUID = [&](unsigned Index) { +Value *Ptr = Builder.CreateStructGEP(StructTy, RISCVCPUModel, Index); +Value *CPUID = Builder.CreateAlignedLoad(StructTy->getTypeAtIndex(Index), wangpc-pp wrote: Tried it, but `CreateLoad` in `CGBuilder` needs `Address` which also needs alignment. https://github.com/llvm/llvm-project/pull/116231 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Handle gfx950 global_load_lds_* instructions (PR #116680)
arsenm wrote: ### Merge activity * **Nov 19, 12:46 AM EST**: A user started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/116680). https://github.com/llvm/llvm-project/pull/116680 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] AMDGPU: Define v_mfma_f32_32x32x16_bf16 for gfx950 (PR #116679)
arsenm wrote: ### Merge activity * **Nov 19, 12:46 AM EST**: A user started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/116679). https://github.com/llvm/llvm-project/pull/116679 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port SpillPlacement analysis to NPM (PR #116618)
@@ -38,13 +39,21 @@ class BitVector; class EdgeBundles; class MachineBlockFrequencyInfo; class MachineFunction; +class SpillPlacementWrapperLegacy; +class SpillPlacementAnalysis; + +class SpillPlacement { + friend class SpillPlacementWrapperLegacy; + friend class SpillPlacementAnalysis; -class SpillPlacement : public MachineFunctionPass { struct Node; + const MachineFunction *MF = nullptr; const EdgeBundles *bundles = nullptr; const MachineBlockFrequencyInfo *MBFI = nullptr; - Node *nodes = nullptr; + + static void arrayDeleter(Node *N); + std::unique_ptr nodes; paperchalice wrote: An outlined default destructor would work. 🤔 https://github.com/llvm/llvm-project/pull/116618 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Handle gfx950 global_load_lds_* instructions (PR #116680)
@@ -1980,6 +1985,10 @@ defm GLOBAL_LOAD_LDS_USHORT : FLAT_Real_AllAddr_LDS <0x028, 0x12>; defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_LDS <0x029, 0x13>; defm GLOBAL_LOAD_LDS_DWORD : FLAT_Real_AllAddr_LDS <0x02a, 0x14>; +defm GLOBAL_LOAD_LDS_DWORDX3 : FLAT_Real_AllAddr_LDS <0x07e, 0x07e>; +defm GLOBAL_LOAD_LDS_DWORDX4 : FLAT_Real_AllAddr_LDS <0x07d, 0x07d>; + arsenm wrote: whatever was in the merge https://github.com/llvm/llvm-project/pull/116680 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Handle gfx950 96/128-bit buffer_load_lds (PR #116681)
arsenm wrote: ### Merge activity * **Nov 19, 12:46 AM EST**: A user started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/116681). https://github.com/llvm/llvm-project/pull/116681 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port SpillPlacement analysis to NPM (PR #116618)
@@ -38,13 +39,21 @@ class BitVector; class EdgeBundles; class MachineBlockFrequencyInfo; class MachineFunction; +class SpillPlacementWrapperLegacy; +class SpillPlacementAnalysis; + +class SpillPlacement { + friend class SpillPlacementWrapperLegacy; + friend class SpillPlacementAnalysis; -class SpillPlacement : public MachineFunctionPass { struct Node; + const MachineFunction *MF = nullptr; const EdgeBundles *bundles = nullptr; const MachineBlockFrequencyInfo *MBFI = nullptr; - Node *nodes = nullptr; + + static void arrayDeleter(Node *N); + std::unique_ptr nodes; optimisan wrote: The definition of `Node` is not available here, so the default deleter fails to compile sizeof(Node) for this incomplete type. To hack around it I put the definition of `arrayDeleter` in the implementation where struct Node is defined. But changing to `unique_ptr` facilitates removal of `.get()` calls https://github.com/llvm/llvm-project/pull/116618 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][OpenMP] Apply modifier representation to semantic checks (PR #116658)
https://github.com/kparzysz updated https://github.com/llvm/llvm-project/pull/116658 >From fac6a8594643811418f37ee42fc1ac35bcc2a244 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Thu, 14 Nov 2024 07:29:59 -0600 Subject: [PATCH 1/2] [flang][OpenMP] Apply modifier representation to semantic checks Also, define helper macros in parse-tree.h. Apply the new modifier representation to the DEFAULTMAP and REDUCTION clauses, with testcases utilizing the new modifier validation. OpenMP modifier overhaul: #3/3 --- flang/include/flang/Parser/dump-parse-tree.h | 8 +- flang/include/flang/Parser/parse-tree.h | 49 +-- .../flang/Semantics/openmp-modifiers.h| 4 + flang/lib/Lower/OpenMP/Clauses.cpp| 33 flang/lib/Parser/openmp-parsers.cpp | 40 + flang/lib/Parser/unparse.cpp | 15 ++-- flang/lib/Semantics/check-omp-structure.cpp | 83 +++ flang/lib/Semantics/check-omp-structure.h | 3 +- flang/lib/Semantics/openmp-modifiers.cpp | 33 flang/lib/Semantics/resolve-directives.cpp| 52 +++- .../test/Parser/OpenMP/defaultmap-clause.f90 | 8 +- .../test/Parser/OpenMP/defaultmap-unparse.f90 | 16 ++-- .../test/Parser/OpenMP/reduction-modifier.f90 | 6 +- .../Semantics/OpenMP/combined-constructs.f90 | 12 +-- .../OpenMP/defaultmap-clause-v45.f90 | 2 +- 15 files changed, 236 insertions(+), 128 deletions(-) diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h index df5bf1d8d3200e..9c59ce520a31aa 100644 --- a/flang/include/flang/Parser/dump-parse-tree.h +++ b/flang/include/flang/Parser/dump-parse-tree.h @@ -509,9 +509,11 @@ class ParseTreeDumper { NODE(parser, OmpDeclareMapperSpecifier) NODE(parser, OmpDefaultClause) NODE_ENUM(OmpDefaultClause, Type) + NODE(parser, OmpVariableCategory) + NODE_ENUM(OmpVariableCategory, Value) NODE(parser, OmpDefaultmapClause) NODE_ENUM(OmpDefaultmapClause, ImplicitBehavior) - NODE_ENUM(OmpDefaultmapClause, VariableCategory) + NODE(OmpDefaultmapClause, Modifier) NODE(parser, OmpDependenceType) NODE_ENUM(OmpDependenceType, Value) NODE(parser, OmpTaskDependenceType) @@ -567,8 +569,10 @@ class ParseTreeDumper { NODE_ENUM(OmpBindClause, Type) NODE(parser, OmpProcBindClause) NODE_ENUM(OmpProcBindClause, Type) - NODE_ENUM(OmpReductionClause, ReductionModifier) + NODE(parser, OmpReductionModifier) + NODE_ENUM(OmpReductionModifier, Value) NODE(parser, OmpReductionClause) + NODE(OmpReductionClause, Modifier) NODE(parser, OmpInReductionClause) NODE(parser, OmpReductionCombiner) NODE(OmpReductionCombiner, FunctionCombiner) diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index ef49a36578270e..5b28bcd4e21b80 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -3440,6 +3440,16 @@ struct OmpObject { WRAPPER_CLASS(OmpObjectList, std::list); +#define MODIFIER_BOILERPLATE(...) \ + struct Modifier { \ +using Variant = std::variant<__VA_ARGS__>; \ +UNION_CLASS_BOILERPLATE(Modifier); \ +CharBlock source; \ +Variant u; \ + } + +#define MODIFIERS() std::optional> + inline namespace modifier { // For uniformity, in all keyword modifiers the name of the type defined // by ENUM_CLASS is "Value", e.g. @@ -3505,12 +3515,20 @@ struct OmpLinearModifier { // - |// since 4.5, until 5.2 // + | * | .AND. | .OR. | .EQV. | .NEQV. |// since 4.5 // MIN | MAX | IAND | IOR | IEOR // since 4.5 -// struct OmpReductionIdentifier { UNION_CLASS_BOILERPLATE(OmpReductionIdentifier); std::variant u; }; +// Ref: [5.0:300-302], [5.1:332-334], [5.2:134-137] +// +// reduction-modifier -> +// DEFAULT | INSCAN | TASK// since 5.0 +struct OmpReductionModifier { + ENUM_CLASS(Value, Default, Inscan, Task); + WRAPPER_CLASS_BOILERPLATE(OmpReductionModifier, Value); +}; + // Ref: [4.5:169-170], [5.0:254-256], [5.1:287-289], [5.2:321] // // task-dependence-type -> // "dependence-type" in 5.1 and before @@ -3521,6 +3539,17 @@ struct OmpTaskDependenceType { ENUM_CLASS(Value, In, Out, Inout, Inoutset, Mutexinoutset, Depobj) WRAPPER_CLASS_BOILERPLATE(OmpTaskDependenceType, Value); }; + +// Ref: [4.5:229-230], [5.0:324-325], [5.1:357-358], [5.2:161-162] +// +// variable-category -> +// SCALAR | // since 4.5 +// AGGREGATE | ALLOCATABLE | POINTER |// since 5.0 +// ALL// since 5.2 +struct OmpVariableCategory { + ENUM_CLASS(Value, Aggregate, All, Allocatable, Pointer, Scalar) + WRAPPER_CLASS_BOILERPLATE(OmpVariableCategory, Value); +}; } // namespace modifier // --- Clauses @@ -3578,8 +3607,8 @@ struct OmpDefaultmapClause { TUPLE_CLASS_BOILERPLATE(OmpDef
[llvm-branch-commits] [clang] [llvm] AMDGPU: Add first gfx950 mfma instructions (PR #116312)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/116312 >From 6c8fd97756f9b08e3562a8702b2aae186ef72075 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 21 Nov 2023 10:03:19 +0900 Subject: [PATCH] AMDGPU: Add first gfx950 mfma instructions Scheduling info and hazards are wrong and TBD. --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 6 + .../CodeGenOpenCL/builtins-amdgcn-mfma.cl | 25 +- .../builtins-amdgcn-error-gfx950-param.cl | 21 ++ .../builtins-amdgcn-error-gfx950.cl | 12 + llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 9 + llvm/lib/Target/AMDGPU/AMDGPU.td | 4 + .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 4 +- .../Target/AMDGPU/AMDGPUSearchableTables.td | 2 + llvm/lib/Target/AMDGPU/GCNSubtarget.h | 4 + llvm/lib/Target/AMDGPU/SIInstrInfo.td | 4 + llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 22 ++ .../UniformityAnalysis/AMDGPU/intrinsics.ll | 17 ++ .../CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.ll | 274 ++ llvm/test/MC/AMDGPU/mai-gfx950.s | 112 +++ .../MC/Disassembler/AMDGPU/gfx950_mai.txt | 61 llvm/test/tools/llvm-mca/AMDGPU/gfx950.s | 18 ++ 16 files changed, 592 insertions(+), 3 deletions(-) create mode 100644 clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl create mode 100644 clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.ll create mode 100644 llvm/test/MC/AMDGPU/mai-gfx950.s create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx950_mai.txt create mode 100644 llvm/test/tools/llvm-mca/AMDGPU/gfx950.s diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 61516eb2a4a723..6917d8d1aca69d 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -431,6 +431,12 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb", "nc", "fp8-conversion- TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-conversion-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-conversion-insts") +//===--===// +// GFX950 only builtins. +//===--===// +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x32_f16, "V4fV8hV8hV4fIiIiIi", "nc", "gfx950-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x16_f16, "V16fV8hV8hV16fIiIiIi", "nc", "gfx950-insts") + //===--===// // GFX12+ only builtins. //===--===// diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl index dcdeee6b6acc40..a644a60f9ec381 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl @@ -2,6 +2,7 @@ // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx908 -DMFMA_GFX908_TESTS -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX908 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a -DMFMA_GFX90A_TESTS -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX90A // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx940 -DMFMA_GFX940_TESTS -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX940 +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx950 -DMFMA_GFX950_TESTS -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX950 #pragma OPENCL EXTENSION cl_khr_fp64:enable @@ -222,7 +223,7 @@ void test_mfma_f64_4x4x4f64(global double* out, double a, double b, double c) #endif // MFMA_GFX90A_TESTS -#ifdef MFMA_GFX940_TESTS +#if defined(MFMA_GFX940_TESTS) || defined(MFMA_GFX950_TESTS) // CHECK-GFX940-LABEL: @test_mfma_i32_16x16x32_i8 // CHECK-GFX940: call <4 x i32> @llvm.amdgcn.mfma.i32.16x16x32.i8(i64 %a, i64 %b, <4 x i32> %c, i32 0, i32 0, i32 0) void test_mfma_i32_16x16x32_i8(global v4i* out, long a, long b, v4i c) @@ -404,4 +405,24 @@ void test_smfmac_f32_32x32x32_fp8_fp8(global v16f* out, v2i a, v4i b, v16f c, in { *out = __builtin_amdgcn_smfmac_f32_32x32x32_fp8_fp8(a, b, c, idx, 0, 0); } -#endif // MFMA_GFX940_TESTS +#endif // defined(MFMA_GFX940_TESTS) || defined(MFMA_GFX950_TESTS) + +#ifdef MFMA_GFX950_TESTS + +// CHECK-GFX950-LABEL: @test_mfma_f32_16x16x32_f16( +// CHECK-GFX950: tail call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.f16(<8 x half> %a, <8 x half> %b, <4 x float> %c, i32 1, i32 2, i32 3) + +v4f test_mfma_f32_16x16x32_f16(v8h a, v8h b, v4f c) +{ + return __builtin_amdgcn_mfma_f32_16x16x32_f16(a, b, c, 1, 2, 3); +} + +// CHECK-GFX950-LABEL: @test_mfma_f32_32x32x16_f16 +// CHECK-GFX950: tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32
[llvm-branch-commits] [llvm] AMDGPU: Add V_CVT_PK_BF16_F32 for gfx950 (PR #116678)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/116678 >From 1adfc6bf758377390753d35df51fb7a294202238 Mon Sep 17 00:00:00 2001 From: Pravin Jagtap Date: Mon, 5 Feb 2024 04:29:01 -0500 Subject: [PATCH] AMDGPU: Add V_CVT_PK_BF16_F32 for gfx950 --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 6 + llvm/lib/Target/AMDGPU/SIInstrInfo.td | 1 + llvm/lib/Target/AMDGPU/VOP3Instructions.td| 25 ++ llvm/test/CodeGen/AMDGPU/bf16-conversions.ll | 395 -- llvm/test/MC/AMDGPU/gfx950_asm_vop3.s | 26 ++ .../Disassembler/AMDGPU/gfx950_dasm_vop3.txt | 19 + 6 files changed, 255 insertions(+), 217 deletions(-) create mode 100644 llvm/test/MC/AMDGPU/gfx950_asm_vop3.s create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 1e261f4256c93b..ad89812558d25c 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -889,6 +889,12 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::MUL, MVT::i1, Promote); + if (Subtarget->hasBF16ConversionInsts()) { +setOperationAction(ISD::FP_ROUND, MVT::v2bf16, Legal); +setOperationAction(ISD::FP_ROUND, MVT::bf16, Legal); +setOperationAction(ISD::BUILD_VECTOR, MVT::v2bf16, Legal); + } + setTargetDAGCombine({ISD::ADD, ISD::UADDO_CARRY, ISD::SUB, diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 882e147dc231fa..7df9be5c6f7a0b 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2787,6 +2787,7 @@ def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>; def VOP_I32_I32_I32_ARITH : VOPProfile <[i32, i32, i32, untyped], /*EnableClamp=*/1>; def VOP_V2F16_F32_F32 : VOPProfile <[v2f16, f32, f32, untyped]>; def VOP_F32_F16_F16_F16 : VOPProfile <[f32, f16, f16, f16]>; +def VOP_V2BF16_F32_F32 : VOPProfile <[v2bf16, f32, f32, untyped]>; def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>; def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>; diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 551e8b3a679202..917e1b3974b46a 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -944,6 +944,30 @@ let SubtargetPredicate = isGFX11Plus in { defm V_CVT_PK_U16_F32 : VOP3Inst<"v_cvt_pk_u16_f32", VOP3_Profile>; } // End SubtargetPredicate = isGFX11Plus +// FIXME: GlobalISel cannot distinguish f16 and bf16 and may start using bf16 patterns +//instead of less complex f16. Disable GlobalISel for these for now. +def bf16_fpround : PatFrag <(ops node:$src0), (fpround $src0), [{ return true; }]> { + let GISelPredicateCode = [{return false;}]; +} + +let SubtargetPredicate = HasBF16ConversionInsts in { + let ReadsModeReg = 0 in { +defm V_CVT_PK_BF16_F32: VOP3Inst<"v_cvt_pk_bf16_f32", VOP3_Profile>; + } + def : GCNPat<(v2bf16 (bf16_fpround v2f32:$src)), + (V_CVT_PK_BF16_F32_e64 0, (EXTRACT_SUBREG VReg_64:$src, sub0), 0, (EXTRACT_SUBREG VReg_64:$src, sub1))>; + def : GCNPat<(v2bf16 (bf16_fpround v2f64:$src)), + (V_CVT_PK_BF16_F32_e64 0, (V_CVT_F32_F64_e64 0, (EXTRACT_SUBREG VReg_128:$src, sub0_sub1)), + 0, (V_CVT_F32_F64_e64 0, (EXTRACT_SUBREG VReg_128:$src, sub2_sub3)))>; + def : GCNPat<(v2bf16 (build_vector (bf16 (bf16_fpround (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers, + (bf16 (bf16_fpround (f32 (VOP3Mods f32:$src1, i32:$src1_modifiers)), + (V_CVT_PK_BF16_F32_e64 $src0_modifiers, $src0, $src1_modifiers, $src1)>; + def : GCNPat<(bf16 (bf16_fpround (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers, + (V_CVT_PK_BF16_F32_e64 $src0_modifiers, $src0, 0, (f32 (IMPLICIT_DEF)))>; + def : GCNPat<(bf16 (bf16_fpround (f64 (VOP3Mods f64:$src0, i32:$src0_modifiers, + (V_CVT_PK_BF16_F32_e64 0, (f32 (V_CVT_F32_F64_e64 $src0_modifiers, $src0)), 0, (f32 (IMPLICIT_DEF)))>; +} + let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in { defm V_MAXIMUMMINIMUM_F32 : VOP3Inst<"v_maximumminimum_f32", VOP3_Profile>; defm V_MINIMUMMAXIMUM_F32 : VOP3Inst<"v_minimummaximum_f32", VOP3_Profile>; @@ -1721,5 +1745,6 @@ defm V_LSHL_ADD_U64 : VOP3_Real_vi <0x208>; defm V_CVT_PK_FP8_F32 : VOP3OpSel_Real_gfx9 <0x2a2>; defm V_CVT_PK_BF8_F32 : VOP3OpSel_Real_gfx9 <0x2a3>; +defm V_CVT_PK_BF16_F32: VOP3OpSel_Real_gfx9 <0x268>; defm V_CVT_SR_FP8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a4>; defm V_CVT_SR_BF8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a5>; diff --git a/llvm/test/CodeGen/AMDGPU/bf16-conversions.ll b/llvm/test/CodeGen/AMDGPU/bf16-conversions.ll inde
[llvm-branch-commits] [flang] [flang][OpenMP] Apply modifier representation to semantic checks (PR #116658)
https://github.com/kparzysz updated https://github.com/llvm/llvm-project/pull/116658 >From fac6a8594643811418f37ee42fc1ac35bcc2a244 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Thu, 14 Nov 2024 07:29:59 -0600 Subject: [PATCH 1/3] [flang][OpenMP] Apply modifier representation to semantic checks Also, define helper macros in parse-tree.h. Apply the new modifier representation to the DEFAULTMAP and REDUCTION clauses, with testcases utilizing the new modifier validation. OpenMP modifier overhaul: #3/3 --- flang/include/flang/Parser/dump-parse-tree.h | 8 +- flang/include/flang/Parser/parse-tree.h | 49 +-- .../flang/Semantics/openmp-modifiers.h| 4 + flang/lib/Lower/OpenMP/Clauses.cpp| 33 flang/lib/Parser/openmp-parsers.cpp | 40 + flang/lib/Parser/unparse.cpp | 15 ++-- flang/lib/Semantics/check-omp-structure.cpp | 83 +++ flang/lib/Semantics/check-omp-structure.h | 3 +- flang/lib/Semantics/openmp-modifiers.cpp | 33 flang/lib/Semantics/resolve-directives.cpp| 52 +++- .../test/Parser/OpenMP/defaultmap-clause.f90 | 8 +- .../test/Parser/OpenMP/defaultmap-unparse.f90 | 16 ++-- .../test/Parser/OpenMP/reduction-modifier.f90 | 6 +- .../Semantics/OpenMP/combined-constructs.f90 | 12 +-- .../OpenMP/defaultmap-clause-v45.f90 | 2 +- 15 files changed, 236 insertions(+), 128 deletions(-) diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h index df5bf1d8d3200e..9c59ce520a31aa 100644 --- a/flang/include/flang/Parser/dump-parse-tree.h +++ b/flang/include/flang/Parser/dump-parse-tree.h @@ -509,9 +509,11 @@ class ParseTreeDumper { NODE(parser, OmpDeclareMapperSpecifier) NODE(parser, OmpDefaultClause) NODE_ENUM(OmpDefaultClause, Type) + NODE(parser, OmpVariableCategory) + NODE_ENUM(OmpVariableCategory, Value) NODE(parser, OmpDefaultmapClause) NODE_ENUM(OmpDefaultmapClause, ImplicitBehavior) - NODE_ENUM(OmpDefaultmapClause, VariableCategory) + NODE(OmpDefaultmapClause, Modifier) NODE(parser, OmpDependenceType) NODE_ENUM(OmpDependenceType, Value) NODE(parser, OmpTaskDependenceType) @@ -567,8 +569,10 @@ class ParseTreeDumper { NODE_ENUM(OmpBindClause, Type) NODE(parser, OmpProcBindClause) NODE_ENUM(OmpProcBindClause, Type) - NODE_ENUM(OmpReductionClause, ReductionModifier) + NODE(parser, OmpReductionModifier) + NODE_ENUM(OmpReductionModifier, Value) NODE(parser, OmpReductionClause) + NODE(OmpReductionClause, Modifier) NODE(parser, OmpInReductionClause) NODE(parser, OmpReductionCombiner) NODE(OmpReductionCombiner, FunctionCombiner) diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index ef49a36578270e..5b28bcd4e21b80 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -3440,6 +3440,16 @@ struct OmpObject { WRAPPER_CLASS(OmpObjectList, std::list); +#define MODIFIER_BOILERPLATE(...) \ + struct Modifier { \ +using Variant = std::variant<__VA_ARGS__>; \ +UNION_CLASS_BOILERPLATE(Modifier); \ +CharBlock source; \ +Variant u; \ + } + +#define MODIFIERS() std::optional> + inline namespace modifier { // For uniformity, in all keyword modifiers the name of the type defined // by ENUM_CLASS is "Value", e.g. @@ -3505,12 +3515,20 @@ struct OmpLinearModifier { // - |// since 4.5, until 5.2 // + | * | .AND. | .OR. | .EQV. | .NEQV. |// since 4.5 // MIN | MAX | IAND | IOR | IEOR // since 4.5 -// struct OmpReductionIdentifier { UNION_CLASS_BOILERPLATE(OmpReductionIdentifier); std::variant u; }; +// Ref: [5.0:300-302], [5.1:332-334], [5.2:134-137] +// +// reduction-modifier -> +// DEFAULT | INSCAN | TASK// since 5.0 +struct OmpReductionModifier { + ENUM_CLASS(Value, Default, Inscan, Task); + WRAPPER_CLASS_BOILERPLATE(OmpReductionModifier, Value); +}; + // Ref: [4.5:169-170], [5.0:254-256], [5.1:287-289], [5.2:321] // // task-dependence-type -> // "dependence-type" in 5.1 and before @@ -3521,6 +3539,17 @@ struct OmpTaskDependenceType { ENUM_CLASS(Value, In, Out, Inout, Inoutset, Mutexinoutset, Depobj) WRAPPER_CLASS_BOILERPLATE(OmpTaskDependenceType, Value); }; + +// Ref: [4.5:229-230], [5.0:324-325], [5.1:357-358], [5.2:161-162] +// +// variable-category -> +// SCALAR | // since 4.5 +// AGGREGATE | ALLOCATABLE | POINTER |// since 5.0 +// ALL// since 5.2 +struct OmpVariableCategory { + ENUM_CLASS(Value, Aggregate, All, Allocatable, Pointer, Scalar) + WRAPPER_CLASS_BOILERPLATE(OmpVariableCategory, Value); +}; } // namespace modifier // --- Clauses @@ -3578,8 +3607,8 @@ struct OmpDefaultmapClause { TUPLE_CLASS_BOILERPLATE(OmpDef
[llvm-branch-commits] [llvm] [NFC] Use unique_ptr in SparseSet (PR #116617)
https://github.com/optimisan created https://github.com/llvm/llvm-project/pull/116617 This allows implementing the move constructor. >From 8a5db30a3841b88ccac2c781d933eeb45560fdfa Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Mon, 18 Nov 2024 10:15:19 + Subject: [PATCH] [NFC] Use unique_ptr in SparseSet This allows implementing the move constructor. --- llvm/include/llvm/ADT/SparseSet.h | 18 +++--- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/llvm/include/llvm/ADT/SparseSet.h b/llvm/include/llvm/ADT/SparseSet.h index c7793117ff5408..1adae0d4595ac4 100644 --- a/llvm/include/llvm/ADT/SparseSet.h +++ b/llvm/include/llvm/ADT/SparseSet.h @@ -129,7 +129,12 @@ class SparseSet { using DenseT = SmallVector; using size_type = unsigned; DenseT Dense; - SparseT *Sparse = nullptr; + + struct Deleter { +void operator()(SparseT *S) { free(S); } + }; + std::unique_ptr Sparse; + unsigned Universe = 0; KeyFunctorT KeyIndexOf; SparseSetValFunctor ValIndexOf; @@ -144,7 +149,7 @@ class SparseSet { SparseSet() = default; SparseSet(const SparseSet &) = delete; SparseSet &operator=(const SparseSet &) = delete; - ~SparseSet() { free(Sparse); } + SparseSet(SparseSet &&) = default; /// setUniverse - Set the universe size which determines the largest key the /// set can hold. The universe must be sized before any elements can be @@ -159,11 +164,10 @@ class SparseSet { // Hysteresis prevents needless reallocations. if (U >= Universe/4 && U <= Universe) return; -free(Sparse); // The Sparse array doesn't actually need to be initialized, so malloc // would be enough here, but that will cause tools like valgrind to // complain about branching on uninitialized data. -Sparse = static_cast(safe_calloc(U, sizeof(SparseT))); +Sparse.reset(static_cast(safe_calloc(U, sizeof(SparseT; Universe = U; } @@ -205,7 +209,7 @@ class SparseSet { assert(Idx < Universe && "Key out of range"); assert(Sparse != nullptr && "Invalid sparse type"); const unsigned Stride = std::numeric_limits::max() + 1u; -for (unsigned i = Sparse[Idx], e = size(); i < e; i += Stride) { +for (unsigned i = Sparse.get()[Idx], e = size(); i < e; i += Stride) { const unsigned FoundIdx = ValIndexOf(Dense[i]); assert(FoundIdx < Universe && "Invalid key in set. Did object mutate?"); if (Idx == FoundIdx) @@ -255,7 +259,7 @@ class SparseSet { iterator I = findIndex(Idx); if (I != end()) return std::make_pair(I, false); -Sparse[Idx] = size(); +Sparse.get()[Idx] = size(); Dense.push_back(Val); return std::make_pair(end() - 1, true); } @@ -292,7 +296,7 @@ class SparseSet { *I = Dense.back(); unsigned BackIdx = ValIndexOf(Dense.back()); assert(BackIdx < Universe && "Invalid key in set. Did object mutate?"); - Sparse[BackIdx] = I - begin(); + Sparse.get()[BackIdx] = I - begin(); } // This depends on SmallVector::pop_back() not invalidating iterators. // std::vector::pop_back() doesn't give that guarantee. ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][LLVM] `LLVMTypeConverter`: Tighten materialization checks (PR #116532)
https://github.com/zero9178 approved this pull request. LGTM, thank you :)) https://github.com/llvm/llvm-project/pull/116532 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [MSVC] work-around for compile time issue 102513 (PR #111314)
nikic wrote: > @tru should this have been merged? Do I need to do something to facilitate? > Sorry for not following up earlier I have been sick recently. Thanks. Backport PRs need to be part of the release milestone, otherwise they're likely to get forgotten about :) I added it just now. https://github.com/llvm/llvm-project/pull/111314 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Handle gfx950 global_load_lds_* instructions (PR #116680)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/116680 >From 6711ea8a2ae2f0e50488cab587937fa6a3e00ea7 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 18 Jan 2024 14:44:03 +0700 Subject: [PATCH] AMDGPU: Handle gfx950 global_load_lds_* instructions Define global_load_lds_dwordx3 and global_load_dwordx4. Oddly it seems dwordx2 was skipped. --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 2 +- .../AMDGPU/AMDGPUInstructionSelector.cpp | 10 ++ llvm/lib/Target/AMDGPU/FLATInstructions.td| 9 ++ llvm/lib/Target/AMDGPU/GCNSubtarget.h | 7 + llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 10 ++ .../llvm.amdgcn.global.load.lds.gfx950.ll | 137 ++ llvm/test/MC/AMDGPU/gfx950_asm_features.s | 37 + llvm/test/MC/Disassembler/AMDGPU/gfx950.txt | 25 8 files changed, 236 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.lds.gfx950.ll create mode 100644 llvm/test/MC/AMDGPU/gfx950_asm_features.s create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx950.txt diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 15f33cdbf92e6e..f43ab50d2ea441 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2452,7 +2452,7 @@ class AMDGPUGlobalLoadLDS : [], [LLVMQualPointerType<1>,// Base global pointer to load from LLVMQualPointerType<3>,// LDS base pointer to store to - llvm_i32_ty, // Data byte size: 1/2/4 + llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950) llvm_i32_ty, // imm offset (applied to both global and LDS address) llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = sc0, // bit 1 = sc1, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 13de93e829fab2..a6ef0069f134bd 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -3329,6 +3329,16 @@ bool AMDGPUInstructionSelector::selectGlobalLoadLds(MachineInstr &MI) const{ case 4: Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD; break; + case 12: +if (!Subtarget->hasLDSLoadB96_B128()) + return false; +Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX3; +break; + case 16: +if (!Subtarget->hasLDSLoadB96_B128()) + return false; +Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX4; +break; } MachineBasicBlock *MBB = MI.getParent(); diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index db74372e9db452..861fcf017d9e4d 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -934,6 +934,11 @@ defm GLOBAL_LOAD_LDS_USHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_usho defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sshort">; defm GLOBAL_LOAD_LDS_DWORD : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dword">; +let SubtargetPredicate = HasGFX950Insts in { +defm GLOBAL_LOAD_LDS_DWORDX3 : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dwordx3">; +defm GLOBAL_LOAD_LDS_DWORDX4 : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dwordx4">; +} + let SubtargetPredicate = isGFX12Plus in { defm GLOBAL_ATOMIC_COND_SUB_U32: FLAT_Global_Atomic_Pseudo <"global_atomic_cond_sub_u32", VGPR_32, i32>; defm GLOBAL_ATOMIC_ORDERED_ADD_B64 : FLAT_Global_Atomic_Pseudo <"global_atomic_ordered_add_b64", VReg_64, i64>; @@ -1980,6 +1985,10 @@ defm GLOBAL_LOAD_LDS_USHORT : FLAT_Real_AllAddr_LDS <0x028, 0x12>; defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_LDS <0x029, 0x13>; defm GLOBAL_LOAD_LDS_DWORD : FLAT_Real_AllAddr_LDS <0x02a, 0x14>; +defm GLOBAL_LOAD_LDS_DWORDX3 : FLAT_Real_AllAddr_LDS <0x07e, 0x07e>; +defm GLOBAL_LOAD_LDS_DWORDX4 : FLAT_Real_AllAddr_LDS <0x07d, 0x07d>; + + defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Real_Atomics_vi <0x40>; defm GLOBAL_ATOMIC_CMPSWAP: FLAT_Global_Real_Atomics_vi <0x41>; defm GLOBAL_ATOMIC_ADD: FLAT_Global_Real_Atomics_vi <0x42>; diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 4a6efe533230b1..f3f96940c1f44b 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -1289,6 +1289,13 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, // hasGFX940Insts and hasGFX90AInsts are also true. bool hasGFX950Insts() const { return GFX950Insts; } + /// Returns true if the target supports + /// global_load_lds_dwordx3/global_load_lds_dwordx4 or + /// buffer_load_dwordx3/buffer_load_dwordx4 with the lds bit. + bool hasLDSLoadB96_B128() const { +return h
[llvm-branch-commits] [llvm] AMDGPU: Handle gfx950 96/128-bit buffer_load_lds (PR #116681)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/116681 >From e6110347d262f74c2f2c76dfde113723ac21115c Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 18 Jan 2024 16:18:05 +0700 Subject: [PATCH] AMDGPU: Handle gfx950 96/128-bit buffer_load_lds Enforcing this limit in the clang builtin will come later. --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 8 +- .../AMDGPU/AMDGPUInstructionSelector.cpp | 18 ++ llvm/lib/Target/AMDGPU/BUFInstructions.td | 24 ++- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 16 ++ .../llvm.amdgcn.global.load.lds.gfx950.ll | 8 + ...m.amdgcn.raw.ptr.buffer.load.lds.gfx950.ll | 176 ...mdgcn.struct.ptr.buffer.load.lds.gfx950.ll | 196 ++ llvm/test/MC/AMDGPU/mubuf-gfx950.s| 32 +++ llvm/test/MC/Disassembler/AMDGPU/gfx950.txt | 19 ++ 9 files changed, 485 insertions(+), 12 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.lds.gfx950.ll create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.lds.gfx950.ll create mode 100644 llvm/test/MC/AMDGPU/mubuf-gfx950.s diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index f43ab50d2ea441..360af786c5160d 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1674,7 +1674,7 @@ class AMDGPURawBufferLoadLDS : Intrinsic < [], [llvm_v4i32_ty, // rsrc(SGPR) LLVMQualPointerType<3>,// LDS base offset - llvm_i32_ty, // Data byte size: 1/2/4 + llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950) llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty, // imm offset(imm, included in bounds checking and swizzling) @@ -1693,7 +1693,7 @@ class AMDGPURawPtrBufferLoadLDS : Intrinsic < [], [AMDGPUBufferRsrcTy,// rsrc(SGPR) LLVMQualPointerType<3>,// LDS base offset - llvm_i32_ty, // Data byte size: 1/2/4 + llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950) llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty, // imm offset(imm, included in bounds checking and swizzling) @@ -1715,7 +1715,7 @@ class AMDGPUStructBufferLoadLDS : Intrinsic < [], [llvm_v4i32_ty, // rsrc(SGPR) LLVMQualPointerType<3>,// LDS base offset - llvm_i32_ty, // Data byte size: 1/2/4 + llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950) llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) @@ -1735,7 +1735,7 @@ class AMDGPUStructPtrBufferLoadLDS : Intrinsic < [], [AMDGPUBufferRsrcTy,// rsrc(SGPR) LLVMQualPointerType<3>,// LDS base offset - llvm_i32_ty, // Data byte size: 1/2/4 + llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950) llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index a6ef0069f134bd..3522ece24f1c45 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -3240,6 +3240,24 @@ bool AMDGPUInstructionSelector::selectBufferLoadLds(MachineInstr &MI) const { : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET; break; + case 12: +if (!Subtarget->hasLDSLoadB96_B128()) + return false; + +Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_BOTHEN + : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_IDXEN +: HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFEN + : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFSET; +break; + case 16: +if (!Subtarget->hasLDSLoadB96_B128()) + return false; + +Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_BOTHEN + : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_IDXEN +: HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFEN + : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFSET; +break; } Ma
[llvm-branch-commits] [clang] [libcxx] [libcxxabi] [Fuchsia][cmake] Allow using FatLTO when building runtimes (PR #112277)
https://github.com/ilovepi updated https://github.com/llvm/llvm-project/pull/112277 >From 1dafa521d5a1e10e3f79f63a661b2e14acff5a4a Mon Sep 17 00:00:00 2001 From: Paul Kirth Date: Mon, 14 Oct 2024 15:06:38 -0700 Subject: [PATCH 1/4] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?= =?UTF-8?q?itial=20version?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.4 --- libcxx/CMakeLists.txt| 4 libcxx/src/CMakeLists.txt| 10 ++ libcxxabi/src/CMakeLists.txt | 10 ++ 3 files changed, 24 insertions(+) diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt index f1942e963ccc31..5a68237f7336c5 100644 --- a/libcxx/CMakeLists.txt +++ b/libcxx/CMakeLists.txt @@ -102,6 +102,10 @@ option(LIBCXX_ENABLE_WIDE_CHARACTERS support the C functionality for wide characters. When wide characters are not supported, several parts of the library will be disabled, notably the wide character specializations of std::basic_string." ON) + option(LIBCXX_ENABLE_FATLTO + "Whether to compile libc++ with FatLTO enabled." ON) + option(LIBCXX_ENABLE_LTO + "Whether to compile libc++ with LTO enabled." ON) # To use time zone support in libc++ the platform needs to have the IANA # database installed. Libc++ will fail to build if this is enabled on a diff --git a/libcxx/src/CMakeLists.txt b/libcxx/src/CMakeLists.txt index b187677ff2db52..670db758f53173 100644 --- a/libcxx/src/CMakeLists.txt +++ b/libcxx/src/CMakeLists.txt @@ -173,6 +173,16 @@ if (APPLE AND LLVM_USE_SANITIZER) endif() endif() + +if(LIBCXX_ENABLE_LTO) + list(APPEND LIBCXX_COMPILE_FLAGS "-flto") + list(APPEND LIBCXX_LINK_FLAGS "-flto") +endif() +if(LIBCXX_ENABLE_FATLTO) + list(APPEND LIBCXX_COMPILE_FLAGS "-ffat-lto-objects") + list(APPEND LIBCXX_LINK_FLAGS "-ffat-lto-objects") +endif() + split_list(LIBCXX_COMPILE_FLAGS) split_list(LIBCXX_LINK_FLAGS) diff --git a/libcxxabi/src/CMakeLists.txt b/libcxxabi/src/CMakeLists.txt index 480e528b819bb9..822ede39c6a525 100644 --- a/libcxxabi/src/CMakeLists.txt +++ b/libcxxabi/src/CMakeLists.txt @@ -143,6 +143,15 @@ if ( APPLE ) endif() endif() +if(LIBCXX_ENABLE_LTO) + list(APPEND LIBCXXABI_COMPILE_FLAGS "-flto") + list(APPEND LIBCXXABI_LINK_FLAGS "-flto") +endif() +if(LIBCXX_ENABLE_FATLTO) + list(APPEND LIBCXXABI_COMPILE_FLAGS "-ffat-lto-objects") + list(APPEND LIBCXXABI_LINK_FLAGS "-ffat-lto-objects") +endif() + split_list(LIBCXXABI_COMPILE_FLAGS) split_list(LIBCXXABI_LINK_FLAGS) @@ -154,6 +163,7 @@ endif() include(WarningFlags) + # Build the shared library. add_library(cxxabi_shared_objects OBJECT EXCLUDE_FROM_ALL ${LIBCXXABI_SOURCES} ${LIBCXXABI_HEADERS}) cxx_add_warning_flags(cxxabi_shared_objects ${LIBCXXABI_ENABLE_WERROR} ${LIBCXXABI_ENABLE_PEDANTIC}) >From 38851d29d9eaf5e3c597be3f9f57179f308ba335 Mon Sep 17 00:00:00 2001 From: Paul Kirth Date: Mon, 14 Oct 2024 15:27:36 -0700 Subject: [PATCH 2/4] Remove newline from diff Created using spr 1.3.4 --- libcxxabi/src/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/libcxxabi/src/CMakeLists.txt b/libcxxabi/src/CMakeLists.txt index 1a1e57aa0077b4..783f17583c62e0 100644 --- a/libcxxabi/src/CMakeLists.txt +++ b/libcxxabi/src/CMakeLists.txt @@ -163,7 +163,6 @@ endif() include(WarningFlags) - # Build the shared library. add_library(cxxabi_shared_objects OBJECT EXCLUDE_FROM_ALL ${LIBCXXABI_SOURCES} ${LIBCXXABI_HEADERS}) cxx_add_warning_flags(cxxabi_shared_objects ${LIBCXXABI_ENABLE_WERROR} ${LIBCXXABI_ENABLE_PEDANTIC}) >From 535f2f2c17a3c80aa12c0106a468a8f2127241fc Mon Sep 17 00:00:00 2001 From: Paul Kirth Date: Wed, 16 Oct 2024 11:20:51 -0700 Subject: [PATCH 3/4] Avoid unecessary changes to libc++ cmake Created using spr 1.3.4 --- clang/cmake/caches/Fuchsia-stage2.cmake | 8 libcxx/CMakeLists.txt | 4 libcxx/src/CMakeLists.txt | 10 -- libcxxabi/src/CMakeLists.txt| 9 - 4 files changed, 8 insertions(+), 23 deletions(-) diff --git a/clang/cmake/caches/Fuchsia-stage2.cmake b/clang/cmake/caches/Fuchsia-stage2.cmake index 5af98c7b3b3fba..e62f29ecbe6f45 100644 --- a/clang/cmake/caches/Fuchsia-stage2.cmake +++ b/clang/cmake/caches/Fuchsia-stage2.cmake @@ -192,6 +192,10 @@ foreach(target aarch64-unknown-linux-gnu;armv7-unknown-linux-gnueabihf;i386-unkn set(RUNTIMES_${target}_LLVM_TOOLS_DIR "${CMAKE_BINARY_DIR}/bin" CACHE BOOL "") set(RUNTIMES_${target}_LLVM_ENABLE_RUNTIMES "compiler-rt;libcxx;libcxxabi;libunwind" CACHE STRING "") +# Enable FatLTO for Linux and baremetal runtimes +set(RUNTIMES_${target}_LLVM_ENABLE_LTO ON CACHE BOOL "") +set(RUNTIMES_${target}_LLVM_ENABLE_FATLTO ON CACHE BOOL "") + # Use .build-id link. list(APPEND RUNTIME_BUILD_ID_LINK "${target}") endif() @@ -274,6 +278,10 @@ if(FUCHSIA_SDK) set(RUNTIMES_${target}+asan+noexcept_LIBCXXABI_ENABLE_EXCEPTIONS OFF CACHE B
[llvm-branch-commits] [clang] [llvm] AMDGPU: Define v_mfma_f32_32x32x16_bf16 for gfx950 (PR #116679)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/116679 >From c2e9801ef48929f73f6141c386b6169fa24c6c43 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 19 Dec 2023 12:46:00 +0700 Subject: [PATCH] AMDGPU: Define v_mfma_f32_32x32x16_bf16 for gfx950 Unlike the existing gfx940 intrinsics using short/i16 in place of bfloat, this uses the natural bfloat type. --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 2 + .../CodeGenOpenCL/builtins-amdgcn-mfma.cl | 6 + .../builtins-amdgcn-error-gfx950-param.cl | 7 + .../builtins-amdgcn-error-gfx950.cl | 5 +- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 2 + llvm/lib/Target/AMDGPU/SIInstrInfo.td | 1 + llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 6 + .../UniformityAnalysis/AMDGPU/intrinsics.ll | 8 + .../AMDGPU/llvm.amdgcn.mfma.gfx950.bf16.ll| 474 ++ llvm/test/MC/AMDGPU/mai-gfx950.s | 56 ++- .../MC/Disassembler/AMDGPU/gfx950_mai.txt | 27 + llvm/test/tools/llvm-mca/AMDGPU/gfx950.s | 10 +- 12 files changed, 596 insertions(+), 8 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.bf16.ll diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 6917d8d1aca69d..7ce8f2c1669d67 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -437,6 +437,8 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-conversion- TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x32_f16, "V4fV8hV8hV4fIiIiIi", "nc", "gfx950-insts") TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x16_f16, "V16fV8hV8hV16fIiIiIi", "nc", "gfx950-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x16_bf16, "V16fV8yV8yV16fIiIiIi", "nc", "gfx950-insts") + //===--===// // GFX12+ only builtins. //===--===// diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl index a644a60f9ec381..841d8fcad0fee0 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl @@ -24,6 +24,7 @@ typedef short v8s __attribute__((ext_vector_type(8))); typedef short v16s __attribute__((ext_vector_type(16))); typedef short v32s __attribute__((ext_vector_type(32))); typedef double v4d __attribute__((ext_vector_type(4))); +typedef __bf16 v8bf16 __attribute__((ext_vector_type(8))); #ifdef MFMA_GFX908_TESTS @@ -424,5 +425,10 @@ v16f test_mfma_f32_32x32x16_f16(v8h a, v8h b, v16f c) return __builtin_amdgcn_mfma_f32_32x32x16_f16(a, b, c, 1, 2, 3); } +// CHECK-GFX950-LABEL: @test_mfma_f32_32x32x16_bf16( +// CHECK-GFX950: tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.bf16(<8 x bfloat> %a, <8 x bfloat> %b, <16 x float> %c, i32 1, i32 2, i32 3) +v16f test_mfma_f32_32x32x16_bf16(v8bf16 a, v8bf16 b, v16f c) { + return __builtin_amdgcn_mfma_f32_32x32x16_bf16(a, b, c, 1, 2, 3); +} #endif diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl index 4c267e2cac5cad..4af67763c40dd2 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl @@ -4,6 +4,7 @@ typedef float float4 __attribute__((ext_vector_type(4))); typedef float float16 __attribute__((ext_vector_type(16))); typedef half half8 __attribute__((ext_vector_type(8))); +typedef __bf16 bfloat8 __attribute__((ext_vector_type(8))); void test_mfma_f32_16x16x32_f16(__global float4* out, half8 a, half8 b, float4 c, int X) { @@ -19,3 +20,9 @@ void test_mfma_f32_32x32x16_f16(__global float16* out, half8 a, half8 b, float16 *out = __builtin_amdgcn_mfma_f32_32x32x16_f16(a, b, c, 0, X, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_32x32x16_f16' must be a constant integer}} *out = __builtin_amdgcn_mfma_f32_32x32x16_f16(a, b, c, 0, 0, X); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_32x32x16_f16' must be a constant integer}} } + +void test_mfma_f32_32x32x16_bf16(__global float16* out, bfloat8 a, bfloat8 b, float16 c, int X) { + *out = __builtin_amdgcn_mfma_f32_32x32x16_bf16(a, b, c, X, 0, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_32x32x16_bf16' must be a constant integer}} + *out = __builtin_amdgcn_mfma_f32_32x32x16_bf16(a, b, c, 0, X, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_32x32x16_bf16' must be a constant integer}} + *out = __builtin_amdgcn_mfma_f32_32x32x16_bf16(a, b, c, 0, 0, X); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_32x32x16_bf16' must be a constant integer}} +} diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl b/clang/test/S
[llvm-branch-commits] [clang] [llvm] AMDGPU: Add first gfx950 mfma instructions (PR #116312)
arsenm wrote: ### Merge activity * **Nov 18, 4:29 PM EST**: A user started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/116312). https://github.com/llvm/llvm-project/pull/116312 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [libcxx] [libcxxabi] [Fuchsia][cmake] Allow using FatLTO when building runtimes (PR #112277)
https://github.com/ilovepi edited https://github.com/llvm/llvm-project/pull/112277 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][Transforms] Support 1:N mappings in `ConversionValueMapping` (PR #116524)
https://github.com/matthias-springer updated https://github.com/llvm/llvm-project/pull/116524 >From 7025a8caae81e97022155b8fac8075fc29e24650 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Sun, 17 Nov 2024 09:00:45 +0100 Subject: [PATCH 1/2] [mlir][LLVM] `LLVMTypeConverter`: Tighten materialization checks --- .../Conversion/LLVMCommon/TypeConverter.cpp | 32 .../MemRefToLLVM/type-conversion.mlir | 57 ++ mlir/test/lib/Dialect/LLVM/CMakeLists.txt | 1 + mlir/test/lib/Dialect/LLVM/TestPatterns.cpp | 77 +++ mlir/tools/mlir-opt/mlir-opt.cpp | 2 + 5 files changed, 154 insertions(+), 15 deletions(-) create mode 100644 mlir/test/Conversion/MemRefToLLVM/type-conversion.mlir create mode 100644 mlir/test/lib/Dialect/LLVM/TestPatterns.cpp diff --git a/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp b/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp index ce91424e7a577e..59b0f5c9b09bcd 100644 --- a/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp +++ b/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp @@ -153,6 +153,12 @@ LLVMTypeConverter::LLVMTypeConverter(MLIRContext *ctx, type.isVarArg()); }); + // Helper function that checks if the given value range is a bare pointer. + auto isBarePointer = [](ValueRange values) { +return values.size() == 1 && + isa(values.front().getType()); + }; + // Argument materializations convert from the new block argument types // (multiple SSA values that make up a memref descriptor) back to the // original block argument type. The dialect conversion framework will then @@ -161,11 +167,10 @@ LLVMTypeConverter::LLVMTypeConverter(MLIRContext *ctx, addArgumentMaterialization([&](OpBuilder &builder, UnrankedMemRefType resultType, ValueRange inputs, Location loc) { -if (inputs.size() == 1) { - // Bare pointers are not supported for unranked memrefs because a - // memref descriptor cannot be built just from a bare pointer. +// Note: Bare pointers are not supported for unranked memrefs because a +// memref descriptor cannot be built just from a bare pointer. +if (TypeRange(inputs) != getUnrankedMemRefDescriptorFields()) return Value(); -} Value desc = UnrankedMemRefDescriptor::pack(builder, loc, *this, resultType, inputs); // An argument materialization must return a value of type @@ -177,20 +182,17 @@ LLVMTypeConverter::LLVMTypeConverter(MLIRContext *ctx, addArgumentMaterialization([&](OpBuilder &builder, MemRefType resultType, ValueRange inputs, Location loc) { Value desc; -if (inputs.size() == 1) { - // This is a bare pointer. We allow bare pointers only for function entry - // blocks. - BlockArgument barePtr = dyn_cast(inputs.front()); - if (!barePtr) -return Value(); - Block *block = barePtr.getOwner(); - if (!block->isEntryBlock() || - !isa(block->getParentOp())) -return Value(); +if (isBarePointer(inputs)) { desc = MemRefDescriptor::fromStaticShape(builder, loc, *this, resultType, inputs[0]); -} else { +} else if (TypeRange(inputs) == + getMemRefDescriptorFields(resultType, + /*unpackAggregates=*/true)) { desc = MemRefDescriptor::pack(builder, loc, *this, resultType, inputs); +} else { + // The inputs are neither a bare pointer nor an unpacked memref + // descriptor. This materialization function cannot be used. + return Value(); } // An argument materialization must return a value of type `resultType`, // so insert a cast from the memref descriptor type (!llvm.struct) to the diff --git a/mlir/test/Conversion/MemRefToLLVM/type-conversion.mlir b/mlir/test/Conversion/MemRefToLLVM/type-conversion.mlir new file mode 100644 index 00..0288aa11313c72 --- /dev/null +++ b/mlir/test/Conversion/MemRefToLLVM/type-conversion.mlir @@ -0,0 +1,57 @@ +// RUN: mlir-opt %s -test-llvm-legalize-patterns -split-input-file + +// Test the argument materializer for ranked MemRef types. + +// CHECK-LABEL: func @construct_ranked_memref_descriptor( +// CHECK: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK-COUNT-7: llvm.insertvalue +// CHECK: builtin.unrealized_conversion_cast %{{.*}} : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> to memref<5x4xf32> +func.func @construct_ranked_memref_descriptor(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: i64, %arg3: i64, %arg4: i64, %arg5: i64, %arg6: i64) { + %0 = "test.direct_replacement"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6) : (!llvm.ptr, !llvm.ptr, i64, i64, i64, i64, i64) -> (memref<5x4xf32>) + "test.legal_op"(%0) : (memref<5x
[llvm-branch-commits] [llvm] [NFC] Use unique_ptr in SparseSet (PR #116617)
https://github.com/optimisan ready_for_review https://github.com/llvm/llvm-project/pull/116617 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port SpillPlacement analysis to NPM (PR #116618)
optimisan wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/116618?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#116618** https://app.graphite.dev/github/pr/llvm/llvm-project/116618?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/116618?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#116617** https://app.graphite.dev/github/pr/llvm/llvm-project/116617?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116616** https://app.graphite.dev/github/pr/llvm/llvm-project/116616?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/116618 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][Transforms] Support 1:N mappings in `ConversionValueMapping` (PR #116524)
https://github.com/matthias-springer edited https://github.com/llvm/llvm-project/pull/116524 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC] Use unique_ptr in SparseSet (PR #116617)
optimisan wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/116617?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#116618** https://app.graphite.dev/github/pr/llvm/llvm-project/116618?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116617** https://app.graphite.dev/github/pr/llvm/llvm-project/116617?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/116617?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#116616** https://app.graphite.dev/github/pr/llvm/llvm-project/116616?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/116617 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port SpillPlacement analysis to NPM (PR #116618)
llvmbot wrote: @llvm/pr-subscribers-llvm-regalloc Author: Akshat Oke (optimisan) Changes I am not sure how to test this. --- Full diff: https://github.com/llvm/llvm-project/pull/116618.diff 4 Files Affected: - (modified) llvm/include/llvm/InitializePasses.h (+1-1) - (modified) llvm/lib/CodeGen/RegAllocGreedy.cpp (+3-3) - (modified) llvm/lib/CodeGen/SpillPlacement.cpp (+58-33) - (modified) llvm/lib/CodeGen/SpillPlacement.h (+42-10) ``diff diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index fb8356b9c98cb9..728b178e0cdad7 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -289,7 +289,7 @@ void initializeSinkingLegacyPassPass(PassRegistry &); void initializeSjLjEHPreparePass(PassRegistry &); void initializeSlotIndexesWrapperPassPass(PassRegistry &); void initializeSpeculativeExecutionLegacyPassPass(PassRegistry &); -void initializeSpillPlacementPass(PassRegistry &); +void initializeSpillPlacementWrapperLegacyPass(PassRegistry &); void initializeStackColoringLegacyPass(PassRegistry &); void initializeStackFrameLayoutAnalysisPassPass(PassRegistry &); void initializeStackMapLivenessPass(PassRegistry &); diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index 3542bfe18af46f..3fdf2d6e07a75f 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -162,7 +162,7 @@ INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperLegacy) INITIALIZE_PASS_DEPENDENCY(LiveRegMatrixWrapperLegacy) INITIALIZE_PASS_DEPENDENCY(EdgeBundlesWrapperLegacy) -INITIALIZE_PASS_DEPENDENCY(SpillPlacement) +INITIALIZE_PASS_DEPENDENCY(SpillPlacementWrapperLegacy) INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass) INITIALIZE_PASS_DEPENDENCY(RegAllocEvictionAdvisorAnalysis) INITIALIZE_PASS_DEPENDENCY(RegAllocPriorityAdvisorAnalysis) @@ -217,7 +217,7 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addPreserved(); AU.addRequired(); - AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); @@ -2731,7 +2731,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { ORE = &getAnalysis().getORE(); Loops = &getAnalysis().getLI(); Bundles = &getAnalysis().getEdgeBundles(); - SpillPlacer = &getAnalysis(); + SpillPlacer = &getAnalysis().getResult(); DebugVars = &getAnalysis(); initializeCSRCost(); diff --git a/llvm/lib/CodeGen/SpillPlacement.cpp b/llvm/lib/CodeGen/SpillPlacement.cpp index 318e2b19322bb4..c9baabf6161d3a 100644 --- a/llvm/lib/CodeGen/SpillPlacement.cpp +++ b/llvm/lib/CodeGen/SpillPlacement.cpp @@ -44,17 +44,17 @@ using namespace llvm; #define DEBUG_TYPE "spill-code-placement" -char SpillPlacement::ID = 0; +char SpillPlacementWrapperLegacy::ID = 0; -char &llvm::SpillPlacementID = SpillPlacement::ID; +char &llvm::SpillPlacementID = SpillPlacementWrapperLegacy::ID; -INITIALIZE_PASS_BEGIN(SpillPlacement, DEBUG_TYPE, +INITIALIZE_PASS_BEGIN(SpillPlacementWrapperLegacy, DEBUG_TYPE, "Spill Code Placement Analysis", true, true) INITIALIZE_PASS_DEPENDENCY(EdgeBundlesWrapperLegacy) -INITIALIZE_PASS_END(SpillPlacement, DEBUG_TYPE, +INITIALIZE_PASS_END(SpillPlacementWrapperLegacy, DEBUG_TYPE, "Spill Code Placement Analysis", true, true) -void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const { +void SpillPlacementWrapperLegacy::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired(); AU.addRequiredTransitive(); @@ -189,32 +189,57 @@ struct SpillPlacement::Node { } }; -bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) { +bool SpillPlacementWrapperLegacy::runOnMachineFunction(MachineFunction &MF) { + auto *Bundles = &getAnalysis().getEdgeBundles(); + auto *MBFI = &getAnalysis().getMBFI(); + + Impl.reset(new SpillPlacement(Bundles, MBFI)); + Impl->run(MF); + return false; +} + +AnalysisKey SpillPlacementAnalysis::Key; + +SpillPlacement +SpillPlacementAnalysis::run(MachineFunction &MF, +MachineFunctionAnalysisManager &MFAM) { + auto *Bundles = &MFAM.getResult(MF); + auto *MBFI = &MFAM.getResult(MF); + SpillPlacement Impl(Bundles, MBFI); + Impl.run(MF); + return Impl; +} + +bool SpillPlacementAnalysis::Result::invalidate( +MachineFunction &MF, const PreservedAnalyses &PA, +MachineFunctionAnalysisManager::Invalidator &Inv) { + auto PAC = PA.getChecker(); + return !(PAC.preserved() || + PAC.preservedSet>()) || + Inv.invalidate(MF, PA) || + Inv.invalidate(MF, PA); +} + +void SpillPlacement::arrayDeleter(Node *N) { + if (N) +delete[] N; +} + +void SpillPlacement::run(MachineFunction &mf) { MF = &mf; - bundles = &getAnalysis().getEdgeBundles(); assert(!nodes && "Leaking node array"); - nodes =
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port SpillPlacement analysis to NPM (PR #116618)
https://github.com/optimisan edited https://github.com/llvm/llvm-project/pull/116618 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port SpillPlacement analysis to NPM (PR #116618)
https://github.com/optimisan ready_for_review https://github.com/llvm/llvm-project/pull/116618 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][Transforms] Support 1:N mappings in `ConversionValueMapping` (PR #116524)
https://github.com/matthias-springer edited https://github.com/llvm/llvm-project/pull/116524 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port SpillPlacement analysis to NPM (PR #116618)
https://github.com/optimisan created https://github.com/llvm/llvm-project/pull/116618 None >From c791eaa8768073b3ef770a59859346a859bd7a7f Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Mon, 18 Nov 2024 12:42:00 + Subject: [PATCH] [CodeGen][NewPM] Port SpillPlacement analysis to NPM --- llvm/include/llvm/InitializePasses.h | 2 +- llvm/lib/CodeGen/RegAllocGreedy.cpp | 6 +- llvm/lib/CodeGen/SpillPlacement.cpp | 91 ++-- llvm/lib/CodeGen/SpillPlacement.h| 52 +--- 4 files changed, 104 insertions(+), 47 deletions(-) diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index fb8356b9c98cb9..728b178e0cdad7 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -289,7 +289,7 @@ void initializeSinkingLegacyPassPass(PassRegistry &); void initializeSjLjEHPreparePass(PassRegistry &); void initializeSlotIndexesWrapperPassPass(PassRegistry &); void initializeSpeculativeExecutionLegacyPassPass(PassRegistry &); -void initializeSpillPlacementPass(PassRegistry &); +void initializeSpillPlacementWrapperLegacyPass(PassRegistry &); void initializeStackColoringLegacyPass(PassRegistry &); void initializeStackFrameLayoutAnalysisPassPass(PassRegistry &); void initializeStackMapLivenessPass(PassRegistry &); diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index 3542bfe18af46f..3fdf2d6e07a75f 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -162,7 +162,7 @@ INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperLegacy) INITIALIZE_PASS_DEPENDENCY(LiveRegMatrixWrapperLegacy) INITIALIZE_PASS_DEPENDENCY(EdgeBundlesWrapperLegacy) -INITIALIZE_PASS_DEPENDENCY(SpillPlacement) +INITIALIZE_PASS_DEPENDENCY(SpillPlacementWrapperLegacy) INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass) INITIALIZE_PASS_DEPENDENCY(RegAllocEvictionAdvisorAnalysis) INITIALIZE_PASS_DEPENDENCY(RegAllocPriorityAdvisorAnalysis) @@ -217,7 +217,7 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addPreserved(); AU.addRequired(); - AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); @@ -2731,7 +2731,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { ORE = &getAnalysis().getORE(); Loops = &getAnalysis().getLI(); Bundles = &getAnalysis().getEdgeBundles(); - SpillPlacer = &getAnalysis(); + SpillPlacer = &getAnalysis().getResult(); DebugVars = &getAnalysis(); initializeCSRCost(); diff --git a/llvm/lib/CodeGen/SpillPlacement.cpp b/llvm/lib/CodeGen/SpillPlacement.cpp index 318e2b19322bb4..c9baabf6161d3a 100644 --- a/llvm/lib/CodeGen/SpillPlacement.cpp +++ b/llvm/lib/CodeGen/SpillPlacement.cpp @@ -44,17 +44,17 @@ using namespace llvm; #define DEBUG_TYPE "spill-code-placement" -char SpillPlacement::ID = 0; +char SpillPlacementWrapperLegacy::ID = 0; -char &llvm::SpillPlacementID = SpillPlacement::ID; +char &llvm::SpillPlacementID = SpillPlacementWrapperLegacy::ID; -INITIALIZE_PASS_BEGIN(SpillPlacement, DEBUG_TYPE, +INITIALIZE_PASS_BEGIN(SpillPlacementWrapperLegacy, DEBUG_TYPE, "Spill Code Placement Analysis", true, true) INITIALIZE_PASS_DEPENDENCY(EdgeBundlesWrapperLegacy) -INITIALIZE_PASS_END(SpillPlacement, DEBUG_TYPE, +INITIALIZE_PASS_END(SpillPlacementWrapperLegacy, DEBUG_TYPE, "Spill Code Placement Analysis", true, true) -void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const { +void SpillPlacementWrapperLegacy::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired(); AU.addRequiredTransitive(); @@ -189,32 +189,57 @@ struct SpillPlacement::Node { } }; -bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) { +bool SpillPlacementWrapperLegacy::runOnMachineFunction(MachineFunction &MF) { + auto *Bundles = &getAnalysis().getEdgeBundles(); + auto *MBFI = &getAnalysis().getMBFI(); + + Impl.reset(new SpillPlacement(Bundles, MBFI)); + Impl->run(MF); + return false; +} + +AnalysisKey SpillPlacementAnalysis::Key; + +SpillPlacement +SpillPlacementAnalysis::run(MachineFunction &MF, +MachineFunctionAnalysisManager &MFAM) { + auto *Bundles = &MFAM.getResult(MF); + auto *MBFI = &MFAM.getResult(MF); + SpillPlacement Impl(Bundles, MBFI); + Impl.run(MF); + return Impl; +} + +bool SpillPlacementAnalysis::Result::invalidate( +MachineFunction &MF, const PreservedAnalyses &PA, +MachineFunctionAnalysisManager::Invalidator &Inv) { + auto PAC = PA.getChecker(); + return !(PAC.preserved() || + PAC.preservedSet>()) || + Inv.invalidate(MF, PA) || + Inv.invalidate(MF, PA); +} + +void SpillPlacement::arrayDeleter(Node *N) { + if (N) +delete[] N; +} + +void SpillPlacement::run(MachineFunction &mf) { MF =
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port SpillPlacement analysis to NPM (PR #116618)
https://github.com/optimisan edited https://github.com/llvm/llvm-project/pull/116618 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NewPM] Introduce MFAnalysisGetter for a common analysis getter (PR #116166)
https://github.com/optimisan closed https://github.com/llvm/llvm-project/pull/116166 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [RISCV] Support __builtin_cpu_is (PR #116231)
https://github.com/wangpc-pp updated https://github.com/llvm/llvm-project/pull/116231 >From 9686a2c5c5276289e72d9098f497a9f246a1c457 Mon Sep 17 00:00:00 2001 From: Wang Pengcheng Date: Thu, 14 Nov 2024 22:06:45 +0800 Subject: [PATCH 1/4] Remove stale CHECKs Created using spr 1.3.6-beta.1 --- clang/test/CodeGen/builtin-cpu-is.c | 20 1 file changed, 20 deletions(-) diff --git a/clang/test/CodeGen/builtin-cpu-is.c b/clang/test/CodeGen/builtin-cpu-is.c index e4a2071cf46795..b8dd97eeacebcf 100644 --- a/clang/test/CodeGen/builtin-cpu-is.c +++ b/clang/test/CodeGen/builtin-cpu-is.c @@ -7,8 +7,6 @@ // global, the bit grab, and the icmp correct. extern void a(const char *); -// CHECK: @__cpu_model = external dso_local global { i32, i32, i32, [1 x i32] } - // CHECK-X86-LABEL: define dso_local void @intel( // CHECK-X86-SAME: ) #[[ATTR0:[0-9]+]] { // CHECK-X86-NEXT: [[ENTRY:.*:]] @@ -24,9 +22,6 @@ extern void a(const char *); void intel(void) { if (__builtin_cpu_is("intel")) a("intel"); - - // CHECK: [[LOAD:%[^ ]+]] = load i32, ptr @__cpu_model - // CHECK: = icmp eq i32 [[LOAD]], 1 } // CHECK-X86-LABEL: define dso_local void @amd( @@ -44,9 +39,6 @@ void intel(void) { void amd(void) { if (__builtin_cpu_is("amd")) a("amd"); - - // CHECK: [[LOAD:%[^ ]+]] = load i32, ptr @__cpu_model - // CHECK: = icmp eq i32 [[LOAD]], 2 } // CHECK-X86-LABEL: define dso_local void @atom( @@ -64,9 +56,6 @@ void amd(void) { void atom(void) { if (__builtin_cpu_is("atom")) a("atom"); - - // CHECK: [[LOAD:%[^ ]+]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 1) - // CHECK: = icmp eq i32 [[LOAD]], 1 } // CHECK-X86-LABEL: define dso_local void @amdfam10h( @@ -84,9 +73,6 @@ void atom(void) { void amdfam10h(void) { if (__builtin_cpu_is("amdfam10h")) a("amdfam10h"); - - // CHECK: [[LOAD:%[^ ]+]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 1) - // CHECK: = icmp eq i32 [[LOAD]], 4 } // CHECK-X86-LABEL: define dso_local void @barcelona( @@ -104,9 +90,6 @@ void amdfam10h(void) { void barcelona(void) { if (__builtin_cpu_is("barcelona")) a("barcelona"); - - // CHECK: [[LOAD:%[^ ]+]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 2) - // CHECK: = icmp eq i32 [[LOAD]], 4 } // CHECK-X86-LABEL: define dso_local void @nehalem( @@ -124,9 +107,6 @@ void barcelona(void) { void nehalem(void) { if (__builtin_cpu_is("nehalem")) a("nehalem"); - - // CHECK: [[LOAD:%[^ ]+]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 2) - // CHECK: = icmp eq i32 [[LOAD]], 1 } #endif >From 2bb2d5079b5bf98ba9f87e082ca3e67ab70068aa Mon Sep 17 00:00:00 2001 From: Wang Pengcheng Date: Thu, 14 Nov 2024 22:12:36 +0800 Subject: [PATCH 2/4] Simplify test Created using spr 1.3.6-beta.1 --- clang/test/CodeGen/builtin-cpu-is.c | 25 ++--- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/clang/test/CodeGen/builtin-cpu-is.c b/clang/test/CodeGen/builtin-cpu-is.c index b8dd97eeacebcf..8e78213a7cfcfb 100644 --- a/clang/test/CodeGen/builtin-cpu-is.c +++ b/clang/test/CodeGen/builtin-cpu-is.c @@ -111,12 +111,9 @@ void nehalem(void) { #endif #ifdef __riscv -// CHECK-RV64-LABEL: define dso_local signext i32 @test_riscv( -// CHECK-RV64-SAME: i32 noundef signext [[A:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-LABEL: define dso_local signext i32 @test_cpu_is_veyron_v1( +// CHECK-RV64-SAME: ) #[[ATTR0:[0-9]+]] { // CHECK-RV64-NEXT: [[ENTRY:.*:]] -// CHECK-RV64-NEXT:[[RETVAL:%.*]] = alloca i32, align 4 -// CHECK-RV64-NEXT:[[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK-RV64-NEXT:store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK-RV64-NEXT:[[TMP0:%.*]] = load i32, ptr @__riscv_cpu_model, align 4 // CHECK-RV64-NEXT:[[TMP1:%.*]] = icmp eq i32 [[TMP0]], 1567 // CHECK-RV64-NEXT:[[TMP2:%.*]] = load i64, ptr getelementptr inbounds ({ i32, i64, i64 }, ptr @__riscv_cpu_model, i32 0, i32 1), align 8 @@ -125,20 +122,10 @@ void nehalem(void) { // CHECK-RV64-NEXT:[[TMP5:%.*]] = load i64, ptr getelementptr inbounds ({ i32, i64, i64 }, ptr @__riscv_cpu_model, i32 0, i32 2), align 8 // CHECK-RV64-NEXT:[[TMP6:%.*]] = icmp eq i64 [[TMP5]], 273 // CHECK-RV64-NEXT:[[TMP7:%.*]] = and i1 [[TMP4]], [[TMP6]] -// CHECK-RV64-NEXT:br i1 [[TMP7]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] -// CHECK-RV64: [[IF_THEN]]: -// CHECK-RV64-NEXT:store i32 3, ptr [[RETVAL]], align 4 -// CHECK-RV64-NEXT:br label %[[RETURN:.*]] -// CHECK-RV64: [[IF_END]]: -// CHECK-RV64-NEXT:store i32 0, ptr [[RETVAL]], align 4 -// CHECK-RV64-NEXT:br label %[[RETURN]] -// CHECK-RV64: [[RETURN]]: -// CHECK-RV64-NEXT:[[TMP8:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK-RV64-NEXT:ret i32 [[TM
[llvm-branch-commits] [llvm] [NFC] Use unique_ptr in SparseSet (PR #116617)
llvmbot wrote: @llvm/pr-subscribers-llvm-adt Author: Akshat Oke (optimisan) Changes This allows implementing the move constructor. --- Full diff: https://github.com/llvm/llvm-project/pull/116617.diff 1 Files Affected: - (modified) llvm/include/llvm/ADT/SparseSet.h (+11-7) ``diff diff --git a/llvm/include/llvm/ADT/SparseSet.h b/llvm/include/llvm/ADT/SparseSet.h index c7793117ff5408..1adae0d4595ac4 100644 --- a/llvm/include/llvm/ADT/SparseSet.h +++ b/llvm/include/llvm/ADT/SparseSet.h @@ -129,7 +129,12 @@ class SparseSet { using DenseT = SmallVector; using size_type = unsigned; DenseT Dense; - SparseT *Sparse = nullptr; + + struct Deleter { +void operator()(SparseT *S) { free(S); } + }; + std::unique_ptr Sparse; + unsigned Universe = 0; KeyFunctorT KeyIndexOf; SparseSetValFunctor ValIndexOf; @@ -144,7 +149,7 @@ class SparseSet { SparseSet() = default; SparseSet(const SparseSet &) = delete; SparseSet &operator=(const SparseSet &) = delete; - ~SparseSet() { free(Sparse); } + SparseSet(SparseSet &&) = default; /// setUniverse - Set the universe size which determines the largest key the /// set can hold. The universe must be sized before any elements can be @@ -159,11 +164,10 @@ class SparseSet { // Hysteresis prevents needless reallocations. if (U >= Universe/4 && U <= Universe) return; -free(Sparse); // The Sparse array doesn't actually need to be initialized, so malloc // would be enough here, but that will cause tools like valgrind to // complain about branching on uninitialized data. -Sparse = static_cast(safe_calloc(U, sizeof(SparseT))); +Sparse.reset(static_cast(safe_calloc(U, sizeof(SparseT; Universe = U; } @@ -205,7 +209,7 @@ class SparseSet { assert(Idx < Universe && "Key out of range"); assert(Sparse != nullptr && "Invalid sparse type"); const unsigned Stride = std::numeric_limits::max() + 1u; -for (unsigned i = Sparse[Idx], e = size(); i < e; i += Stride) { +for (unsigned i = Sparse.get()[Idx], e = size(); i < e; i += Stride) { const unsigned FoundIdx = ValIndexOf(Dense[i]); assert(FoundIdx < Universe && "Invalid key in set. Did object mutate?"); if (Idx == FoundIdx) @@ -255,7 +259,7 @@ class SparseSet { iterator I = findIndex(Idx); if (I != end()) return std::make_pair(I, false); -Sparse[Idx] = size(); +Sparse.get()[Idx] = size(); Dense.push_back(Val); return std::make_pair(end() - 1, true); } @@ -292,7 +296,7 @@ class SparseSet { *I = Dense.back(); unsigned BackIdx = ValIndexOf(Dense.back()); assert(BackIdx < Universe && "Invalid key in set. Did object mutate?"); - Sparse[BackIdx] = I - begin(); + Sparse.get()[BackIdx] = I - begin(); } // This depends on SmallVector::pop_back() not invalidating iterators. // std::vector::pop_back() doesn't give that guarantee. `` https://github.com/llvm/llvm-project/pull/116617 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][Transforms] Support 1:N mappings in `ConversionValueMapping` (PR #116524)
https://github.com/matthias-springer updated https://github.com/llvm/llvm-project/pull/116524 >From e3946a5496cdf64ff6a8a5c7e1b117f4904ac9e5 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Sun, 17 Nov 2024 04:38:09 +0100 Subject: [PATCH] [mlir][Transforms] Support 1:N mappings in `ConversionValueMapping` --- .../Conversion/LLVMCommon/TypeConverter.cpp | 68 ++- .../Bufferization/Transforms/Bufferize.cpp| 1 - .../EmitC/Transforms/TypeConversions.cpp | 1 - .../Dialect/Linalg/Transforms/Detensorize.cpp | 1 - .../Quant/Transforms/StripFuncQuantTypes.cpp | 1 - .../Utils/SparseTensorDescriptor.cpp | 3 - .../Vector/Transforms/VectorLinearize.cpp | 1 - .../Transforms/Utils/DialectConversion.cpp| 527 ++ mlir/test/Transforms/test-legalizer.mlir | 3 - .../Func/TestDecomposeCallGraphTypes.cpp | 2 +- mlir/test/lib/Dialect/Test/TestPatterns.cpp | 11 +- .../lib/Transforms/TestDialectConversion.cpp | 1 - 12 files changed, 335 insertions(+), 285 deletions(-) diff --git a/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp b/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp index 59b0f5c9b09bcd..fbf1c20d0baa32 100644 --- a/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp +++ b/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp @@ -153,20 +153,31 @@ LLVMTypeConverter::LLVMTypeConverter(MLIRContext *ctx, type.isVarArg()); }); + // Add generic source and target materializations to handle cases where + // non-LLVM types persist after an LLVM conversion. + addSourceMaterialization([&](OpBuilder &builder, Type resultType, + ValueRange inputs, Location loc) { +return builder.create(loc, resultType, inputs) +.getResult(0); + }); + addTargetMaterialization([&](OpBuilder &builder, Type resultType, + ValueRange inputs, Location loc) { +return builder.create(loc, resultType, inputs) +.getResult(0); + }); + // Helper function that checks if the given value range is a bare pointer. auto isBarePointer = [](ValueRange values) { return values.size() == 1 && isa(values.front().getType()); }; - // Argument materializations convert from the new block argument types - // (multiple SSA values that make up a memref descriptor) back to the - // original block argument type. The dialect conversion framework will then - // insert a target materialization from the original block argument type to - // a legal type. - addArgumentMaterialization([&](OpBuilder &builder, - UnrankedMemRefType resultType, - ValueRange inputs, Location loc) { + // Source materializations convert the MemrRef descriptor elements + // (multiple SSA values that make up a MemrRef descriptor) back to the + // original MemRef type. + addSourceMaterialization([&](OpBuilder &builder, + UnrankedMemRefType resultType, ValueRange inputs, + Location loc) { // Note: Bare pointers are not supported for unranked memrefs because a // memref descriptor cannot be built just from a bare pointer. if (TypeRange(inputs) != getUnrankedMemRefDescriptorFields()) @@ -179,8 +190,8 @@ LLVMTypeConverter::LLVMTypeConverter(MLIRContext *ctx, return builder.create(loc, resultType, desc) .getResult(0); }); - addArgumentMaterialization([&](OpBuilder &builder, MemRefType resultType, - ValueRange inputs, Location loc) { + addSourceMaterialization([&](OpBuilder &builder, MemRefType resultType, + ValueRange inputs, Location loc) { Value desc; if (isBarePointer(inputs)) { desc = MemRefDescriptor::fromStaticShape(builder, loc, *this, resultType, @@ -200,23 +211,30 @@ LLVMTypeConverter::LLVMTypeConverter(MLIRContext *ctx, return builder.create(loc, resultType, desc) .getResult(0); }); - // Add generic source and target materializations to handle cases where - // non-LLVM types persist after an LLVM conversion. - addSourceMaterialization([&](OpBuilder &builder, Type resultType, - ValueRange inputs, Location loc) { -if (inputs.size() != 1) - return Value(); + addTargetMaterialization([&](OpBuilder &builder, + LLVM::LLVMStructType resultType, + ValueRange inputs, Location loc, + Type originalType) -> Value { +if (auto memrefType = dyn_cast_or_null(originalType)) { + if (isBarePointer(inputs)) { +return MemRefDescriptor::fromStaticShape(builder, loc, *this, + memrefType, inputs[0]); + } else if (TypeRange(inputs) == + getMemRefDescriptorFields(memrefType, +
[llvm-branch-commits] [mlir] [mlir][LLVM] `LLVMTypeConverter`: Tighten materialization checks (PR #116532)
https://github.com/matthias-springer updated https://github.com/llvm/llvm-project/pull/116532 >From 7025a8caae81e97022155b8fac8075fc29e24650 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Sun, 17 Nov 2024 09:00:45 +0100 Subject: [PATCH] [mlir][LLVM] `LLVMTypeConverter`: Tighten materialization checks --- .../Conversion/LLVMCommon/TypeConverter.cpp | 32 .../MemRefToLLVM/type-conversion.mlir | 57 ++ mlir/test/lib/Dialect/LLVM/CMakeLists.txt | 1 + mlir/test/lib/Dialect/LLVM/TestPatterns.cpp | 77 +++ mlir/tools/mlir-opt/mlir-opt.cpp | 2 + 5 files changed, 154 insertions(+), 15 deletions(-) create mode 100644 mlir/test/Conversion/MemRefToLLVM/type-conversion.mlir create mode 100644 mlir/test/lib/Dialect/LLVM/TestPatterns.cpp diff --git a/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp b/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp index ce91424e7a577e..59b0f5c9b09bcd 100644 --- a/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp +++ b/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp @@ -153,6 +153,12 @@ LLVMTypeConverter::LLVMTypeConverter(MLIRContext *ctx, type.isVarArg()); }); + // Helper function that checks if the given value range is a bare pointer. + auto isBarePointer = [](ValueRange values) { +return values.size() == 1 && + isa(values.front().getType()); + }; + // Argument materializations convert from the new block argument types // (multiple SSA values that make up a memref descriptor) back to the // original block argument type. The dialect conversion framework will then @@ -161,11 +167,10 @@ LLVMTypeConverter::LLVMTypeConverter(MLIRContext *ctx, addArgumentMaterialization([&](OpBuilder &builder, UnrankedMemRefType resultType, ValueRange inputs, Location loc) { -if (inputs.size() == 1) { - // Bare pointers are not supported for unranked memrefs because a - // memref descriptor cannot be built just from a bare pointer. +// Note: Bare pointers are not supported for unranked memrefs because a +// memref descriptor cannot be built just from a bare pointer. +if (TypeRange(inputs) != getUnrankedMemRefDescriptorFields()) return Value(); -} Value desc = UnrankedMemRefDescriptor::pack(builder, loc, *this, resultType, inputs); // An argument materialization must return a value of type @@ -177,20 +182,17 @@ LLVMTypeConverter::LLVMTypeConverter(MLIRContext *ctx, addArgumentMaterialization([&](OpBuilder &builder, MemRefType resultType, ValueRange inputs, Location loc) { Value desc; -if (inputs.size() == 1) { - // This is a bare pointer. We allow bare pointers only for function entry - // blocks. - BlockArgument barePtr = dyn_cast(inputs.front()); - if (!barePtr) -return Value(); - Block *block = barePtr.getOwner(); - if (!block->isEntryBlock() || - !isa(block->getParentOp())) -return Value(); +if (isBarePointer(inputs)) { desc = MemRefDescriptor::fromStaticShape(builder, loc, *this, resultType, inputs[0]); -} else { +} else if (TypeRange(inputs) == + getMemRefDescriptorFields(resultType, + /*unpackAggregates=*/true)) { desc = MemRefDescriptor::pack(builder, loc, *this, resultType, inputs); +} else { + // The inputs are neither a bare pointer nor an unpacked memref + // descriptor. This materialization function cannot be used. + return Value(); } // An argument materialization must return a value of type `resultType`, // so insert a cast from the memref descriptor type (!llvm.struct) to the diff --git a/mlir/test/Conversion/MemRefToLLVM/type-conversion.mlir b/mlir/test/Conversion/MemRefToLLVM/type-conversion.mlir new file mode 100644 index 00..0288aa11313c72 --- /dev/null +++ b/mlir/test/Conversion/MemRefToLLVM/type-conversion.mlir @@ -0,0 +1,57 @@ +// RUN: mlir-opt %s -test-llvm-legalize-patterns -split-input-file + +// Test the argument materializer for ranked MemRef types. + +// CHECK-LABEL: func @construct_ranked_memref_descriptor( +// CHECK: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK-COUNT-7: llvm.insertvalue +// CHECK: builtin.unrealized_conversion_cast %{{.*}} : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> to memref<5x4xf32> +func.func @construct_ranked_memref_descriptor(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: i64, %arg3: i64, %arg4: i64, %arg5: i64, %arg6: i64) { + %0 = "test.direct_replacement"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6) : (!llvm.ptr, !llvm.ptr, i64, i64, i64, i64, i64) -> (memref<5x4xf32>) + "test.legal_op"(%0) : (memref<5x4xf3
[llvm-branch-commits] [lld] [PAC][lld][AArch64][ELF] Support signed GOT with tiny code model (PR #113816)
https://github.com/kovdan01 updated https://github.com/llvm/llvm-project/pull/113816 >From ff01757ad3d20b9538a23b12e7c3e2cd7f6dc20d Mon Sep 17 00:00:00 2001 From: Daniil Kovalev Date: Fri, 25 Oct 2024 21:28:18 +0300 Subject: [PATCH 1/3] [PAC][lld][AArch64][ELF] Support signed GOT with tiny code model Support `R_AARCH64_AUTH_GOT_ADR_PREL_LO21` and `R_AARCH64_AUTH_GOT_LD_PREL19` GOT-generating relocations. --- lld/ELF/Arch/AArch64.cpp | 5 ++ lld/ELF/InputSection.cpp | 1 + lld/ELF/Relocations.cpp | 17 ++--- lld/ELF/Relocations.h| 1 + lld/test/ELF/aarch64-got-relocations-pauth.s | 73 5 files changed, 89 insertions(+), 8 deletions(-) diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp index 076351dd00d3b3..94e79fdf1025ce 100644 --- a/lld/ELF/Arch/AArch64.cpp +++ b/lld/ELF/Arch/AArch64.cpp @@ -205,6 +205,9 @@ RelExpr AArch64::getRelExpr(RelType type, const Symbol &s, case R_AARCH64_AUTH_LD64_GOT_LO12_NC: case R_AARCH64_AUTH_GOT_ADD_LO12_NC: return R_AARCH64_AUTH_GOT; + case R_AARCH64_AUTH_GOT_LD_PREL19: + case R_AARCH64_AUTH_GOT_ADR_PREL_LO21: +return R_AARCH64_AUTH_GOT_PC; case R_AARCH64_LD64_GOTPAGE_LO15: return R_AARCH64_GOT_PAGE; case R_AARCH64_ADR_GOT_PAGE: @@ -549,6 +552,7 @@ void AArch64::relocate(uint8_t *loc, const Relocation &rel, write32AArch64Addr(loc, val >> 12); break; case R_AARCH64_ADR_PREL_LO21: + case R_AARCH64_AUTH_GOT_ADR_PREL_LO21: checkInt(ctx, loc, val, 21, rel); write32AArch64Addr(loc, val); break; @@ -569,6 +573,7 @@ void AArch64::relocate(uint8_t *loc, const Relocation &rel, case R_AARCH64_CONDBR19: case R_AARCH64_LD_PREL_LO19: case R_AARCH64_GOT_LD_PREL19: + case R_AARCH64_AUTH_GOT_LD_PREL19: checkAlignment(ctx, loc, val, 4, rel); checkInt(ctx, loc, val, 21, rel); writeMaskedBits32le(loc, (val & 0x1C) << 3, 0x1C << 3); diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 84f23bf78a4e9b..d49a654c6a29b7 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -796,6 +796,7 @@ uint64_t InputSectionBase::getRelocTargetVA(Ctx &ctx, const Relocation &r, case R_AARCH64_GOT_PAGE: return r.sym->getGotVA(ctx) + a - getAArch64Page(ctx.in.got->getVA()); case R_GOT_PC: + case R_AARCH64_AUTH_GOT_PC: case R_RELAX_TLS_GD_TO_IE: return r.sym->getGotVA(ctx) + a - p; case R_GOTPLT_GOTREL: diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index bbb3d3210e0253..d783b5f0a674cf 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -210,11 +210,11 @@ static bool needsPlt(RelExpr expr) { } bool lld::elf::needsGot(RelExpr expr) { - return oneof( - expr); + return oneof(expr); } // True if this expression is of the form Sym - X, where X is a position in the @@ -1010,8 +1010,8 @@ bool RelocationScanner::isStaticLinkTimeConstant(RelExpr e, RelType type, R_GOTONLY_PC, R_GOTPLTONLY_PC, R_PLT_PC, R_PLT_GOTREL, R_PLT_GOTPLT, R_GOTPLT_GOTREL, R_GOTPLT_PC, R_PPC32_PLTREL, R_PPC64_CALL_PLT, R_PPC64_RELAX_TOC, R_RISCV_ADD, R_AARCH64_GOT_PAGE, -R_AARCH64_AUTH_GOT, R_LOONGARCH_PLT_PAGE_PC, R_LOONGARCH_GOT, -R_LOONGARCH_GOT_PAGE_PC>(e)) +R_AARCH64_AUTH_GOT, R_AARCH64_AUTH_GOT_PC, R_LOONGARCH_PLT_PAGE_PC, +R_LOONGARCH_GOT, R_LOONGARCH_GOT_PAGE_PC>(e)) return true; // These never do, except if the entire file is position dependent or if @@ -1126,7 +1126,8 @@ void RelocationScanner::processAux(RelExpr expr, RelType type, uint64_t offset, // Many LoongArch TLS relocs reuse the R_LOONGARCH_GOT type, in which // case the NEEDS_GOT flag shouldn't get set. bool needsGotAuth = - (expr == R_AARCH64_AUTH_GOT || expr == R_AARCH64_AUTH_GOT_PAGE_PC); + (expr == R_AARCH64_AUTH_GOT || expr == R_AARCH64_AUTH_GOT_PC || + expr == R_AARCH64_AUTH_GOT_PAGE_PC); uint16_t flags = sym.flags.load(std::memory_order_relaxed); if (!(flags & NEEDS_GOT)) { sym.setFlags(needsGotAuth ? (NEEDS_GOT | NEEDS_GOT_AUTH) : NEEDS_GOT); diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h index 20d88de402ac18..38d55d46116569 100644 --- a/lld/ELF/Relocations.h +++ b/lld/ELF/Relocations.h @@ -89,6 +89,7 @@ enum RelExpr { R_AARCH64_AUTH_GOT_PAGE_PC, R_AARCH64_GOT_PAGE, R_AARCH64_AUTH_GOT, + R_AARCH64_AUTH_GOT_PC, R_AARCH64_PAGE_PC, R_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC, R_AARCH64_TLSDESC_PAGE, diff --git a/lld/test/ELF/aarch64-got-relocations-pauth.s b/lld/test/ELF/aarch64-got-relocations-pauth.s index 3fe73a086c729b..14f03958482dff 100644 --- a/lld/test/ELF/aarch64-got-relocations-pauth.s +++ b/lld/test/ELF/aarch64-got-relocations-pauth.s @@ -78,6 +78,79 @@ _start: adrp x1, :got_auth:zed add x1, x1, :got_auth_lo12:zed +#--- ok-tiny.s + +# RUN: ll
[llvm-branch-commits] [llvm] AMDGPU: Handle gfx950 global_load_lds_* instructions (PR #116680)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/116680 Define global_load_lds_dwordx3 and global_load_dwordx4. Oddly it seems dwordx2 was skipped. >From 42f311ceb555ea2b3f171ad2ef8254e971e0be12 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 18 Jan 2024 14:44:03 +0700 Subject: [PATCH] AMDGPU: Handle gfx950 global_load_lds_* instructions Define global_load_lds_dwordx3 and global_load_dwordx4. Oddly it seems dwordx2 was skipped. --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 2 +- .../AMDGPU/AMDGPUInstructionSelector.cpp | 10 ++ llvm/lib/Target/AMDGPU/FLATInstructions.td| 9 ++ llvm/lib/Target/AMDGPU/GCNSubtarget.h | 7 + llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 10 ++ .../llvm.amdgcn.global.load.lds.gfx950.ll | 137 ++ llvm/test/MC/AMDGPU/gfx950_asm_features.s | 37 + llvm/test/MC/Disassembler/AMDGPU/gfx950.txt | 25 8 files changed, 236 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.lds.gfx950.ll create mode 100644 llvm/test/MC/AMDGPU/gfx950_asm_features.s create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx950.txt diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 15f33cdbf92e6e..f43ab50d2ea441 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2452,7 +2452,7 @@ class AMDGPUGlobalLoadLDS : [], [LLVMQualPointerType<1>,// Base global pointer to load from LLVMQualPointerType<3>,// LDS base pointer to store to - llvm_i32_ty, // Data byte size: 1/2/4 + llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950) llvm_i32_ty, // imm offset (applied to both global and LDS address) llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = sc0, // bit 1 = sc1, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 13de93e829fab2..a6ef0069f134bd 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -3329,6 +3329,16 @@ bool AMDGPUInstructionSelector::selectGlobalLoadLds(MachineInstr &MI) const{ case 4: Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD; break; + case 12: +if (!Subtarget->hasLDSLoadB96_B128()) + return false; +Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX3; +break; + case 16: +if (!Subtarget->hasLDSLoadB96_B128()) + return false; +Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX4; +break; } MachineBasicBlock *MBB = MI.getParent(); diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index db74372e9db452..861fcf017d9e4d 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -934,6 +934,11 @@ defm GLOBAL_LOAD_LDS_USHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_usho defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sshort">; defm GLOBAL_LOAD_LDS_DWORD : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dword">; +let SubtargetPredicate = HasGFX950Insts in { +defm GLOBAL_LOAD_LDS_DWORDX3 : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dwordx3">; +defm GLOBAL_LOAD_LDS_DWORDX4 : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dwordx4">; +} + let SubtargetPredicate = isGFX12Plus in { defm GLOBAL_ATOMIC_COND_SUB_U32: FLAT_Global_Atomic_Pseudo <"global_atomic_cond_sub_u32", VGPR_32, i32>; defm GLOBAL_ATOMIC_ORDERED_ADD_B64 : FLAT_Global_Atomic_Pseudo <"global_atomic_ordered_add_b64", VReg_64, i64>; @@ -1980,6 +1985,10 @@ defm GLOBAL_LOAD_LDS_USHORT : FLAT_Real_AllAddr_LDS <0x028, 0x12>; defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_LDS <0x029, 0x13>; defm GLOBAL_LOAD_LDS_DWORD : FLAT_Real_AllAddr_LDS <0x02a, 0x14>; +defm GLOBAL_LOAD_LDS_DWORDX3 : FLAT_Real_AllAddr_LDS <0x07e, 0x07e>; +defm GLOBAL_LOAD_LDS_DWORDX4 : FLAT_Real_AllAddr_LDS <0x07d, 0x07d>; + + defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Real_Atomics_vi <0x40>; defm GLOBAL_ATOMIC_CMPSWAP: FLAT_Global_Real_Atomics_vi <0x41>; defm GLOBAL_ATOMIC_ADD: FLAT_Global_Real_Atomics_vi <0x42>; diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 4a6efe533230b1..f3f96940c1f44b 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -1289,6 +1289,13 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, // hasGFX940Insts and hasGFX90AInsts are also true. bool hasGFX950Insts() const { return GFX950Insts; } + /// Returns true if the target supports + /// global_load_lds_dwordx3/global_load_lds_dwordx4 or + /// buffer_load_dwo
[llvm-branch-commits] [llvm] AMDGPU: Handle gfx950 96/128-bit buffer_load_lds (PR #116681)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/116681?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#116681** https://app.graphite.dev/github/pr/llvm/llvm-project/116681?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/116681?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#116680** https://app.graphite.dev/github/pr/llvm/llvm-project/116680?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116679** https://app.graphite.dev/github/pr/llvm/llvm-project/116679?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116678** https://app.graphite.dev/github/pr/llvm/llvm-project/116678?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116312** https://app.graphite.dev/github/pr/llvm/llvm-project/116312?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116311** https://app.graphite.dev/github/pr/llvm/llvm-project/116311?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116310** https://app.graphite.dev/github/pr/llvm/llvm-project/116310?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116309** https://app.graphite.dev/github/pr/llvm/llvm-project/116309?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116308** https://app.graphite.dev/github/pr/llvm/llvm-project/116308?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116307** https://app.graphite.dev/github/pr/llvm/llvm-project/116307?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/116681 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] AMDGPU: Define v_mfma_f32_32x32x16_bf16 for gfx950 (PR #116679)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/116679?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#116680** https://app.graphite.dev/github/pr/llvm/llvm-project/116680?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116679** https://app.graphite.dev/github/pr/llvm/llvm-project/116679?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/116679?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#116678** https://app.graphite.dev/github/pr/llvm/llvm-project/116678?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116312** https://app.graphite.dev/github/pr/llvm/llvm-project/116312?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116311** https://app.graphite.dev/github/pr/llvm/llvm-project/116311?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116310** https://app.graphite.dev/github/pr/llvm/llvm-project/116310?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116309** https://app.graphite.dev/github/pr/llvm/llvm-project/116309?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116308** https://app.graphite.dev/github/pr/llvm/llvm-project/116308?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116307** https://app.graphite.dev/github/pr/llvm/llvm-project/116307?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/116679 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Handle gfx950 global_load_lds_* instructions (PR #116680)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/116680?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#116681** https://app.graphite.dev/github/pr/llvm/llvm-project/116681?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116680** https://app.graphite.dev/github/pr/llvm/llvm-project/116680?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/116680?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#116679** https://app.graphite.dev/github/pr/llvm/llvm-project/116679?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116678** https://app.graphite.dev/github/pr/llvm/llvm-project/116678?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116312** https://app.graphite.dev/github/pr/llvm/llvm-project/116312?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116311** https://app.graphite.dev/github/pr/llvm/llvm-project/116311?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116310** https://app.graphite.dev/github/pr/llvm/llvm-project/116310?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116309** https://app.graphite.dev/github/pr/llvm/llvm-project/116309?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116308** https://app.graphite.dev/github/pr/llvm/llvm-project/116308?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116307** https://app.graphite.dev/github/pr/llvm/llvm-project/116307?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/116680 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] AMDGPU: Define v_mfma_f32_32x32x16_bf16 for gfx950 (PR #116679)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/116679 Unlike the existing gfx940 intrinsics using short/i16 in place of bfloat, this uses the natural bfloat type. >From 82bb6e07b68b1df378e89c1eba1f9deb3c2d67f5 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 19 Dec 2023 12:46:00 +0700 Subject: [PATCH] AMDGPU: Define v_mfma_f32_32x32x16_bf16 for gfx950 Unlike the existing gfx940 intrinsics using short/i16 in place of bfloat, this uses the natural bfloat type. --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 2 + .../CodeGenOpenCL/builtins-amdgcn-mfma.cl | 6 + .../builtins-amdgcn-error-gfx950-param.cl | 7 + .../builtins-amdgcn-error-gfx950.cl | 5 +- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 2 + llvm/lib/Target/AMDGPU/SIInstrInfo.td | 1 + llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 6 + .../UniformityAnalysis/AMDGPU/intrinsics.ll | 8 + .../AMDGPU/llvm.amdgcn.mfma.gfx950.bf16.ll| 474 ++ llvm/test/MC/AMDGPU/mai-gfx950.s | 56 ++- .../MC/Disassembler/AMDGPU/gfx950_mai.txt | 27 + llvm/test/tools/llvm-mca/AMDGPU/gfx950.s | 10 +- 12 files changed, 596 insertions(+), 8 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.bf16.ll diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 6917d8d1aca69d..7ce8f2c1669d67 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -437,6 +437,8 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-conversion- TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x32_f16, "V4fV8hV8hV4fIiIiIi", "nc", "gfx950-insts") TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x16_f16, "V16fV8hV8hV16fIiIiIi", "nc", "gfx950-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x16_bf16, "V16fV8yV8yV16fIiIiIi", "nc", "gfx950-insts") + //===--===// // GFX12+ only builtins. //===--===// diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl index a644a60f9ec381..841d8fcad0fee0 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl @@ -24,6 +24,7 @@ typedef short v8s __attribute__((ext_vector_type(8))); typedef short v16s __attribute__((ext_vector_type(16))); typedef short v32s __attribute__((ext_vector_type(32))); typedef double v4d __attribute__((ext_vector_type(4))); +typedef __bf16 v8bf16 __attribute__((ext_vector_type(8))); #ifdef MFMA_GFX908_TESTS @@ -424,5 +425,10 @@ v16f test_mfma_f32_32x32x16_f16(v8h a, v8h b, v16f c) return __builtin_amdgcn_mfma_f32_32x32x16_f16(a, b, c, 1, 2, 3); } +// CHECK-GFX950-LABEL: @test_mfma_f32_32x32x16_bf16( +// CHECK-GFX950: tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.bf16(<8 x bfloat> %a, <8 x bfloat> %b, <16 x float> %c, i32 1, i32 2, i32 3) +v16f test_mfma_f32_32x32x16_bf16(v8bf16 a, v8bf16 b, v16f c) { + return __builtin_amdgcn_mfma_f32_32x32x16_bf16(a, b, c, 1, 2, 3); +} #endif diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl index 4c267e2cac5cad..4af67763c40dd2 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl @@ -4,6 +4,7 @@ typedef float float4 __attribute__((ext_vector_type(4))); typedef float float16 __attribute__((ext_vector_type(16))); typedef half half8 __attribute__((ext_vector_type(8))); +typedef __bf16 bfloat8 __attribute__((ext_vector_type(8))); void test_mfma_f32_16x16x32_f16(__global float4* out, half8 a, half8 b, float4 c, int X) { @@ -19,3 +20,9 @@ void test_mfma_f32_32x32x16_f16(__global float16* out, half8 a, half8 b, float16 *out = __builtin_amdgcn_mfma_f32_32x32x16_f16(a, b, c, 0, X, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_32x32x16_f16' must be a constant integer}} *out = __builtin_amdgcn_mfma_f32_32x32x16_f16(a, b, c, 0, 0, X); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_32x32x16_f16' must be a constant integer}} } + +void test_mfma_f32_32x32x16_bf16(__global float16* out, bfloat8 a, bfloat8 b, float16 c, int X) { + *out = __builtin_amdgcn_mfma_f32_32x32x16_bf16(a, b, c, X, 0, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_32x32x16_bf16' must be a constant integer}} + *out = __builtin_amdgcn_mfma_f32_32x32x16_bf16(a, b, c, 0, X, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_32x32x16_bf16' must be a constant integer}} + *out = __builtin_amdgcn_mfma_f32_32x32x16_bf16(a, b, c, 0, 0, X); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_32x32x16_bf16' must
[llvm-branch-commits] [llvm] AMDGPU: Add V_CVT_PK_BF16_F32 for gfx950 (PR #116678)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/116678?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#116679** https://app.graphite.dev/github/pr/llvm/llvm-project/116679?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116678** https://app.graphite.dev/github/pr/llvm/llvm-project/116678?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/116678?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#116312** https://app.graphite.dev/github/pr/llvm/llvm-project/116312?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116311** https://app.graphite.dev/github/pr/llvm/llvm-project/116311?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116310** https://app.graphite.dev/github/pr/llvm/llvm-project/116310?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116309** https://app.graphite.dev/github/pr/llvm/llvm-project/116309?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116308** https://app.graphite.dev/github/pr/llvm/llvm-project/116308?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116307** https://app.graphite.dev/github/pr/llvm/llvm-project/116307?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/116678 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] AMDGPU: Define v_mfma_f32_32x32x16_bf16 for gfx950 (PR #116679)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/116679?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#116680** https://app.graphite.dev/github/pr/llvm/llvm-project/116680?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116679** https://app.graphite.dev/github/pr/llvm/llvm-project/116679?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/116679?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#116678** https://app.graphite.dev/github/pr/llvm/llvm-project/116678?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116312** https://app.graphite.dev/github/pr/llvm/llvm-project/116312?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116311** https://app.graphite.dev/github/pr/llvm/llvm-project/116311?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116310** https://app.graphite.dev/github/pr/llvm/llvm-project/116310?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116309** https://app.graphite.dev/github/pr/llvm/llvm-project/116309?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116308** https://app.graphite.dev/github/pr/llvm/llvm-project/116308?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116307** https://app.graphite.dev/github/pr/llvm/llvm-project/116307?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/116679 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Handle gfx950 96/128-bit buffer_load_lds (PR #116681)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/116681 Enforcing this limit in the clang builtin will come later. >From f5657c9cc25cfed321ced807510a21dc374bcfe3 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 18 Jan 2024 16:18:05 +0700 Subject: [PATCH] AMDGPU: Handle gfx950 96/128-bit buffer_load_lds Enforcing this limit in the clang builtin will come later. --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 8 +- .../AMDGPU/AMDGPUInstructionSelector.cpp | 18 ++ llvm/lib/Target/AMDGPU/BUFInstructions.td | 24 ++- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 16 ++ .../llvm.amdgcn.global.load.lds.gfx950.ll | 8 + ...m.amdgcn.raw.ptr.buffer.load.lds.gfx950.ll | 176 ...mdgcn.struct.ptr.buffer.load.lds.gfx950.ll | 196 ++ llvm/test/MC/AMDGPU/mubuf-gfx950.s| 32 +++ llvm/test/MC/Disassembler/AMDGPU/gfx950.txt | 19 ++ 9 files changed, 485 insertions(+), 12 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.lds.gfx950.ll create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.lds.gfx950.ll create mode 100644 llvm/test/MC/AMDGPU/mubuf-gfx950.s diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index f43ab50d2ea441..360af786c5160d 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1674,7 +1674,7 @@ class AMDGPURawBufferLoadLDS : Intrinsic < [], [llvm_v4i32_ty, // rsrc(SGPR) LLVMQualPointerType<3>,// LDS base offset - llvm_i32_ty, // Data byte size: 1/2/4 + llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950) llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty, // imm offset(imm, included in bounds checking and swizzling) @@ -1693,7 +1693,7 @@ class AMDGPURawPtrBufferLoadLDS : Intrinsic < [], [AMDGPUBufferRsrcTy,// rsrc(SGPR) LLVMQualPointerType<3>,// LDS base offset - llvm_i32_ty, // Data byte size: 1/2/4 + llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950) llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty, // imm offset(imm, included in bounds checking and swizzling) @@ -1715,7 +1715,7 @@ class AMDGPUStructBufferLoadLDS : Intrinsic < [], [llvm_v4i32_ty, // rsrc(SGPR) LLVMQualPointerType<3>,// LDS base offset - llvm_i32_ty, // Data byte size: 1/2/4 + llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950) llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) @@ -1735,7 +1735,7 @@ class AMDGPUStructPtrBufferLoadLDS : Intrinsic < [], [AMDGPUBufferRsrcTy,// rsrc(SGPR) LLVMQualPointerType<3>,// LDS base offset - llvm_i32_ty, // Data byte size: 1/2/4 + llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950) llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index a6ef0069f134bd..3522ece24f1c45 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -3240,6 +3240,24 @@ bool AMDGPUInstructionSelector::selectBufferLoadLds(MachineInstr &MI) const { : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET; break; + case 12: +if (!Subtarget->hasLDSLoadB96_B128()) + return false; + +Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_BOTHEN + : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_IDXEN +: HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFEN + : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFSET; +break; + case 16: +if (!Subtarget->hasLDSLoadB96_B128()) + return false; + +Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_BOTHEN + : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_IDXEN +: HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFEN + : AMDG
[llvm-branch-commits] [clang] [llvm] AMDGPU: Add first gfx950 mfma instructions (PR #116312)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/116312 >From 566cdf85a2a03fc41148715593081643570d6ded Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 21 Nov 2023 10:03:19 +0900 Subject: [PATCH] AMDGPU: Add first gfx950 mfma instructions Scheduling info and hazards are wrong and TBD. --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 6 + .../CodeGenOpenCL/builtins-amdgcn-mfma.cl | 25 +- .../builtins-amdgcn-error-gfx950-param.cl | 21 ++ .../builtins-amdgcn-error-gfx950.cl | 12 + llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 9 + llvm/lib/Target/AMDGPU/AMDGPU.td | 4 + .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 4 +- .../Target/AMDGPU/AMDGPUSearchableTables.td | 2 + llvm/lib/Target/AMDGPU/GCNSubtarget.h | 4 + llvm/lib/Target/AMDGPU/SIInstrInfo.td | 4 + llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 22 ++ .../UniformityAnalysis/AMDGPU/intrinsics.ll | 17 ++ .../CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.ll | 274 ++ llvm/test/MC/AMDGPU/mai-gfx950.s | 112 +++ .../MC/Disassembler/AMDGPU/gfx950_mai.txt | 61 llvm/test/tools/llvm-mca/AMDGPU/gfx950.s | 18 ++ 16 files changed, 592 insertions(+), 3 deletions(-) create mode 100644 clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl create mode 100644 clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.ll create mode 100644 llvm/test/MC/AMDGPU/mai-gfx950.s create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx950_mai.txt create mode 100644 llvm/test/tools/llvm-mca/AMDGPU/gfx950.s diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 61516eb2a4a723..6917d8d1aca69d 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -431,6 +431,12 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb", "nc", "fp8-conversion- TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-conversion-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-conversion-insts") +//===--===// +// GFX950 only builtins. +//===--===// +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x32_f16, "V4fV8hV8hV4fIiIiIi", "nc", "gfx950-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x16_f16, "V16fV8hV8hV16fIiIiIi", "nc", "gfx950-insts") + //===--===// // GFX12+ only builtins. //===--===// diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl index dcdeee6b6acc40..a644a60f9ec381 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl @@ -2,6 +2,7 @@ // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx908 -DMFMA_GFX908_TESTS -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX908 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a -DMFMA_GFX90A_TESTS -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX90A // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx940 -DMFMA_GFX940_TESTS -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX940 +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx950 -DMFMA_GFX950_TESTS -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX950 #pragma OPENCL EXTENSION cl_khr_fp64:enable @@ -222,7 +223,7 @@ void test_mfma_f64_4x4x4f64(global double* out, double a, double b, double c) #endif // MFMA_GFX90A_TESTS -#ifdef MFMA_GFX940_TESTS +#if defined(MFMA_GFX940_TESTS) || defined(MFMA_GFX950_TESTS) // CHECK-GFX940-LABEL: @test_mfma_i32_16x16x32_i8 // CHECK-GFX940: call <4 x i32> @llvm.amdgcn.mfma.i32.16x16x32.i8(i64 %a, i64 %b, <4 x i32> %c, i32 0, i32 0, i32 0) void test_mfma_i32_16x16x32_i8(global v4i* out, long a, long b, v4i c) @@ -404,4 +405,24 @@ void test_smfmac_f32_32x32x32_fp8_fp8(global v16f* out, v2i a, v4i b, v16f c, in { *out = __builtin_amdgcn_smfmac_f32_32x32x32_fp8_fp8(a, b, c, idx, 0, 0); } -#endif // MFMA_GFX940_TESTS +#endif // defined(MFMA_GFX940_TESTS) || defined(MFMA_GFX950_TESTS) + +#ifdef MFMA_GFX950_TESTS + +// CHECK-GFX950-LABEL: @test_mfma_f32_16x16x32_f16( +// CHECK-GFX950: tail call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.f16(<8 x half> %a, <8 x half> %b, <4 x float> %c, i32 1, i32 2, i32 3) + +v4f test_mfma_f32_16x16x32_f16(v8h a, v8h b, v4f c) +{ + return __builtin_amdgcn_mfma_f32_16x16x32_f16(a, b, c, 1, 2, 3); +} + +// CHECK-GFX950-LABEL: @test_mfma_f32_32x32x16_f16 +// CHECK-GFX950: tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32
[llvm-branch-commits] [llvm] AMDGPU: Add V_CVT_PK_BF16_F32 for gfx950 (PR #116678)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/116678 None >From f3682aa080aebde46106fa11176442973ff62c26 Mon Sep 17 00:00:00 2001 From: Pravin Jagtap Date: Mon, 5 Feb 2024 04:29:01 -0500 Subject: [PATCH] AMDGPU: Add V_CVT_PK_BF16_F32 for gfx950 --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 6 + llvm/lib/Target/AMDGPU/SIInstrInfo.td | 1 + llvm/lib/Target/AMDGPU/VOP3Instructions.td| 25 ++ llvm/test/CodeGen/AMDGPU/bf16-conversions.ll | 395 -- llvm/test/MC/AMDGPU/gfx950_asm_vop3.s | 26 ++ .../Disassembler/AMDGPU/gfx950_dasm_vop3.txt | 19 + 6 files changed, 255 insertions(+), 217 deletions(-) create mode 100644 llvm/test/MC/AMDGPU/gfx950_asm_vop3.s create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 1e261f4256c93b..ad89812558d25c 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -889,6 +889,12 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::MUL, MVT::i1, Promote); + if (Subtarget->hasBF16ConversionInsts()) { +setOperationAction(ISD::FP_ROUND, MVT::v2bf16, Legal); +setOperationAction(ISD::FP_ROUND, MVT::bf16, Legal); +setOperationAction(ISD::BUILD_VECTOR, MVT::v2bf16, Legal); + } + setTargetDAGCombine({ISD::ADD, ISD::UADDO_CARRY, ISD::SUB, diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 882e147dc231fa..7df9be5c6f7a0b 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2787,6 +2787,7 @@ def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>; def VOP_I32_I32_I32_ARITH : VOPProfile <[i32, i32, i32, untyped], /*EnableClamp=*/1>; def VOP_V2F16_F32_F32 : VOPProfile <[v2f16, f32, f32, untyped]>; def VOP_F32_F16_F16_F16 : VOPProfile <[f32, f16, f16, f16]>; +def VOP_V2BF16_F32_F32 : VOPProfile <[v2bf16, f32, f32, untyped]>; def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>; def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>; diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 551e8b3a679202..917e1b3974b46a 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -944,6 +944,30 @@ let SubtargetPredicate = isGFX11Plus in { defm V_CVT_PK_U16_F32 : VOP3Inst<"v_cvt_pk_u16_f32", VOP3_Profile>; } // End SubtargetPredicate = isGFX11Plus +// FIXME: GlobalISel cannot distinguish f16 and bf16 and may start using bf16 patterns +//instead of less complex f16. Disable GlobalISel for these for now. +def bf16_fpround : PatFrag <(ops node:$src0), (fpround $src0), [{ return true; }]> { + let GISelPredicateCode = [{return false;}]; +} + +let SubtargetPredicate = HasBF16ConversionInsts in { + let ReadsModeReg = 0 in { +defm V_CVT_PK_BF16_F32: VOP3Inst<"v_cvt_pk_bf16_f32", VOP3_Profile>; + } + def : GCNPat<(v2bf16 (bf16_fpround v2f32:$src)), + (V_CVT_PK_BF16_F32_e64 0, (EXTRACT_SUBREG VReg_64:$src, sub0), 0, (EXTRACT_SUBREG VReg_64:$src, sub1))>; + def : GCNPat<(v2bf16 (bf16_fpround v2f64:$src)), + (V_CVT_PK_BF16_F32_e64 0, (V_CVT_F32_F64_e64 0, (EXTRACT_SUBREG VReg_128:$src, sub0_sub1)), + 0, (V_CVT_F32_F64_e64 0, (EXTRACT_SUBREG VReg_128:$src, sub2_sub3)))>; + def : GCNPat<(v2bf16 (build_vector (bf16 (bf16_fpround (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers, + (bf16 (bf16_fpround (f32 (VOP3Mods f32:$src1, i32:$src1_modifiers)), + (V_CVT_PK_BF16_F32_e64 $src0_modifiers, $src0, $src1_modifiers, $src1)>; + def : GCNPat<(bf16 (bf16_fpround (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers, + (V_CVT_PK_BF16_F32_e64 $src0_modifiers, $src0, 0, (f32 (IMPLICIT_DEF)))>; + def : GCNPat<(bf16 (bf16_fpround (f64 (VOP3Mods f64:$src0, i32:$src0_modifiers, + (V_CVT_PK_BF16_F32_e64 0, (f32 (V_CVT_F32_F64_e64 $src0_modifiers, $src0)), 0, (f32 (IMPLICIT_DEF)))>; +} + let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in { defm V_MAXIMUMMINIMUM_F32 : VOP3Inst<"v_maximumminimum_f32", VOP3_Profile>; defm V_MINIMUMMAXIMUM_F32 : VOP3Inst<"v_minimummaximum_f32", VOP3_Profile>; @@ -1721,5 +1745,6 @@ defm V_LSHL_ADD_U64 : VOP3_Real_vi <0x208>; defm V_CVT_PK_FP8_F32 : VOP3OpSel_Real_gfx9 <0x2a2>; defm V_CVT_PK_BF8_F32 : VOP3OpSel_Real_gfx9 <0x2a3>; +defm V_CVT_PK_BF16_F32: VOP3OpSel_Real_gfx9 <0x268>; defm V_CVT_SR_FP8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a4>; defm V_CVT_SR_BF8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a5>; diff --git a/llvm/test/CodeGen/AMDGPU/bf16-conversions.ll b/llvm/test/CodeGen/AMDGPU/bf16-conversions.l
[llvm-branch-commits] [clang] [llvm] AMDGPU: Define v_mfma_f32_32x32x16_bf16 for gfx950 (PR #116679)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes Unlike the existing gfx940 intrinsics using short/i16 in place of bfloat, this uses the natural bfloat type. --- Patch is 40.83 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/116679.diff 12 Files Affected: - (modified) clang/include/clang/Basic/BuiltinsAMDGPU.def (+2) - (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl (+6) - (modified) clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl (+7) - (modified) clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl (+4-1) - (modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (+2) - (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.td (+1) - (modified) llvm/lib/Target/AMDGPU/VOP3PInstructions.td (+6) - (modified) llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll (+8) - (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.bf16.ll (+474) - (modified) llvm/test/MC/AMDGPU/mai-gfx950.s (+52-4) - (modified) llvm/test/MC/Disassembler/AMDGPU/gfx950_mai.txt (+27) - (modified) llvm/test/tools/llvm-mca/AMDGPU/gfx950.s (+7-3) ``diff diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 6917d8d1aca69d..7ce8f2c1669d67 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -437,6 +437,8 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-conversion- TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x32_f16, "V4fV8hV8hV4fIiIiIi", "nc", "gfx950-insts") TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x16_f16, "V16fV8hV8hV16fIiIiIi", "nc", "gfx950-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x16_bf16, "V16fV8yV8yV16fIiIiIi", "nc", "gfx950-insts") + //===--===// // GFX12+ only builtins. //===--===// diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl index a644a60f9ec381..841d8fcad0fee0 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl @@ -24,6 +24,7 @@ typedef short v8s __attribute__((ext_vector_type(8))); typedef short v16s __attribute__((ext_vector_type(16))); typedef short v32s __attribute__((ext_vector_type(32))); typedef double v4d __attribute__((ext_vector_type(4))); +typedef __bf16 v8bf16 __attribute__((ext_vector_type(8))); #ifdef MFMA_GFX908_TESTS @@ -424,5 +425,10 @@ v16f test_mfma_f32_32x32x16_f16(v8h a, v8h b, v16f c) return __builtin_amdgcn_mfma_f32_32x32x16_f16(a, b, c, 1, 2, 3); } +// CHECK-GFX950-LABEL: @test_mfma_f32_32x32x16_bf16( +// CHECK-GFX950: tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.bf16(<8 x bfloat> %a, <8 x bfloat> %b, <16 x float> %c, i32 1, i32 2, i32 3) +v16f test_mfma_f32_32x32x16_bf16(v8bf16 a, v8bf16 b, v16f c) { + return __builtin_amdgcn_mfma_f32_32x32x16_bf16(a, b, c, 1, 2, 3); +} #endif diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl index 4c267e2cac5cad..4af67763c40dd2 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl @@ -4,6 +4,7 @@ typedef float float4 __attribute__((ext_vector_type(4))); typedef float float16 __attribute__((ext_vector_type(16))); typedef half half8 __attribute__((ext_vector_type(8))); +typedef __bf16 bfloat8 __attribute__((ext_vector_type(8))); void test_mfma_f32_16x16x32_f16(__global float4* out, half8 a, half8 b, float4 c, int X) { @@ -19,3 +20,9 @@ void test_mfma_f32_32x32x16_f16(__global float16* out, half8 a, half8 b, float16 *out = __builtin_amdgcn_mfma_f32_32x32x16_f16(a, b, c, 0, X, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_32x32x16_f16' must be a constant integer}} *out = __builtin_amdgcn_mfma_f32_32x32x16_f16(a, b, c, 0, 0, X); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_32x32x16_f16' must be a constant integer}} } + +void test_mfma_f32_32x32x16_bf16(__global float16* out, bfloat8 a, bfloat8 b, float16 c, int X) { + *out = __builtin_amdgcn_mfma_f32_32x32x16_bf16(a, b, c, X, 0, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_32x32x16_bf16' must be a constant integer}} + *out = __builtin_amdgcn_mfma_f32_32x32x16_bf16(a, b, c, 0, X, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_32x32x16_bf16' must be a constant integer}} + *out = __builtin_amdgcn_mfma_f32_32x32x16_bf16(a, b, c, 0, 0, X); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_32x32x16_bf16' must be a constant integer}} +} diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx
[llvm-branch-commits] [clang] [llvm] AMDGPU: Define v_mfma_f32_32x32x16_bf16 for gfx950 (PR #116679)
llvmbot wrote: @llvm/pr-subscribers-mc @llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-clang Author: Matt Arsenault (arsenm) Changes Unlike the existing gfx940 intrinsics using short/i16 in place of bfloat, this uses the natural bfloat type. --- Patch is 40.83 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/116679.diff 12 Files Affected: - (modified) clang/include/clang/Basic/BuiltinsAMDGPU.def (+2) - (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl (+6) - (modified) clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl (+7) - (modified) clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl (+4-1) - (modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (+2) - (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.td (+1) - (modified) llvm/lib/Target/AMDGPU/VOP3PInstructions.td (+6) - (modified) llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll (+8) - (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.bf16.ll (+474) - (modified) llvm/test/MC/AMDGPU/mai-gfx950.s (+52-4) - (modified) llvm/test/MC/Disassembler/AMDGPU/gfx950_mai.txt (+27) - (modified) llvm/test/tools/llvm-mca/AMDGPU/gfx950.s (+7-3) ``diff diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 6917d8d1aca69d..7ce8f2c1669d67 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -437,6 +437,8 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-conversion- TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x32_f16, "V4fV8hV8hV4fIiIiIi", "nc", "gfx950-insts") TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x16_f16, "V16fV8hV8hV16fIiIiIi", "nc", "gfx950-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x16_bf16, "V16fV8yV8yV16fIiIiIi", "nc", "gfx950-insts") + //===--===// // GFX12+ only builtins. //===--===// diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl index a644a60f9ec381..841d8fcad0fee0 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl @@ -24,6 +24,7 @@ typedef short v8s __attribute__((ext_vector_type(8))); typedef short v16s __attribute__((ext_vector_type(16))); typedef short v32s __attribute__((ext_vector_type(32))); typedef double v4d __attribute__((ext_vector_type(4))); +typedef __bf16 v8bf16 __attribute__((ext_vector_type(8))); #ifdef MFMA_GFX908_TESTS @@ -424,5 +425,10 @@ v16f test_mfma_f32_32x32x16_f16(v8h a, v8h b, v16f c) return __builtin_amdgcn_mfma_f32_32x32x16_f16(a, b, c, 1, 2, 3); } +// CHECK-GFX950-LABEL: @test_mfma_f32_32x32x16_bf16( +// CHECK-GFX950: tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.bf16(<8 x bfloat> %a, <8 x bfloat> %b, <16 x float> %c, i32 1, i32 2, i32 3) +v16f test_mfma_f32_32x32x16_bf16(v8bf16 a, v8bf16 b, v16f c) { + return __builtin_amdgcn_mfma_f32_32x32x16_bf16(a, b, c, 1, 2, 3); +} #endif diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl index 4c267e2cac5cad..4af67763c40dd2 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl @@ -4,6 +4,7 @@ typedef float float4 __attribute__((ext_vector_type(4))); typedef float float16 __attribute__((ext_vector_type(16))); typedef half half8 __attribute__((ext_vector_type(8))); +typedef __bf16 bfloat8 __attribute__((ext_vector_type(8))); void test_mfma_f32_16x16x32_f16(__global float4* out, half8 a, half8 b, float4 c, int X) { @@ -19,3 +20,9 @@ void test_mfma_f32_32x32x16_f16(__global float16* out, half8 a, half8 b, float16 *out = __builtin_amdgcn_mfma_f32_32x32x16_f16(a, b, c, 0, X, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_32x32x16_f16' must be a constant integer}} *out = __builtin_amdgcn_mfma_f32_32x32x16_f16(a, b, c, 0, 0, X); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_32x32x16_f16' must be a constant integer}} } + +void test_mfma_f32_32x32x16_bf16(__global float16* out, bfloat8 a, bfloat8 b, float16 c, int X) { + *out = __builtin_amdgcn_mfma_f32_32x32x16_bf16(a, b, c, X, 0, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_32x32x16_bf16' must be a constant integer}} + *out = __builtin_amdgcn_mfma_f32_32x32x16_bf16(a, b, c, 0, X, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_32x32x16_bf16' must be a constant integer}} + *out = __builtin_amdgcn_mfma_f32_32x32x16_bf16(a, b, c, 0, 0, X); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_32x32x16_bf16' must be a constant integer}} +} diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl b/cla
[llvm-branch-commits] [llvm] AMDGPU: Handle gfx950 global_load_lds_* instructions (PR #116680)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/116680 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add V_CVT_PK_BF16_F32 for gfx950 (PR #116678)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes --- Patch is 27.17 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/116678.diff 6 Files Affected: - (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+6) - (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.td (+1) - (modified) llvm/lib/Target/AMDGPU/VOP3Instructions.td (+25) - (modified) llvm/test/CodeGen/AMDGPU/bf16-conversions.ll (+178-217) - (added) llvm/test/MC/AMDGPU/gfx950_asm_vop3.s (+26) - (added) llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt (+19) ``diff diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 1e261f4256c93b..ad89812558d25c 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -889,6 +889,12 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::MUL, MVT::i1, Promote); + if (Subtarget->hasBF16ConversionInsts()) { +setOperationAction(ISD::FP_ROUND, MVT::v2bf16, Legal); +setOperationAction(ISD::FP_ROUND, MVT::bf16, Legal); +setOperationAction(ISD::BUILD_VECTOR, MVT::v2bf16, Legal); + } + setTargetDAGCombine({ISD::ADD, ISD::UADDO_CARRY, ISD::SUB, diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 882e147dc231fa..7df9be5c6f7a0b 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2787,6 +2787,7 @@ def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>; def VOP_I32_I32_I32_ARITH : VOPProfile <[i32, i32, i32, untyped], /*EnableClamp=*/1>; def VOP_V2F16_F32_F32 : VOPProfile <[v2f16, f32, f32, untyped]>; def VOP_F32_F16_F16_F16 : VOPProfile <[f32, f16, f16, f16]>; +def VOP_V2BF16_F32_F32 : VOPProfile <[v2bf16, f32, f32, untyped]>; def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>; def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>; diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 551e8b3a679202..917e1b3974b46a 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -944,6 +944,30 @@ let SubtargetPredicate = isGFX11Plus in { defm V_CVT_PK_U16_F32 : VOP3Inst<"v_cvt_pk_u16_f32", VOP3_Profile>; } // End SubtargetPredicate = isGFX11Plus +// FIXME: GlobalISel cannot distinguish f16 and bf16 and may start using bf16 patterns +//instead of less complex f16. Disable GlobalISel for these for now. +def bf16_fpround : PatFrag <(ops node:$src0), (fpround $src0), [{ return true; }]> { + let GISelPredicateCode = [{return false;}]; +} + +let SubtargetPredicate = HasBF16ConversionInsts in { + let ReadsModeReg = 0 in { +defm V_CVT_PK_BF16_F32: VOP3Inst<"v_cvt_pk_bf16_f32", VOP3_Profile>; + } + def : GCNPat<(v2bf16 (bf16_fpround v2f32:$src)), + (V_CVT_PK_BF16_F32_e64 0, (EXTRACT_SUBREG VReg_64:$src, sub0), 0, (EXTRACT_SUBREG VReg_64:$src, sub1))>; + def : GCNPat<(v2bf16 (bf16_fpround v2f64:$src)), + (V_CVT_PK_BF16_F32_e64 0, (V_CVT_F32_F64_e64 0, (EXTRACT_SUBREG VReg_128:$src, sub0_sub1)), + 0, (V_CVT_F32_F64_e64 0, (EXTRACT_SUBREG VReg_128:$src, sub2_sub3)))>; + def : GCNPat<(v2bf16 (build_vector (bf16 (bf16_fpround (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers, + (bf16 (bf16_fpround (f32 (VOP3Mods f32:$src1, i32:$src1_modifiers)), + (V_CVT_PK_BF16_F32_e64 $src0_modifiers, $src0, $src1_modifiers, $src1)>; + def : GCNPat<(bf16 (bf16_fpround (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers, + (V_CVT_PK_BF16_F32_e64 $src0_modifiers, $src0, 0, (f32 (IMPLICIT_DEF)))>; + def : GCNPat<(bf16 (bf16_fpround (f64 (VOP3Mods f64:$src0, i32:$src0_modifiers, + (V_CVT_PK_BF16_F32_e64 0, (f32 (V_CVT_F32_F64_e64 $src0_modifiers, $src0)), 0, (f32 (IMPLICIT_DEF)))>; +} + let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in { defm V_MAXIMUMMINIMUM_F32 : VOP3Inst<"v_maximumminimum_f32", VOP3_Profile>; defm V_MINIMUMMAXIMUM_F32 : VOP3Inst<"v_minimummaximum_f32", VOP3_Profile>; @@ -1721,5 +1745,6 @@ defm V_LSHL_ADD_U64 : VOP3_Real_vi <0x208>; defm V_CVT_PK_FP8_F32 : VOP3OpSel_Real_gfx9 <0x2a2>; defm V_CVT_PK_BF8_F32 : VOP3OpSel_Real_gfx9 <0x2a3>; +defm V_CVT_PK_BF16_F32: VOP3OpSel_Real_gfx9 <0x268>; defm V_CVT_SR_FP8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a4>; defm V_CVT_SR_BF8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a5>; diff --git a/llvm/test/CodeGen/AMDGPU/bf16-conversions.ll b/llvm/test/CodeGen/AMDGPU/bf16-conversions.ll index 425fc5884cec7f..135efceb31fdda 100644 --- a/llvm/test/CodeGen/AMDGPU/bf16-conversions.ll +++ b/llvm/test/CodeGen/AMDGPU/bf16-conversions.ll @@ -24,139 +24,168 @@ define amd
[llvm-branch-commits] [clang] [llvm] AMDGPU: Define v_mfma_f32_32x32x16_bf16 for gfx950 (PR #116679)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/116679 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add V_CVT_PK_BF16_F32 for gfx950 (PR #116678)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/116678 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Handle gfx950 96/128-bit buffer_load_lds (PR #116681)
llvmbot wrote: @llvm/pr-subscribers-llvm-ir Author: Matt Arsenault (arsenm) Changes Enforcing this limit in the clang builtin will come later. --- Patch is 31.94 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/116681.diff 9 Files Affected: - (modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (+4-4) - (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (+18) - (modified) llvm/lib/Target/AMDGPU/BUFInstructions.td (+16-8) - (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+16) - (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.lds.gfx950.ll (+8) - (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.lds.gfx950.ll (+176) - (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.lds.gfx950.ll (+196) - (added) llvm/test/MC/AMDGPU/mubuf-gfx950.s (+32) - (modified) llvm/test/MC/Disassembler/AMDGPU/gfx950.txt (+19) ``diff diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index f43ab50d2ea441..360af786c5160d 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1674,7 +1674,7 @@ class AMDGPURawBufferLoadLDS : Intrinsic < [], [llvm_v4i32_ty, // rsrc(SGPR) LLVMQualPointerType<3>,// LDS base offset - llvm_i32_ty, // Data byte size: 1/2/4 + llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950) llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty, // imm offset(imm, included in bounds checking and swizzling) @@ -1693,7 +1693,7 @@ class AMDGPURawPtrBufferLoadLDS : Intrinsic < [], [AMDGPUBufferRsrcTy,// rsrc(SGPR) LLVMQualPointerType<3>,// LDS base offset - llvm_i32_ty, // Data byte size: 1/2/4 + llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950) llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty, // imm offset(imm, included in bounds checking and swizzling) @@ -1715,7 +1715,7 @@ class AMDGPUStructBufferLoadLDS : Intrinsic < [], [llvm_v4i32_ty, // rsrc(SGPR) LLVMQualPointerType<3>,// LDS base offset - llvm_i32_ty, // Data byte size: 1/2/4 + llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950) llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) @@ -1735,7 +1735,7 @@ class AMDGPUStructPtrBufferLoadLDS : Intrinsic < [], [AMDGPUBufferRsrcTy,// rsrc(SGPR) LLVMQualPointerType<3>,// LDS base offset - llvm_i32_ty, // Data byte size: 1/2/4 + llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950) llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index a6ef0069f134bd..3522ece24f1c45 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -3240,6 +3240,24 @@ bool AMDGPUInstructionSelector::selectBufferLoadLds(MachineInstr &MI) const { : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET; break; + case 12: +if (!Subtarget->hasLDSLoadB96_B128()) + return false; + +Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_BOTHEN + : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_IDXEN +: HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFEN + : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFSET; +break; + case 16: +if (!Subtarget->hasLDSLoadB96_B128()) + return false; + +Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_BOTHEN + : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_IDXEN +: HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFEN + : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFSET; +break; } MachineBasicBlock *MBB = MI.getParent(); diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 79d6a825f60b03..7283733dea22db 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstr
[llvm-branch-commits] [llvm] AMDGPU: Handle gfx950 96/128-bit buffer_load_lds (PR #116681)
llvmbot wrote: @llvm/pr-subscribers-clang Author: Matt Arsenault (arsenm) Changes Enforcing this limit in the clang builtin will come later. --- Patch is 31.94 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/116681.diff 9 Files Affected: - (modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (+4-4) - (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (+18) - (modified) llvm/lib/Target/AMDGPU/BUFInstructions.td (+16-8) - (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+16) - (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.lds.gfx950.ll (+8) - (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.lds.gfx950.ll (+176) - (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.lds.gfx950.ll (+196) - (added) llvm/test/MC/AMDGPU/mubuf-gfx950.s (+32) - (modified) llvm/test/MC/Disassembler/AMDGPU/gfx950.txt (+19) ``diff diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index f43ab50d2ea441..360af786c5160d 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1674,7 +1674,7 @@ class AMDGPURawBufferLoadLDS : Intrinsic < [], [llvm_v4i32_ty, // rsrc(SGPR) LLVMQualPointerType<3>,// LDS base offset - llvm_i32_ty, // Data byte size: 1/2/4 + llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950) llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty, // imm offset(imm, included in bounds checking and swizzling) @@ -1693,7 +1693,7 @@ class AMDGPURawPtrBufferLoadLDS : Intrinsic < [], [AMDGPUBufferRsrcTy,// rsrc(SGPR) LLVMQualPointerType<3>,// LDS base offset - llvm_i32_ty, // Data byte size: 1/2/4 + llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950) llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty, // imm offset(imm, included in bounds checking and swizzling) @@ -1715,7 +1715,7 @@ class AMDGPUStructBufferLoadLDS : Intrinsic < [], [llvm_v4i32_ty, // rsrc(SGPR) LLVMQualPointerType<3>,// LDS base offset - llvm_i32_ty, // Data byte size: 1/2/4 + llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950) llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) @@ -1735,7 +1735,7 @@ class AMDGPUStructPtrBufferLoadLDS : Intrinsic < [], [AMDGPUBufferRsrcTy,// rsrc(SGPR) LLVMQualPointerType<3>,// LDS base offset - llvm_i32_ty, // Data byte size: 1/2/4 + llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950) llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index a6ef0069f134bd..3522ece24f1c45 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -3240,6 +3240,24 @@ bool AMDGPUInstructionSelector::selectBufferLoadLds(MachineInstr &MI) const { : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET; break; + case 12: +if (!Subtarget->hasLDSLoadB96_B128()) + return false; + +Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_BOTHEN + : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_IDXEN +: HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFEN + : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFSET; +break; + case 16: +if (!Subtarget->hasLDSLoadB96_B128()) + return false; + +Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_BOTHEN + : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_IDXEN +: HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFEN + : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFSET; +break; } MachineBasicBlock *MBB = MI.getParent(); diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 79d6a825f60b03..7283733dea22db 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstruc
[llvm-branch-commits] [llvm] AMDGPU: Handle gfx950 96/128-bit buffer_load_lds (PR #116681)
llvmbot wrote: @llvm/pr-subscribers-mc @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes Enforcing this limit in the clang builtin will come later. --- Patch is 31.94 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/116681.diff 9 Files Affected: - (modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (+4-4) - (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (+18) - (modified) llvm/lib/Target/AMDGPU/BUFInstructions.td (+16-8) - (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+16) - (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.lds.gfx950.ll (+8) - (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.lds.gfx950.ll (+176) - (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.lds.gfx950.ll (+196) - (added) llvm/test/MC/AMDGPU/mubuf-gfx950.s (+32) - (modified) llvm/test/MC/Disassembler/AMDGPU/gfx950.txt (+19) ``diff diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index f43ab50d2ea441..360af786c5160d 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1674,7 +1674,7 @@ class AMDGPURawBufferLoadLDS : Intrinsic < [], [llvm_v4i32_ty, // rsrc(SGPR) LLVMQualPointerType<3>,// LDS base offset - llvm_i32_ty, // Data byte size: 1/2/4 + llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950) llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty, // imm offset(imm, included in bounds checking and swizzling) @@ -1693,7 +1693,7 @@ class AMDGPURawPtrBufferLoadLDS : Intrinsic < [], [AMDGPUBufferRsrcTy,// rsrc(SGPR) LLVMQualPointerType<3>,// LDS base offset - llvm_i32_ty, // Data byte size: 1/2/4 + llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950) llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty, // imm offset(imm, included in bounds checking and swizzling) @@ -1715,7 +1715,7 @@ class AMDGPUStructBufferLoadLDS : Intrinsic < [], [llvm_v4i32_ty, // rsrc(SGPR) LLVMQualPointerType<3>,// LDS base offset - llvm_i32_ty, // Data byte size: 1/2/4 + llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950) llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) @@ -1735,7 +1735,7 @@ class AMDGPUStructPtrBufferLoadLDS : Intrinsic < [], [AMDGPUBufferRsrcTy,// rsrc(SGPR) LLVMQualPointerType<3>,// LDS base offset - llvm_i32_ty, // Data byte size: 1/2/4 + llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950) llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index a6ef0069f134bd..3522ece24f1c45 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -3240,6 +3240,24 @@ bool AMDGPUInstructionSelector::selectBufferLoadLds(MachineInstr &MI) const { : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET; break; + case 12: +if (!Subtarget->hasLDSLoadB96_B128()) + return false; + +Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_BOTHEN + : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_IDXEN +: HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFEN + : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFSET; +break; + case 16: +if (!Subtarget->hasLDSLoadB96_B128()) + return false; + +Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_BOTHEN + : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_IDXEN +: HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFEN + : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFSET; +break; } MachineBasicBlock *MBB = MI.getParent(); diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 79d6a825f60b03..7283733dea22db 100644 --- a/
[llvm-branch-commits] [llvm] AMDGPU: Handle gfx950 global_load_lds_* instructions (PR #116680)
llvmbot wrote: @llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-clang Author: Matt Arsenault (arsenm) Changes Define global_load_lds_dwordx3 and global_load_dwordx4. Oddly it seems dwordx2 was skipped. --- Full diff: https://github.com/llvm/llvm-project/pull/116680.diff 8 Files Affected: - (modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (+1-1) - (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (+10) - (modified) llvm/lib/Target/AMDGPU/FLATInstructions.td (+9) - (modified) llvm/lib/Target/AMDGPU/GCNSubtarget.h (+7) - (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+10) - (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.lds.gfx950.ll (+137) - (added) llvm/test/MC/AMDGPU/gfx950_asm_features.s (+37) - (added) llvm/test/MC/Disassembler/AMDGPU/gfx950.txt (+25) ``diff diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 15f33cdbf92e6e..f43ab50d2ea441 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2452,7 +2452,7 @@ class AMDGPUGlobalLoadLDS : [], [LLVMQualPointerType<1>,// Base global pointer to load from LLVMQualPointerType<3>,// LDS base pointer to store to - llvm_i32_ty, // Data byte size: 1/2/4 + llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950) llvm_i32_ty, // imm offset (applied to both global and LDS address) llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = sc0, // bit 1 = sc1, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 13de93e829fab2..a6ef0069f134bd 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -3329,6 +3329,16 @@ bool AMDGPUInstructionSelector::selectGlobalLoadLds(MachineInstr &MI) const{ case 4: Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD; break; + case 12: +if (!Subtarget->hasLDSLoadB96_B128()) + return false; +Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX3; +break; + case 16: +if (!Subtarget->hasLDSLoadB96_B128()) + return false; +Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX4; +break; } MachineBasicBlock *MBB = MI.getParent(); diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index db74372e9db452..861fcf017d9e4d 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -934,6 +934,11 @@ defm GLOBAL_LOAD_LDS_USHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_usho defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sshort">; defm GLOBAL_LOAD_LDS_DWORD : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dword">; +let SubtargetPredicate = HasGFX950Insts in { +defm GLOBAL_LOAD_LDS_DWORDX3 : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dwordx3">; +defm GLOBAL_LOAD_LDS_DWORDX4 : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dwordx4">; +} + let SubtargetPredicate = isGFX12Plus in { defm GLOBAL_ATOMIC_COND_SUB_U32: FLAT_Global_Atomic_Pseudo <"global_atomic_cond_sub_u32", VGPR_32, i32>; defm GLOBAL_ATOMIC_ORDERED_ADD_B64 : FLAT_Global_Atomic_Pseudo <"global_atomic_ordered_add_b64", VReg_64, i64>; @@ -1980,6 +1985,10 @@ defm GLOBAL_LOAD_LDS_USHORT : FLAT_Real_AllAddr_LDS <0x028, 0x12>; defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_LDS <0x029, 0x13>; defm GLOBAL_LOAD_LDS_DWORD : FLAT_Real_AllAddr_LDS <0x02a, 0x14>; +defm GLOBAL_LOAD_LDS_DWORDX3 : FLAT_Real_AllAddr_LDS <0x07e, 0x07e>; +defm GLOBAL_LOAD_LDS_DWORDX4 : FLAT_Real_AllAddr_LDS <0x07d, 0x07d>; + + defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Real_Atomics_vi <0x40>; defm GLOBAL_ATOMIC_CMPSWAP: FLAT_Global_Real_Atomics_vi <0x41>; defm GLOBAL_ATOMIC_ADD: FLAT_Global_Real_Atomics_vi <0x42>; diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 4a6efe533230b1..f3f96940c1f44b 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -1289,6 +1289,13 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, // hasGFX940Insts and hasGFX90AInsts are also true. bool hasGFX950Insts() const { return GFX950Insts; } + /// Returns true if the target supports + /// global_load_lds_dwordx3/global_load_lds_dwordx4 or + /// buffer_load_dwordx3/buffer_load_dwordx4 with the lds bit. + bool hasLDSLoadB96_B128() const { +return hasGFX950Insts(); + } + bool hasSALUFloatInsts() const { return HasSALUFloatInsts; } bool hasPseudoScalarTrans() const { return HasPseudoScalarTrans; } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index ad8981
[llvm-branch-commits] [llvm] AMDGPU: Handle gfx950 96/128-bit buffer_load_lds (PR #116681)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/116681 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Handle gfx950 96/128-bit buffer_load_lds (PR #116681)
github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning: You can test this locally with the following command: ``bash git-clang-format --diff 42f311ceb555ea2b3f171ad2ef8254e971e0be12 f5657c9cc25cfed321ced807510a21dc374bcfe3 --extensions cpp -- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp llvm/lib/Target/AMDGPU/SIISelLowering.cpp `` View the diff from clang-format here. ``diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 3522ece24f..707136409e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -3244,19 +3244,19 @@ bool AMDGPUInstructionSelector::selectBufferLoadLds(MachineInstr &MI) const { if (!Subtarget->hasLDSLoadB96_B128()) return false; -Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_BOTHEN - : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_IDXEN -: HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFEN - : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFSET; +Opc = HasVIndex? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_BOTHEN +: AMDGPU::BUFFER_LOAD_DWORDX3_LDS_IDXEN + : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFEN + : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFSET; break; case 16: if (!Subtarget->hasLDSLoadB96_B128()) return false; -Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_BOTHEN - : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_IDXEN -: HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFEN - : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFSET; +Opc = HasVIndex? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_BOTHEN +: AMDGPU::BUFFER_LOAD_DWORDX4_LDS_IDXEN + : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFEN + : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFSET; break; } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 5b02f9bf80..1763c1f0aa 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -9828,18 +9828,18 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, case 12: if (!Subtarget->hasLDSLoadB96_B128()) return SDValue(); - Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_BOTHEN - : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_IDXEN - : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFEN - : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFSET; + Opc = HasVIndex? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_BOTHEN + : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_IDXEN +: HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFEN + : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFSET; break; case 16: if (!Subtarget->hasLDSLoadB96_B128()) return SDValue(); - Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_BOTHEN - : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_IDXEN - : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFEN - : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFSET; + Opc = HasVIndex? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_BOTHEN + : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_IDXEN +: HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFEN + : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFSET; break; } `` https://github.com/llvm/llvm-project/pull/116681 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Handle gfx950 global_load_lds_* instructions (PR #116680)
github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning: You can test this locally with the following command: ``bash git-clang-format --diff 82bb6e07b68b1df378e89c1eba1f9deb3c2d67f5 42f311ceb555ea2b3f171ad2ef8254e971e0be12 --extensions cpp,h -- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp llvm/lib/Target/AMDGPU/GCNSubtarget.h llvm/lib/Target/AMDGPU/SIISelLowering.cpp `` View the diff from clang-format here. ``diff diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index f3f96940c1..b27f9a0612 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -1292,9 +1292,7 @@ public: /// Returns true if the target supports /// global_load_lds_dwordx3/global_load_lds_dwordx4 or /// buffer_load_dwordx3/buffer_load_dwordx4 with the lds bit. - bool hasLDSLoadB96_B128() const { -return hasGFX950Insts(); - } + bool hasLDSLoadB96_B128() const { return hasGFX950Insts(); } bool hasSALUFloatInsts() const { return HasSALUFloatInsts; } `` https://github.com/llvm/llvm-project/pull/116680 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port SpillPlacement analysis to NPM (PR #116618)
@@ -223,7 +248,7 @@ void SpillPlacement::activate(unsigned n) { if (ActiveNodes->test(n)) return; ActiveNodes->set(n); - nodes[n].clear(Threshold); + nodes.get()[n].clear(Threshold); paperchalice wrote: You can use `operator []` if `nodes` is an array form `unique_ptr`. https://github.com/llvm/llvm-project/pull/116618 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][Transforms] Add 1:N `matchAndRewrite` overload (PR #116470)
https://github.com/zero9178 approved this pull request. LGTM, thank you! https://github.com/llvm/llvm-project/pull/116470 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] release/19.x: Reapply "[sanitizer_common] AND signals in BlockSignals instead of deleting (#113443)" for non-Android Linux only (#115790) (PR #116670)
llvmbot wrote: @llvm/pr-subscribers-compiler-rt-sanitizer Author: None (llvmbot) Changes Backport 531acf9e2f24977d2556b39229b22f4518a1faa5 Requested by: @thurstond --- Full diff: https://github.com/llvm/llvm-project/pull/116670.diff 3 Files Affected: - (modified) compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp (+39-16) - (modified) compiler-rt/lib/sanitizer_common/tests/CMakeLists.txt (+1) - (added) compiler-rt/lib/sanitizer_common/tests/sanitizer_block_signals.cpp (+76) ``diff diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp index b9b1f496df7c98..be3b3bd94e2a58 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp @@ -160,33 +160,56 @@ void SetSigProcMask(__sanitizer_sigset_t *set, __sanitizer_sigset_t *oldset) { CHECK_EQ(0, internal_sigprocmask(SIG_SETMASK, set, oldset)); } +# if SANITIZER_LINUX +// Deletes the specified signal from newset, if it is not present in oldset +// Equivalently: newset[signum] = newset[signum] & oldset[signum] +static void KeepUnblocked(__sanitizer_sigset_t &newset, + __sanitizer_sigset_t &oldset, int signum) { + // FIXME: https://github.com/google/sanitizers/issues/1816 + if (SANITIZER_ANDROID || !internal_sigismember(&oldset, signum)) +internal_sigdelset(&newset, signum); +} +# endif + // Block asynchronous signals void BlockSignals(__sanitizer_sigset_t *oldset) { - __sanitizer_sigset_t set; - internal_sigfillset(&set); -# if SANITIZER_LINUX && !SANITIZER_ANDROID + __sanitizer_sigset_t newset; + internal_sigfillset(&newset); + +# if SANITIZER_LINUX + __sanitizer_sigset_t currentset; + +#if !SANITIZER_ANDROID + // FIXME: https://github.com/google/sanitizers/issues/1816 + SetSigProcMask(NULL, ¤tset); + // Glibc uses SIGSETXID signal during setuid call. If this signal is blocked // on any thread, setuid call hangs. // See test/sanitizer_common/TestCases/Linux/setuid.c. - internal_sigdelset(&set, 33); -# endif -# if SANITIZER_LINUX + KeepUnblocked(newset, currentset, 33); +#endif // !SANITIZER_ANDROID + // Seccomp-BPF-sandboxed processes rely on SIGSYS to handle trapped syscalls. // If this signal is blocked, such calls cannot be handled and the process may // hang. - internal_sigdelset(&set, 31); + KeepUnblocked(newset, currentset, 31); +#if !SANITIZER_ANDROID // Don't block synchronous signals - internal_sigdelset(&set, SIGSEGV); - internal_sigdelset(&set, SIGBUS); - internal_sigdelset(&set, SIGILL); - internal_sigdelset(&set, SIGTRAP); - internal_sigdelset(&set, SIGABRT); - internal_sigdelset(&set, SIGFPE); - internal_sigdelset(&set, SIGPIPE); -# endif + // but also don't unblock signals that the user had deliberately blocked. + // FIXME: https://github.com/google/sanitizers/issues/1816 + KeepUnblocked(newset, currentset, SIGSEGV); + KeepUnblocked(newset, currentset, SIGBUS); + KeepUnblocked(newset, currentset, SIGILL); + KeepUnblocked(newset, currentset, SIGTRAP); + KeepUnblocked(newset, currentset, SIGABRT); + KeepUnblocked(newset, currentset, SIGFPE); + KeepUnblocked(newset, currentset, SIGPIPE); +#endif //! SANITIZER_ANDROID + +# endif // SANITIZER_LINUX - SetSigProcMask(&set, oldset); + SetSigProcMask(&newset, oldset); } ScopedBlockSignals::ScopedBlockSignals(__sanitizer_sigset_t *copy) { diff --git a/compiler-rt/lib/sanitizer_common/tests/CMakeLists.txt b/compiler-rt/lib/sanitizer_common/tests/CMakeLists.txt index 2b4c15125263a9..fef8bb772e0e0d 100644 --- a/compiler-rt/lib/sanitizer_common/tests/CMakeLists.txt +++ b/compiler-rt/lib/sanitizer_common/tests/CMakeLists.txt @@ -15,6 +15,7 @@ set(SANITIZER_UNITTESTS sanitizer_array_ref_test.cpp sanitizer_atomic_test.cpp sanitizer_bitvector_test.cpp + sanitizer_block_signals.cpp sanitizer_bvgraph_test.cpp sanitizer_chained_origin_depot_test.cpp sanitizer_common_test.cpp diff --git a/compiler-rt/lib/sanitizer_common/tests/sanitizer_block_signals.cpp b/compiler-rt/lib/sanitizer_common/tests/sanitizer_block_signals.cpp new file mode 100644 index 00..b43648a8aef230 --- /dev/null +++ b/compiler-rt/lib/sanitizer_common/tests/sanitizer_block_signals.cpp @@ -0,0 +1,76 @@ +//===-- sanitizer_block_signals.cpp ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// This file is a part of sanitizer_common unit tests. +// +//===--===// +#include +#include + +#include "gtest/gtest.h" +#include "sanitizer_common/sanitizer_linux.h" + +namespace __sanitizer {
[llvm-branch-commits] [compiler-rt] release/19.x: Reapply "[sanitizer_common] AND signals in BlockSignals instead of deleting (#113443)" for non-Android Linux only (#115790) (PR #116670)
llvmbot wrote: @thurstond What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/116670 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [RISCV] Support __builtin_cpu_is (PR #116231)
https://github.com/topperc edited https://github.com/llvm/llvm-project/pull/116231 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [RISCV] Support __builtin_cpu_is (PR #116231)
@@ -22505,6 +22506,47 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID, return nullptr; } +Value *CodeGenFunction::EmitRISCVCpuIs(const CallExpr *E) { + const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts(); + StringRef CPUStr = cast(CPUExpr)->getString(); + return EmitRISCVCpuIs(CPUStr); +} + +Value *CodeGenFunction::EmitRISCVCpuIs(StringRef CPUStr) { + llvm::Type *Int32Ty = Builder.getInt32Ty(); + llvm::Type *Int64Ty = Builder.getInt64Ty(); + llvm::StructType *StructTy = llvm::StructType::get(Int32Ty, Int64Ty, Int64Ty); + llvm::Constant *RISCVCPUModel = + CGM.CreateRuntimeVariable(StructTy, "__riscv_cpu_model"); + cast(RISCVCPUModel)->setDSOLocal(true); + + auto loadRISCVCPUID = [&](unsigned Index) { +Value *Ptr = Builder.CreateStructGEP(StructTy, RISCVCPUModel, Index); +Value *CPUID = Builder.CreateAlignedLoad(StructTy->getTypeAtIndex(Index), topperc wrote: You can use `CreateLoad` to avoid llvm::MaybeAlign() https://github.com/llvm/llvm-project/pull/116231 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] release/19.x: Reapply "[sanitizer_common] AND signals in BlockSignals instead of deleting (#113443)" for non-Android Linux only (#115790) (PR #116670)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/116670 Backport 531acf9e2f24977d2556b39229b22f4518a1faa5 Requested by: @thurstond >From 6925f3c7c7d8b83e2195cb8e473eccdecae42607 Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Thu, 14 Nov 2024 10:35:35 -0800 Subject: [PATCH] Reapply "[sanitizer_common] AND signals in BlockSignals instead of deleting (#113443)" for non-Android Linux only (#115790) The original patch (25fd366d6a7d40266ff27c134ed8beb0a90cc33b) was reverted in 083a5cdbeab09517d8345868970d4f41170d7ed2 because it broke some buildbots. This revised patch makes two changes: - Reverts to *pre-#98200* behavior for Android. This avoids a build breakage on Android. - Only define KeepUnblocked if SANITIZER_LINUX: this avoids a build breakage on solaris, which does not support internal_sigdelset. N.B. Other buildbot failures were non-sanitizer tests and are therefore unrelated. Original commit message: My earlier patch https://github.com/llvm/llvm-project/pull/98200 caused a regression because it unconditionally unblocked synchronous signals, even if the user program had deliberately blocked them. This patch fixes the issue by checking the current signal mask, as suggested by Vitaly. It also adds tests. Fixes #113385 (cherry picked from commit 531acf9e2f24977d2556b39229b22f4518a1faa5) --- .../lib/sanitizer_common/sanitizer_linux.cpp | 55 ++ .../lib/sanitizer_common/tests/CMakeLists.txt | 1 + .../tests/sanitizer_block_signals.cpp | 76 +++ 3 files changed, 116 insertions(+), 16 deletions(-) create mode 100644 compiler-rt/lib/sanitizer_common/tests/sanitizer_block_signals.cpp diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp index b9b1f496df7c98..be3b3bd94e2a58 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp @@ -160,33 +160,56 @@ void SetSigProcMask(__sanitizer_sigset_t *set, __sanitizer_sigset_t *oldset) { CHECK_EQ(0, internal_sigprocmask(SIG_SETMASK, set, oldset)); } +# if SANITIZER_LINUX +// Deletes the specified signal from newset, if it is not present in oldset +// Equivalently: newset[signum] = newset[signum] & oldset[signum] +static void KeepUnblocked(__sanitizer_sigset_t &newset, + __sanitizer_sigset_t &oldset, int signum) { + // FIXME: https://github.com/google/sanitizers/issues/1816 + if (SANITIZER_ANDROID || !internal_sigismember(&oldset, signum)) +internal_sigdelset(&newset, signum); +} +# endif + // Block asynchronous signals void BlockSignals(__sanitizer_sigset_t *oldset) { - __sanitizer_sigset_t set; - internal_sigfillset(&set); -# if SANITIZER_LINUX && !SANITIZER_ANDROID + __sanitizer_sigset_t newset; + internal_sigfillset(&newset); + +# if SANITIZER_LINUX + __sanitizer_sigset_t currentset; + +#if !SANITIZER_ANDROID + // FIXME: https://github.com/google/sanitizers/issues/1816 + SetSigProcMask(NULL, ¤tset); + // Glibc uses SIGSETXID signal during setuid call. If this signal is blocked // on any thread, setuid call hangs. // See test/sanitizer_common/TestCases/Linux/setuid.c. - internal_sigdelset(&set, 33); -# endif -# if SANITIZER_LINUX + KeepUnblocked(newset, currentset, 33); +#endif // !SANITIZER_ANDROID + // Seccomp-BPF-sandboxed processes rely on SIGSYS to handle trapped syscalls. // If this signal is blocked, such calls cannot be handled and the process may // hang. - internal_sigdelset(&set, 31); + KeepUnblocked(newset, currentset, 31); +#if !SANITIZER_ANDROID // Don't block synchronous signals - internal_sigdelset(&set, SIGSEGV); - internal_sigdelset(&set, SIGBUS); - internal_sigdelset(&set, SIGILL); - internal_sigdelset(&set, SIGTRAP); - internal_sigdelset(&set, SIGABRT); - internal_sigdelset(&set, SIGFPE); - internal_sigdelset(&set, SIGPIPE); -# endif + // but also don't unblock signals that the user had deliberately blocked. + // FIXME: https://github.com/google/sanitizers/issues/1816 + KeepUnblocked(newset, currentset, SIGSEGV); + KeepUnblocked(newset, currentset, SIGBUS); + KeepUnblocked(newset, currentset, SIGILL); + KeepUnblocked(newset, currentset, SIGTRAP); + KeepUnblocked(newset, currentset, SIGABRT); + KeepUnblocked(newset, currentset, SIGFPE); + KeepUnblocked(newset, currentset, SIGPIPE); +#endif //! SANITIZER_ANDROID + +# endif // SANITIZER_LINUX - SetSigProcMask(&set, oldset); + SetSigProcMask(&newset, oldset); } ScopedBlockSignals::ScopedBlockSignals(__sanitizer_sigset_t *copy) { diff --git a/compiler-rt/lib/sanitizer_common/tests/CMakeLists.txt b/compiler-rt/lib/sanitizer_common/tests/CMakeLists.txt index 2b4c15125263a9..fef8bb772e0e0d 100644 --- a/compiler-rt/lib/sanitizer_common/tests/CMakeLists.txt +++ b/compiler-rt/lib/sanitizer_common/tests/CMakeLists.txt @@ -15,6 +15,7 @@ set(SANITIZE
[llvm-branch-commits] [compiler-rt] release/19.x: Reapply "[sanitizer_common] AND signals in BlockSignals instead of deleting (#113443)" for non-Android Linux only (#115790) (PR #116670)
https://github.com/thurstond approved this pull request. https://github.com/llvm/llvm-project/pull/116670 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] AMDGPU: Add first gfx950 mfma instructions (PR #116312)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/116312 >From 56e2ba8ee3266bdef464e456e06e67b45f946ef0 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 21 Nov 2023 10:03:19 +0900 Subject: [PATCH] AMDGPU: Add first gfx950 mfma instructions Scheduling info and hazards are wrong and TBD. --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 6 + .../CodeGenOpenCL/builtins-amdgcn-mfma.cl | 25 +- .../builtins-amdgcn-error-gfx950-param.cl | 21 ++ .../builtins-amdgcn-error-gfx950.cl | 12 + llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 9 + llvm/lib/Target/AMDGPU/AMDGPU.td | 4 + .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 4 +- .../Target/AMDGPU/AMDGPUSearchableTables.td | 2 + llvm/lib/Target/AMDGPU/GCNSubtarget.h | 4 + llvm/lib/Target/AMDGPU/SIInstrInfo.td | 4 + llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 22 ++ .../UniformityAnalysis/AMDGPU/intrinsics.ll | 17 ++ .../CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.ll | 274 ++ llvm/test/MC/AMDGPU/mai-gfx950.s | 112 +++ .../MC/Disassembler/AMDGPU/gfx950_mai.txt | 61 llvm/test/tools/llvm-mca/AMDGPU/gfx950.s | 18 ++ 16 files changed, 592 insertions(+), 3 deletions(-) create mode 100644 clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl create mode 100644 clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.ll create mode 100644 llvm/test/MC/AMDGPU/mai-gfx950.s create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx950_mai.txt create mode 100644 llvm/test/tools/llvm-mca/AMDGPU/gfx950.s diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 61516eb2a4a723..6917d8d1aca69d 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -431,6 +431,12 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb", "nc", "fp8-conversion- TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-conversion-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-conversion-insts") +//===--===// +// GFX950 only builtins. +//===--===// +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x32_f16, "V4fV8hV8hV4fIiIiIi", "nc", "gfx950-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x16_f16, "V16fV8hV8hV16fIiIiIi", "nc", "gfx950-insts") + //===--===// // GFX12+ only builtins. //===--===// diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl index dcdeee6b6acc40..a644a60f9ec381 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl @@ -2,6 +2,7 @@ // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx908 -DMFMA_GFX908_TESTS -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX908 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a -DMFMA_GFX90A_TESTS -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX90A // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx940 -DMFMA_GFX940_TESTS -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX940 +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx950 -DMFMA_GFX950_TESTS -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX950 #pragma OPENCL EXTENSION cl_khr_fp64:enable @@ -222,7 +223,7 @@ void test_mfma_f64_4x4x4f64(global double* out, double a, double b, double c) #endif // MFMA_GFX90A_TESTS -#ifdef MFMA_GFX940_TESTS +#if defined(MFMA_GFX940_TESTS) || defined(MFMA_GFX950_TESTS) // CHECK-GFX940-LABEL: @test_mfma_i32_16x16x32_i8 // CHECK-GFX940: call <4 x i32> @llvm.amdgcn.mfma.i32.16x16x32.i8(i64 %a, i64 %b, <4 x i32> %c, i32 0, i32 0, i32 0) void test_mfma_i32_16x16x32_i8(global v4i* out, long a, long b, v4i c) @@ -404,4 +405,24 @@ void test_smfmac_f32_32x32x32_fp8_fp8(global v16f* out, v2i a, v4i b, v16f c, in { *out = __builtin_amdgcn_smfmac_f32_32x32x32_fp8_fp8(a, b, c, idx, 0, 0); } -#endif // MFMA_GFX940_TESTS +#endif // defined(MFMA_GFX940_TESTS) || defined(MFMA_GFX950_TESTS) + +#ifdef MFMA_GFX950_TESTS + +// CHECK-GFX950-LABEL: @test_mfma_f32_16x16x32_f16( +// CHECK-GFX950: tail call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.f16(<8 x half> %a, <8 x half> %b, <4 x float> %c, i32 1, i32 2, i32 3) + +v4f test_mfma_f32_16x16x32_f16(v8h a, v8h b, v4f c) +{ + return __builtin_amdgcn_mfma_f32_16x16x32_f16(a, b, c, 1, 2, 3); +} + +// CHECK-GFX950-LABEL: @test_mfma_f32_32x32x16_f16 +// CHECK-GFX950: tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32
[llvm-branch-commits] [llvm] AMDGPU: Increase the LDS size to support to 160 KB for gfx950 (PR #116309)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/116309 >From 74ed0a510ff829e5e98d9edf0284ee4decfa4bc0 Mon Sep 17 00:00:00 2001 From: Pravin Jagtap Date: Wed, 13 Dec 2023 00:27:03 -0500 Subject: [PATCH 1/2] AMDGPU: Increase the LDS size to support to 160 KB for gfx950 --- llvm/docs/AMDGPUUsage.rst | 2 + llvm/lib/Target/AMDGPU/AMDGPU.td | 3 +- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 12 +++-- llvm/lib/Target/AMDGPU/AMDGPUFeatures.td | 1 + .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp| 2 + llvm/test/CodeGen/AMDGPU/extra-lds-size.ll| 7 +++ .../AMDGPU/lds-limit-diagnostics-gfx950.ll| 13 + .../CodeGen/AMDGPU/lds-size-hsa-gfx950.ll | 31 +++ .../CodeGen/AMDGPU/lds-size-pal-gfx950.ll | 26 ++ .../tools/llvm-objdump/ELF/AMDGPU/kd-gfx950.s | 52 +++ 10 files changed, 144 insertions(+), 5 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/lds-limit-diagnostics-gfx950.ll create mode 100644 llvm/test/CodeGen/AMDGPU/lds-size-hsa-gfx950.ll create mode 100644 llvm/test/CodeGen/AMDGPU/lds-size-pal-gfx950.ll create mode 100644 llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx950.s diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index b85b680b9c82d3..a25b6feddbeddc 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -5475,6 +5475,8 @@ The fields used by CP for code objects before V3 also match those specified in roundup(lds-size / (64 * 4)) GFX7-GFX11 roundup(lds-size / (128 * 4)) + GFX950 + roundup(lds-size / (320 * 4)) 24 1 bit ENABLE_EXCEPTION_IEEE_754_FPWavefront starts execution _INVALID_OPERATION with specified exceptions diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 35dbf86b7c6f36..a05d4a644d08d1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1494,7 +1494,8 @@ def FeatureISAVersion9_5_Common : FeatureSet< [FeatureFP8Insts, FeatureFP8ConversionInsts, FeatureCvtFP8VOP1Bug, - FeatureGFX950Insts + FeatureGFX950Insts, + FeatureAddressableLocalMemorySize163840 ])>; def FeatureISAVersion9_4_0 : FeatureSet< diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index d801f2b1591275..90ece275412c7c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -1172,12 +1172,16 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, ProgInfo.DX10Clamp = Mode.DX10Clamp; unsigned LDSAlignShift; - if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) { -// LDS is allocated in 64 dword blocks. -LDSAlignShift = 8; - } else { + if (STM.getFeatureBits().test(FeatureAddressableLocalMemorySize163840)) { +// LDS is allocated in 320 dword blocks. +LDSAlignShift = 11; + } else if (STM.getFeatureBits().test( + FeatureAddressableLocalMemorySize65536)) { // LDS is allocated in 128 dword blocks. LDSAlignShift = 9; + } else { +// LDS is allocated in 64 dword blocks. +LDSAlignShift = 8; } ProgInfo.SGPRSpill = MFI->getNumSpilledSGPRs(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td b/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td index f832a2a55d6229..74d1faeb6f545b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td @@ -29,6 +29,7 @@ class SubtargetFeatureAddressableLocalMemorySize : SubtargetFeature< def FeatureAddressableLocalMemorySize32768 : SubtargetFeatureAddressableLocalMemorySize<32768>; def FeatureAddressableLocalMemorySize65536 : SubtargetFeatureAddressableLocalMemorySize<65536>; +def FeatureAddressableLocalMemorySize163840 : SubtargetFeatureAddressableLocalMemorySize<163840>; class SubtargetFeatureWavefrontSize : SubtargetFeature< "wavefrontsize"#!shl(1, ValueLog2), diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 01866fbd9da6e7..501d00b1f308d9 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -916,6 +916,8 @@ unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI) { return 32768; if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize65536)) return 65536; + if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize163840)) +return 163840; return 0; } diff --git a/llvm/test/CodeGen/AMDGPU/extra-lds-size.ll b/llvm/test/CodeGen/AMDGPU/extra-lds-size.ll index 13640b74a7937b..318ecd16a2cc
[llvm-branch-commits] [flang] [llvm] [flang][OpenMP] Change clause modifier representation in parser (PR #116656)
https://github.com/kparzysz created https://github.com/llvm/llvm-project/pull/116656 The main issue to solve is that OpenMP modifiers can be specified in any order, so the parser cannot expect any specific modifier at a given position. To solve that, define modifier to be a union of all allowable specific modifiers for a given clause. Additionally, implement modifier descriptors: for each modifier the corresponding descriptor contains a set of properties of the modifier that allow a common set of semantic checks. Start with the syntactic properties defined in the spec: Required, Unique, Exclusive, Ultimate, and implement common checks to verify each of them. OpenMP modifier overhaul: #2/3 >From e8bbc26e136993758c3a3197eed6b1924c6531d0 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Mon, 18 Nov 2024 08:47:24 -0600 Subject: [PATCH] [flang][OpenMP] Change clause modifier representation in parser The main issue to solve is that OpenMP modifiers can be specified in any order, so the parser cannot expect any specific modifier at a given position. To solve that, define modifier to be a union of all allowable specific modifiers for a given clause. Additionally, implement modifier descriptors: for each modifier the corresponding descriptor contains a set of properties of the modifier that allow a common set of semantic checks. Start with the syntactic properties defined in the spec: Required, Unique, Exclusive, Ultimate, and implement common checks to verify each of them. OpenMP modifier overhaul: #2/3 --- .../flang/Semantics/openmp-modifiers.h| 391 ++ flang/lib/Semantics/CMakeLists.txt| 1 + flang/lib/Semantics/openmp-modifiers.cpp | 146 +++ llvm/include/llvm/Frontend/OpenMP/OMP.h | 2 + llvm/lib/Frontend/OpenMP/OMP.cpp | 5 + 5 files changed, 545 insertions(+) create mode 100644 flang/include/flang/Semantics/openmp-modifiers.h create mode 100644 flang/lib/Semantics/openmp-modifiers.cpp diff --git a/flang/include/flang/Semantics/openmp-modifiers.h b/flang/include/flang/Semantics/openmp-modifiers.h new file mode 100644 index 00..6be582761ed687 --- /dev/null +++ b/flang/include/flang/Semantics/openmp-modifiers.h @@ -0,0 +1,391 @@ +//===-- flang/lib/Semantics/openmp-modifiers.h --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#ifndef FORTRAN_SEMANTICS_OPENMP_MODIFIERS_H_ +#define FORTRAN_SEMANTICS_OPENMP_MODIFIERS_H_ + +#include "flang/Common/enum-set.h" +#include "flang/Parser/parse-tree.h" +#include "flang/Semantics/semantics.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Frontend/OpenMP/OMP.h" + +#include +#include +#include +#include + +namespace Fortran::semantics { + +// Ref: [5.2:58] +// +// Syntactic properties for Clauses, Arguments and Modifiers +// +// Inverse properties: +// not Required -> Optional +// not Unique-> Repeatable +// not Exclusive -> Compatible +// not Ultimate -> Free +// +// Clause defaults: Optional, Repeatable, Compatible, Free +// Argument defaults: Required, Unique, Compatible, Free +// Modifier defaults: Optional, Unique, Compatible, Free +// +// --- +// Each modifier is used as either pre-modifier (i.e. modifier: item), +// or post-modifier (i.e. item: modifier). The default is pre-. +// Add an additional property that reflects the type of modifier. + +ENUM_CLASS(OmpProperty, Required, Unique, Exclusive, Ultimate, Post); +using OmpProperties = common::EnumSet; +using OmpClauses = +common::EnumSet; + +struct OmpModifierDescriptor { + // Modifier name for use in diagnostic messages. + const OmpProperties &props(unsigned version) const; + const OmpClauses &clauses(unsigned version) const; + + const llvm::StringRef name; + // Version-dependent properties of the modifier. + const std::map props_; + // Version-dependent set of clauses to which the modifier can apply. + const std::map clauses_; +}; + +template const OmpModifierDescriptor &OmpGetDescriptor(); + +template <> +const OmpModifierDescriptor &OmpGetDescriptor(); +template <> +const OmpModifierDescriptor &OmpGetDescriptor(); +template <> +const OmpModifierDescriptor &OmpGetDescriptor(); +template <> +const OmpModifierDescriptor &OmpGetDescriptor(); +template <> +const OmpModifierDescriptor &OmpGetDescriptor(); + +// Explanation of terminology: +// +// A typical clause with modifier[s] looks like this (with parts that are +// not relevant here removed): +// struct OmpSomeClause { +// struct Modifier { +// using Variant = std::variant; +// Variant u; +// }; +// std::tuple>, ...> t; +// }; +// +// The Speficic1, etc. refer to parser cla
[llvm-branch-commits] [flang] [flang][OpenMP] Apply modifier representation to semantic checks (PR #116658)
https://github.com/kparzysz created https://github.com/llvm/llvm-project/pull/116658 Also, define helper macros in parse-tree.h. Apply the new modifier representation to the DEFAULTMAP and REDUCTION clauses, with testcases utilizing the new modifier validation. OpenMP modifier overhaul: #3/3 >From fac6a8594643811418f37ee42fc1ac35bcc2a244 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Thu, 14 Nov 2024 07:29:59 -0600 Subject: [PATCH] [flang][OpenMP] Apply modifier representation to semantic checks Also, define helper macros in parse-tree.h. Apply the new modifier representation to the DEFAULTMAP and REDUCTION clauses, with testcases utilizing the new modifier validation. OpenMP modifier overhaul: #3/3 --- flang/include/flang/Parser/dump-parse-tree.h | 8 +- flang/include/flang/Parser/parse-tree.h | 49 +-- .../flang/Semantics/openmp-modifiers.h| 4 + flang/lib/Lower/OpenMP/Clauses.cpp| 33 flang/lib/Parser/openmp-parsers.cpp | 40 + flang/lib/Parser/unparse.cpp | 15 ++-- flang/lib/Semantics/check-omp-structure.cpp | 83 +++ flang/lib/Semantics/check-omp-structure.h | 3 +- flang/lib/Semantics/openmp-modifiers.cpp | 33 flang/lib/Semantics/resolve-directives.cpp| 52 +++- .../test/Parser/OpenMP/defaultmap-clause.f90 | 8 +- .../test/Parser/OpenMP/defaultmap-unparse.f90 | 16 ++-- .../test/Parser/OpenMP/reduction-modifier.f90 | 6 +- .../Semantics/OpenMP/combined-constructs.f90 | 12 +-- .../OpenMP/defaultmap-clause-v45.f90 | 2 +- 15 files changed, 236 insertions(+), 128 deletions(-) diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h index df5bf1d8d3200e..9c59ce520a31aa 100644 --- a/flang/include/flang/Parser/dump-parse-tree.h +++ b/flang/include/flang/Parser/dump-parse-tree.h @@ -509,9 +509,11 @@ class ParseTreeDumper { NODE(parser, OmpDeclareMapperSpecifier) NODE(parser, OmpDefaultClause) NODE_ENUM(OmpDefaultClause, Type) + NODE(parser, OmpVariableCategory) + NODE_ENUM(OmpVariableCategory, Value) NODE(parser, OmpDefaultmapClause) NODE_ENUM(OmpDefaultmapClause, ImplicitBehavior) - NODE_ENUM(OmpDefaultmapClause, VariableCategory) + NODE(OmpDefaultmapClause, Modifier) NODE(parser, OmpDependenceType) NODE_ENUM(OmpDependenceType, Value) NODE(parser, OmpTaskDependenceType) @@ -567,8 +569,10 @@ class ParseTreeDumper { NODE_ENUM(OmpBindClause, Type) NODE(parser, OmpProcBindClause) NODE_ENUM(OmpProcBindClause, Type) - NODE_ENUM(OmpReductionClause, ReductionModifier) + NODE(parser, OmpReductionModifier) + NODE_ENUM(OmpReductionModifier, Value) NODE(parser, OmpReductionClause) + NODE(OmpReductionClause, Modifier) NODE(parser, OmpInReductionClause) NODE(parser, OmpReductionCombiner) NODE(OmpReductionCombiner, FunctionCombiner) diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index ef49a36578270e..5b28bcd4e21b80 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -3440,6 +3440,16 @@ struct OmpObject { WRAPPER_CLASS(OmpObjectList, std::list); +#define MODIFIER_BOILERPLATE(...) \ + struct Modifier { \ +using Variant = std::variant<__VA_ARGS__>; \ +UNION_CLASS_BOILERPLATE(Modifier); \ +CharBlock source; \ +Variant u; \ + } + +#define MODIFIERS() std::optional> + inline namespace modifier { // For uniformity, in all keyword modifiers the name of the type defined // by ENUM_CLASS is "Value", e.g. @@ -3505,12 +3515,20 @@ struct OmpLinearModifier { // - |// since 4.5, until 5.2 // + | * | .AND. | .OR. | .EQV. | .NEQV. |// since 4.5 // MIN | MAX | IAND | IOR | IEOR // since 4.5 -// struct OmpReductionIdentifier { UNION_CLASS_BOILERPLATE(OmpReductionIdentifier); std::variant u; }; +// Ref: [5.0:300-302], [5.1:332-334], [5.2:134-137] +// +// reduction-modifier -> +// DEFAULT | INSCAN | TASK// since 5.0 +struct OmpReductionModifier { + ENUM_CLASS(Value, Default, Inscan, Task); + WRAPPER_CLASS_BOILERPLATE(OmpReductionModifier, Value); +}; + // Ref: [4.5:169-170], [5.0:254-256], [5.1:287-289], [5.2:321] // // task-dependence-type -> // "dependence-type" in 5.1 and before @@ -3521,6 +3539,17 @@ struct OmpTaskDependenceType { ENUM_CLASS(Value, In, Out, Inout, Inoutset, Mutexinoutset, Depobj) WRAPPER_CLASS_BOILERPLATE(OmpTaskDependenceType, Value); }; + +// Ref: [4.5:229-230], [5.0:324-325], [5.1:357-358], [5.2:161-162] +// +// variable-category -> +// SCALAR | // since 4.5 +// AGGREGATE | ALLOCATABLE | POINTER |// since 5.0 +// ALL// since 5.2 +struct OmpVariableCategory { + ENUM_CLASS(Value, Aggregate, All, Allocatable
[llvm-branch-commits] [flang] [llvm] [flang][OpenMP] Change clause modifier representation in parser (PR #116656)
llvmbot wrote: @llvm/pr-subscribers-flang-semantics Author: Krzysztof Parzyszek (kparzysz) Changes The main issue to solve is that OpenMP modifiers can be specified in any order, so the parser cannot expect any specific modifier at a given position. To solve that, define modifier to be a union of all allowable specific modifiers for a given clause. Additionally, implement modifier descriptors: for each modifier the corresponding descriptor contains a set of properties of the modifier that allow a common set of semantic checks. Start with the syntactic properties defined in the spec: Required, Unique, Exclusive, Ultimate, and implement common checks to verify each of them. OpenMP modifier overhaul: #2/3 --- Patch is 21.10 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/116656.diff 5 Files Affected: - (added) flang/include/flang/Semantics/openmp-modifiers.h (+391) - (modified) flang/lib/Semantics/CMakeLists.txt (+1) - (added) flang/lib/Semantics/openmp-modifiers.cpp (+146) - (modified) llvm/include/llvm/Frontend/OpenMP/OMP.h (+2) - (modified) llvm/lib/Frontend/OpenMP/OMP.cpp (+5) ``diff diff --git a/flang/include/flang/Semantics/openmp-modifiers.h b/flang/include/flang/Semantics/openmp-modifiers.h new file mode 100644 index 00..6be582761ed687 --- /dev/null +++ b/flang/include/flang/Semantics/openmp-modifiers.h @@ -0,0 +1,391 @@ +//===-- flang/lib/Semantics/openmp-modifiers.h --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#ifndef FORTRAN_SEMANTICS_OPENMP_MODIFIERS_H_ +#define FORTRAN_SEMANTICS_OPENMP_MODIFIERS_H_ + +#include "flang/Common/enum-set.h" +#include "flang/Parser/parse-tree.h" +#include "flang/Semantics/semantics.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Frontend/OpenMP/OMP.h" + +#include +#include +#include +#include + +namespace Fortran::semantics { + +// Ref: [5.2:58] +// +// Syntactic properties for Clauses, Arguments and Modifiers +// +// Inverse properties: +// not Required -> Optional +// not Unique-> Repeatable +// not Exclusive -> Compatible +// not Ultimate -> Free +// +// Clause defaults: Optional, Repeatable, Compatible, Free +// Argument defaults: Required, Unique, Compatible, Free +// Modifier defaults: Optional, Unique, Compatible, Free +// +// --- +// Each modifier is used as either pre-modifier (i.e. modifier: item), +// or post-modifier (i.e. item: modifier). The default is pre-. +// Add an additional property that reflects the type of modifier. + +ENUM_CLASS(OmpProperty, Required, Unique, Exclusive, Ultimate, Post); +using OmpProperties = common::EnumSet; +using OmpClauses = +common::EnumSet; + +struct OmpModifierDescriptor { + // Modifier name for use in diagnostic messages. + const OmpProperties &props(unsigned version) const; + const OmpClauses &clauses(unsigned version) const; + + const llvm::StringRef name; + // Version-dependent properties of the modifier. + const std::map props_; + // Version-dependent set of clauses to which the modifier can apply. + const std::map clauses_; +}; + +template const OmpModifierDescriptor &OmpGetDescriptor(); + +template <> +const OmpModifierDescriptor &OmpGetDescriptor(); +template <> +const OmpModifierDescriptor &OmpGetDescriptor(); +template <> +const OmpModifierDescriptor &OmpGetDescriptor(); +template <> +const OmpModifierDescriptor &OmpGetDescriptor(); +template <> +const OmpModifierDescriptor &OmpGetDescriptor(); + +// Explanation of terminology: +// +// A typical clause with modifier[s] looks like this (with parts that are +// not relevant here removed): +// struct OmpSomeClause { +// struct Modifier { +// using Variant = std::variant; +// Variant u; +// }; +// std::tuple>, ...> t; +// }; +// +// The Speficic1, etc. refer to parser classes that represent modifiers, +// e.g. OmpIterator or OmpTaskDependenceType. The Variant type contains +// all modifiers that are allowed for a given clause. The Modifier class +// is there to wrap the variant into the form that the parse tree visitor +// expects, i.e. with traits, member "u", etc. +// +// To avoid ambiguities with the word "modifier" (e.g. is it "any modifier", +// or "this specific modifier"?), the following code uses different terms: +// +// - UnionTy:refers to the nested "Modifier" class, i.e. +// "OmpSomeClause::Modifier" in the example above. +// - SpecificTy: refers to any of the alternatives, i.e. "Specific1" or +// "Specific2". + +template +const OmpModifierDescriptor &OmpGetDescriptor(const UnionTy &modifier) { + return common::visit( + [](auto &&m) -> d
[llvm-branch-commits] [flang] [flang][OpenMP] Apply modifier representation to semantic checks (PR #116658)
llvmbot wrote: @llvm/pr-subscribers-flang-openmp Author: Krzysztof Parzyszek (kparzysz) Changes Also, define helper macros in parse-tree.h. Apply the new modifier representation to the DEFAULTMAP and REDUCTION clauses, with testcases utilizing the new modifier validation. OpenMP modifier overhaul: #3/3 --- Patch is 37.74 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/116658.diff 15 Files Affected: - (modified) flang/include/flang/Parser/dump-parse-tree.h (+6-2) - (modified) flang/include/flang/Parser/parse-tree.h (+40-9) - (modified) flang/include/flang/Semantics/openmp-modifiers.h (+4) - (modified) flang/lib/Lower/OpenMP/Clauses.cpp (+19-14) - (modified) flang/lib/Parser/openmp-parsers.cpp (+24-16) - (modified) flang/lib/Parser/unparse.cpp (+6-9) - (modified) flang/lib/Semantics/check-omp-structure.cpp (+49-34) - (modified) flang/lib/Semantics/check-omp-structure.h (+1-2) - (modified) flang/lib/Semantics/openmp-modifiers.cpp (+33) - (modified) flang/lib/Semantics/resolve-directives.cpp (+32-20) - (modified) flang/test/Parser/OpenMP/defaultmap-clause.f90 (+4-4) - (modified) flang/test/Parser/OpenMP/defaultmap-unparse.f90 (+8-8) - (modified) flang/test/Parser/OpenMP/reduction-modifier.f90 (+3-3) - (modified) flang/test/Semantics/OpenMP/combined-constructs.f90 (+6-6) - (modified) flang/test/Semantics/OpenMP/defaultmap-clause-v45.f90 (+1-1) ``diff diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h index df5bf1d8d3200e..9c59ce520a31aa 100644 --- a/flang/include/flang/Parser/dump-parse-tree.h +++ b/flang/include/flang/Parser/dump-parse-tree.h @@ -509,9 +509,11 @@ class ParseTreeDumper { NODE(parser, OmpDeclareMapperSpecifier) NODE(parser, OmpDefaultClause) NODE_ENUM(OmpDefaultClause, Type) + NODE(parser, OmpVariableCategory) + NODE_ENUM(OmpVariableCategory, Value) NODE(parser, OmpDefaultmapClause) NODE_ENUM(OmpDefaultmapClause, ImplicitBehavior) - NODE_ENUM(OmpDefaultmapClause, VariableCategory) + NODE(OmpDefaultmapClause, Modifier) NODE(parser, OmpDependenceType) NODE_ENUM(OmpDependenceType, Value) NODE(parser, OmpTaskDependenceType) @@ -567,8 +569,10 @@ class ParseTreeDumper { NODE_ENUM(OmpBindClause, Type) NODE(parser, OmpProcBindClause) NODE_ENUM(OmpProcBindClause, Type) - NODE_ENUM(OmpReductionClause, ReductionModifier) + NODE(parser, OmpReductionModifier) + NODE_ENUM(OmpReductionModifier, Value) NODE(parser, OmpReductionClause) + NODE(OmpReductionClause, Modifier) NODE(parser, OmpInReductionClause) NODE(parser, OmpReductionCombiner) NODE(OmpReductionCombiner, FunctionCombiner) diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index ef49a36578270e..5b28bcd4e21b80 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -3440,6 +3440,16 @@ struct OmpObject { WRAPPER_CLASS(OmpObjectList, std::list); +#define MODIFIER_BOILERPLATE(...) \ + struct Modifier { \ +using Variant = std::variant<__VA_ARGS__>; \ +UNION_CLASS_BOILERPLATE(Modifier); \ +CharBlock source; \ +Variant u; \ + } + +#define MODIFIERS() std::optional> + inline namespace modifier { // For uniformity, in all keyword modifiers the name of the type defined // by ENUM_CLASS is "Value", e.g. @@ -3505,12 +3515,20 @@ struct OmpLinearModifier { // - |// since 4.5, until 5.2 // + | * | .AND. | .OR. | .EQV. | .NEQV. |// since 4.5 // MIN | MAX | IAND | IOR | IEOR // since 4.5 -// struct OmpReductionIdentifier { UNION_CLASS_BOILERPLATE(OmpReductionIdentifier); std::variant u; }; +// Ref: [5.0:300-302], [5.1:332-334], [5.2:134-137] +// +// reduction-modifier -> +// DEFAULT | INSCAN | TASK// since 5.0 +struct OmpReductionModifier { + ENUM_CLASS(Value, Default, Inscan, Task); + WRAPPER_CLASS_BOILERPLATE(OmpReductionModifier, Value); +}; + // Ref: [4.5:169-170], [5.0:254-256], [5.1:287-289], [5.2:321] // // task-dependence-type -> // "dependence-type" in 5.1 and before @@ -3521,6 +3539,17 @@ struct OmpTaskDependenceType { ENUM_CLASS(Value, In, Out, Inout, Inoutset, Mutexinoutset, Depobj) WRAPPER_CLASS_BOILERPLATE(OmpTaskDependenceType, Value); }; + +// Ref: [4.5:229-230], [5.0:324-325], [5.1:357-358], [5.2:161-162] +// +// variable-category -> +// SCALAR | // since 4.5 +// AGGREGATE | ALLOCATABLE | POINTER |// since 5.0 +// ALL// since 5.2 +struct OmpVariableCategory { + ENUM_CLASS(Value, Aggregate, All, Allocatable, Pointer, Scalar) + WRAPPER_CLASS_BOILERPLATE(OmpVariableCategory, Value); +}; } // namespace modifier // --- Clauses @@ -3578,8 +3607,8 @@ struct OmpDefaultmapClause { TUPLE_CLASS_BOILERPLATE(OmpDefaultmapClause
[llvm-branch-commits] [flang] [flang][OpenMP] Apply modifier representation to semantic checks (PR #116658)
llvmbot wrote: @llvm/pr-subscribers-flang-parser Author: Krzysztof Parzyszek (kparzysz) Changes Also, define helper macros in parse-tree.h. Apply the new modifier representation to the DEFAULTMAP and REDUCTION clauses, with testcases utilizing the new modifier validation. OpenMP modifier overhaul: #3/3 --- Patch is 37.74 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/116658.diff 15 Files Affected: - (modified) flang/include/flang/Parser/dump-parse-tree.h (+6-2) - (modified) flang/include/flang/Parser/parse-tree.h (+40-9) - (modified) flang/include/flang/Semantics/openmp-modifiers.h (+4) - (modified) flang/lib/Lower/OpenMP/Clauses.cpp (+19-14) - (modified) flang/lib/Parser/openmp-parsers.cpp (+24-16) - (modified) flang/lib/Parser/unparse.cpp (+6-9) - (modified) flang/lib/Semantics/check-omp-structure.cpp (+49-34) - (modified) flang/lib/Semantics/check-omp-structure.h (+1-2) - (modified) flang/lib/Semantics/openmp-modifiers.cpp (+33) - (modified) flang/lib/Semantics/resolve-directives.cpp (+32-20) - (modified) flang/test/Parser/OpenMP/defaultmap-clause.f90 (+4-4) - (modified) flang/test/Parser/OpenMP/defaultmap-unparse.f90 (+8-8) - (modified) flang/test/Parser/OpenMP/reduction-modifier.f90 (+3-3) - (modified) flang/test/Semantics/OpenMP/combined-constructs.f90 (+6-6) - (modified) flang/test/Semantics/OpenMP/defaultmap-clause-v45.f90 (+1-1) ``diff diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h index df5bf1d8d3200e..9c59ce520a31aa 100644 --- a/flang/include/flang/Parser/dump-parse-tree.h +++ b/flang/include/flang/Parser/dump-parse-tree.h @@ -509,9 +509,11 @@ class ParseTreeDumper { NODE(parser, OmpDeclareMapperSpecifier) NODE(parser, OmpDefaultClause) NODE_ENUM(OmpDefaultClause, Type) + NODE(parser, OmpVariableCategory) + NODE_ENUM(OmpVariableCategory, Value) NODE(parser, OmpDefaultmapClause) NODE_ENUM(OmpDefaultmapClause, ImplicitBehavior) - NODE_ENUM(OmpDefaultmapClause, VariableCategory) + NODE(OmpDefaultmapClause, Modifier) NODE(parser, OmpDependenceType) NODE_ENUM(OmpDependenceType, Value) NODE(parser, OmpTaskDependenceType) @@ -567,8 +569,10 @@ class ParseTreeDumper { NODE_ENUM(OmpBindClause, Type) NODE(parser, OmpProcBindClause) NODE_ENUM(OmpProcBindClause, Type) - NODE_ENUM(OmpReductionClause, ReductionModifier) + NODE(parser, OmpReductionModifier) + NODE_ENUM(OmpReductionModifier, Value) NODE(parser, OmpReductionClause) + NODE(OmpReductionClause, Modifier) NODE(parser, OmpInReductionClause) NODE(parser, OmpReductionCombiner) NODE(OmpReductionCombiner, FunctionCombiner) diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index ef49a36578270e..5b28bcd4e21b80 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -3440,6 +3440,16 @@ struct OmpObject { WRAPPER_CLASS(OmpObjectList, std::list); +#define MODIFIER_BOILERPLATE(...) \ + struct Modifier { \ +using Variant = std::variant<__VA_ARGS__>; \ +UNION_CLASS_BOILERPLATE(Modifier); \ +CharBlock source; \ +Variant u; \ + } + +#define MODIFIERS() std::optional> + inline namespace modifier { // For uniformity, in all keyword modifiers the name of the type defined // by ENUM_CLASS is "Value", e.g. @@ -3505,12 +3515,20 @@ struct OmpLinearModifier { // - |// since 4.5, until 5.2 // + | * | .AND. | .OR. | .EQV. | .NEQV. |// since 4.5 // MIN | MAX | IAND | IOR | IEOR // since 4.5 -// struct OmpReductionIdentifier { UNION_CLASS_BOILERPLATE(OmpReductionIdentifier); std::variant u; }; +// Ref: [5.0:300-302], [5.1:332-334], [5.2:134-137] +// +// reduction-modifier -> +// DEFAULT | INSCAN | TASK// since 5.0 +struct OmpReductionModifier { + ENUM_CLASS(Value, Default, Inscan, Task); + WRAPPER_CLASS_BOILERPLATE(OmpReductionModifier, Value); +}; + // Ref: [4.5:169-170], [5.0:254-256], [5.1:287-289], [5.2:321] // // task-dependence-type -> // "dependence-type" in 5.1 and before @@ -3521,6 +3539,17 @@ struct OmpTaskDependenceType { ENUM_CLASS(Value, In, Out, Inout, Inoutset, Mutexinoutset, Depobj) WRAPPER_CLASS_BOILERPLATE(OmpTaskDependenceType, Value); }; + +// Ref: [4.5:229-230], [5.0:324-325], [5.1:357-358], [5.2:161-162] +// +// variable-category -> +// SCALAR | // since 4.5 +// AGGREGATE | ALLOCATABLE | POINTER |// since 5.0 +// ALL// since 5.2 +struct OmpVariableCategory { + ENUM_CLASS(Value, Aggregate, All, Allocatable, Pointer, Scalar) + WRAPPER_CLASS_BOILERPLATE(OmpVariableCategory, Value); +}; } // namespace modifier // --- Clauses @@ -3578,8 +3607,8 @@ struct OmpDefaultmapClause { TUPLE_CLASS_BOILERPLATE(OmpDefaultmapClause
[llvm-branch-commits] [llvm] [Linker] Remove a use of StructType::setBody. NFC. (PR #116653)
llvmbot wrote: @llvm/pr-subscribers-lto Author: Jay Foad (jayfoad) Changes This falls out naturally after inlining finishType into its only remaining use. --- Full diff: https://github.com/llvm/llvm-project/pull/116653.diff 1 Files Affected: - (modified) llvm/lib/Linker/IRMover.cpp (+11-18) ``diff diff --git a/llvm/lib/Linker/IRMover.cpp b/llvm/lib/Linker/IRMover.cpp index c653900c632cc9..4bb0ddf891744b 100644 --- a/llvm/lib/Linker/IRMover.cpp +++ b/llvm/lib/Linker/IRMover.cpp @@ -82,8 +82,6 @@ class TypeMapTy : public ValueMapTypeRemapper { Type *get(Type *SrcTy); Type *get(Type *SrcTy, SmallPtrSet &Visited); - void finishType(StructType *DTy, StructType *STy, ArrayRef ETypes); - FunctionType *get(FunctionType *T) { return cast(get((Type *)T)); } @@ -233,20 +231,6 @@ Error TypeMapTy::linkDefinedTypeBodies() { return Error::success(); } -void TypeMapTy::finishType(StructType *DTy, StructType *STy, - ArrayRef ETypes) { - DTy->setBody(ETypes, STy->isPacked()); - - // Steal STy's name. - if (STy->hasName()) { -SmallString<16> TmpName = STy->getName(); -STy->setName(""); -DTy->setName(TmpName); - } - - DstStructTypesSet.addNonOpaque(DTy); -} - Type *TypeMapTy::get(Type *Ty) { SmallPtrSet Visited; return get(Ty, Visited); @@ -342,8 +326,17 @@ Type *TypeMapTy::get(Type *Ty, SmallPtrSet &Visited) { return *Entry = Ty; } -StructType *DTy = StructType::create(Ty->getContext()); -finishType(DTy, STy, ElementTypes); +StructType *DTy = +StructType::create(Ty->getContext(), ElementTypes, "", STy->isPacked()); + +// Steal STy's name. +if (STy->hasName()) { + SmallString<16> TmpName = STy->getName(); + STy->setName(""); + DTy->setName(TmpName); +} + +DstStructTypesSet.addNonOpaque(DTy); return *Entry = DTy; } } `` https://github.com/llvm/llvm-project/pull/116653 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][OpenMP] Apply modifier representation to semantic checks (PR #116658)
github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning: You can test this locally with the following command: ``bash git-clang-format --diff e8bbc26e136993758c3a3197eed6b1924c6531d0 fac6a8594643811418f37ee42fc1ac35bcc2a244 --extensions h,cpp -- flang/include/flang/Parser/dump-parse-tree.h flang/include/flang/Parser/parse-tree.h flang/include/flang/Semantics/openmp-modifiers.h flang/lib/Lower/OpenMP/Clauses.cpp flang/lib/Parser/openmp-parsers.cpp flang/lib/Parser/unparse.cpp flang/lib/Semantics/check-omp-structure.cpp flang/lib/Semantics/check-omp-structure.h flang/lib/Semantics/openmp-modifiers.cpp flang/lib/Semantics/resolve-directives.cpp `` View the diff from clang-format here. ``diff diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index 063201fc86..3ee8159682 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -255,8 +255,8 @@ TYPE_PARSER(construct( "POINTER" >> pure(OmpVariableCategory::Value::Pointer) || "SCALAR" >> pure(OmpVariableCategory::Value::Scalar))) -TYPE_PARSER(sourced(construct( -Parser{}))) +TYPE_PARSER(sourced( +construct(Parser{}))) // --- Parsers for clauses `` https://github.com/llvm/llvm-project/pull/116658 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC] Use unique_ptr in SparseSet (PR #116617)
@@ -205,7 +209,7 @@ class SparseSet { assert(Idx < Universe && "Key out of range"); assert(Sparse != nullptr && "Invalid sparse type"); const unsigned Stride = std::numeric_limits::max() + 1u; -for (unsigned i = Sparse[Idx], e = size(); i < e; i += Stride) { +for (unsigned i = Sparse.get()[Idx], e = size(); i < e; i += Stride) { dwblaikie wrote: If you make the `std::unique_ptr` into a `std::unique_ptr` then you can use [] directly without the `.get()` I think? https://github.com/llvm/llvm-project/pull/116617 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC] Use unique_ptr in SparseSet (PR #116617)
dwblaikie wrote: Oh, and please add unit test coverage for the new move functionality. https://github.com/llvm/llvm-project/pull/116617 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][OpenMP] Apply modifier representation to semantic checks (PR #116658)
https://github.com/kparzysz updated https://github.com/llvm/llvm-project/pull/116658 >From fac6a8594643811418f37ee42fc1ac35bcc2a244 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Thu, 14 Nov 2024 07:29:59 -0600 Subject: [PATCH 1/2] [flang][OpenMP] Apply modifier representation to semantic checks Also, define helper macros in parse-tree.h. Apply the new modifier representation to the DEFAULTMAP and REDUCTION clauses, with testcases utilizing the new modifier validation. OpenMP modifier overhaul: #3/3 --- flang/include/flang/Parser/dump-parse-tree.h | 8 +- flang/include/flang/Parser/parse-tree.h | 49 +-- .../flang/Semantics/openmp-modifiers.h| 4 + flang/lib/Lower/OpenMP/Clauses.cpp| 33 flang/lib/Parser/openmp-parsers.cpp | 40 + flang/lib/Parser/unparse.cpp | 15 ++-- flang/lib/Semantics/check-omp-structure.cpp | 83 +++ flang/lib/Semantics/check-omp-structure.h | 3 +- flang/lib/Semantics/openmp-modifiers.cpp | 33 flang/lib/Semantics/resolve-directives.cpp| 52 +++- .../test/Parser/OpenMP/defaultmap-clause.f90 | 8 +- .../test/Parser/OpenMP/defaultmap-unparse.f90 | 16 ++-- .../test/Parser/OpenMP/reduction-modifier.f90 | 6 +- .../Semantics/OpenMP/combined-constructs.f90 | 12 +-- .../OpenMP/defaultmap-clause-v45.f90 | 2 +- 15 files changed, 236 insertions(+), 128 deletions(-) diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h index df5bf1d8d3200e..9c59ce520a31aa 100644 --- a/flang/include/flang/Parser/dump-parse-tree.h +++ b/flang/include/flang/Parser/dump-parse-tree.h @@ -509,9 +509,11 @@ class ParseTreeDumper { NODE(parser, OmpDeclareMapperSpecifier) NODE(parser, OmpDefaultClause) NODE_ENUM(OmpDefaultClause, Type) + NODE(parser, OmpVariableCategory) + NODE_ENUM(OmpVariableCategory, Value) NODE(parser, OmpDefaultmapClause) NODE_ENUM(OmpDefaultmapClause, ImplicitBehavior) - NODE_ENUM(OmpDefaultmapClause, VariableCategory) + NODE(OmpDefaultmapClause, Modifier) NODE(parser, OmpDependenceType) NODE_ENUM(OmpDependenceType, Value) NODE(parser, OmpTaskDependenceType) @@ -567,8 +569,10 @@ class ParseTreeDumper { NODE_ENUM(OmpBindClause, Type) NODE(parser, OmpProcBindClause) NODE_ENUM(OmpProcBindClause, Type) - NODE_ENUM(OmpReductionClause, ReductionModifier) + NODE(parser, OmpReductionModifier) + NODE_ENUM(OmpReductionModifier, Value) NODE(parser, OmpReductionClause) + NODE(OmpReductionClause, Modifier) NODE(parser, OmpInReductionClause) NODE(parser, OmpReductionCombiner) NODE(OmpReductionCombiner, FunctionCombiner) diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index ef49a36578270e..5b28bcd4e21b80 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -3440,6 +3440,16 @@ struct OmpObject { WRAPPER_CLASS(OmpObjectList, std::list); +#define MODIFIER_BOILERPLATE(...) \ + struct Modifier { \ +using Variant = std::variant<__VA_ARGS__>; \ +UNION_CLASS_BOILERPLATE(Modifier); \ +CharBlock source; \ +Variant u; \ + } + +#define MODIFIERS() std::optional> + inline namespace modifier { // For uniformity, in all keyword modifiers the name of the type defined // by ENUM_CLASS is "Value", e.g. @@ -3505,12 +3515,20 @@ struct OmpLinearModifier { // - |// since 4.5, until 5.2 // + | * | .AND. | .OR. | .EQV. | .NEQV. |// since 4.5 // MIN | MAX | IAND | IOR | IEOR // since 4.5 -// struct OmpReductionIdentifier { UNION_CLASS_BOILERPLATE(OmpReductionIdentifier); std::variant u; }; +// Ref: [5.0:300-302], [5.1:332-334], [5.2:134-137] +// +// reduction-modifier -> +// DEFAULT | INSCAN | TASK// since 5.0 +struct OmpReductionModifier { + ENUM_CLASS(Value, Default, Inscan, Task); + WRAPPER_CLASS_BOILERPLATE(OmpReductionModifier, Value); +}; + // Ref: [4.5:169-170], [5.0:254-256], [5.1:287-289], [5.2:321] // // task-dependence-type -> // "dependence-type" in 5.1 and before @@ -3521,6 +3539,17 @@ struct OmpTaskDependenceType { ENUM_CLASS(Value, In, Out, Inout, Inoutset, Mutexinoutset, Depobj) WRAPPER_CLASS_BOILERPLATE(OmpTaskDependenceType, Value); }; + +// Ref: [4.5:229-230], [5.0:324-325], [5.1:357-358], [5.2:161-162] +// +// variable-category -> +// SCALAR | // since 4.5 +// AGGREGATE | ALLOCATABLE | POINTER |// since 5.0 +// ALL// since 5.2 +struct OmpVariableCategory { + ENUM_CLASS(Value, Aggregate, All, Allocatable, Pointer, Scalar) + WRAPPER_CLASS_BOILERPLATE(OmpVariableCategory, Value); +}; } // namespace modifier // --- Clauses @@ -3578,8 +3607,8 @@ struct OmpDefaultmapClause { TUPLE_CLASS_BOILERPLATE(OmpDef
[llvm-branch-commits] [llvm] [Linker] Remove a use of StructType::setBody. NFC. (PR #116653)
https://github.com/jayfoad created https://github.com/llvm/llvm-project/pull/116653 This falls out naturally after inlining finishType into its only remaining use. >From 4140bc772f5930807cb2ea5b4b2aa945c57b699c Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Mon, 18 Nov 2024 16:36:33 + Subject: [PATCH] [Linker] Remove a use of StructType::setBody. NFC. This falls out naturally after inlining finishType into its only remaining use. --- llvm/lib/Linker/IRMover.cpp | 29 +++-- 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Linker/IRMover.cpp b/llvm/lib/Linker/IRMover.cpp index c653900c632cc9..4bb0ddf891744b 100644 --- a/llvm/lib/Linker/IRMover.cpp +++ b/llvm/lib/Linker/IRMover.cpp @@ -82,8 +82,6 @@ class TypeMapTy : public ValueMapTypeRemapper { Type *get(Type *SrcTy); Type *get(Type *SrcTy, SmallPtrSet &Visited); - void finishType(StructType *DTy, StructType *STy, ArrayRef ETypes); - FunctionType *get(FunctionType *T) { return cast(get((Type *)T)); } @@ -233,20 +231,6 @@ Error TypeMapTy::linkDefinedTypeBodies() { return Error::success(); } -void TypeMapTy::finishType(StructType *DTy, StructType *STy, - ArrayRef ETypes) { - DTy->setBody(ETypes, STy->isPacked()); - - // Steal STy's name. - if (STy->hasName()) { -SmallString<16> TmpName = STy->getName(); -STy->setName(""); -DTy->setName(TmpName); - } - - DstStructTypesSet.addNonOpaque(DTy); -} - Type *TypeMapTy::get(Type *Ty) { SmallPtrSet Visited; return get(Ty, Visited); @@ -342,8 +326,17 @@ Type *TypeMapTy::get(Type *Ty, SmallPtrSet &Visited) { return *Entry = Ty; } -StructType *DTy = StructType::create(Ty->getContext()); -finishType(DTy, STy, ElementTypes); +StructType *DTy = +StructType::create(Ty->getContext(), ElementTypes, "", STy->isPacked()); + +// Steal STy's name. +if (STy->hasName()) { + SmallString<16> TmpName = STy->getName(); + STy->setName(""); + DTy->setName(TmpName); +} + +DstStructTypesSet.addNonOpaque(DTy); return *Entry = DTy; } } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] release/19.x: Reapply "[sanitizer_common] AND signals in BlockSignals instead of deleting (#113443)" for non-Android Linux only (#115790) (PR #116670)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/116670 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] release/19.x: Reapply "[sanitizer_common] AND signals in BlockSignals instead of deleting (#113443)" for non-Android Linux only (#115790) (PR #116670)
github-actions[bot] wrote: Thank you for submitting a Pull Request (PR) to the LLVM Project! This PR will be automatically labeled and the relevant teams will be notified. If you wish to, you can add reviewers by using the "Reviewers" section on this page. If this is not working for you, it is probably because you do not have write permissions for the repository. In which case you can instead tag reviewers by name in a comment by using `@` followed by their GitHub username. If you have received no comments on your PR for a week, you can request a review by "ping"ing the PR by adding a comment “Ping”. The common courtesy "ping" rate is once a week. Please remember that you are asking for valuable time from other developers. If you have further questions, they may be answered by the [LLVM GitHub User Guide](https://llvm.org/docs/GitHub.html). You can also ask questions in a comment on this PR, on the [LLVM Discord](https://discord.com/invite/xS7Z362) or on the [forums](https://discourse.llvm.org/). https://github.com/llvm/llvm-project/pull/116670 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] release/19.x: Reapply "[sanitizer_common] AND signals in BlockSignals instead of deleting (#113443)" for non-Android Linux only (#115790) (PR #116670)
https://github.com/vitalybuka approved this pull request. https://github.com/llvm/llvm-project/pull/116670 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [RISCV] Support __builtin_cpu_is (PR #116231)
@@ -22505,6 +22506,47 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID, return nullptr; } +Value *CodeGenFunction::EmitRISCVCpuIs(const CallExpr *E) { + const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts(); + StringRef CPUStr = cast(CPUExpr)->getString(); + return EmitRISCVCpuIs(CPUStr); +} + +Value *CodeGenFunction::EmitRISCVCpuIs(StringRef CPUStr) { + llvm::Type *Int32Ty = Builder.getInt32Ty(); + llvm::Type *Int64Ty = Builder.getInt64Ty(); + llvm::StructType *StructTy = llvm::StructType::get(Int32Ty, Int64Ty, Int64Ty); + llvm::Constant *RISCVCPUModel = + CGM.CreateRuntimeVariable(StructTy, "__riscv_cpu_model"); + cast(RISCVCPUModel)->setDSOLocal(true); + + auto loadRISCVCPUID = [&](unsigned Index) { +Value *Ptr = Builder.CreateStructGEP(StructTy, RISCVCPUModel, Index); +Value *CPUID = Builder.CreateAlignedLoad(StructTy->getTypeAtIndex(Index), + Ptr, llvm::MaybeAlign()); +return CPUID; + }; + + const llvm::RISCV::CPUModel CPUModel = llvm::RISCV::getCPUModel(CPUStr); + + // Compare mvendorid. + Value *VendorID = loadRISCVCPUID(0); + Value *Result = + Builder.CreateICmpEQ(VendorID, Builder.getInt32(CPUModel.MVendorID)); + + // Compare marchid. + Value *ArchID = loadRISCVCPUID(1); + Result = Builder.CreateAnd( + Result, Builder.CreateICmpEQ(ArchID, Builder.getInt64(CPUModel.MArchID))); + + // Compare mimplid. topperc wrote: mimpid https://github.com/llvm/llvm-project/pull/116231 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add subtarget features for minimum3/maximum3 instructions (PR #116308)
arsenm wrote: ### Merge activity * **Nov 18, 1:34 PM EST**: A user started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/116308). https://github.com/llvm/llvm-project/pull/116308 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] AMDGPU: Add v_prng_b32 instruction for gfx950 (PR #116310)
arsenm wrote: ### Merge activity * **Nov 18, 1:34 PM EST**: A user started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/116310). https://github.com/llvm/llvm-project/pull/116310 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Handle gfx950 96/128-bit buffer_load_lds (PR #116681)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/116681 >From 884cb697a58e021372842cc674806a5228a84ef0 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 18 Jan 2024 16:18:05 +0700 Subject: [PATCH] AMDGPU: Handle gfx950 96/128-bit buffer_load_lds Enforcing this limit in the clang builtin will come later. --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 8 +- .../AMDGPU/AMDGPUInstructionSelector.cpp | 18 ++ llvm/lib/Target/AMDGPU/BUFInstructions.td | 24 ++- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 16 ++ .../llvm.amdgcn.global.load.lds.gfx950.ll | 8 + ...m.amdgcn.raw.ptr.buffer.load.lds.gfx950.ll | 176 ...mdgcn.struct.ptr.buffer.load.lds.gfx950.ll | 196 ++ llvm/test/MC/AMDGPU/mubuf-gfx950.s| 32 +++ llvm/test/MC/Disassembler/AMDGPU/gfx950.txt | 19 ++ 9 files changed, 485 insertions(+), 12 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.lds.gfx950.ll create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.lds.gfx950.ll create mode 100644 llvm/test/MC/AMDGPU/mubuf-gfx950.s diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index f43ab50d2ea441..360af786c5160d 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1674,7 +1674,7 @@ class AMDGPURawBufferLoadLDS : Intrinsic < [], [llvm_v4i32_ty, // rsrc(SGPR) LLVMQualPointerType<3>,// LDS base offset - llvm_i32_ty, // Data byte size: 1/2/4 + llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950) llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty, // imm offset(imm, included in bounds checking and swizzling) @@ -1693,7 +1693,7 @@ class AMDGPURawPtrBufferLoadLDS : Intrinsic < [], [AMDGPUBufferRsrcTy,// rsrc(SGPR) LLVMQualPointerType<3>,// LDS base offset - llvm_i32_ty, // Data byte size: 1/2/4 + llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950) llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty, // imm offset(imm, included in bounds checking and swizzling) @@ -1715,7 +1715,7 @@ class AMDGPUStructBufferLoadLDS : Intrinsic < [], [llvm_v4i32_ty, // rsrc(SGPR) LLVMQualPointerType<3>,// LDS base offset - llvm_i32_ty, // Data byte size: 1/2/4 + llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950) llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) @@ -1735,7 +1735,7 @@ class AMDGPUStructPtrBufferLoadLDS : Intrinsic < [], [AMDGPUBufferRsrcTy,// rsrc(SGPR) LLVMQualPointerType<3>,// LDS base offset - llvm_i32_ty, // Data byte size: 1/2/4 + llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950) llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index a6ef0069f134bd..3522ece24f1c45 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -3240,6 +3240,24 @@ bool AMDGPUInstructionSelector::selectBufferLoadLds(MachineInstr &MI) const { : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET; break; + case 12: +if (!Subtarget->hasLDSLoadB96_B128()) + return false; + +Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_BOTHEN + : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_IDXEN +: HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFEN + : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFSET; +break; + case 16: +if (!Subtarget->hasLDSLoadB96_B128()) + return false; + +Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_BOTHEN + : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_IDXEN +: HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFEN + : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFSET; +break; } Ma
[llvm-branch-commits] [flang] [llvm] [flang][OpenMP] Change clause modifier representation in parser (PR #116656)
https://github.com/kparzysz updated https://github.com/llvm/llvm-project/pull/116656 >From e8bbc26e136993758c3a3197eed6b1924c6531d0 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Mon, 18 Nov 2024 08:47:24 -0600 Subject: [PATCH] [flang][OpenMP] Change clause modifier representation in parser The main issue to solve is that OpenMP modifiers can be specified in any order, so the parser cannot expect any specific modifier at a given position. To solve that, define modifier to be a union of all allowable specific modifiers for a given clause. Additionally, implement modifier descriptors: for each modifier the corresponding descriptor contains a set of properties of the modifier that allow a common set of semantic checks. Start with the syntactic properties defined in the spec: Required, Unique, Exclusive, Ultimate, and implement common checks to verify each of them. OpenMP modifier overhaul: #2/3 --- .../flang/Semantics/openmp-modifiers.h| 391 ++ flang/lib/Semantics/CMakeLists.txt| 1 + flang/lib/Semantics/openmp-modifiers.cpp | 146 +++ llvm/include/llvm/Frontend/OpenMP/OMP.h | 2 + llvm/lib/Frontend/OpenMP/OMP.cpp | 5 + 5 files changed, 545 insertions(+) create mode 100644 flang/include/flang/Semantics/openmp-modifiers.h create mode 100644 flang/lib/Semantics/openmp-modifiers.cpp diff --git a/flang/include/flang/Semantics/openmp-modifiers.h b/flang/include/flang/Semantics/openmp-modifiers.h new file mode 100644 index 00..6be582761ed687 --- /dev/null +++ b/flang/include/flang/Semantics/openmp-modifiers.h @@ -0,0 +1,391 @@ +//===-- flang/lib/Semantics/openmp-modifiers.h --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#ifndef FORTRAN_SEMANTICS_OPENMP_MODIFIERS_H_ +#define FORTRAN_SEMANTICS_OPENMP_MODIFIERS_H_ + +#include "flang/Common/enum-set.h" +#include "flang/Parser/parse-tree.h" +#include "flang/Semantics/semantics.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Frontend/OpenMP/OMP.h" + +#include +#include +#include +#include + +namespace Fortran::semantics { + +// Ref: [5.2:58] +// +// Syntactic properties for Clauses, Arguments and Modifiers +// +// Inverse properties: +// not Required -> Optional +// not Unique-> Repeatable +// not Exclusive -> Compatible +// not Ultimate -> Free +// +// Clause defaults: Optional, Repeatable, Compatible, Free +// Argument defaults: Required, Unique, Compatible, Free +// Modifier defaults: Optional, Unique, Compatible, Free +// +// --- +// Each modifier is used as either pre-modifier (i.e. modifier: item), +// or post-modifier (i.e. item: modifier). The default is pre-. +// Add an additional property that reflects the type of modifier. + +ENUM_CLASS(OmpProperty, Required, Unique, Exclusive, Ultimate, Post); +using OmpProperties = common::EnumSet; +using OmpClauses = +common::EnumSet; + +struct OmpModifierDescriptor { + // Modifier name for use in diagnostic messages. + const OmpProperties &props(unsigned version) const; + const OmpClauses &clauses(unsigned version) const; + + const llvm::StringRef name; + // Version-dependent properties of the modifier. + const std::map props_; + // Version-dependent set of clauses to which the modifier can apply. + const std::map clauses_; +}; + +template const OmpModifierDescriptor &OmpGetDescriptor(); + +template <> +const OmpModifierDescriptor &OmpGetDescriptor(); +template <> +const OmpModifierDescriptor &OmpGetDescriptor(); +template <> +const OmpModifierDescriptor &OmpGetDescriptor(); +template <> +const OmpModifierDescriptor &OmpGetDescriptor(); +template <> +const OmpModifierDescriptor &OmpGetDescriptor(); + +// Explanation of terminology: +// +// A typical clause with modifier[s] looks like this (with parts that are +// not relevant here removed): +// struct OmpSomeClause { +// struct Modifier { +// using Variant = std::variant; +// Variant u; +// }; +// std::tuple>, ...> t; +// }; +// +// The Speficic1, etc. refer to parser classes that represent modifiers, +// e.g. OmpIterator or OmpTaskDependenceType. The Variant type contains +// all modifiers that are allowed for a given clause. The Modifier class +// is there to wrap the variant into the form that the parse tree visitor +// expects, i.e. with traits, member "u", etc. +// +// To avoid ambiguities with the word "modifier" (e.g. is it "any modifier", +// or "this specific modifier"?), the following code uses different terms: +// +// - UnionTy:refers to the nested "Modifier" class, i.e. +// "OmpSomeClause::Modifier" in the example above. +// - SpecificTy: refers to any
[llvm-branch-commits] [llvm] AMDGPU: Increase the LDS size to support to 160 KB for gfx950 (PR #116309)
arsenm wrote: ### Merge activity * **Nov 18, 1:34 PM EST**: A user started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/116309). https://github.com/llvm/llvm-project/pull/116309 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Handle gfx950 global_load_lds_* instructions (PR #116680)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/116680 >From 0443398b73f18791598db1bf6ab2274a46ac649f Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 18 Jan 2024 14:44:03 +0700 Subject: [PATCH] AMDGPU: Handle gfx950 global_load_lds_* instructions Define global_load_lds_dwordx3 and global_load_dwordx4. Oddly it seems dwordx2 was skipped. --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 2 +- .../AMDGPU/AMDGPUInstructionSelector.cpp | 10 ++ llvm/lib/Target/AMDGPU/FLATInstructions.td| 9 ++ llvm/lib/Target/AMDGPU/GCNSubtarget.h | 7 + llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 10 ++ .../llvm.amdgcn.global.load.lds.gfx950.ll | 137 ++ llvm/test/MC/AMDGPU/gfx950_asm_features.s | 37 + llvm/test/MC/Disassembler/AMDGPU/gfx950.txt | 25 8 files changed, 236 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.lds.gfx950.ll create mode 100644 llvm/test/MC/AMDGPU/gfx950_asm_features.s create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx950.txt diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 15f33cdbf92e6e..f43ab50d2ea441 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2452,7 +2452,7 @@ class AMDGPUGlobalLoadLDS : [], [LLVMQualPointerType<1>,// Base global pointer to load from LLVMQualPointerType<3>,// LDS base pointer to store to - llvm_i32_ty, // Data byte size: 1/2/4 + llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950) llvm_i32_ty, // imm offset (applied to both global and LDS address) llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = sc0, // bit 1 = sc1, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 13de93e829fab2..a6ef0069f134bd 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -3329,6 +3329,16 @@ bool AMDGPUInstructionSelector::selectGlobalLoadLds(MachineInstr &MI) const{ case 4: Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD; break; + case 12: +if (!Subtarget->hasLDSLoadB96_B128()) + return false; +Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX3; +break; + case 16: +if (!Subtarget->hasLDSLoadB96_B128()) + return false; +Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX4; +break; } MachineBasicBlock *MBB = MI.getParent(); diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index db74372e9db452..861fcf017d9e4d 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -934,6 +934,11 @@ defm GLOBAL_LOAD_LDS_USHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_usho defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sshort">; defm GLOBAL_LOAD_LDS_DWORD : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dword">; +let SubtargetPredicate = HasGFX950Insts in { +defm GLOBAL_LOAD_LDS_DWORDX3 : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dwordx3">; +defm GLOBAL_LOAD_LDS_DWORDX4 : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dwordx4">; +} + let SubtargetPredicate = isGFX12Plus in { defm GLOBAL_ATOMIC_COND_SUB_U32: FLAT_Global_Atomic_Pseudo <"global_atomic_cond_sub_u32", VGPR_32, i32>; defm GLOBAL_ATOMIC_ORDERED_ADD_B64 : FLAT_Global_Atomic_Pseudo <"global_atomic_ordered_add_b64", VReg_64, i64>; @@ -1980,6 +1985,10 @@ defm GLOBAL_LOAD_LDS_USHORT : FLAT_Real_AllAddr_LDS <0x028, 0x12>; defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_LDS <0x029, 0x13>; defm GLOBAL_LOAD_LDS_DWORD : FLAT_Real_AllAddr_LDS <0x02a, 0x14>; +defm GLOBAL_LOAD_LDS_DWORDX3 : FLAT_Real_AllAddr_LDS <0x07e, 0x07e>; +defm GLOBAL_LOAD_LDS_DWORDX4 : FLAT_Real_AllAddr_LDS <0x07d, 0x07d>; + + defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Real_Atomics_vi <0x40>; defm GLOBAL_ATOMIC_CMPSWAP: FLAT_Global_Real_Atomics_vi <0x41>; defm GLOBAL_ATOMIC_ADD: FLAT_Global_Real_Atomics_vi <0x42>; diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 4a6efe533230b1..f3f96940c1f44b 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -1289,6 +1289,13 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, // hasGFX940Insts and hasGFX90AInsts are also true. bool hasGFX950Insts() const { return GFX950Insts; } + /// Returns true if the target supports + /// global_load_lds_dwordx3/global_load_lds_dwordx4 or + /// buffer_load_dwordx3/buffer_load_dwordx4 with the lds bit. + bool hasLDSLoadB96_B128() const { +return h