[llvm-branch-commits] [llvm] AMDGPU: Mark grid size loads with range metadata (PR #113019)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/113019 >From cc4a77290bc498c22cf5b848c39e4effc8103ba5 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 19 Oct 2024 02:18:45 +0400 Subject: [PATCH] AMDGPU: Mark grid size loads with range metadata Only handles the v5 case. --- .../AMDGPU/AMDGPULowerKernelAttributes.cpp| 33 - llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp| 1 + ...amdgpu-max-num-workgroups-load-annotate.ll | 124 ++ 3 files changed, 154 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-max-num-workgroups-load-annotate.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp index 1bb5e794da7dd6..5fc0c36359b6f5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp @@ -23,6 +23,7 @@ #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicsAMDGPU.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Pass.h" @@ -83,6 +84,20 @@ Function *getBasePtrIntrinsic(Module &M, bool IsV5OrAbove) { } // end anonymous namespace +static void annotateGridSizeLoadWithRangeMD(LoadInst *Load, +uint32_t MaxNumGroups) { + if (MaxNumGroups == 0 || MaxNumGroups == std::numeric_limits::max()) +return; + + if (!Load->getType()->isIntegerTy(32)) +return; + + // TODO: If there is existing range metadata, preserve it if it is stricter. + MDBuilder MDB(Load->getContext()); + MDNode *Range = MDB.createRange(APInt(32, 1), APInt(32, MaxNumGroups + 1)); + Load->setMetadata(LLVMContext::MD_range, Range); +} + static bool processUse(CallInst *CI, bool IsV5OrAbove) { Function *F = CI->getParent()->getParent(); @@ -92,7 +107,11 @@ static bool processUse(CallInst *CI, bool IsV5OrAbove) { const bool HasUniformWorkGroupSize = F->getFnAttribute("uniform-work-group-size").getValueAsBool(); - if (!HasReqdWorkGroupSize && !HasUniformWorkGroupSize) + SmallVector MaxNumWorkgroups = + AMDGPU::getIntegerVecAttribute(*F, "amdgpu-max-num-workgroups", 3); + + if (!HasReqdWorkGroupSize && !HasUniformWorkGroupSize && + none_of(MaxNumWorkgroups, [](unsigned X) { return X != 0; })) return false; Value *BlockCounts[3] = {nullptr, nullptr, nullptr}; @@ -133,16 +152,22 @@ static bool processUse(CallInst *CI, bool IsV5OrAbove) { if (IsV5OrAbove) { // Base is ImplicitArgPtr. switch (Offset) { case HIDDEN_BLOCK_COUNT_X: -if (LoadSize == 4) +if (LoadSize == 4) { BlockCounts[0] = Load; + annotateGridSizeLoadWithRangeMD(Load, MaxNumWorkgroups[0]); +} break; case HIDDEN_BLOCK_COUNT_Y: -if (LoadSize == 4) +if (LoadSize == 4) { BlockCounts[1] = Load; + annotateGridSizeLoadWithRangeMD(Load, MaxNumWorkgroups[1]); +} break; case HIDDEN_BLOCK_COUNT_Z: -if (LoadSize == 4) +if (LoadSize == 4) { BlockCounts[2] = Load; + annotateGridSizeLoadWithRangeMD(Load, MaxNumWorkgroups[2]); +} break; case HIDDEN_GROUP_SIZE_X: if (LoadSize == 2) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 54b17ca2cffb15..b18ce90cf45dba 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -369,6 +369,7 @@ const AMDGPUSubtarget &AMDGPUSubtarget::get(const TargetMachine &TM, const Funct TM.getSubtarget(F)); } +// FIXME: This has no reason to be in subtarget SmallVector AMDGPUSubtarget::getMaxNumWorkGroups(const Function &F) const { return AMDGPU::getIntegerVecAttribute(F, "amdgpu-max-num-workgroups", 3, diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-max-num-workgroups-load-annotate.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-max-num-workgroups-load-annotate.ll new file mode 100644 index 00..9064292129928f --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-max-num-workgroups-load-annotate.ll @@ -0,0 +1,124 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-lower-kernel-attributes %s | FileCheck %s + +define i32 @use_grid_size_x_max_num_workgroups() #0 { +; CHECK-LABEL: define i32 @use_grid_size_x_max_num_workgroups( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT:[[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() +; CHECK-NEXT:[[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[IMPLICITARG_PTR]], align 4, !range [[RNG0:![0-9]+]] +; CHECK-NEXT:ret i32 [[GRID_SIZE_X]] +; + %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() + %grid.size.x = load i32,
[llvm-branch-commits] [clang] [AMDGPU] Simplify dpp builtin handling (PR #115090)
https://github.com/rampitec updated https://github.com/llvm/llvm-project/pull/115090 >From f3d99e4ae92e407ebc2ef3f6b8e4017b397d34eb Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Mon, 4 Nov 2024 12:28:07 -0800 Subject: [PATCH] [AMDGPU] Simplify dpp builtin handling DPP intrinsics can handle any type now, so no need to cast to integer. The caveat is that intrinsics only handle backend legal types, but it does not work with i8 for example. --- clang/lib/CodeGen/CGBuiltin.cpp | 23 ++- .../CodeGenOpenCL/builtins-amdgcn-gfx10.cl| 30 -- .../test/CodeGenOpenCL/builtins-amdgcn-vi.cl | 60 +++ 3 files changed, 38 insertions(+), 75 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 5c3df5124517d6..8c0e76c9e8c3d7 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -19211,37 +19211,24 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); assert(Error == ASTContext::GE_None && "Should not codegen an error"); llvm::Type *DataTy = ConvertType(E->getArg(0)->getType()); -unsigned Size = DataTy->getPrimitiveSizeInBits(); -llvm::Type *IntTy = -llvm::IntegerType::get(Builder.getContext(), std::max(Size, 32u)); Function *F = CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp8 ? Intrinsic::amdgcn_mov_dpp8 : Intrinsic::amdgcn_update_dpp, - IntTy); + DataTy); assert(E->getNumArgs() == 5 || E->getNumArgs() == 6 || E->getNumArgs() == 2); bool InsertOld = BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp; if (InsertOld) - Args.push_back(llvm::PoisonValue::get(IntTy)); -for (unsigned I = 0; I != E->getNumArgs(); ++I) { + Args.push_back(llvm::PoisonValue::get(DataTy)); +Args.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, 0, E)); +for (unsigned I = 1; I != E->getNumArgs(); ++I) { llvm::Value *V = EmitScalarOrConstFoldImmArg(ICEArguments, I, E); - if (I < (BuiltinID == AMDGPU::BI__builtin_amdgcn_update_dpp ? 2u : 1u) && - Size < 32) { -if (!DataTy->isIntegerTy()) - V = Builder.CreateBitCast( - V, llvm::IntegerType::get(Builder.getContext(), Size)); -V = Builder.CreateZExtOrBitCast(V, IntTy); - } llvm::Type *ExpTy = F->getFunctionType()->getFunctionParamType(I + InsertOld); Args.push_back(Builder.CreateTruncOrBitCast(V, ExpTy)); } -Value *V = Builder.CreateCall(F, Args); -if (Size < 32 && !DataTy->isIntegerTy()) - V = Builder.CreateTrunc( - V, llvm::IntegerType::get(Builder.getContext(), Size)); -return Builder.CreateTruncOrBitCast(V, DataTy); +return Builder.CreateCall(F, Args); } case AMDGPU::BI__builtin_amdgcn_permlane16: case AMDGPU::BI__builtin_amdgcn_permlanex16: diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl index a4054cba236dd2..7e4ee6f4a942db 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl @@ -36,45 +36,37 @@ void test_mov_dpp8_long(global long* out, long a) { } // CHECK-LABEL: @test_mov_dpp8_float( -// CHECK: %0 = bitcast float %a to i32 -// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.mov.dpp8.i32(i32 %0, i32 1) -// CHECK-NEXT: store i32 %1, +// CHECK: %0 = tail call{{.*}} float @llvm.amdgcn.mov.dpp8.f32(float %a, i32 1) +// CHECK-NEXT: store float %0, void test_mov_dpp8_float(global float* out, float a) { *out = __builtin_amdgcn_mov_dpp8(a, 1); } // CHECK-LABEL: @test_mov_dpp8_double -// CHECK: %0 = bitcast double %x to i64 -// CHECK-NEXT: %1 = tail call{{.*}} i64 @llvm.amdgcn.mov.dpp8.i64(i64 %0, i32 1) -// CHECK-NEXT: store i64 %1, +// CHECK: %0 = tail call{{.*}} double @llvm.amdgcn.mov.dpp8.f64(double %x, i32 1) +// CHECK-NEXT: store double %0, void test_mov_dpp8_double(double x, global double *p) { *p = __builtin_amdgcn_mov_dpp8(x, 1); } // CHECK-LABEL: @test_mov_dpp8_short -// CHECK: %0 = zext i16 %x to i32 -// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.mov.dpp8.i32(i32 %0, i32 1) -// CHECK-NEXT: %2 = trunc i32 %1 to i16 -// CHECK-NEXT: store i16 %2, +// CHECK: %0 = tail call{{.*}} i16 @llvm.amdgcn.mov.dpp8.i16(i16 %x, i32 1) +// CHECK-NEXT: store i16 %0, void test_mov_dpp8_short(short x, global short *p) { *p = __builtin_amdgcn_mov_dpp8(x, 1); } // CHECK-LABEL: @test_mov_dpp8_char -// CHECK: %0 = zext i8 %x to i32 -// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.mov.dpp8.i32(i32 %0, i32 1) -// CHECK-NEXT: %2 = trunc i32 %1 to i8 -// CHECK-NEXT: store i8 %2, +// CHECK: %0 = tail call{{.*}} i8 @llvm.amdgcn.mov.dpp8.i8(i8 %x,
[llvm-branch-commits] [llvm] [AArch64][PAC] Eliminate excessive MOVs when computing blend (PR #115185)
https://github.com/atrosinenko created https://github.com/llvm/llvm-project/pull/115185 As function calls do not generally preserve X16 and X17, it is beneficial to allow AddrDisc operand of B(L)RA instruction to reside in these registers and make use of this condition when computing the discriminator. This can save up to two MOVs in cases such as loading a (signed) virtual function pointer via a (signed) pointer to vtable, for example ldr x9, [x16] mov x8, x16 mov x17, x8 movk x17, #34646, lsl #48 blraa x9, x17 can be simplified to ldr x8, [x16] movk x16, #34646, lsl #48 blraa x8, x16 >From 54cb6c877d079c968084e2d723666c174fcb873d Mon Sep 17 00:00:00 2001 From: Anatoly Trosinenko Date: Mon, 21 Oct 2024 17:56:40 +0300 Subject: [PATCH] [AArch64][PAC] Eliminate excessive MOVs when computing blend As function calls do not generally preserve X16 and X17, it is beneficial to allow AddrDisc operand of B(L)RA instruction to reside in these registers and make use of this condition when computing the discriminator. This can save up to two MOVs in cases such as loading a (signed) virtual function pointer via a (signed) pointer to vtable, for example ldr x9, [x16] mov x8, x16 mov x17, x8 movk x17, #34646, lsl #48 blraa x9, x17 can be simplified to ldr x8, [x16] movk x16, #34646, lsl #48 blraa x8, x16 --- llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 94 +++ llvm/lib/Target/AArch64/AArch64InstrInfo.td | 18 ++-- llvm/test/CodeGen/AArch64/ptrauth-call.ll | 27 ++ 3 files changed, 89 insertions(+), 50 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index 436bb332053f75..3263bb38ef1fcc 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -163,8 +163,15 @@ class AArch64AsmPrinter : public AsmPrinter { // Emit the sequence for AUT or AUTPAC. void emitPtrauthAuthResign(const MachineInstr *MI); - // Emit the sequence to compute a discriminator into x17, or reuse AddrDisc. - unsigned emitPtrauthDiscriminator(uint16_t Disc, unsigned AddrDisc); + // Emit the sequence to compute the discriminator. + // ScratchReg should be x16/x17. + // The returned register is either unmodified AddrDisc or x16/x17. + // If the expanded pseudo is allowed to clobber AddrDisc register, setting + // MayUseAddrAsScratch may save one MOV instruction, provided the address + // is already in x16/x17. + Register emitPtrauthDiscriminator(uint16_t Disc, Register AddrDisc, +Register ScratchReg, +bool MayUseAddrAsScratch = false); // Emit the sequence for LOADauthptrstatic void LowerLOADauthptrstatic(const MachineInstr &MI); @@ -1727,8 +1734,10 @@ void AArch64AsmPrinter::emitFMov0(const MachineInstr &MI) { } } -unsigned AArch64AsmPrinter::emitPtrauthDiscriminator(uint16_t Disc, - unsigned AddrDisc) { +Register AArch64AsmPrinter::emitPtrauthDiscriminator(uint16_t Disc, + Register AddrDisc, + Register ScratchReg, + bool MayUseAddrAsScratch) { // So far we've used NoRegister in pseudos. Now we need real encodings. if (AddrDisc == AArch64::NoRegister) AddrDisc = AArch64::XZR; @@ -1738,16 +1747,24 @@ unsigned AArch64AsmPrinter::emitPtrauthDiscriminator(uint16_t Disc, if (!Disc) return AddrDisc; - // If there's only a constant discriminator, MOV it into x17. + // If there's only a constant discriminator, MOV it into the scratch register. if (AddrDisc == AArch64::XZR) { -emitMOVZ(AArch64::X17, Disc, 0); -return AArch64::X17; +emitMOVZ(ScratchReg, Disc, 0); +return ScratchReg; } - // If there are both, emit a blend into x17. - emitMovXReg(AArch64::X17, AddrDisc); - emitMOVK(AArch64::X17, Disc, 48); - return AArch64::X17; + // If there are both, emit a blend into the scratch register. + + // Check if we can save one MOV instruction. + assert(MayUseAddrAsScratch || ScratchReg != AddrDisc); + bool AddrDiscIsSafe = AddrDisc == AArch64::X16 || AddrDisc == AArch64::X17; + if (MayUseAddrAsScratch && AddrDiscIsSafe) +ScratchReg = AddrDisc; + else +emitMovXReg(ScratchReg, AddrDisc); + + emitMOVK(ScratchReg, Disc, 48); + return ScratchReg; } /// Emits a code sequence to check an authenticated pointer value. @@ -1964,7 +1981,8 @@ void AArch64AsmPrinter::emitPtrauthAuthResign(const MachineInstr *MI) { // Compute aut discriminator into x17 assert(isUInt<16>(AUTDisc)); - unsigned AUTDiscReg = emitPtrauthDiscriminator(AUTDisc, AUTAddrDisc); + Register AUTDiscReg = + emitPtrauthDiscriminator(AUTDisc, AUTAddrDisc, AArch64::X17);
[llvm-branch-commits] [llvm] [AArch64][PAC] Eliminate excessive MOVs when computing blend (PR #115185)
atrosinenko wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/115185?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#115185** https://app.graphite.dev/github/pr/llvm/llvm-project/115185?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 * **#110705** https://app.graphite.dev/github/pr/llvm/llvm-project/110705?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#110702** https://app.graphite.dev/github/pr/llvm/llvm-project/110702?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about stacking. Join @atrosinenko and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/115185 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AArch64][PAC] Eliminate excessive MOVs when computing blend (PR #115185)
https://github.com/atrosinenko ready_for_review https://github.com/llvm/llvm-project/pull/115185 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AArch64][PAC] Eliminate excessive MOVs when computing blend (PR #115185)
llvmbot wrote: @llvm/pr-subscribers-backend-aarch64 Author: Anatoly Trosinenko (atrosinenko) Changes As function calls do not generally preserve X16 and X17, it is beneficial to allow AddrDisc operand of B(L)RA instruction to reside in these registers and make use of this condition when computing the discriminator. This can save up to two MOVs in cases such as loading a (signed) virtual function pointer via a (signed) pointer to vtable, for example ldr x9, [x16] mov x8, x16 mov x17, x8 movk x17, #34646, lsl #48 blraa x9, x17 can be simplified to ldr x8, [x16] movk x16, #34646, lsl #48 blraa x8, x16 --- Full diff: https://github.com/llvm/llvm-project/pull/115185.diff 3 Files Affected: - (modified) llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp (+53-41) - (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+9-9) - (modified) llvm/test/CodeGen/AArch64/ptrauth-call.ll (+27) ``diff diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index 436bb332053f75..3263bb38ef1fcc 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -163,8 +163,15 @@ class AArch64AsmPrinter : public AsmPrinter { // Emit the sequence for AUT or AUTPAC. void emitPtrauthAuthResign(const MachineInstr *MI); - // Emit the sequence to compute a discriminator into x17, or reuse AddrDisc. - unsigned emitPtrauthDiscriminator(uint16_t Disc, unsigned AddrDisc); + // Emit the sequence to compute the discriminator. + // ScratchReg should be x16/x17. + // The returned register is either unmodified AddrDisc or x16/x17. + // If the expanded pseudo is allowed to clobber AddrDisc register, setting + // MayUseAddrAsScratch may save one MOV instruction, provided the address + // is already in x16/x17. + Register emitPtrauthDiscriminator(uint16_t Disc, Register AddrDisc, +Register ScratchReg, +bool MayUseAddrAsScratch = false); // Emit the sequence for LOADauthptrstatic void LowerLOADauthptrstatic(const MachineInstr &MI); @@ -1727,8 +1734,10 @@ void AArch64AsmPrinter::emitFMov0(const MachineInstr &MI) { } } -unsigned AArch64AsmPrinter::emitPtrauthDiscriminator(uint16_t Disc, - unsigned AddrDisc) { +Register AArch64AsmPrinter::emitPtrauthDiscriminator(uint16_t Disc, + Register AddrDisc, + Register ScratchReg, + bool MayUseAddrAsScratch) { // So far we've used NoRegister in pseudos. Now we need real encodings. if (AddrDisc == AArch64::NoRegister) AddrDisc = AArch64::XZR; @@ -1738,16 +1747,24 @@ unsigned AArch64AsmPrinter::emitPtrauthDiscriminator(uint16_t Disc, if (!Disc) return AddrDisc; - // If there's only a constant discriminator, MOV it into x17. + // If there's only a constant discriminator, MOV it into the scratch register. if (AddrDisc == AArch64::XZR) { -emitMOVZ(AArch64::X17, Disc, 0); -return AArch64::X17; +emitMOVZ(ScratchReg, Disc, 0); +return ScratchReg; } - // If there are both, emit a blend into x17. - emitMovXReg(AArch64::X17, AddrDisc); - emitMOVK(AArch64::X17, Disc, 48); - return AArch64::X17; + // If there are both, emit a blend into the scratch register. + + // Check if we can save one MOV instruction. + assert(MayUseAddrAsScratch || ScratchReg != AddrDisc); + bool AddrDiscIsSafe = AddrDisc == AArch64::X16 || AddrDisc == AArch64::X17; + if (MayUseAddrAsScratch && AddrDiscIsSafe) +ScratchReg = AddrDisc; + else +emitMovXReg(ScratchReg, AddrDisc); + + emitMOVK(ScratchReg, Disc, 48); + return ScratchReg; } /// Emits a code sequence to check an authenticated pointer value. @@ -1964,7 +1981,8 @@ void AArch64AsmPrinter::emitPtrauthAuthResign(const MachineInstr *MI) { // Compute aut discriminator into x17 assert(isUInt<16>(AUTDisc)); - unsigned AUTDiscReg = emitPtrauthDiscriminator(AUTDisc, AUTAddrDisc); + Register AUTDiscReg = + emitPtrauthDiscriminator(AUTDisc, AUTAddrDisc, AArch64::X17); bool AUTZero = AUTDiscReg == AArch64::XZR; unsigned AUTOpc = getAUTOpcodeForKey(AUTKey, AUTZero); @@ -2005,7 +2023,8 @@ void AArch64AsmPrinter::emitPtrauthAuthResign(const MachineInstr *MI) { // Compute pac discriminator into x17 assert(isUInt<16>(PACDisc)); - unsigned PACDiscReg = emitPtrauthDiscriminator(PACDisc, PACAddrDisc); + Register PACDiscReg = + emitPtrauthDiscriminator(PACDisc, PACAddrDisc, AArch64::X17); bool PACZero = PACDiscReg == AArch64::XZR; unsigned PACOpc = getPACOpcodeForKey(PACKey, PACZero); @@ -2037,8 +2056,17 @@ void AArch64AsmPrinter::emitPtrauthBranch(const MachineInstr *MI) { unsigned AddrDisc = MI->getOperand(3).getRe
[llvm-branch-commits] [clang] [flang] [lld] [llvm] [Flang] LLVM_ENABLE_RUNTIMES=flang-rt (PR #110217)
@@ -171,145 +76,88 @@ set(sources unit-map.cpp unit.cpp utf.cpp - ${FORTRAN_MODULE_OBJECTS} ) -include(AddFlangOffloadRuntime) - -# List of files that are buildable for all devices. -set(supported_files - ISO_Fortran_binding.cpp - allocatable.cpp - allocator-registry.cpp - array-constructor.cpp - assign.cpp - buffer.cpp - character.cpp - connection.cpp - copy.cpp - derived-api.cpp - derived.cpp - descriptor.cpp - descriptor-io.cpp - dot-product.cpp - edit-input.cpp - edit-output.cpp - environment.cpp - extrema.cpp - external-unit.cpp - file.cpp - findloc.cpp - format.cpp - inquiry.cpp - internal-unit.cpp - io-api.cpp - io-api-minimal.cpp - io-error.cpp - io-stmt.cpp - iostat.cpp - matmul-transpose.cpp - matmul.cpp - memory.cpp - misc-intrinsic.cpp - namelist.cpp - non-tbp-dio.cpp - numeric.cpp - pointer.cpp - product.cpp - pseudo-unit.cpp - ragged.cpp - stat.cpp - sum.cpp - support.cpp - terminator.cpp - tools.cpp - transformational.cpp - type-code.cpp - type-info.cpp - unit.cpp - utf.cpp +set(public_headers "") +file(GLOB_RECURSE public_headers + "${FLANGRUNTIME_SOURCE_DIR}/include/flang/Runtime/*.h" + "${FLANGRUNTIME_SOURCE_DIR}/include/flang/Common/*.h" ) -enable_cuda_compilation(FortranRuntime "${supported_files}") -enable_omp_offload_compilation("${supported_files}") jhuber6 wrote: Is that going to be supported upstream? `clang` is a CUDA compiler so `flang` can be as well, but since offloading in `flang` is already using my driver code (AFAIK) it might be easier to just make it common. https://github.com/llvm/llvm-project/pull/110217 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] Reduce build output for tests (PR #115085)
https://github.com/arichardson updated https://github.com/llvm/llvm-project/pull/115085 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] Reduce build output for tests (PR #115085)
https://github.com/arichardson updated https://github.com/llvm/llvm-project/pull/115085 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 71a162c - Revert "[NVPTX] Emit prmt selection value in hex (#115049)"
Author: Justin Fargnoli Date: 2024-11-06T12:10:29-08:00 New Revision: 71a162cbf76febd1bc87a0ab6292ed3c89f947bc URL: https://github.com/llvm/llvm-project/commit/71a162cbf76febd1bc87a0ab6292ed3c89f947bc DIFF: https://github.com/llvm/llvm-project/commit/71a162cbf76febd1bc87a0ab6292ed3c89f947bc.diff LOG: Revert "[NVPTX] Emit prmt selection value in hex (#115049)" This reverts commit 3ed4b0b0efca7a9467ce83fc62de9413da38006d. Added: Modified: llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h llvm/lib/Target/NVPTX/NVPTXInstrInfo.td llvm/test/CodeGen/NVPTX/i8x4-instructions.ll llvm/test/CodeGen/NVPTX/sext-setcc.ll llvm/test/CodeGen/NVPTX/shuffle-vec-undef-init.ll Removed: diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp index efb2adca3a565f..4211ae5a2eebcd 100644 --- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp +++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp @@ -373,12 +373,6 @@ void NVPTXInstPrinter::printOffseti32imm(const MCInst *MI, int OpNum, } } -void NVPTXInstPrinter::printHexu32imm(const MCInst *MI, int OpNum, - raw_ostream &O, const char *Modifier) { - int64_t Imm = MI->getOperand(OpNum).getImm(); - O << formatHex(Imm) << "U"; -} - void NVPTXInstPrinter::printProtoIdent(const MCInst *MI, int OpNum, raw_ostream &O, const char *Modifier) { const MCOperand &Op = MI->getOperand(OpNum); diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h index 2ce40bd6e8b973..a17c472d3f0d90 100644 --- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h +++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h @@ -48,8 +48,6 @@ class NVPTXInstPrinter : public MCInstPrinter { raw_ostream &O, const char *Modifier = nullptr); void printOffseti32imm(const MCInst *MI, int OpNum, raw_ostream &O, const char *Modifier = nullptr); - void printHexu32imm(const MCInst *MI, int OpNum, raw_ostream &O, - const char *Modifier = nullptr); void printProtoIdent(const MCInst *MI, int OpNum, raw_ostream &O, const char *Modifier = nullptr); void printPrmtMode(const MCInst *MI, int OpNum, raw_ostream &O, diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index a16935dcbb93be..2658ca32716378 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -1740,10 +1740,6 @@ multiclass BFI { [(set (T RC:$f), (bfi (T imm:$a), (T RC:$b), (i32 imm:$c), (i32 imm:$d)))]>; } -def Hexu32imm : Operand { - let PrintMethod = "printHexu32imm"; -} - multiclass PRMT { def rrr : NVPTXInst<(outs RC:$d), @@ -1752,12 +1748,12 @@ multiclass PRMT { [(set (T RC:$d), (prmt (T RC:$a), (T RC:$b), (i32 Int32Regs:$c), imm:$mode))]>; def rri : NVPTXInst<(outs RC:$d), -(ins RC:$a, Int32Regs:$b, Hexu32imm:$c, PrmtMode:$mode), +(ins RC:$a, Int32Regs:$b, i32imm:$c, PrmtMode:$mode), !strconcat("prmt.b32${mode}", " \t$d, $a, $b, $c;"), [(set (T RC:$d), (prmt (T RC:$a), (T RC:$b), (i32 imm:$c), imm:$mode))]>; def rii : NVPTXInst<(outs RC:$d), -(ins RC:$a, i32imm:$b, Hexu32imm:$c, PrmtMode:$mode), +(ins RC:$a, i32imm:$b, i32imm:$c, PrmtMode:$mode), !strconcat("prmt.b32${mode}", " \t$d, $a, $b, $c;"), [(set (T RC:$d), (prmt (T RC:$a), (T imm:$b), (i32 imm:$c), imm:$mode))]>; } diff --git a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll index c143d7674a7923..a16a5b435962df 100644 --- a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll @@ -118,7 +118,7 @@ define <4 x i8> @test_add(<4 x i8> %a, <4 x i8> %b) #0 { ; CHECK-NEXT:cvt.u16.u32 %rs5, %r7; ; CHECK-NEXT:add.s16 %rs6, %rs5, %rs4; ; CHECK-NEXT:cvt.u32.u16 %r8, %rs6; -; CHECK-NEXT:prmt.b32 %r9, %r8, %r5, 0x3340U; +; CHECK-NEXT:prmt.b32 %r9, %r8, %r5, 13120; ; CHECK-NEXT:bfe.u32 %r10, %r2, 8, 8; ; CHECK-NEXT:cvt.u16.u32 %rs7, %r10; ; CHECK-NEXT:bfe.u32 %r11, %r1, 8, 8; @@ -131,8 +131,8 @@ define <4 x i8> @test_add(<4 x i8> %a, <4 x i8> %b) #0 { ; CHECK-NEXT:cvt.u16.u32 %rs11, %r14; ; CHECK-NEXT:add.s16 %rs12, %rs11, %rs10; ; CHECK-NEXT:cvt.u32.u16 %r15, %rs12; -; CHECK-NEXT:prmt.b32 %r16, %r15, %r12, 0x3340U; -; CHECK-NEXT:prmt.b32 %r17, %r16, %r9, 0x5410U; +; CHECK-NEXT:prmt.b32 %r16, %r15, %r12, 13120; +; CHECK-NEXT:prmt.b32 %r17, %r16
[llvm-branch-commits] [lldb] [lldb][LoongArch] Function calls support in lldb expressions (PR #114742)
https://github.com/wangleiat updated https://github.com/llvm/llvm-project/pull/114742 >From f390561ee9c49dd10f0b13b79b713624664d7da2 Mon Sep 17 00:00:00 2001 From: wanglei Date: Mon, 4 Nov 2024 17:12:03 +0800 Subject: [PATCH 1/3] comply with code style Created using spr 1.3.5-bogner --- lldb/source/Plugins/ABI/LoongArch/ABISysV_loongarch.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lldb/source/Plugins/ABI/LoongArch/ABISysV_loongarch.h b/lldb/source/Plugins/ABI/LoongArch/ABISysV_loongarch.h index 6e57b0806e54f5..5069bc48bbfba2 100644 --- a/lldb/source/Plugins/ABI/LoongArch/ABISysV_loongarch.h +++ b/lldb/source/Plugins/ABI/LoongArch/ABISysV_loongarch.h @@ -6,8 +6,8 @@ // //===--===// -#ifndef liblldb_ABISysV_loongarch_h_ -#define liblldb_ABISysV_loongarch_h_ +#ifndef LLDB_SOURCE_PLUGINS_ABI_LOONGARCH_ABISYSV_LOONGARCH_H +#define LLDB_SOURCE_PLUGINS_ABI_LOONGARCH_ABISYSV_LOONGARCH_H // Other libraries and framework includes #include "llvm/TargetParser/Triple.h" @@ -101,4 +101,4 @@ class ABISysV_loongarch : public lldb_private::RegInfoBasedABI { // loongarch32 }; -#endif // liblldb_ABISysV_loongarch_h_ +#endif // LLDB_SOURCE_PLUGINS_ABI_LOONGARCH_ABISYSV_LOONGARCH_H >From 8363707da351b6f2c10f1e945514402c5ceea65d Mon Sep 17 00:00:00 2001 From: wanglei Date: Tue, 5 Nov 2024 18:28:25 +0800 Subject: [PATCH 2/3] Address @DavidSpickett's comments Created using spr 1.3.5-bogner --- .../ABI/LoongArch/ABISysV_loongarch.cpp | 200 +- .../Plugins/ABI/LoongArch/ABISysV_loongarch.h | 6 +- 2 files changed, 107 insertions(+), 99 deletions(-) diff --git a/lldb/source/Plugins/ABI/LoongArch/ABISysV_loongarch.cpp b/lldb/source/Plugins/ABI/LoongArch/ABISysV_loongarch.cpp index cd8270c01113f7..1624af4fd6f6e8 100644 --- a/lldb/source/Plugins/ABI/LoongArch/ABISysV_loongarch.cpp +++ b/lldb/source/Plugins/ABI/LoongArch/ABISysV_loongarch.cpp @@ -30,10 +30,10 @@ // The ABI is not a source of such information as size, offset, encoding, etc. // of a register. Just provides correct dwarf and eh_frame numbers. -#define DEFINE_GENERIC_REGISTER_STUB(dwarf_num, str_name, generic_num) \ +#define DEFINE_GENERIC_REGISTER_STUB(dwarf_num, generic_num) \ { \ DEFINE_REG_NAME(dwarf_num), \ - DEFINE_REG_NAME_STR(str_name), \ + DEFINE_REG_NAME_STR(nullptr), \ 0, \ 0, \ eEncodingInvalid, \ @@ -44,8 +44,8 @@ nullptr, \ } -#define DEFINE_REGISTER_STUB(dwarf_num, str_name) \ - DEFINE_GENERIC_REGISTER_STUB(dwarf_num, str_name, LLDB_INVALID_REGNUM) +#define DEFINE_REGISTER_STUB(dwarf_num) \ + DEFINE_GENERIC_REGISTER_STUB(dwarf_num, LLDB_INVALID_REGNUM) using namespace lldb; using namespace lldb_private; @@ -94,39 +94,39 @@ enum regnums { }; static const std::array g_register_infos = { -{DEFINE_REGISTER_STUB(r0, nullptr), - DEFINE_GENERIC_REGISTER_STUB(r1, nullptr, LLDB_REGNUM_GENERIC_RA), - DEFINE_REGISTER_STUB(r2, nullptr), - DEFINE_GENERIC_REGISTER_STUB(r3, nullptr, LLDB_REGNUM_GENERIC_SP), - DEFINE_GENERIC_REGISTER_STUB(r4, nullptr, LLDB_REGNUM_GENERIC_ARG1), - DEFINE_GENERIC_REGISTER_STUB(r5, nullptr, LLDB_REGNUM_GENERIC_ARG2), - DEFINE_GENERIC_REGISTER_STUB(r6, nullptr, LLDB_REGNUM_GENERIC_ARG3), - DEFINE_GENERIC_REGISTER_STUB(r7, nullptr, LLDB_REGNUM_GENERIC_ARG4), - DEFINE_GENERIC_REGISTER_STUB(r8, nullptr, LLDB_REGNUM_GENERIC_ARG5), - DEFINE_GENERIC_REGISTER_STUB(r9, nullptr, LLDB_REGNUM_GENERIC_ARG6), - DEFINE_GENERIC_REGISTER_STUB(r10, nullptr, LLDB_REGNUM_GENERIC_ARG7), - DEFINE_GENERIC_REGISTER_STUB(r11, nullptr, LLDB_REGNUM_GENERIC_ARG8), - DEFINE_REGISTER_STUB(r12, nullptr), - DEFINE_REGISTER_STUB(r13, nullptr), - DEFINE_REGISTER_STUB(r14, nullptr), - DEFINE_REGISTER_STUB(r15, nullptr), - DEFINE_REGISTER_STUB(r16, nullptr), - DEFINE_REGISTER_STUB(r17, nullptr), - DEFINE_REGISTER_STUB(r18, nullptr), - DEFINE_REGISTER_STUB(r19, nullptr), - DEFINE_REGISTER_STUB(r20, nullptr), - DEFINE_REGISTER_STUB(r21, nullptr), - DEFINE_GENERIC_REGISTER_STUB(r22, nullptr, LLDB_REGNUM_GENERIC_FP), - DEFINE_REGISTER_STUB(r23, nullptr), - DEFINE_REGISTER_STUB(r24, nullptr), - DEFINE_REGISTER_STUB(r25, nullptr), - DEFINE_REGISTER_STUB(r26, nullptr), - DEFINE_REGISTER
[llvm-branch-commits] [clang] [AMDGPU] Simplify dpp builtin handling (PR #115090)
https://github.com/rampitec updated https://github.com/llvm/llvm-project/pull/115090 >From 084e347f5fb6e9068313ad4dbc53b44c2d4cee69 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Mon, 4 Nov 2024 12:28:07 -0800 Subject: [PATCH] [AMDGPU] Simplify dpp builtin handling DPP intrinsics can handle any type now, so no need to cast to integer. The caveat is that intrinsics only handle backend legal types, but it does not work with i8 for example. --- clang/lib/CodeGen/CGBuiltin.cpp | 23 ++- .../CodeGenOpenCL/builtins-amdgcn-gfx10.cl| 30 -- .../test/CodeGenOpenCL/builtins-amdgcn-vi.cl | 60 +++ 3 files changed, 38 insertions(+), 75 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 82770a75af23e4..7e3e6463799fb6 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -19193,37 +19193,24 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); assert(Error == ASTContext::GE_None && "Should not codegen an error"); llvm::Type *DataTy = ConvertType(E->getArg(0)->getType()); -unsigned Size = DataTy->getPrimitiveSizeInBits(); -llvm::Type *IntTy = -llvm::IntegerType::get(Builder.getContext(), std::max(Size, 32u)); Function *F = CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp8 ? Intrinsic::amdgcn_mov_dpp8 : Intrinsic::amdgcn_update_dpp, - IntTy); + DataTy); assert(E->getNumArgs() == 5 || E->getNumArgs() == 6 || E->getNumArgs() == 2); bool InsertOld = BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp; if (InsertOld) - Args.push_back(llvm::PoisonValue::get(IntTy)); -for (unsigned I = 0; I != E->getNumArgs(); ++I) { + Args.push_back(llvm::PoisonValue::get(DataTy)); +Args.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, 0, E)); +for (unsigned I = 1; I != E->getNumArgs(); ++I) { llvm::Value *V = EmitScalarOrConstFoldImmArg(ICEArguments, I, E); - if (I < (BuiltinID == AMDGPU::BI__builtin_amdgcn_update_dpp ? 2u : 1u) && - Size < 32) { -if (!DataTy->isIntegerTy()) - V = Builder.CreateBitCast( - V, llvm::IntegerType::get(Builder.getContext(), Size)); -V = Builder.CreateZExtOrBitCast(V, IntTy); - } llvm::Type *ExpTy = F->getFunctionType()->getFunctionParamType(I + InsertOld); Args.push_back(Builder.CreateTruncOrBitCast(V, ExpTy)); } -Value *V = Builder.CreateCall(F, Args); -if (Size < 32 && !DataTy->isIntegerTy()) - V = Builder.CreateTrunc( - V, llvm::IntegerType::get(Builder.getContext(), Size)); -return Builder.CreateTruncOrBitCast(V, DataTy); +return Builder.CreateCall(F, Args); } case AMDGPU::BI__builtin_amdgcn_permlane16: case AMDGPU::BI__builtin_amdgcn_permlanex16: diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl index a4054cba236dd2..7e4ee6f4a942db 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl @@ -36,45 +36,37 @@ void test_mov_dpp8_long(global long* out, long a) { } // CHECK-LABEL: @test_mov_dpp8_float( -// CHECK: %0 = bitcast float %a to i32 -// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.mov.dpp8.i32(i32 %0, i32 1) -// CHECK-NEXT: store i32 %1, +// CHECK: %0 = tail call{{.*}} float @llvm.amdgcn.mov.dpp8.f32(float %a, i32 1) +// CHECK-NEXT: store float %0, void test_mov_dpp8_float(global float* out, float a) { *out = __builtin_amdgcn_mov_dpp8(a, 1); } // CHECK-LABEL: @test_mov_dpp8_double -// CHECK: %0 = bitcast double %x to i64 -// CHECK-NEXT: %1 = tail call{{.*}} i64 @llvm.amdgcn.mov.dpp8.i64(i64 %0, i32 1) -// CHECK-NEXT: store i64 %1, +// CHECK: %0 = tail call{{.*}} double @llvm.amdgcn.mov.dpp8.f64(double %x, i32 1) +// CHECK-NEXT: store double %0, void test_mov_dpp8_double(double x, global double *p) { *p = __builtin_amdgcn_mov_dpp8(x, 1); } // CHECK-LABEL: @test_mov_dpp8_short -// CHECK: %0 = zext i16 %x to i32 -// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.mov.dpp8.i32(i32 %0, i32 1) -// CHECK-NEXT: %2 = trunc i32 %1 to i16 -// CHECK-NEXT: store i16 %2, +// CHECK: %0 = tail call{{.*}} i16 @llvm.amdgcn.mov.dpp8.i16(i16 %x, i32 1) +// CHECK-NEXT: store i16 %0, void test_mov_dpp8_short(short x, global short *p) { *p = __builtin_amdgcn_mov_dpp8(x, 1); } // CHECK-LABEL: @test_mov_dpp8_char -// CHECK: %0 = zext i8 %x to i32 -// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.mov.dpp8.i32(i32 %0, i32 1) -// CHECK-NEXT: %2 = trunc i32 %1 to i8 -// CHECK-NEXT: store i8 %2, +// CHECK: %0 = tail call{{.*}} i8 @llvm.amdgcn.mov.dpp8.i8(i8 %x,
[llvm-branch-commits] [clang] [Serialization] Code cleanups and polish 83233 (PR #83237)
ilya-biryukov wrote: > > > Sorry, could you provide the hash id for the commit that avoid the > > > warning? > > > > > > I tried running this on head and unfortunately it reproduces on head as > > well :( So this looks like a sleeper issue which now also gets triggered by > > this PR in a non-reduced version in our codebase. Looks like it is not rare > > with module-related issues where most issues remain silent due to lazy > > deserialization in large codebases. Would it be possible for you to take a > > look at this issue or provide pointers ? > > Got it. I am not sure if I had the time. But given this is not related to > this PR, and it sounds like a regression issue triggered by other commits. > Maybe it might be helpful to find that commit to understand the issue better. +1, I think that fixing the underlying issue to unblock this might be the easiest path. Reducing and checking at every step that compiler from HEAD succeeds and compiler with this commit fails will likely take more time than actually fixing it. @usx95 would you mind looking at the underlying issue? I'm also happy to help dig through it. https://github.com/llvm/llvm-project/pull/83237 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [RISCV] Support memcmp expansion for vectors (PR #114517)
@@ -2525,5 +2527,21 @@ RISCVTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { Options.LoadSizes = {8, 4, 2, 1}; else Options.LoadSizes = {4, 2, 1}; + if (IsZeroCmp && ST->hasVInstructions()) { wangpc-pp wrote: Good catch! I will add a guard here! https://github.com/llvm/llvm-project/pull/114517 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] 74419f8 - Revert "[flang][OpenMP] Add alias analysis for omp private (#113566)"
Author: Dominik Adamski Date: 2024-11-06T09:22:17+01:00 New Revision: 74419f801de610cf20a78af5d9562e1eb2387c23 URL: https://github.com/llvm/llvm-project/commit/74419f801de610cf20a78af5d9562e1eb2387c23 DIFF: https://github.com/llvm/llvm-project/commit/74419f801de610cf20a78af5d9562e1eb2387c23.diff LOG: Revert "[flang][OpenMP] Add alias analysis for omp private (#113566)" This reverts commit f3025c8b4fd797d99a8a8117254f93605ec46aa8. Added: Modified: flang/lib/Optimizer/Analysis/AliasAnalysis.cpp Removed: flang/test/Analysis/AliasAnalysis/alias-analysis-omp-teams-distribute-private-ptr.mlir flang/test/Analysis/AliasAnalysis/alias-analysis-omp-teams-distribute-private.mlir diff --git a/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp index 993d41633a0793..8b7918744017cc 100644 --- a/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp +++ b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp @@ -372,29 +372,6 @@ getAttrsFromVariable(fir::FortranVariableOpInterface var) { return attrs; } -template -static Value getPrivateArg(omp::BlockArgOpenMPOpInterface &argIface, - OMPTypeOp &op, DeclTypeOp &declOp) { - Value privateArg; - if (!op.getPrivateSyms().has_value()) -return privateArg; - for (auto [opSym, blockArg] : - llvm::zip_equal(*op.getPrivateSyms(), argIface.getPrivateBlockArgs())) { -if (blockArg == declOp.getMemref()) { - omp::PrivateClauseOp privateOp = - SymbolTable::lookupNearestSymbolFrom( - op, cast(opSym)); - privateOp.walk([&](omp::YieldOp yieldOp) { -llvm::TypeSwitch(yieldOp.getResults()[0].getDefiningOp()) -.template Case( -[&](auto declOp) { privateArg = declOp.getMemref(); }); - }); - return privateArg; -} - } - return privateArg; -} - AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v, bool getInstantiationPoint) { auto *defOp = v.getDefiningOp(); @@ -493,37 +470,20 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v, breakFromLoop = true; }) .Case([&](auto op) { - if (omp::BlockArgOpenMPOpInterface argIface = - dyn_cast(op->getParentOp())) { -Value ompValArg; -llvm::TypeSwitch(op->getParentOp()) -.template Case([&](auto targetOp) { - // If declare operation is inside omp target region, - // continue alias analysis outside the target region - for (auto [opArg, blockArg] : llvm::zip_equal( - targetOp.getMapVars(), argIface.getMapBlockArgs())) { -if (blockArg == op.getMemref()) { - omp::MapInfoOp mapInfo = - llvm::cast(opArg.getDefiningOp()); - ompValArg = mapInfo.getVarPtr(); - break; -} - } - // If given operation does not reflect mapping item, - // check private clause - if (!ompValArg) -ompValArg = getPrivateArg(argIface, targetOp, op); -}) -.template Case( -[&](auto privateOp) { - ompValArg = getPrivateArg(argIface, privateOp, op); -}); -if (ompValArg) { - v = ompValArg; - defOp = ompValArg.getDefiningOp(); - return; + // If declare operation is inside omp target region, + // continue alias analysis outside the target region + if (auto targetOp = + llvm::dyn_cast(op->getParentOp())) { +auto argIface = cast(*targetOp); +for (auto [opArg, blockArg] : llvm::zip_equal( + targetOp.getMapVars(), argIface.getMapBlockArgs())) { + if (blockArg == op.getMemref()) { +omp::MapInfoOp mapInfo = +llvm::cast(opArg.getDefiningOp()); +v = mapInfo.getVarPtr(); +defOp = v.getDefiningOp(); +return; + } } } auto varIf = llvm::cast(defOp); diff --git a/flang/test/Analysis/AliasAnalysis/alias-analysis-omp-teams-distribute-private-ptr.mlir b/flang/test/Analysis/AliasAnalysis/alias-analysis-omp-teams-distribute-private-ptr.mlir deleted file mode 100644 index 78207d21c45bf3..00 --- a/flang/test/Analysis/AliasAnalysis/alias-analysis-omp-teams-distribute-private-ptr.mlir +++ /dev/null @@ -1,102 +0,0 @@ -// Use --mlir-disable-threading so that the AA queries are serialized -// as well as its diagnostic output. -// RUN: fir-opt %s -pass-pipeline='builtin.module(func.func(test-fir-alias-analysis))'
[llvm-branch-commits] [llvm] [RISCV] Support memcmp expansion for vectors (PR #114517)
@@ -14520,17 +14520,78 @@ static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &D return true; } +/// Try to map an integer comparison with size > XLEN to vector instructions +/// before type legalization splits it up into chunks. +static SDValue +combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC, +const SDLoc &DL, SelectionDAG &DAG, +const RISCVSubtarget &Subtarget) { + assert(ISD::isIntEqualitySetCC(CC) && "Bad comparison predicate"); + + if (!Subtarget.hasVInstructions()) +return SDValue(); + + MVT XLenVT = Subtarget.getXLenVT(); + EVT OpVT = X.getValueType(); + // We're looking for an oversized integer equality comparison. + if (OpVT.isScalableVT() || !OpVT.isScalarInteger()) +return SDValue(); + + unsigned OpSize = OpVT.getSizeInBits(); + // TODO: Support non-power-of-2 types. + if (!isPowerOf2_32(OpSize)) +return SDValue(); + + // The size should be larger than XLen and smaller than the maximum vector + // size. + if (OpSize <= Subtarget.getXLen() || + OpSize > Subtarget.getRealMinVLen() * + Subtarget.getMaxLMULForFixedLengthVectors()) +return SDValue(); + + // Don't perform this combine if constructing the vector will be expensive. + auto IsVectorBitCastCheap = [](SDValue X) { +X = peekThroughBitcasts(X); +return isa(X) || X.getValueType().isVector() || + X.getOpcode() == ISD::LOAD; + }; + if (!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y)) +return SDValue(); + + if (DAG.getMachineFunction().getFunction().hasFnAttribute( + Attribute::NoImplicitFloat)) +return SDValue(); lukel97 wrote: Do we need to check for this on RISC-V? We're not introducing any FP code here https://github.com/llvm/llvm-project/pull/114517 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [RISCV] Support memcmp expansion for vectors (PR #114517)
@@ -2525,5 +2527,21 @@ RISCVTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { Options.LoadSizes = {8, 4, 2, 1}; else Options.LoadSizes = {4, 2, 1}; + if (IsZeroCmp && ST->hasVInstructions()) { lukel97 wrote: Doesn't this mean that processors with only +unaligned-scalar-mem will now expand vector-sized compares? https://github.com/llvm/llvm-project/pull/114517 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [RISCV] Support memcmp expansion for vectors (PR #114517)
@@ -14520,17 +14520,78 @@ static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &D return true; } +/// Try to map an integer comparison with size > XLEN to vector instructions +/// before type legalization splits it up into chunks. +static SDValue +combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC, +const SDLoc &DL, SelectionDAG &DAG, +const RISCVSubtarget &Subtarget) { + assert(ISD::isIntEqualitySetCC(CC) && "Bad comparison predicate"); + + if (!Subtarget.hasVInstructions()) +return SDValue(); + + MVT XLenVT = Subtarget.getXLenVT(); + EVT OpVT = X.getValueType(); + // We're looking for an oversized integer equality comparison. + if (OpVT.isScalableVT() || !OpVT.isScalarInteger()) +return SDValue(); + + unsigned OpSize = OpVT.getSizeInBits(); + // TODO: Support non-power-of-2 types. + if (!isPowerOf2_32(OpSize)) +return SDValue(); + + // The size should be larger than XLen and smaller than the maximum vector + // size. + if (OpSize <= Subtarget.getXLen() || + OpSize > Subtarget.getRealMinVLen() * + Subtarget.getMaxLMULForFixedLengthVectors()) +return SDValue(); + + // Don't perform this combine if constructing the vector will be expensive. + auto IsVectorBitCastCheap = [](SDValue X) { +X = peekThroughBitcasts(X); +return isa(X) || X.getValueType().isVector() || + X.getOpcode() == ISD::LOAD; + }; + if (!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y)) +return SDValue(); + + if (DAG.getMachineFunction().getFunction().hasFnAttribute( + Attribute::NoImplicitFloat)) +return SDValue(); lukel97 wrote: Oh that's right noimplicitfloat also disables SIMD, I forgot about that. https://github.com/llvm/llvm-project/pull/114517 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [RISCV] Support memcmp expansion for vectors (PR #114517)
https://github.com/lukel97 edited https://github.com/llvm/llvm-project/pull/114517 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [RISCV] Add vcpop.m/vfirst.m to RISCVMaskedPseudosTable (PR #115162)
https://github.com/lukel97 edited https://github.com/llvm/llvm-project/pull/115162 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [RISCV] Support non-power-of-2 types when expanding memcmp (PR #114971)
@@ -1069,21 +1069,14 @@ define i32 @bcmp_size_15(ptr %s1, ptr %s2) nounwind optsize { ; ; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_15: ; CHECK-UNALIGNED-RV32-V: # %bb.0: # %entry -; CHECK-UNALIGNED-RV32-V-NEXT:lw a2, 0(a0) -; CHECK-UNALIGNED-RV32-V-NEXT:lw a3, 4(a0) -; CHECK-UNALIGNED-RV32-V-NEXT:lw a4, 7(a0) -; CHECK-UNALIGNED-RV32-V-NEXT:lw a0, 11(a0) -; CHECK-UNALIGNED-RV32-V-NEXT:lw a5, 0(a1) -; CHECK-UNALIGNED-RV32-V-NEXT:lw a6, 4(a1) -; CHECK-UNALIGNED-RV32-V-NEXT:lw a7, 7(a1) -; CHECK-UNALIGNED-RV32-V-NEXT:lw a1, 11(a1) -; CHECK-UNALIGNED-RV32-V-NEXT:xor a2, a2, a5 -; CHECK-UNALIGNED-RV32-V-NEXT:xor a3, a3, a6 -; CHECK-UNALIGNED-RV32-V-NEXT:xor a4, a4, a7 -; CHECK-UNALIGNED-RV32-V-NEXT:xor a0, a0, a1 -; CHECK-UNALIGNED-RV32-V-NEXT:or a0, a3, a0 -; CHECK-UNALIGNED-RV32-V-NEXT:or a2, a2, a4 -; CHECK-UNALIGNED-RV32-V-NEXT:or a0, a2, a0 +; CHECK-UNALIGNED-RV32-V-NEXT:vsetivli zero, 15, e8, m1, ta, ma +; CHECK-UNALIGNED-RV32-V-NEXT:vle8.v v8, (a0) +; CHECK-UNALIGNED-RV32-V-NEXT:vle8.v v9, (a1) +; CHECK-UNALIGNED-RV32-V-NEXT:vsetivli zero, 16, e8, m1, ta, ma +; CHECK-UNALIGNED-RV32-V-NEXT:vmset.m v0 +; CHECK-UNALIGNED-RV32-V-NEXT:vmsne.vv v8, v8, v9 +; CHECK-UNALIGNED-RV32-V-NEXT:vsetivli zero, 15, e8, m1, ta, ma +; CHECK-UNALIGNED-RV32-V-NEXT:vcpop.m a0, v8, v0.t wangpc-pp wrote: Nope, it doesn't work. We should fix it in another place. https://github.com/llvm/llvm-project/pull/114971 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [flang] [lld] [llvm] [Flang] LLVM_ENABLE_RUNTIMES=FortranRuntime (PR #110217)
@@ -0,0 +1,165 @@ +#===-- CMakeLists.txt --===# +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +#======# +# +# Build instructions for the flang-rt library. This is file is intended to be +# included using the LLVM_ENABLE_RUNTIMES mechanism. +# +#======# + +set(LLVM_SUBPROJECT_TITLE "Fortran Runtime") +set(FLANGRT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") +set(FLANGRT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}") +set(FLANG_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../flang") + +enable_language(Fortran) + +list(APPEND CMAKE_MODULE_PATH +"${FLANGRT_SOURCE_DIR}/cmake/modules" +"${FLANG_SOURCE_DIR}/cmake/modules" + ) +include(AddFlangRT) +include(FlangCommon) + + + +# Build Mode Introspection # + + +# Setting these variables from an LLVM build is sufficient that flang-rt can +# construct the output paths, so it can behave as if it were in-tree here. +set(LLVM_TREE_AVAILABLE OFF) +if (LLVM_LIBRARY_OUTPUT_INTDIR AND LLVM_RUNTIME_OUTPUT_INTDIR AND PACKAGE_VERSION) + # This is a bootstap build + set(LLVM_TREE_AVAILABLE ON) +endif() + +if (LLVM_TREE_AVAILABLE) + # Despite Clang in the name, get_clang_resource_dir does not depend on Clang being added to the build + # flang-new uses the same resource dir as clang. + include(GetClangResourceDir) + get_clang_resource_dir(FLANGRT_BUILD_LIB_DIR PREFIX "${LLVM_LIBRARY_OUTPUT_INTDIR}/.." SUBDIR "lib${LLVM_LIBDIR_SUFFIX}") + get_clang_resource_dir(FLANGRT_INSTALL_LIB_DIR SUBDIR "lib${LLVM_LIBDIR_SUFFIX}") # No prefix, CMake's install command find the install prefix itself +else () + set(FLANGRT_BUILD_LIB_DIR "${LLVM_LIBRARY_OUTPUT_INTDIR}") + set(FLANGRT_INSTALL_LIB_DIR "lib${LLVM_LIBDIR_SUFFIX}") +endif () + +if (DEFINED WIN32) + set(FLANGRT_BUILD_LIB_DIR "${FLANGRT_BUILD_LIB_DIR}/windows") + set(FLANGRT_INSTALL_LIB_DIR "${FLANGRT_INSTALL_LIB_DIR}/windows") +elseif (LLVM_ENABLE_PER_TARGET_RUNTIME_DIR) + set(FLANGRT_BUILD_LIB_DIR "${FLANGRT_BUILD_LIB_DIR}/${LLVM_TARGET_TRIPLE}") + set(FLANGRT_INSTALL_LIB_DIR "${FLANGRT_INSTALL_LIB_DIR}/${LLVM_TARGET_TRIPLE}") +endif () + + +# +# Build Options # +# + +# Important: flang-rt user options must be prefixed with "FLANG_RT_". Variables +# with this prefix will be forwarded in bootstrap builds. + +option(FLANG_RT_INCLUDE_TESTS "Generate build targets for the flang-rt unit and regression-tests." "${LLVM_INCLUDE_TESTS}") + +set(FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT "" CACHE STRING "Compile flang-rt with GPU support (CUDA or OpenMP)") +set_property(CACHE FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT PROPERTY STRINGS +"" +CUDA +OpenMP + ) +if (NOT FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT) +elseif (FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT STREQUAL "CUDA") + set(FLANG_RT_LIBCUDACXX_PATH "" CACHE PATH "Path to libcu++ package installation") + option(FLANG_RT_CUDA_RUNTIME_PTX_WITHOUT_GLOBAL_VARS "Do not compile global variables' definitions when producing PTX library" OFF) +elseif (FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT STREQUAL "OpenMP") +set(FLANG_RT_DEVICE_ARCHITECTURES "all" CACHE STRING + "List of OpenMP device architectures to be used to compile the Fortran runtime (e.g. 'gfx1103;sm_90')") +else () + message(FATAL_ERROR "Invalid value '${FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT}' for FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT; must be empty, 'CUDA', or 'OpenMP'") +endif () + +option(FLANG_RT_ENABLE_CUF "Compile CUDA Fortran runtime sources" OFF) +if (FLANG_RT_ENABLE_CUF) + find_package(CUDAToolkit REQUIRED) +endif() + + + +# System Introspection # + + +include(CheckCXXSymbolExists) +include(CheckCXXSourceCompiles) +check_cxx_symbol_exists(strerror_r string.h HAVE_STRERROR_R) +# Can't use symbol exists here as the function is overloaded in C++ +check_cxx_source_compiles( + "#include + int main() { + char buf[4096]; + return strerror_s(buf, 4096, 0); + } + " + HAVE_DECL_STRERROR_S) + + +# Search for clang_rt.builtins library. +if (WIN32) + execute_process( + COMMAND "${CMAKE_CXX_COMPILER}" "-print-libgcc-file-name" "-rtlib=compiler-rt" + RESULT_VARIABLE CXX_COMPILER_PRINT_LIBGCC_PATH_FAILURE + OUTPUT_VARIABLE CXX_COMPILER_PRINT_LIBGCC_PATH_RESULT + ERROR_QUIET +) + if (NOT CXX_COMPILER_PRINT_LIBGCC_PATH_FAILURE AND CXX_COMPILER_PRINT_LIBGCC_PATH_RESULT) +string(STRIP "${CXX_COMPILER_PRINT_LIBGCC_PATH_RESULT}" FLANGRT_LIBCALL) + else () +set(FLANGRT_LIBCALL "") + endif () +endif () + + +# +# Build Preparation # +# + +if
[llvm-branch-commits] [clang] [flang] [lld] [llvm] [Flang] LLVM_ENABLE_RUNTIMES=FortranRuntime (PR #110217)
@@ -221,6 +230,9 @@ function(llvm_ExternalProject_Add name source_dir) -DCMAKE_ASM_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/clang${CMAKE_EXECUTABLE_SUFFIX}) endif() endif() +if(FLANG_IN_TOOLCHAIN) + list(APPEND compiler_args -DCMAKE_Fortran_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/flang-new${CMAKE_EXECUTABLE_SUFFIX}) jhuber6 wrote: This will just be `flang` soon, right? https://github.com/llvm/llvm-project/pull/110217 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [flang] [lld] [llvm] [Flang] LLVM_ENABLE_RUNTIMES=FortranRuntime (PR #110217)
@@ -270,13 +271,15 @@ function(runtime_default_target) -DLLVM_BUILD_TOOLS=${LLVM_BUILD_TOOLS} -DCMAKE_C_COMPILER_WORKS=ON -DCMAKE_CXX_COMPILER_WORKS=ON + -DCMAKE_Fortran_COMPILER_WORKS=ON -DCMAKE_ASM_COMPILER_WORKS=ON ${COMMON_CMAKE_ARGS} ${RUNTIMES_CMAKE_ARGS} ${ARG_CMAKE_ARGS} PASSTHROUGH_PREFIXES LLVM_ENABLE_RUNTIMES LLVM_USE_LINKER -CUDA # For runtimes that may look for the CUDA SDK (libc, offload) +CUDA # For runtimes that may look for the CUDA SDK (libc, offload, flang-rt) +FLANG_RUNTIME # Shared between Flang and Flang-RT jhuber6 wrote: We intentionally restrict the global prefixes, I think we check for the projects and then add the prefix instead of just doing it for everyone. https://github.com/llvm/llvm-project/pull/110217 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [RISCV] Support non-power-of-2 types when expanding memcmp (PR #114971)
@@ -1069,21 +1069,14 @@ define i32 @bcmp_size_15(ptr %s1, ptr %s2) nounwind optsize { ; ; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_15: ; CHECK-UNALIGNED-RV32-V: # %bb.0: # %entry -; CHECK-UNALIGNED-RV32-V-NEXT:lw a2, 0(a0) -; CHECK-UNALIGNED-RV32-V-NEXT:lw a3, 4(a0) -; CHECK-UNALIGNED-RV32-V-NEXT:lw a4, 7(a0) -; CHECK-UNALIGNED-RV32-V-NEXT:lw a0, 11(a0) -; CHECK-UNALIGNED-RV32-V-NEXT:lw a5, 0(a1) -; CHECK-UNALIGNED-RV32-V-NEXT:lw a6, 4(a1) -; CHECK-UNALIGNED-RV32-V-NEXT:lw a7, 7(a1) -; CHECK-UNALIGNED-RV32-V-NEXT:lw a1, 11(a1) -; CHECK-UNALIGNED-RV32-V-NEXT:xor a2, a2, a5 -; CHECK-UNALIGNED-RV32-V-NEXT:xor a3, a3, a6 -; CHECK-UNALIGNED-RV32-V-NEXT:xor a4, a4, a7 -; CHECK-UNALIGNED-RV32-V-NEXT:xor a0, a0, a1 -; CHECK-UNALIGNED-RV32-V-NEXT:or a0, a3, a0 -; CHECK-UNALIGNED-RV32-V-NEXT:or a2, a2, a4 -; CHECK-UNALIGNED-RV32-V-NEXT:or a0, a2, a0 +; CHECK-UNALIGNED-RV32-V-NEXT:vsetivli zero, 15, e8, m1, ta, ma +; CHECK-UNALIGNED-RV32-V-NEXT:vle8.v v8, (a0) +; CHECK-UNALIGNED-RV32-V-NEXT:vle8.v v9, (a1) +; CHECK-UNALIGNED-RV32-V-NEXT:vsetivli zero, 16, e8, m1, ta, ma +; CHECK-UNALIGNED-RV32-V-NEXT:vmset.m v0 +; CHECK-UNALIGNED-RV32-V-NEXT:vmsne.vv v8, v8, v9 +; CHECK-UNALIGNED-RV32-V-NEXT:vsetivli zero, 15, e8, m1, ta, ma +; CHECK-UNALIGNED-RV32-V-NEXT:vcpop.m a0, v8, v0.t wangpc-pp wrote: I think this is the lowering of `ISD::VECREDUCE_OR` nodes. The `VL` parameter comes from `getDefaultVLOps` and its value is 16 because the vector type `v15i8` has been widened to `v16i8`. We may use `VP_REDUCE_OR` here. https://github.com/llvm/llvm-project/pull/114971 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [Flang][NFC] Split runtime headers in preparation for cross-compilation. (PR #112188)
https://github.com/Meinersbur edited https://github.com/llvm/llvm-project/pull/112188 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [llvm] [Flang][NFC] Move runtime library files to flang-rt. (PR #110298)
https://github.com/Meinersbur edited https://github.com/llvm/llvm-project/pull/110298 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [RISCV] Add vcpop.m/vfirst.m to RISCVMaskedPseudosTable (PR #115162)
@@ -1150,6 +1150,7 @@ class VPseudoUnaryNoMaskGPROut : class VPseudoUnaryMaskGPROut : Pseudo<(outs GPR:$rd), (ins VR:$rs1, VMaskOp:$vm, AVL:$vl, sew:$sew), []>, + RISCVMaskedPseudo, lukel97 wrote: Nit, add instead of adding it in the class move it to the two `def`s so it's consistent with other uses of RISCVMaskedPseudo? https://github.com/llvm/llvm-project/pull/115162 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [RISCV] Add vcpop.m/vfirst.m to RISCVMaskedPseudosTable (PR #115162)
https://github.com/lukel97 approved this pull request. Good catch. I double checked and we're setting ElementsDependOnVL and ElementsDependOnMask for VCPOP_M and VFIRST_M so adding RISCVMaskedPseudo should be safe. https://github.com/llvm/llvm-project/pull/115162 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [flang] [lld] [llvm] [Flang] LLVM_ENABLE_RUNTIMES=flang-rt (PR #110217)
https://github.com/jhuber6 edited https://github.com/llvm/llvm-project/pull/110217 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libc++abi] Stop copying headers to the build directory (PR #115086)
arichardson wrote: > Can't we do the same for libc++ as well? Looks like CI is happy on this one so I'll try the same with libc++ next. https://github.com/llvm/llvm-project/pull/115086 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [flang] [lld] [llvm] [Flang] LLVM_ENABLE_RUNTIMES=FortranRuntime (PR #110217)
@@ -0,0 +1,165 @@ +#===-- CMakeLists.txt --===# +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +#======# +# +# Build instructions for the flang-rt library. This is file is intended to be +# included using the LLVM_ENABLE_RUNTIMES mechanism. +# +#======# + +set(LLVM_SUBPROJECT_TITLE "Fortran Runtime") +set(FLANGRT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") +set(FLANGRT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}") +set(FLANG_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../flang") + +enable_language(Fortran) + +list(APPEND CMAKE_MODULE_PATH +"${FLANGRT_SOURCE_DIR}/cmake/modules" +"${FLANG_SOURCE_DIR}/cmake/modules" + ) +include(AddFlangRT) +include(FlangCommon) + + + +# Build Mode Introspection # + + +# Setting these variables from an LLVM build is sufficient that flang-rt can +# construct the output paths, so it can behave as if it were in-tree here. +set(LLVM_TREE_AVAILABLE OFF) +if (LLVM_LIBRARY_OUTPUT_INTDIR AND LLVM_RUNTIME_OUTPUT_INTDIR AND PACKAGE_VERSION) + # This is a bootstap build + set(LLVM_TREE_AVAILABLE ON) +endif() + +if (LLVM_TREE_AVAILABLE) + # Despite Clang in the name, get_clang_resource_dir does not depend on Clang being added to the build + # flang-new uses the same resource dir as clang. + include(GetClangResourceDir) + get_clang_resource_dir(FLANGRT_BUILD_LIB_DIR PREFIX "${LLVM_LIBRARY_OUTPUT_INTDIR}/.." SUBDIR "lib${LLVM_LIBDIR_SUFFIX}") + get_clang_resource_dir(FLANGRT_INSTALL_LIB_DIR SUBDIR "lib${LLVM_LIBDIR_SUFFIX}") # No prefix, CMake's install command find the install prefix itself +else () + set(FLANGRT_BUILD_LIB_DIR "${LLVM_LIBRARY_OUTPUT_INTDIR}") + set(FLANGRT_INSTALL_LIB_DIR "lib${LLVM_LIBDIR_SUFFIX}") +endif () + +if (DEFINED WIN32) + set(FLANGRT_BUILD_LIB_DIR "${FLANGRT_BUILD_LIB_DIR}/windows") jhuber6 wrote: Nit, I feel like this and everything else should be `FLANG_RT` just for consistency with `CLANG_RT`. https://github.com/llvm/llvm-project/pull/110217 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [Flang][NFC] Split runtime headers in preparation for cross-compilation. (PR #112188)
https://github.com/DavidTruby approved this pull request. LGTM, but someone else should probably approve as this is quite a large change. I don't think this one is an NFC because of the changes mentioned above by @jeanPerier https://github.com/llvm/llvm-project/pull/112188 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [Flang][NFC] Split runtime headers in preparation for cross-compilation. (PR #112188)
https://github.com/DavidTruby edited https://github.com/llvm/llvm-project/pull/112188 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [RISCV] Support non-power-of-2 types when expanding memcmp (PR #114971)
https://github.com/wangpc-pp updated https://github.com/llvm/llvm-project/pull/114971 >From 3fd27bd1405a8b2c068786a200d610b9cacb65ef Mon Sep 17 00:00:00 2001 From: Wang Pengcheng Date: Tue, 5 Nov 2024 20:38:44 +0800 Subject: [PATCH] Set max bytes Created using spr 1.3.6-beta.1 --- llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index c65feb9755633c..a1c5f76bae0099 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -2508,7 +2508,10 @@ RISCVTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { Options.LoadSizes = {4, 2, 1}; if (IsZeroCmp && ST->hasVInstructions()) { unsigned VLenB = ST->getRealMinVLen() / 8; -for (unsigned Size = ST->getXLen() / 8 + 1; +// The minimum size should be the maximum bytes between `VLen * LMUL_MF8` +// and `XLen + 8`. +unsigned MinSize = std::max(VLenB / 8, ST->getXLen() / 8 + 1); +for (unsigned Size = MinSize; Size <= VLenB * ST->getMaxLMULForFixedLengthVectors(); Size++) Options.LoadSizes.insert(Options.LoadSizes.begin(), Size); } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [RISCV] Support non-power-of-2 types when expanding memcmp (PR #114971)
https://github.com/wangpc-pp updated https://github.com/llvm/llvm-project/pull/114971 >From 3fd27bd1405a8b2c068786a200d610b9cacb65ef Mon Sep 17 00:00:00 2001 From: Wang Pengcheng Date: Tue, 5 Nov 2024 20:38:44 +0800 Subject: [PATCH] Set max bytes Created using spr 1.3.6-beta.1 --- llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index c65feb9755633c..a1c5f76bae0099 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -2508,7 +2508,10 @@ RISCVTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { Options.LoadSizes = {4, 2, 1}; if (IsZeroCmp && ST->hasVInstructions()) { unsigned VLenB = ST->getRealMinVLen() / 8; -for (unsigned Size = ST->getXLen() / 8 + 1; +// The minimum size should be the maximum bytes between `VLen * LMUL_MF8` +// and `XLen + 8`. +unsigned MinSize = std::max(VLenB / 8, ST->getXLen() / 8 + 1); +for (unsigned Size = MinSize; Size <= VLenB * ST->getMaxLMULForFixedLengthVectors(); Size++) Options.LoadSizes.insert(Options.LoadSizes.begin(), Size); } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [RISCV] Add vcpop.m/vfirst.m to RISCVMaskedPseudosTable (PR #115162)
llvmbot wrote: @llvm/pr-subscribers-backend-risc-v Author: Pengcheng Wang (wangpc-pp) Changes We seem to forget these two instructions. --- Full diff: https://github.com/llvm/llvm-project/pull/115162.diff 3 Files Affected: - (modified) llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp (+12-2) - (modified) llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td (+1) - (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll (+2-8) ``diff diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 6291842e071a3e..17b617c502ca90 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -3708,6 +3708,15 @@ static bool isImplicitDef(SDValue V) { return V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF; } +static bool hasGPROut(unsigned Opc) { + switch (RISCV::getRVVMCOpcode(Opc)) { + case RISCV::VCPOP_M: + case RISCV::VFIRST_M: +return true; + } + return false; +} + // Optimize masked RVV pseudo instructions with a known all-ones mask to their // corresponding "unmasked" pseudo versions. The mask we're interested in will // take the form of a V0 physical register operand, with a glued @@ -3737,8 +3746,9 @@ bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) { #endif SmallVector Ops; - // Skip the passthru operand at index 0 if !UseTUPseudo. - for (unsigned I = !UseTUPseudo, E = N->getNumOperands(); I != E; I++) { + // Skip the passthru operand at index 0 if !UseTUPseudo and no GPR out. + bool ShouldSkip = !UseTUPseudo && !hasGPROut(Opc); + for (unsigned I = ShouldSkip, E = N->getNumOperands(); I != E; I++) { // Skip the mask, and the Glue. SDValue Op = N->getOperand(I); if (I == MaskOpIdx || Op.getValueType() == MVT::Glue) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 89e71b7c22c12d..d5c7932b6f8edf 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -1150,6 +1150,7 @@ class VPseudoUnaryNoMaskGPROut : class VPseudoUnaryMaskGPROut : Pseudo<(outs GPR:$rd), (ins VR:$rs1, VMaskOp:$vm, AVL:$vl, sew:$sew), []>, + RISCVMaskedPseudo, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll index ca17ea49a6f920..487234674befe0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll @@ -1797,11 +1797,8 @@ define float @vreduce_fminimum_v7f32(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT:vsetivli zero, 7, e32, m2, ta, ma ; CHECK-NEXT:vle32.v v8, (a0) -; CHECK-NEXT:vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT:vmset.m v0 -; CHECK-NEXT:vsetivli zero, 7, e32, m2, ta, ma ; CHECK-NEXT:vmfne.vv v10, v8, v8 -; CHECK-NEXT:vcpop.m a0, v10, v0.t +; CHECK-NEXT:vcpop.m a0, v10 ; CHECK-NEXT:beqz a0, .LBB111_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT:lui a0, 523264 @@ -2558,11 +2555,8 @@ define float @vreduce_fmaximum_v7f32(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT:vsetivli zero, 7, e32, m2, ta, ma ; CHECK-NEXT:vle32.v v8, (a0) -; CHECK-NEXT:vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT:vmset.m v0 -; CHECK-NEXT:vsetivli zero, 7, e32, m2, ta, ma ; CHECK-NEXT:vmfne.vv v10, v8, v8 -; CHECK-NEXT:vcpop.m a0, v10, v0.t +; CHECK-NEXT:vcpop.m a0, v10 ; CHECK-NEXT:beqz a0, .LBB139_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT:lui a0, 523264 `` https://github.com/llvm/llvm-project/pull/115162 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [RISCV] Add vcpop.m/vfirst.m to RISCVMaskedPseudosTable (PR #115162)
https://github.com/wangpc-pp created https://github.com/llvm/llvm-project/pull/115162 We seem to forget these two instructions. ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [RISCV] Support non-power-of-2 types when expanding memcmp (PR #114971)
@@ -1069,21 +1069,14 @@ define i32 @bcmp_size_15(ptr %s1, ptr %s2) nounwind optsize { ; ; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_15: ; CHECK-UNALIGNED-RV32-V: # %bb.0: # %entry -; CHECK-UNALIGNED-RV32-V-NEXT:lw a2, 0(a0) -; CHECK-UNALIGNED-RV32-V-NEXT:lw a3, 4(a0) -; CHECK-UNALIGNED-RV32-V-NEXT:lw a4, 7(a0) -; CHECK-UNALIGNED-RV32-V-NEXT:lw a0, 11(a0) -; CHECK-UNALIGNED-RV32-V-NEXT:lw a5, 0(a1) -; CHECK-UNALIGNED-RV32-V-NEXT:lw a6, 4(a1) -; CHECK-UNALIGNED-RV32-V-NEXT:lw a7, 7(a1) -; CHECK-UNALIGNED-RV32-V-NEXT:lw a1, 11(a1) -; CHECK-UNALIGNED-RV32-V-NEXT:xor a2, a2, a5 -; CHECK-UNALIGNED-RV32-V-NEXT:xor a3, a3, a6 -; CHECK-UNALIGNED-RV32-V-NEXT:xor a4, a4, a7 -; CHECK-UNALIGNED-RV32-V-NEXT:xor a0, a0, a1 -; CHECK-UNALIGNED-RV32-V-NEXT:or a0, a3, a0 -; CHECK-UNALIGNED-RV32-V-NEXT:or a2, a2, a4 -; CHECK-UNALIGNED-RV32-V-NEXT:or a0, a2, a0 +; CHECK-UNALIGNED-RV32-V-NEXT:vsetivli zero, 15, e8, m1, ta, ma +; CHECK-UNALIGNED-RV32-V-NEXT:vle8.v v8, (a0) +; CHECK-UNALIGNED-RV32-V-NEXT:vle8.v v9, (a1) +; CHECK-UNALIGNED-RV32-V-NEXT:vsetivli zero, 16, e8, m1, ta, ma +; CHECK-UNALIGNED-RV32-V-NEXT:vmset.m v0 +; CHECK-UNALIGNED-RV32-V-NEXT:vmsne.vv v8, v8, v9 +; CHECK-UNALIGNED-RV32-V-NEXT:vsetivli zero, 15, e8, m1, ta, ma +; CHECK-UNALIGNED-RV32-V-NEXT:vcpop.m a0, v8, v0.t wangpc-pp wrote: This should be fixed by using VP nodes and #115162. https://github.com/llvm/llvm-project/pull/114971 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [RISCV] Support non-power-of-2 types when expanding memcmp (PR #114971)
https://github.com/wangpc-pp updated https://github.com/llvm/llvm-project/pull/114971 >From 3fd27bd1405a8b2c068786a200d610b9cacb65ef Mon Sep 17 00:00:00 2001 From: Wang Pengcheng Date: Tue, 5 Nov 2024 20:38:44 +0800 Subject: [PATCH] Set max bytes Created using spr 1.3.6-beta.1 --- llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index c65feb9755633c..a1c5f76bae0099 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -2508,7 +2508,10 @@ RISCVTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { Options.LoadSizes = {4, 2, 1}; if (IsZeroCmp && ST->hasVInstructions()) { unsigned VLenB = ST->getRealMinVLen() / 8; -for (unsigned Size = ST->getXLen() / 8 + 1; +// The minimum size should be the maximum bytes between `VLen * LMUL_MF8` +// and `XLen + 8`. +unsigned MinSize = std::max(VLenB / 8, ST->getXLen() / 8 + 1); +for (unsigned Size = MinSize; Size <= VLenB * ST->getMaxLMULForFixedLengthVectors(); Size++) Options.LoadSizes.insert(Options.LoadSizes.begin(), Size); } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [RISCV] Support non-power-of-2 types when expanding memcmp (PR #114971)
https://github.com/wangpc-pp updated https://github.com/llvm/llvm-project/pull/114971 >From 3fd27bd1405a8b2c068786a200d610b9cacb65ef Mon Sep 17 00:00:00 2001 From: Wang Pengcheng Date: Tue, 5 Nov 2024 20:38:44 +0800 Subject: [PATCH] Set max bytes Created using spr 1.3.6-beta.1 --- llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index c65feb9755633c..a1c5f76bae0099 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -2508,7 +2508,10 @@ RISCVTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { Options.LoadSizes = {4, 2, 1}; if (IsZeroCmp && ST->hasVInstructions()) { unsigned VLenB = ST->getRealMinVLen() / 8; -for (unsigned Size = ST->getXLen() / 8 + 1; +// The minimum size should be the maximum bytes between `VLen * LMUL_MF8` +// and `XLen + 8`. +unsigned MinSize = std::max(VLenB / 8, ST->getXLen() / 8 + 1); +for (unsigned Size = MinSize; Size <= VLenB * ST->getMaxLMULForFixedLengthVectors(); Size++) Options.LoadSizes.insert(Options.LoadSizes.begin(), Size); } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libc++abi] Stop copying headers to the build directory (PR #115086)
https://github.com/ldionne commented: Can't we do the same for libc++ as well? https://github.com/llvm/llvm-project/pull/115086 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lldb] [lldb][LoongArch] Function calls support in lldb expressions (PR #114742)
https://github.com/SixWeining deleted https://github.com/llvm/llvm-project/pull/114742 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lldb] [lldb][LoongArch] Function calls support in lldb expressions (PR #114742)
https://github.com/SixWeining approved this pull request. LGTM. But should land after #114741. https://github.com/llvm/llvm-project/pull/114742 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [flang] [lld] [llvm] [Flang] LLVM_ENABLE_RUNTIMES=flang-rt (PR #110217)
@@ -171,145 +76,88 @@ set(sources unit-map.cpp unit.cpp utf.cpp - ${FORTRAN_MODULE_OBJECTS} ) -include(AddFlangOffloadRuntime) - -# List of files that are buildable for all devices. -set(supported_files - ISO_Fortran_binding.cpp - allocatable.cpp - allocator-registry.cpp - array-constructor.cpp - assign.cpp - buffer.cpp - character.cpp - connection.cpp - copy.cpp - derived-api.cpp - derived.cpp - descriptor.cpp - descriptor-io.cpp - dot-product.cpp - edit-input.cpp - edit-output.cpp - environment.cpp - extrema.cpp - external-unit.cpp - file.cpp - findloc.cpp - format.cpp - inquiry.cpp - internal-unit.cpp - io-api.cpp - io-api-minimal.cpp - io-error.cpp - io-stmt.cpp - iostat.cpp - matmul-transpose.cpp - matmul.cpp - memory.cpp - misc-intrinsic.cpp - namelist.cpp - non-tbp-dio.cpp - numeric.cpp - pointer.cpp - product.cpp - pseudo-unit.cpp - ragged.cpp - stat.cpp - sum.cpp - support.cpp - terminator.cpp - tools.cpp - transformational.cpp - type-code.cpp - type-info.cpp - unit.cpp - utf.cpp +set(public_headers "") +file(GLOB_RECURSE public_headers + "${FLANGRUNTIME_SOURCE_DIR}/include/flang/Runtime/*.h" + "${FLANGRUNTIME_SOURCE_DIR}/include/flang/Common/*.h" ) -enable_cuda_compilation(FortranRuntime "${supported_files}") -enable_omp_offload_compilation("${supported_files}") jhuber6 wrote: Where is this CUDA build actually used? I'm interested in removing it if possible and just building this on top of my other GPU libraries. https://github.com/llvm/llvm-project/pull/110217 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] [libc++] Implement std::move_only_function (P0288R9) (PR #94670)
https://github.com/ldionne commented: I think the most effective way of settling on the bit-stealing issue would be to benchmark something like this: 1. Set up ~100 functions that do random stuff (e.g. they all return numbers from 1 to 100) 2. Set up a `std::vector>` with some large number of functions in it. 3. Assign from the 100 functions randomly into the vector, so that the vector contains roughly uniformly distributed function pointers. 4. Measure how long it takes to call all the functions in the vector using this bit-stealing implementation, and an implementation where we store a null entry in the destructor instead. Similarly, for this benchmark we could measure the time it takes for destroying the vector. https://github.com/llvm/llvm-project/pull/94670 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libc++abi] Stop copying headers to the build directory (PR #115086)
https://github.com/ldionne approved this pull request. LGTM, thanks for the cleanup! https://github.com/llvm/llvm-project/pull/115086 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [flang] [lld] [llvm] [Flang] LLVM_ENABLE_RUNTIMES=flang-rt (PR #110217)
@@ -171,145 +76,88 @@ set(sources unit-map.cpp unit.cpp utf.cpp - ${FORTRAN_MODULE_OBJECTS} ) -include(AddFlangOffloadRuntime) - -# List of files that are buildable for all devices. -set(supported_files - ISO_Fortran_binding.cpp - allocatable.cpp - allocator-registry.cpp - array-constructor.cpp - assign.cpp - buffer.cpp - character.cpp - connection.cpp - copy.cpp - derived-api.cpp - derived.cpp - descriptor.cpp - descriptor-io.cpp - dot-product.cpp - edit-input.cpp - edit-output.cpp - environment.cpp - extrema.cpp - external-unit.cpp - file.cpp - findloc.cpp - format.cpp - inquiry.cpp - internal-unit.cpp - io-api.cpp - io-api-minimal.cpp - io-error.cpp - io-stmt.cpp - iostat.cpp - matmul-transpose.cpp - matmul.cpp - memory.cpp - misc-intrinsic.cpp - namelist.cpp - non-tbp-dio.cpp - numeric.cpp - pointer.cpp - product.cpp - pseudo-unit.cpp - ragged.cpp - stat.cpp - sum.cpp - support.cpp - terminator.cpp - tools.cpp - transformational.cpp - type-code.cpp - type-info.cpp - unit.cpp - utf.cpp +set(public_headers "") +file(GLOB_RECURSE public_headers + "${FLANGRUNTIME_SOURCE_DIR}/include/flang/Runtime/*.h" + "${FLANGRUNTIME_SOURCE_DIR}/include/flang/Common/*.h" ) -enable_cuda_compilation(FortranRuntime "${supported_files}") -enable_omp_offload_compilation("${supported_files}") jhuber6 wrote: Alright, any changes I make probably won't impact this since it'll be a separate build. But I would greatly prefer if we stop having many different ways of doing things in the LLVM offloading space. https://github.com/llvm/llvm-project/pull/110217 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [AMDGPU] Simplify dpp builtin handling (PR #115090)
https://github.com/arsenm approved this pull request. Should also teach instcombine to fold bitcast + app https://github.com/llvm/llvm-project/pull/115090 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [AMDGPU] Simplify dpp builtin handling (PR #115090)
https://github.com/rampitec updated https://github.com/llvm/llvm-project/pull/115090 >From 7ccac58706b2d7e54c8498818b560af490a70eac Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Mon, 4 Nov 2024 12:28:07 -0800 Subject: [PATCH] [AMDGPU] Simplify dpp builtin handling DPP intrinsics can handle any type now, so no need to cast to integer. The caveat is that intrinsics only handle backend legal types, but it does not work with i8 for example. --- clang/lib/CodeGen/CGBuiltin.cpp | 23 ++- .../CodeGenOpenCL/builtins-amdgcn-gfx10.cl| 30 -- .../test/CodeGenOpenCL/builtins-amdgcn-vi.cl | 60 +++ 3 files changed, 38 insertions(+), 75 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 5c3df5124517d6..8c0e76c9e8c3d7 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -19211,37 +19211,24 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); assert(Error == ASTContext::GE_None && "Should not codegen an error"); llvm::Type *DataTy = ConvertType(E->getArg(0)->getType()); -unsigned Size = DataTy->getPrimitiveSizeInBits(); -llvm::Type *IntTy = -llvm::IntegerType::get(Builder.getContext(), std::max(Size, 32u)); Function *F = CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp8 ? Intrinsic::amdgcn_mov_dpp8 : Intrinsic::amdgcn_update_dpp, - IntTy); + DataTy); assert(E->getNumArgs() == 5 || E->getNumArgs() == 6 || E->getNumArgs() == 2); bool InsertOld = BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp; if (InsertOld) - Args.push_back(llvm::PoisonValue::get(IntTy)); -for (unsigned I = 0; I != E->getNumArgs(); ++I) { + Args.push_back(llvm::PoisonValue::get(DataTy)); +Args.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, 0, E)); +for (unsigned I = 1; I != E->getNumArgs(); ++I) { llvm::Value *V = EmitScalarOrConstFoldImmArg(ICEArguments, I, E); - if (I < (BuiltinID == AMDGPU::BI__builtin_amdgcn_update_dpp ? 2u : 1u) && - Size < 32) { -if (!DataTy->isIntegerTy()) - V = Builder.CreateBitCast( - V, llvm::IntegerType::get(Builder.getContext(), Size)); -V = Builder.CreateZExtOrBitCast(V, IntTy); - } llvm::Type *ExpTy = F->getFunctionType()->getFunctionParamType(I + InsertOld); Args.push_back(Builder.CreateTruncOrBitCast(V, ExpTy)); } -Value *V = Builder.CreateCall(F, Args); -if (Size < 32 && !DataTy->isIntegerTy()) - V = Builder.CreateTrunc( - V, llvm::IntegerType::get(Builder.getContext(), Size)); -return Builder.CreateTruncOrBitCast(V, DataTy); +return Builder.CreateCall(F, Args); } case AMDGPU::BI__builtin_amdgcn_permlane16: case AMDGPU::BI__builtin_amdgcn_permlanex16: diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl index a4054cba236dd2..7e4ee6f4a942db 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl @@ -36,45 +36,37 @@ void test_mov_dpp8_long(global long* out, long a) { } // CHECK-LABEL: @test_mov_dpp8_float( -// CHECK: %0 = bitcast float %a to i32 -// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.mov.dpp8.i32(i32 %0, i32 1) -// CHECK-NEXT: store i32 %1, +// CHECK: %0 = tail call{{.*}} float @llvm.amdgcn.mov.dpp8.f32(float %a, i32 1) +// CHECK-NEXT: store float %0, void test_mov_dpp8_float(global float* out, float a) { *out = __builtin_amdgcn_mov_dpp8(a, 1); } // CHECK-LABEL: @test_mov_dpp8_double -// CHECK: %0 = bitcast double %x to i64 -// CHECK-NEXT: %1 = tail call{{.*}} i64 @llvm.amdgcn.mov.dpp8.i64(i64 %0, i32 1) -// CHECK-NEXT: store i64 %1, +// CHECK: %0 = tail call{{.*}} double @llvm.amdgcn.mov.dpp8.f64(double %x, i32 1) +// CHECK-NEXT: store double %0, void test_mov_dpp8_double(double x, global double *p) { *p = __builtin_amdgcn_mov_dpp8(x, 1); } // CHECK-LABEL: @test_mov_dpp8_short -// CHECK: %0 = zext i16 %x to i32 -// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.mov.dpp8.i32(i32 %0, i32 1) -// CHECK-NEXT: %2 = trunc i32 %1 to i16 -// CHECK-NEXT: store i16 %2, +// CHECK: %0 = tail call{{.*}} i16 @llvm.amdgcn.mov.dpp8.i16(i16 %x, i32 1) +// CHECK-NEXT: store i16 %0, void test_mov_dpp8_short(short x, global short *p) { *p = __builtin_amdgcn_mov_dpp8(x, 1); } // CHECK-LABEL: @test_mov_dpp8_char -// CHECK: %0 = zext i8 %x to i32 -// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.mov.dpp8.i32(i32 %0, i32 1) -// CHECK-NEXT: %2 = trunc i32 %1 to i8 -// CHECK-NEXT: store i8 %2, +// CHECK: %0 = tail call{{.*}} i8 @llvm.amdgcn.mov.dpp8.i8(i8 %x,
[llvm-branch-commits] [libcxxabi] 5d95a55 - Revert "[libc++abi] Stop copying headers to the build directory"
Author: Alexander Richardson Date: 2024-11-06T15:06:15-08:00 New Revision: 5d95a55a43daea00d42d771892037bd3aa44e291 URL: https://github.com/llvm/llvm-project/commit/5d95a55a43daea00d42d771892037bd3aa44e291 DIFF: https://github.com/llvm/llvm-project/commit/5d95a55a43daea00d42d771892037bd3aa44e291.diff LOG: Revert "[libc++abi] Stop copying headers to the build directory" This reverts commit 5be02d7a03c6d40d4d71264936d4aab98e4186aa. Added: Modified: libcxxabi/CMakeLists.txt libcxxabi/include/CMakeLists.txt Removed: diff --git a/libcxxabi/CMakeLists.txt b/libcxxabi/CMakeLists.txt index 50e9a296a4a13b..da0e8b286cddc1 100644 --- a/libcxxabi/CMakeLists.txt +++ b/libcxxabi/CMakeLists.txt @@ -86,6 +86,12 @@ set(LIBCXXABI_STATIC_OUTPUT_NAME "c++abi" CACHE STRING "Output name for the stat set(LIBCXXABI_INSTALL_INCLUDE_DIR "${CMAKE_INSTALL_INCLUDEDIR}/c++/v1" CACHE STRING "Path to install the libc++abi headers at.") +if(LLVM_LIBRARY_OUTPUT_INTDIR) + set(LIBCXXABI_GENERATED_INCLUDE_DIR "${LLVM_BINARY_DIR}/include/c++/v1") +else() + set(LIBCXXABI_GENERATED_INCLUDE_DIR "${CMAKE_BINARY_DIR}/include/c++/v1") +endif() + set(LIBCXXABI_LIBCXX_LIBRARY_PATH "" CACHE PATH "The path to libc++ library.") set(LIBCXXABI_LIBRARY_VERSION "1.0" CACHE STRING "Version of libc++abi. This will be reflected in the name of the shared \ diff --git a/libcxxabi/include/CMakeLists.txt b/libcxxabi/include/CMakeLists.txt index 0deb7b1eb9e715..5b1cc2545016ec 100644 --- a/libcxxabi/include/CMakeLists.txt +++ b/libcxxabi/include/CMakeLists.txt @@ -3,7 +3,20 @@ set(files cxxabi.h ) +foreach(f ${files}) + set(src "${CMAKE_CURRENT_SOURCE_DIR}/${f}") + set(dst "${LIBCXXABI_GENERATED_INCLUDE_DIR}/${f}") + add_custom_command(OUTPUT ${dst} +DEPENDS ${src} +COMMAND ${CMAKE_COMMAND} -E copy_if_ diff erent ${src} ${dst} +COMMENT "Copying CXXABI header ${f}") + list(APPEND _all_includes "${dst}") +endforeach() + +add_custom_target(generate-cxxabi-headers ALL DEPENDS ${_all_includes}) + add_library(cxxabi-headers INTERFACE) +add_dependencies(cxxabi-headers generate-cxxabi-headers) target_include_directories(cxxabi-headers INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}") if (LIBCXXABI_INSTALL_HEADERS) ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [RISCV] Add vcpop.m/vfirst.m to RISCVMaskedPseudosTable (PR #115162)
https://github.com/wangpc-pp updated https://github.com/llvm/llvm-project/pull/115162 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [RISCV] Add vcpop.m/vfirst.m to RISCVMaskedPseudosTable (PR #115162)
https://github.com/wangpc-pp updated https://github.com/llvm/llvm-project/pull/115162 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)
wlei-llvm wrote: > > Ping @wlei-llvm > > Sorry for the delay. The new version addressed my last comment (with just > minor nits). However, I didn't fully follow the new features related to > `ProbeMatchSpecs` stuffs. Could you add more descriptions to the diff > summary? Or if it’s not a lot of work, could we split it into two patches? We > could commit the first part, and I will review the second part separately. NVM, I think now I get what `ProbeMatchSpecs` does, it's a vector because a function can have multiple sections(function split) https://github.com/llvm/llvm-project/pull/99891 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port RegUsageInfoCollector pass to NPM (PR #113874)
@@ -1,5 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn-amd-amdhsa -enable-ipra -print-regusage -o /dev/null 2>&1 < %s | FileCheck %s + +; RUN: llc -mtriple=amdgcn-amd-amdhsa -stop-after=irtranslator -o - %s \ optimisan wrote: I'll do that https://github.com/llvm/llvm-project/pull/113874 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)
@@ -447,12 +561,12 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) { /// of the basic blocks in the binary, the count is "matched" to the block. /// Similarly, if both the source and the target of a count in the profile are /// matched to a jump in the binary, the count is recorded in CFG. -size_t -matchWeightsByHashes(BinaryContext &BC, - const BinaryFunction::BasicBlockOrderType &BlockOrder, - const yaml::bolt::BinaryFunctionProfile &YamlBF, - FlowFunction &Func, HashFunction HashFunction, - YAMLProfileReader::ProfileLookupMap &IdToYamlBF) { +size_t matchWeightsByHashes( wlei-llvm wrote: Should we rename this function? Now it's not only "ByHashes", if so, also update the comments above. https://github.com/llvm/llvm-project/pull/99891 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)
@@ -482,11 +596,68 @@ matchWeightsByHashes(BinaryContext &BC, << Twine::utohexstr(BB->getHash()) << "\n"); } StaleMatcher Matcher; + // Collects function pseudo probes for use in the StaleMatcher. + if (opts::StaleMatchingWithPseudoProbes) { +const MCPseudoProbeDecoder *Decoder = BC.getPseudoProbeDecoder(); +assert(Decoder && + "If pseudo probes are in use, pseudo probe decoder should exist"); +const AddressProbesMap &ProbeMap = Decoder->getAddress2ProbesMap(); +const uint64_t FuncAddr = BF.getAddress(); +for (const MCDecodedPseudoProbe &Probe : + ProbeMap.find(FuncAddr, FuncAddr + BF.getSize())) + if (const BinaryBasicBlock *BB = + BF.getBasicBlockContainingOffset(Probe.getAddress() - FuncAddr)) +Matcher.mapProbeToBB(&Probe, Blocks[BB->getIndex()]); + } Matcher.init(Blocks, BlendedHashes, CallHashes); - // Index in yaml profile => corresponding (matched) block - DenseMap MatchedBlocks; - // Match blocks from the profile to the blocks in CFG + using FlowBlockTy = + std::pair; + using ProfileBlockMatchMap = DenseMap; + // Binary profile => block index => matched block + its block profile + DenseMap + MatchedBlocks; + + // Map of FlowBlock and matching method. + DenseMap MatchedFlowBlocks; + + // Match blocks from the profile to the blocks in CFG by strict hash. + for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) { +// Update matching stats. +++BC.Stats.NumStaleBlocks; +BC.Stats.StaleSampleCount += YamlBB.ExecCount; + +assert(YamlBB.Hash != 0 && "empty hash of BinaryBasicBlockProfile"); +BlendedBlockHash YamlHash(YamlBB.Hash); +const FlowBlock *MatchedBlock = nullptr; +StaleMatcher::MatchMethod Method; +std::tie(MatchedBlock, Method) = Matcher.matchBlockStrict(YamlHash); +if (!MatchedBlock) + continue; +MatchedFlowBlocks.try_emplace(MatchedBlock, Method); +MatchedBlocks[&YamlBF][YamlBB.Index] = {MatchedBlock, &YamlBB}; + } + // Match blocks from the profile to the blocks in CFG by pseudo probes. + for (const YAMLProfileReader::ProbeMatchSpec &PS : ProbeMatchSpecs) { +const YAMLProfileReader::InlineTreeNodeMapTy &InlineTreeNodeMap = PS.first; +const yaml::bolt::BinaryFunctionProfile &YamlBP = PS.second; +for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBP.Blocks) { + if (YamlBB.PseudoProbes.empty()) +continue; + const FlowBlock *MatchedBlock = nullptr; + StaleMatcher::MatchMethod Method; + std::tie(MatchedBlock, Method) = + Matcher.matchBlockProbe(YamlBB.PseudoProbes, InlineTreeNodeMap); + if (!MatchedBlock) +continue; + // Don't override earlier matches + if (MatchedFlowBlocks.contains(MatchedBlock)) +continue; + MatchedFlowBlocks.try_emplace(MatchedBlock, Method); + MatchedBlocks[&YamlBP][YamlBB.Index] = {MatchedBlock, &YamlBB}; wlei-llvm wrote: Those lines are the same for the three matchings, probably can be factored out into a helper function. https://github.com/llvm/llvm-project/pull/99891 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [flang] [lld] [llvm] [Flang] LLVM_ENABLE_RUNTIMES=flang-rt (PR #110217)
@@ -171,145 +76,88 @@ set(sources unit-map.cpp unit.cpp utf.cpp - ${FORTRAN_MODULE_OBJECTS} ) -include(AddFlangOffloadRuntime) - -# List of files that are buildable for all devices. -set(supported_files - ISO_Fortran_binding.cpp - allocatable.cpp - allocator-registry.cpp - array-constructor.cpp - assign.cpp - buffer.cpp - character.cpp - connection.cpp - copy.cpp - derived-api.cpp - derived.cpp - descriptor.cpp - descriptor-io.cpp - dot-product.cpp - edit-input.cpp - edit-output.cpp - environment.cpp - extrema.cpp - external-unit.cpp - file.cpp - findloc.cpp - format.cpp - inquiry.cpp - internal-unit.cpp - io-api.cpp - io-api-minimal.cpp - io-error.cpp - io-stmt.cpp - iostat.cpp - matmul-transpose.cpp - matmul.cpp - memory.cpp - misc-intrinsic.cpp - namelist.cpp - non-tbp-dio.cpp - numeric.cpp - pointer.cpp - product.cpp - pseudo-unit.cpp - ragged.cpp - stat.cpp - sum.cpp - support.cpp - terminator.cpp - tools.cpp - transformational.cpp - type-code.cpp - type-info.cpp - unit.cpp - utf.cpp +set(public_headers "") +file(GLOB_RECURSE public_headers + "${FLANGRUNTIME_SOURCE_DIR}/include/flang/Runtime/*.h" + "${FLANGRUNTIME_SOURCE_DIR}/include/flang/Common/*.h" ) -enable_cuda_compilation(FortranRuntime "${supported_files}") -enable_omp_offload_compilation("${supported_files}") clementval wrote: We have plan to use it in CUDA Fortran implementation. https://github.com/llvm/llvm-project/pull/110217 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [AMDGPU] Simplify dpp builtin handling (PR #115090)
rampitec wrote: > Should also teach instcombine to fold bitcast + app It still needs downstack change to handle i8: https://github.com/llvm/llvm-project/pull/114887 https://github.com/llvm/llvm-project/pull/115090 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Introduce a "new" target feature `xf32-insts` (PR #115214)
https://github.com/shiltian created https://github.com/llvm/llvm-project/pull/115214 The feature itself is not new. Just to use it to guard corresponding instructions. >From 4077a199a263252ef45895e2c9b4e6375988fa88 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Wed, 6 Nov 2024 16:15:50 -0500 Subject: [PATCH] [AMDGPU] Introduce a "new" target feature `xf32-insts` The feature itself is not new. Just to use it to guard corresponding instructions. --- llvm/lib/Target/AMDGPU/AMDGPU.td| 11 +++ llvm/lib/Target/AMDGPU/GCNSubtarget.h | 4 llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 8 ++-- 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 910f5e06a6f3c4..d068402e95716e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1110,6 +1110,13 @@ def FeatureRequiresCOV6 : SubtargetFeature<"requires-cov6", "Target Requires Code Object V6" >; +def FeatureXF32Insts : SubtargetFeature<"xf32-insts", + "HasXF32Insts", + "true", + "Has instructions that support xf32 format, such as " + "v_mfma_f32_16x16x8xf32 and v_mfma_f32_32x32x4xf32" + >; + // Dummy feature used to disable assembler instructions. def FeatureDisable : SubtargetFeature<"", "FeatureDisable","true", @@ -1448,6 +1455,7 @@ def FeatureISAVersion9_4_Common : FeatureSet< FeatureFP8ConversionInsts, FeatureCvtFP8VOP1Bug, FeaturePkFmacF16Inst, + FeatureXF32Insts, FeatureAtomicFaddRtnInsts, FeatureAtomicFaddNoRtnInsts, FeatureAtomicBufferGlobalPkAddF16Insts, @@ -2289,6 +2297,9 @@ def HasAtomicCSubNoRtnInsts : Predicate<"Subtarget->hasAtomicCSubNoRtnInsts()">; def HasScalarDwordx3Loads : Predicate<"Subtarget->hasScalarDwordx3Loads()">; +def HasXF32Insts : Predicate<"Subtarget->hasXF32Insts()">, + AssemblerPredicate<(all_of FeatureXF32Insts)>; + // Include AMDGPU TD files include "SISchedule.td" include "GCNProcessors.td" diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 1ea3beb2855d69..6ff964077d8fd0 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -179,6 +179,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool HasDefaultComponentZero = false; bool HasAgentScopeFineGrainedRemoteMemoryAtomics = false; bool HasDefaultComponentBroadcast = false; + bool HasXF32Insts = false; /// The maximum number of instructions that may be placed within an S_CLAUSE, /// which is one greater than the maximum argument to S_CLAUSE. A value of 0 /// indicates a lack of S_CLAUSE support. @@ -1302,6 +1303,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, return getGeneration() == GFX12; } + /// \returns true if the target has instructions with xf32 format support. + bool hasXF32Insts() const { return HasXF32Insts; } + /// \returns The maximum number of instructions that can be enclosed in an /// S_CLAUSE on the given subtarget, or 0 for targets that do not support that /// instruction. diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index cdaf489792a24d..e246d433401f94 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -757,10 +757,12 @@ let Predicates = [isGFX90APlus] in { let SubtargetPredicate = isGFX940Plus, is_gfx940_xdl = 1 in { defm V_MFMA_I32_32X32X16I8 : MAIInst<"v_mfma_i32_32x32x16i8", "I32_I64_X32",int_amdgcn_mfma_i32_32x32x16_i8>; defm V_MFMA_I32_16X16X32I8 : MAIInst<"v_mfma_i32_16x16x32i8", "I32_I64_X16",int_amdgcn_mfma_i32_16x16x32_i8>; +} // End SubtargetPredicate = isGFX940Plus, is_gfx940_xdl = 1 + +let SubtargetPredicate = HasXF32Insts, is_gfx940_xdl = 1 in { defm V_MFMA_F32_16X16X8XF32 : MAIInst<"v_mfma_f32_16x16x8xf32", "F32_V2F32_X16", int_amdgcn_mfma_f32_16x16x8_xf32>; defm V_MFMA_F32_32X32X4XF32 : MAIInst<"v_mfma_f32_32x32x4xf32", "F32_V2F32_X32", int_amdgcn_mfma_f32_32x32x4_xf32>; - -} // End SubtargetPredicate = isGFX940Plus, is_gfx940_xdl = 1 +} // End SubtargetPredicate = HasXF32Insts, is_gfx940_xdl = 1 let SubtargetPredicate = HasFP8Insts, is_gfx940_xdl = 1 in { defm V_MFMA_F32_16X16X32_BF8_BF8 : MAIInst<"v_mfma_f32_16x16x32_bf8_bf8", "F32_I64_X32",int_amdgcn_mfma_f32_16x16x32_bf8_bf8>; @@ -1764,8 +1766,10 @@ defm V_MFMA_F64_4X4X4F64: VOP3P_Real_MFMA_gfx90a <0x6f>; defm V_MFMA_I32_32X32X16I8 : VOP3P_Real_MFMA_gfx940 <0x56, "v_mfma_i32_32x32x16_i8">; defm V_MFMA_I32_16X16X32I8 : VOP3P_Real_MFMA_gfx940 <0x57, "v_mfma_i32_16x16x32_i8">; +let SubtargetPredicate = HasXF32Insts in { defm V_MFMA_F32_16X16X8XF32 : VOP3P_Real_MFMA_gfx940 <0x3e, "v_mfma_f32_16x16x8_xf32">; defm V_MFMA_F32_32X32X4XF32 : VOP3P_Real_MFMA_gfx940 <0x3f, "v_mfma_f32_32x32x4_xf32">
[llvm-branch-commits] [llvm] [AMDGPU] Introduce a "new" target feature `xf32-insts` (PR #115214)
shiltian wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/115214?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#115214** https://app.graphite.dev/github/pr/llvm/llvm-project/115214?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 * **#115211** https://app.graphite.dev/github/pr/llvm/llvm-project/115211?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about stacking. Join @shiltian and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/115214 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Introduce a "new" target feature `xf32-insts` (PR #115214)
https://github.com/shiltian edited https://github.com/llvm/llvm-project/pull/115214 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Introduce a "new" target feature `xf32-insts` (PR #115214)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Shilei Tian (shiltian) Changes The feature itself is not new. Just to use it to guard corresponding instructions. --- Full diff: https://github.com/llvm/llvm-project/pull/115214.diff 3 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPU.td (+11) - (modified) llvm/lib/Target/AMDGPU/GCNSubtarget.h (+4) - (modified) llvm/lib/Target/AMDGPU/VOP3PInstructions.td (+6-2) ``diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 910f5e06a6f3c4..d068402e95716e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1110,6 +1110,13 @@ def FeatureRequiresCOV6 : SubtargetFeature<"requires-cov6", "Target Requires Code Object V6" >; +def FeatureXF32Insts : SubtargetFeature<"xf32-insts", + "HasXF32Insts", + "true", + "Has instructions that support xf32 format, such as " + "v_mfma_f32_16x16x8xf32 and v_mfma_f32_32x32x4xf32" + >; + // Dummy feature used to disable assembler instructions. def FeatureDisable : SubtargetFeature<"", "FeatureDisable","true", @@ -1448,6 +1455,7 @@ def FeatureISAVersion9_4_Common : FeatureSet< FeatureFP8ConversionInsts, FeatureCvtFP8VOP1Bug, FeaturePkFmacF16Inst, + FeatureXF32Insts, FeatureAtomicFaddRtnInsts, FeatureAtomicFaddNoRtnInsts, FeatureAtomicBufferGlobalPkAddF16Insts, @@ -2289,6 +2297,9 @@ def HasAtomicCSubNoRtnInsts : Predicate<"Subtarget->hasAtomicCSubNoRtnInsts()">; def HasScalarDwordx3Loads : Predicate<"Subtarget->hasScalarDwordx3Loads()">; +def HasXF32Insts : Predicate<"Subtarget->hasXF32Insts()">, + AssemblerPredicate<(all_of FeatureXF32Insts)>; + // Include AMDGPU TD files include "SISchedule.td" include "GCNProcessors.td" diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 1ea3beb2855d69..6ff964077d8fd0 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -179,6 +179,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool HasDefaultComponentZero = false; bool HasAgentScopeFineGrainedRemoteMemoryAtomics = false; bool HasDefaultComponentBroadcast = false; + bool HasXF32Insts = false; /// The maximum number of instructions that may be placed within an S_CLAUSE, /// which is one greater than the maximum argument to S_CLAUSE. A value of 0 /// indicates a lack of S_CLAUSE support. @@ -1302,6 +1303,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, return getGeneration() == GFX12; } + /// \returns true if the target has instructions with xf32 format support. + bool hasXF32Insts() const { return HasXF32Insts; } + /// \returns The maximum number of instructions that can be enclosed in an /// S_CLAUSE on the given subtarget, or 0 for targets that do not support that /// instruction. diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index cdaf489792a24d..e246d433401f94 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -757,10 +757,12 @@ let Predicates = [isGFX90APlus] in { let SubtargetPredicate = isGFX940Plus, is_gfx940_xdl = 1 in { defm V_MFMA_I32_32X32X16I8 : MAIInst<"v_mfma_i32_32x32x16i8", "I32_I64_X32",int_amdgcn_mfma_i32_32x32x16_i8>; defm V_MFMA_I32_16X16X32I8 : MAIInst<"v_mfma_i32_16x16x32i8", "I32_I64_X16",int_amdgcn_mfma_i32_16x16x32_i8>; +} // End SubtargetPredicate = isGFX940Plus, is_gfx940_xdl = 1 + +let SubtargetPredicate = HasXF32Insts, is_gfx940_xdl = 1 in { defm V_MFMA_F32_16X16X8XF32 : MAIInst<"v_mfma_f32_16x16x8xf32", "F32_V2F32_X16", int_amdgcn_mfma_f32_16x16x8_xf32>; defm V_MFMA_F32_32X32X4XF32 : MAIInst<"v_mfma_f32_32x32x4xf32", "F32_V2F32_X32", int_amdgcn_mfma_f32_32x32x4_xf32>; - -} // End SubtargetPredicate = isGFX940Plus, is_gfx940_xdl = 1 +} // End SubtargetPredicate = HasXF32Insts, is_gfx940_xdl = 1 let SubtargetPredicate = HasFP8Insts, is_gfx940_xdl = 1 in { defm V_MFMA_F32_16X16X32_BF8_BF8 : MAIInst<"v_mfma_f32_16x16x32_bf8_bf8", "F32_I64_X32",int_amdgcn_mfma_f32_16x16x32_bf8_bf8>; @@ -1764,8 +1766,10 @@ defm V_MFMA_F64_4X4X4F64: VOP3P_Real_MFMA_gfx90a <0x6f>; defm V_MFMA_I32_32X32X16I8 : VOP3P_Real_MFMA_gfx940 <0x56, "v_mfma_i32_32x32x16_i8">; defm V_MFMA_I32_16X16X32I8 : VOP3P_Real_MFMA_gfx940 <0x57, "v_mfma_i32_16x16x32_i8">; +let SubtargetPredicate = HasXF32Insts in { defm V_MFMA_F32_16X16X8XF32 : VOP3P_Real_MFMA_gfx940 <0x3e, "v_mfma_f32_16x16x8_xf32">; defm V_MFMA_F32_32X32X4XF32 : VOP3P_Real_MFMA_gfx940 <0x3f, "v_mfma_f32_32x32x4_xf32">; +} // End SubtargetPredicate = HasXF32Insts let SubtargetPredicate = HasFP8Insts in { defm V_MFMA_F32_16X16X32_BF8_BF8 : VOP3P_Real_MFMA_gfx940 <0x70>; defm V_MFMA_F32_16X16X32_BF8_FP8 : VOP3P_Real_MFMA_gfx940 <0x71>;
[llvm-branch-commits] [clang] [llvm] [AMDGPU] Introduce a new generic target `gfx9-4-generic` (PR #115190)
https://github.com/shiltian edited https://github.com/llvm/llvm-project/pull/115190 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [AMDGPU] Introduce a new generic target `gfx9-4-generic` (PR #115190)
https://github.com/shiltian deleted https://github.com/llvm/llvm-project/pull/115190 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [AMDGPU] Introduce a new generic target `gfx9-4-generic` (PR #115190)
@@ -775,6 +775,11 @@ let SubtargetPredicate = HasFP8Insts, is_gfx940_xdl = 1 in { defm V_MFMA_F32_32X32X16_FP8_FP8 : MAIInst<"v_mfma_f32_32x32x16_fp8_fp8", "F32_I64_X16",int_amdgcn_mfma_f32_32x32x16_fp8_fp8>; } // End SubtargetPredicate = HasFP8Insts, is_gfx940_xdl = 1 +let SubtargetPredicate = HasXF32Insts, is_gfx940_xdl = 1 in { shiltian wrote: merge error https://github.com/llvm/llvm-project/pull/115190 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [AMDGPU] Introduce a new generic target `gfx9-4-generic` (PR #115190)
@@ -466,6 +470,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, case GK_GFX942: case GK_GFX941: case GK_GFX940: +case GK_GFX9_4_GENERIC: shiltian wrote: This needs to be updated as well https://github.com/llvm/llvm-project/pull/115190 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lldb] [lldb][LoongArch] Function calls support in lldb expressions (PR #114742)
https://github.com/SixWeining deleted https://github.com/llvm/llvm-project/pull/114742 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [flang] [lld] [llvm] [Flang] LLVM_ENABLE_RUNTIMES=flang-rt (PR #110217)
https://github.com/clementval edited https://github.com/llvm/llvm-project/pull/110217 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [flang] [lld] [llvm] [Flang] LLVM_ENABLE_RUNTIMES=flang-rt (PR #110217)
@@ -171,145 +76,88 @@ set(sources unit-map.cpp unit.cpp utf.cpp - ${FORTRAN_MODULE_OBJECTS} ) -include(AddFlangOffloadRuntime) - -# List of files that are buildable for all devices. -set(supported_files - ISO_Fortran_binding.cpp - allocatable.cpp - allocator-registry.cpp - array-constructor.cpp - assign.cpp - buffer.cpp - character.cpp - connection.cpp - copy.cpp - derived-api.cpp - derived.cpp - descriptor.cpp - descriptor-io.cpp - dot-product.cpp - edit-input.cpp - edit-output.cpp - environment.cpp - extrema.cpp - external-unit.cpp - file.cpp - findloc.cpp - format.cpp - inquiry.cpp - internal-unit.cpp - io-api.cpp - io-api-minimal.cpp - io-error.cpp - io-stmt.cpp - iostat.cpp - matmul-transpose.cpp - matmul.cpp - memory.cpp - misc-intrinsic.cpp - namelist.cpp - non-tbp-dio.cpp - numeric.cpp - pointer.cpp - product.cpp - pseudo-unit.cpp - ragged.cpp - stat.cpp - sum.cpp - support.cpp - terminator.cpp - tools.cpp - transformational.cpp - type-code.cpp - type-info.cpp - unit.cpp - utf.cpp +set(public_headers "") +file(GLOB_RECURSE public_headers + "${FLANGRUNTIME_SOURCE_DIR}/include/flang/Runtime/*.h" + "${FLANGRUNTIME_SOURCE_DIR}/include/flang/Common/*.h" ) -enable_cuda_compilation(FortranRuntime "${supported_files}") -enable_omp_offload_compilation("${supported_files}") clementval wrote: Yes it 's gonna be supported upstream. https://github.com/llvm/llvm-project/pull/110217 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Introduce a "new" target feature `xf32-insts` (PR #115214)
https://github.com/arsenm approved this pull request. https://github.com/llvm/llvm-project/pull/115214 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/19.x: [LoongArch][Clang] Make the parameters and return value of {x, }vxor.v builti ns `unsigned char` vectors (#114513) (PR #114958)
https://github.com/xen0n approved this pull request. Test failures shouldn't be relevant. https://github.com/llvm/llvm-project/pull/114958 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits