@@ -2659,8 +2659,9 @@ An error will be given if:
- Specified values violate subtarget specifications;
- Specified values are not compatible with values provided through other
attributes;
- - The AMDGPU target backend is unable to create machine code that can meet
the
@@ -0,0 +1,31 @@
+//=== AMDGPUAddrSpace.h -*- C++
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apa
@@ -959,6 +967,32 @@ def : GCNPat <
}
} // let OtherPredicates = [HasShaderCyclesRegister]
+def SIMM24bitPtr : ImmLeaf (Imm);}]
+>;
+
+multiclass SMPrefetchPat {
+ def : GCNPat <
+(smrd_prefetch (SMRDImm i64:$sbase, i32:$offset), timm, timm, (i32
cache_type)),
+(!cas
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/72280
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm commented:
Is there any actual difference now between these and the HIP/OpenCL flavors
other than dropping the language from the name?
https://github.com/llvm/llvm-project/pull/72280
___
cfe-commits mailing list
cfe-commits@
@@ -798,6 +798,13 @@ static void InitializePredefinedMacros(const TargetInfo
&TI,
Builder.defineMacro("__ATOMIC_ACQ_REL", "4");
Builder.defineMacro("__ATOMIC_SEQ_CST", "5");
+ // Define macros for the clang atomic scopes.
+ Builder.defineMacro("__MEMORY_SCOPE_SYSTEM", "
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/72280
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -205,6 +220,56 @@ class AtomicScopeHIPModel : public AtomicScopeModel {
}
};
+/// Defines the generic atomic scope model.
+class AtomicScopeGenericModel : public AtomicScopeModel {
+public:
+ /// The enum values match predefined built-in macros __ATOMIC_SCOPE_*.
+ enum
@@ -54,6 +59,16 @@ enum class SyncScope {
inline llvm::StringRef getAsString(SyncScope S) {
arsenm wrote:
I guess this is a pre-existing problem, but why don't these just match the
backend string names?
https://github.com/llvm/llvm-project/pull/72280
___
@@ -904,6 +904,32 @@ BUILTIN(__atomic_signal_fence, "vi", "n")
BUILTIN(__atomic_always_lock_free, "bzvCD*", "nE")
BUILTIN(__atomic_is_lock_free, "bzvCD*", "nE")
+// GNU atomic builtins with atomic scopes.
+ATOMIC_BUILTIN(__scoped_atomic_load, "v.", "t")
arsenm
arsenm wrote:
> So, while it's possible to create a combined option, using a separate option
> also makes sense. Do we generally try to avoid creating new command-line
> options?
Looking again, I see they are different and unrelated. I don't really
understand why we have amdgpu-waitcnt-forcez
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/71874
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -156,19 +157,51 @@ class OffloadBinary : public Binary {
/// owns its memory.
class OffloadFile : public OwningBinary {
public:
+ /// An ordered pair of the target triple and the architecture.
using TargetID = std::pair;
OffloadFile(std::unique_ptr Binary,
@@ -406,5 +410,9 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb",
"nc", "fp8-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-insts")
+// OpenCL
+LANGBUILTIN(p
@@ -170,20 +173,46 @@ static Value *appendString(IRBuilder<> &Builder, Value
*Desc, Value *Arg,
return callAppendStringN(Builder, Desc, Arg, Length, IsLast);
}
+static Value *appendVectorArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,
arsenm wrote:
All
@@ -278,7 +310,13 @@ static Value *callBufferedPrintfStart(
StringData(StringRef(), LenWithNull, LenWithNullAligned, false));
}
} else {
- int AllocSize = M->getDataLayout().getTypeAllocSize(Args[i]->getType());
+ int AllocSize = 0;
+ if (OC
@@ -278,7 +310,13 @@ static Value *callBufferedPrintfStart(
StringData(StringRef(), LenWithNull, LenWithNullAligned, false));
}
} else {
- int AllocSize = M->getDataLayout().getTypeAllocSize(Args[i]->getType());
+ int AllocSize = 0;
+ if (OC
@@ -170,20 +173,46 @@ static Value *appendString(IRBuilder<> &Builder, Value
*Desc, Value *Arg,
return callAppendStringN(Builder, Desc, Arg, Length, IsLast);
}
+static Value *appendVectorArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,
+ bool
@@ -170,20 +173,46 @@ static Value *appendString(IRBuilder<> &Builder, Value
*Desc, Value *Arg,
return callAppendStringN(Builder, Desc, Arg, Length, IsLast);
}
+static Value *appendVectorArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,
+ bool
@@ -450,9 +450,9 @@ bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB,
IRBuilder<> &Builder) {
if (!CompareIfRegionBlock(IfTrue1, IfTrue2, SecondEntryBlock))
return false;
} else if (IfTrue1 == FirstEntryBlock) {
-// The then-path is empty, so we must use "and"
@@ -25,13 +25,13 @@ define void @test_not_crash(i32 %in_a) #0 {
entry:
%cmp0 = icmp eq i32 %in_a, -1
%cmp1 = icmp ne i32 %in_a, 0
- %cond0 = and i1 %cmp0, %cmp1
+ %cond0 = or i1 %cmp0, %cmp1
br i1 %cond0, label %b0, label %b1
b0:; pre
@@ -450,9 +450,9 @@ bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB,
IRBuilder<> &Builder) {
if (!CompareIfRegionBlock(IfTrue1, IfTrue2, SecondEntryBlock))
return false;
} else if (IfTrue1 == FirstEntryBlock) {
-// The then-path is empty, so we must use "and"
@@ -50,13 +49,10 @@ define i64 @test_quadmask_constant_i64() {
; GFX11-LABEL: test_quadmask_constant_i64:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:s_mov_b32 s0, 0x85fe3a92
-; GFX11-NEXT:s_mov_b32 s1, 0x67de4
arsenm wrote:
> This is not being handled for AMDGPU Targets.
I'm assuming this is an artifact of passing all arguments both the host target
and the offload target? @jhuber6 what's the correct way of filtering out
irrelevant codegen options?
https://github.com/llvm/llvm-project/pull/70262
__
https://github.com/arsenm commented:
Needs test
https://github.com/llvm/llvm-project/pull/70760
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/67829
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm closed https://github.com/llvm/llvm-project/pull/67829
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
UTC_ARGS: --version 3
+; RUN: opt -assume-default-is-flat-addrspace -S -passes=infer-address-spaces <
%s 2>&1 | FileCheck %s
+
+@g = addrspace(1) global i32 0, align 4
+
+define ptr @f2()
@@ -334,6 +335,15 @@ template<> struct simplify_type {
}
};
+template <> struct GraphTraits {
arsenm wrote:
What's the issue with putting this here? Seems nicer than inlining yet another
DFS in another place
https://github.com/llvm/llvm-project/pull/7061
arsenm wrote:
For point 1, I would prefer to decouple the printf implementation choice from
the language. I would expect consistent defaults regardless of the language.
This also contradicts point 2? I thought the clang emitted path used hostcall,
and the backend path did not.
As for expandin
arsenm wrote:
I still think all the defaulted address space parameters should be purged and
it only invites bugs. It's a regression to introduce a new defaulted argument.
https://github.com/llvm/llvm-project/pull/71029
___
cfe-commits mailing list
cfe
@@ -8201,6 +8201,66 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode
*Node,
return SDValue();
}
+SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
+SelectionDAG &DAG) const {
+ SDLoc DL(N);
+ SDValue LHS = N-
@@ -8201,6 +8201,66 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode
*Node,
return SDValue();
}
+SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
+SelectionDAG &DAG) const {
+ SDLoc DL(N);
+ SDValue LHS = N-
@@ -8201,6 +8201,66 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode
*Node,
return SDValue();
}
+SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
+SelectionDAG &DAG) const {
+ SDLoc DL(N);
+ SDValue LHS = N-
@@ -13,6 +13,11 @@
// RUN: not %clang -### -c --target=aarch64 -mcmodel=medium %s 2>&1 |
FileCheck --check-prefix=ERR-MEDIUM %s
// RUN: not %clang -### -c --target=aarch64 -mcmodel=kernel %s 2>&1 |
FileCheck --check-prefix=ERR-KERNEL %s
// RUN: not %clang --target=aarch64_32-
@@ -13,6 +13,11 @@
// RUN: not %clang -### -c --target=aarch64 -mcmodel=medium %s 2>&1 |
FileCheck --check-prefix=ERR-MEDIUM %s
// RUN: not %clang -### -c --target=aarch64 -mcmodel=kernel %s 2>&1 |
FileCheck --check-prefix=ERR-KERNEL %s
// RUN: not %clang --target=aarch64_32-
@@ -17632,8 +17632,20 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned
BuiltinID,
case AMDGPU::BI__builtin_amdgcn_mov_dpp:
case AMDGPU::BI__builtin_amdgcn_update_dpp: {
llvm::SmallVector Args;
-for (unsigned I = 0; I != E->getNumArgs(); ++I)
- Args.pu
@@ -17632,8 +17632,20 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned
BuiltinID,
case AMDGPU::BI__builtin_amdgcn_mov_dpp:
case AMDGPU::BI__builtin_amdgcn_update_dpp: {
llvm::SmallVector Args;
-for (unsigned I = 0; I != E->getNumArgs(); ++I)
- Args.pu
arsenm wrote:
Added the string to the message. It's covered by the existing test
https://github.com/llvm/llvm-project/pull/68267
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/m
arsenm wrote:
> Can you please fix the description of the patch.
Fix what about it?
https://github.com/llvm/llvm-project/pull/68267
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/68267
>From db9b84992dbd6d75dc5c23b11d63c195400d5bc1 Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Thu, 31 Aug 2023 17:33:35 -0400
Subject: [PATCH] clang: Add pragma clang fp reciprocal
Just follow allow with the
arsenm wrote:
ping
https://github.com/llvm/llvm-project/pull/74056
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm commented:
What kind of optimizations does this enable? Would this be better expressed as
a maximum dispatch size, per dimension?
https://github.com/llvm/llvm-project/pull/75647
___
cfe-commits mailing list
cfe-commits@lists.
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/75247
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/74588
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -765,6 +766,138 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder,
MachineRegisterInfo &MRI,
return LegalizerHelper::Legalized;
}
+static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI) {
+ unsigned Opc = MI.getOpcode();
+ auto &AtomicMI = cast(MI);
+ au
https://github.com/arsenm commented:
Are pointer type xchg / cmpxchg already tested?
https://github.com/llvm/llvm-project/pull/74588
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm commented:
Description should be adjusted, this isn't really changing the selection
anymore
https://github.com/llvm/llvm-project/pull/71556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-b
@@ -961,6 +961,19 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC,
IntrinsicInst &II) const {
return IC.replaceInstUsesWith(II,
Constant::getNullValue(II.getType()));
}
}
+if (ST->isWave32() && II.getType()->getIntegerBitWidth() == 64) {
+ // %
arsenm wrote:
> Looks like the new test might be failing on macOS/arm64:
> http://45.33.8.238/macm1/75171/step_11.txt
> Do you want to switch the test to use `-mtriple`, or do you want to revert
> and investigate what's up with the assert?
Really it needs #75469 for the triple to not default
arsenm wrote:
> // At -O0, fast-regalloc cannot cope with the live vregs necessary to
This sounds extremely unsound
https://github.com/llvm/llvm-project/pull/74588
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bi
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/75625
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm commented:
Is #75799 related?
https://github.com/llvm/llvm-project/pull/73549
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm commented:
Seems to be missing atomicrmw fadd support?
https://github.com/llvm/llvm-project/pull/75917
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/75917
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -0,0 +1,92 @@
+// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu
gfx1200 \
+// RUN: %s -S -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu
gfx1200 \
+// RUN: -S -o - %s | FileCheck -check
@@ -1368,6 +1391,28 @@ def int_amdgcn_struct_ptr_buffer_atomic_cmpswap :
Intrinsic<
// gfx908 intrinsic
def int_amdgcn_struct_buffer_atomic_fadd :
AMDGPUStructBufferAtomic;
def int_amdgcn_struct_ptr_buffer_atomic_fadd :
AMDGPUStructPtrBufferAtomic;
+// gfx12 intrinsic
+def i
@@ -0,0 +1,92 @@
+// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu
gfx1200 \
+// RUN: %s -S -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu
gfx1200 \
arsenm wrote:
Don't se
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/72709
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
arsenm wrote:
> ping
The split up parts are still part of this one PR. Currently you're supposed to
create a separate PR for each separate change. The set behavior is to squash
all of these together on submit
https://github.com/llvm/llvm-project/pull/72556
___
@@ -104,3 +106,17 @@ void fun() {
(void) b;
(void) var_host_only;
}
+
+// NEG-NOT: external_func
+extern __global__ void external_func();
+// NEG-NOT: @external_dep
+extern void* const external_dep[] = {
+ (void*)(external_func)
+};
+// NEG-NOT: @external_arr
-
arsenm wrote:
ping
https://github.com/llvm/llvm-project/pull/68267
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm closed https://github.com/llvm/llvm-project/pull/68267
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -5767,12 +5768,23 @@ void Clang::ConstructJob(Compilation &C, const
JobAction &JA,
} else if (Triple.getArch() == llvm::Triple::x86_64) {
Ok = llvm::is_contained({"small", "kernel", "medium", "large", "tiny"},
CM);
-} else if (Tri
@@ -150,8 +150,8 @@ BUILTIN(__builtin_amdgcn_mqsad_u32_u8, "V4UiWUiUiV4Ui",
"nc")
// Ballot builtins.
//===--===//
-TARGET_BUILTIN(__builtin_amdgcn_ballot_w32, "Uib", "nc", "wavefrontsize32")
-TARGET_BUILTIN
@@ -0,0 +1,15 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx900 -x hip
-emit-llvm -fcuda-is-device -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx900 -x hip -S
-fcuda-is-device -o - %s
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/70565
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -104,3 +106,17 @@ void fun() {
(void) b;
(void) var_host_only;
}
+
+// NEG-NOT: external_func
+extern __global__ void external_func();
+// NEG-NOT: @external_dep
+extern void* const external_dep[] = {
+ (void*)(external_func)
+};
+// NEG-NOT: @external_arr
-
@@ -406,5 +410,9 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb",
"nc", "fp8-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-insts")
+// OpenCL
+LANGBUILTIN(p
https://github.com/arsenm commented:
Is there a separate PR open for "Add vector processing support to AMDGPU
printf"? I think it's easiest to move this part forward first
https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
c
@@ -25,13 +25,13 @@ define void @test_not_crash(i32 %in_a) #0 {
entry:
%cmp0 = icmp eq i32 %in_a, -1
%cmp1 = icmp ne i32 %in_a, 0
- %cond0 = and i1 %cmp0, %cmp1
+ %cond0 = or i1 %cmp0, %cmp1
arsenm wrote:
Original tests still changing? Needs new copies
@@ -406,5 +410,9 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb",
"nc", "fp8-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-insts")
+// OpenCL
+LANGBUILTIN(p
https://github.com/arsenm closed https://github.com/llvm/llvm-project/pull/66651
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
arsenm wrote:
> lgtm, but a strict reading of the spec would filter out arbitrary other
> ext_vector_types
Still think this would be a good follow up
https://github.com/llvm/llvm-project/pull/66651
___
cfe-commits mailing list
cfe-commits@lists.llvm
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/73906
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm created
https://github.com/llvm/llvm-project/pull/74056
This reverts commit ef388334ee5a3584255b9ef5b3fefdb244fa3fd7.
The referenced issue violates the spec for finite-only math only by
using a return value for a constant infinity. If the interpretation
is results and
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/74279
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -765,6 +766,134 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder,
MachineRegisterInfo &MRI,
return LegalizerHelper::Legalized;
}
+static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI) {
+ unsigned Opc = MI.getOpcode();
+ auto &AtomicMI = cast(MI);
+ au
@@ -765,6 +766,134 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder,
MachineRegisterInfo &MRI,
return LegalizerHelper::Legalized;
}
+static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI) {
+ unsigned Opc = MI.getOpcode();
+ auto &AtomicMI = cast(MI);
+ au
arsenm wrote:
> > > // At -O0, fast-regalloc cannot cope with the live vregs necessary to
> >
> >
> > This sounds extremely unsound
>
> Just to clarify, is the expectation for me to solve this in this patch or can
> this be done in a separate patch?
This should be separate, it's an unrelated
arsenm wrote:
I still think fabs+fcmp is a better canonical form. Between some targets
offering free fabs, and other combines between fabs and sources/sinks, I think
it's better to split out this way. Any codegen regression should be addressed
in the backend.
https://github.com/llvm/llvm-proj
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/76149
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -703,8 +713,37 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
setRegScore(RegNo, T, CurrScore);
}
}
-if (Inst.mayStore() && (TII->isDS(Inst) || mayWriteLDSThroughDMA(Inst))) {
- setRegScore(SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS, T, Curr
@@ -703,8 +713,37 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
setRegScore(RegNo, T, CurrScore);
}
}
-if (Inst.mayStore() && (TII->isDS(Inst) || mayWriteLDSThroughDMA(Inst))) {
- setRegScore(SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS, T, Curr
@@ -703,8 +713,37 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
setRegScore(RegNo, T, CurrScore);
}
}
-if (Inst.mayStore() && (TII->isDS(Inst) || mayWriteLDSThroughDMA(Inst))) {
- setRegScore(SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS, T, Curr
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/74588
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -30113,32 +30120,40 @@ TargetLoweringBase::AtomicExpansionKind
X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
Type *MemType = SI->getValueOperand()->getType();
- bool NoImplicitFloatOps =
- SI->getFunction()->hasFnAttribute(Attribute::NoImplic
Thorsten =?utf-8?q?Schütt?= ,
Thorsten =?utf-8?q?Schütt?= ,
Thorsten =?utf-8?q?Schütt?=
Message-ID:
In-Reply-To:
@@ -6548,6 +6534,54 @@ bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect
*Select,
return false;
}
+bool CombinerHelper::tryFoldSelectOfBinOps(GSelect *Sel
Thorsten =?utf-8?q?Sch=C3=BCtt?= ,
Thorsten =?utf-8?q?Sch=C3=BCtt?= ,
Thorsten =?utf-8?q?Sch=C3=BCtt?=
Message-ID:
In-Reply-To:
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/76763
___
cfe-commits mai
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/74332
>From 405b8705782e50fd72bfd868f51df3111fe9f095 Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Sat, 2 Dec 2023 18:07:50 +0900
Subject: [PATCH 1/2] DAG: Implement promotion for strict_fpextend
Test is a placeh
https://github.com/arsenm closed https://github.com/llvm/llvm-project/pull/76217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -2621,6 +2642,29 @@ SDValue
DAGTypeLegalizer::PromoteFloatRes_FP_ROUND(SDNode *N) {
return DAG.getNode(GetPromotionOpcode(VT, NVT), DL, NVT, Round);
}
+// Explicit operation to reduce precision. Reduce the value to half precision
+// and promote it back to the legal typ
https://github.com/arsenm closed https://github.com/llvm/llvm-project/pull/74332
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -106,6 +107,25 @@ uint32_t AMDGPU::calcEFlagsV4() const {
return retMach | retXnack | retSramEcc;
}
+uint32_t AMDGPU::calcEFlagsV6() const {
+ uint32_t flags = calcEFlagsV4();
+
+ uint32_t genericVersion =
+ getEFlags(ctx.objectFiles[0]) & EF_AMDGPU_GENERIC_VERSION
@@ -1557,140 +1559,98 @@ const EnumEntry ElfHeaderMipsFlags[] = {
ENUM_ENT(EF_MIPS_ARCH_64R6, "mips64r6")
};
+#define AMDGPU_MACH_ENUM_ENTS
\
+ ENUM_ENT(EF_AMDGPU_MACH_NONE, "none"),
\
@@ -1097,7 +1097,7 @@ def : Pat <
multiclass f16_fp_Pats {
// f16_to_fp patterns
def : GCNPat <
-(f32 (f16_to_fp i32:$src0)),
+(f32 (any_f16_to_fp i32:$src0)),
arsenm wrote:
Yes, with appropriate tests added alongside it
https://github.com/llvm/l
https://github.com/arsenm approved this pull request.
I don't know anything about x86 but this seems plausible
https://github.com/llvm/llvm-project/pull/74275
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailm
@@ -448,8 +456,12 @@ GlobalVariable *createPGOFuncNameVar(Module &M,
new GlobalVariable(M, Value->getType(), true, Linkage, Value,
getPGOFuncNameVarName(PGOFuncName, Linkage));
+ // If the target is a GPU, make the symbol protected so it can
+
@@ -959,8 +959,12 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy
&Builder, const Stmt *S,
unsigned Counter = (*RegionCounterMap)[S];
- llvm::Value *Args[] = {FuncNameVar,
- Builder.getInt64(FunctionHash),
+ // Make sure that pointer to globa
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/76587
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
101 - 200 of 2453 matches
Mail list logo