@@ -167,6 +167,10 @@ def FeatureCuMode : SubtargetFeature<"cumode",
"Enable CU wavefront execution mode"
>;
+def FeaturePreciseMemory
arsenm wrote:
The subtarget feature prefix should be removed. The subtarget feature name is
not the user facing component
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/81083
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/81083
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -5908,7 +5908,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl
GD, unsigned BuiltinID,
}
}
-assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
+assert(ArgValue->getType()->canLosslesslyBitCastTo(PTy) &&
--
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/76955
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/81331
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm commented:
Missing the clang builtin test
https://github.com/llvm/llvm-project/pull/81331
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -3759,7 +3759,6 @@ def CALL_PROTOTYPE :
include "NVPTXIntrinsics.td"
-
arsenm wrote:
Random whitespace change
https://github.com/llvm/llvm-project/pull/81331
___
cfe-commits mailing list
cfe-commits@lists.llvm
@@ -312,6 +312,12 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
CI->replaceAllUsesWith(ConstantInt::get(Type::getInt64Ty(Context), 0));
break;
}
+ case Intrinsic::readfixedtimer: {
+errs() << "WARNING: this target does not support the llvm.readfix
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/81331
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/79035
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm commented:
Needs documentation in AMDGPUUsage. Should also clarify behavior of 0
https://github.com/llvm/llvm-project/pull/79035
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/l
@@ -356,6 +356,19 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
if (NumVGPR != 0)
F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR));
}
+
+ if (const auto *Attr = FD->getAttr()) {
+uint32_t X = Attr->getNumWorkGroupsX();
+uint32_t Y = Attr
@@ -1108,3 +1108,8 @@ void
GCNUserSGPRUsageInfo::allocKernargPreloadSGPRs(unsigned NumSGPRs) {
unsigned GCNUserSGPRUsageInfo::getNumFreeUserSGPRs() {
return AMDGPU::getMaxNumUserSGPRs(ST) - NumUsedUserSGPRs;
}
+
+SmallVector
arsenm wrote:
std::array<3>?
ht
@@ -0,0 +1,84 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s
| FileCheck %s
arsenm wrote:
Don't need -verify-machineinstrs
https://github.com/llvm/llvm-project/pull/79035
___
cfe-co
@@ -356,6 +356,19 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
if (NumVGPR != 0)
F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR));
}
+
+ if (const auto *Attr = FD->getAttr()) {
+uint32_t X = Attr->getNumWorkGroupsX();
+uint32_t Y = Attr
@@ -2031,6 +2031,13 @@ def AMDGPUNumVGPR : InheritableAttr {
let Subjects = SubjectList<[Function], ErrorDiag, "kernel functions">;
}
+def AMDGPUNumWorkGroups : InheritableAttr {
+ let Spellings = [Clang<"amdgpu_num_work_groups", 0>];
arsenm wrote:
max_num
arsenm wrote:
Also should have follow up patch to propagate in AMDGPUAttributor, and another
to lower to !range in AMDGPULowerKernelAttributes
https://github.com/llvm/llvm-project/pull/79035
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http
https://github.com/arsenm commented:
Add to release notes?
https://github.com/llvm/llvm-project/pull/81331
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm closed https://github.com/llvm/llvm-project/pull/81108
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -356,6 +356,19 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
if (NumVGPR != 0)
F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR));
}
+
+ if (const auto *Attr = FD->getAttr()) {
+uint32_t X = Attr->getNumWorkGroupsX();
+uint32_t Y = Attr
@@ -1453,9 +1457,11 @@ void AtomicInfo::EmitAtomicLoadLibcall(llvm::Value
*AddForLoaded,
}
llvm::Value *AtomicInfo::EmitAtomicLoadOp(llvm::AtomicOrdering AO,
- bool IsVolatile) {
+ bool IsVolati
@@ -1453,9 +1457,11 @@ void AtomicInfo::EmitAtomicLoadLibcall(llvm::Value
*AddForLoaded,
}
llvm::Value *AtomicInfo::EmitAtomicLoadOp(llvm::AtomicOrdering AO,
- bool IsVolatile) {
+ bool IsVolati
@@ -0,0 +1,84 @@
+// RUN: %clang_cc1 -triple s390x-linux-gnu -O1 -emit-llvm %s -o - | FileCheck
%s
+//
+// Test that floating point atomic stores and loads do not get casted to/from
+// integer.
+
+#include
+
+_Atomic float Af;
+_Atomic double Ad;
+_Atomic long double Ald;
+
+//
@@ -18057,6 +18057,14 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned
BuiltinID,
/*ReturnType*/ Op0->getType(), Intrinsic::dx_frac,
ArrayRef{Op0}, nullptr, "dx.frac");
}
+ case Builtin::BI__builtin_hlsl_elementwise_rcp: {
+Value *Op0 = EmitScalar
@@ -6826,6 +6826,10 @@ def warn_floatingpoint_eq : Warning<
"comparing floating point with == or != is unsafe">,
InGroup>, DefaultIgnore;
+def warn_fenv_access : Warning<
+ "floating point environment access without #pragma STDC FENV_ACCESS set ON">,
+ InGroup>;
@@ -494,6 +494,14 @@ MetadataStreamerMsgPackV4::getHSAKernelProps(const
MachineFunction &MF,
Kern[".max_flat_workgroup_size"] =
Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());
+ unsigned NumWGX = MFI.getMaxNumWorkGroupsX();
+ unsigned NumWGY = MFI.getMax
@@ -1312,6 +1312,11 @@ The AMDGPU backend supports the following LLVM IR
attributes.
the frame. This is an internal
detail of how LDS variables are lowered,
language front ends should not
@@ -137,6 +137,12 @@ Removed Compiler Flags
Attribute Changes in Clang
--
+- Introduced a new function attribute
``__attribute__((amdgpu_max_num_work_groups(x, y, z)))`` or
arsenm wrote:
s/work_groups/workgroup/
https://github.com/ll
@@ -137,6 +137,12 @@ Removed Compiler Flags
Attribute Changes in Clang
--
+- Introduced a new function attribute
``__attribute__((amdgpu_max_num_work_groups(x, y, z)))`` or
arsenm wrote:
Ugh. The ISA manuals usually use "workgroup". r
@@ -137,6 +137,12 @@ Removed Compiler Flags
Attribute Changes in Clang
--
+- Introduced a new function attribute
``__attribute__((amdgpu_max_num_work_groups(x, y, z)))`` or
arsenm wrote:
The backend facing parts seem more consistently
@@ -137,6 +137,12 @@ Removed Compiler Flags
Attribute Changes in Clang
--
+- Introduced a new function attribute
``__attribute__((amdgpu_max_num_work_groups(x, y, z)))`` or
arsenm wrote:
I think some of the AMDGPUUsage work-groups are
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/83446
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/83446
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -1953,13 +1966,22 @@ void CodeGenFunction::EmitAtomicStore(RValue rvalue,
LValue dest,
}
// Okay, we're doing this natively.
-llvm::Value *intValue = atomics.convertRValueToInt(rvalue);
+llvm::Value *ValToStore =
+atomics.convertRValueToInt(rvalue,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?=
Message-ID:
In-Reply-To:
@@ -1130,8 +1130,96 @@ struct BitTest {
static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
};
+
+// Returns the first convergence entry/loop/anchor instruction found in |BB|.
+// std::nullo
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?=
Message-ID:
In-Reply-To:
@@ -1130,8 +1130,96 @@ struct BitTest {
static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
};
+
+// Returns the first convergence entry/loop/anchor instruction found in |BB|.
+// std::nullo
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?=
Message-ID:
In-Reply-To:
@@ -5686,6 +5686,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo
&CallInfo,
if (!CI->getType()->isVoidTy())
CI->setName("call");
+ if (getTarget().getTriple().isSPIRVLogical() &
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?=
Message-ID:
In-Reply-To:
@@ -1130,8 +1130,96 @@ struct BitTest {
static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
};
+
+// Returns the first convergence entry/loop/anchor instruction found in |BB|.
+// std::nullo
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?=
Message-ID:
In-Reply-To:
@@ -1130,8 +1130,96 @@ struct BitTest {
static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
};
+
+// Returns the first convergence entry/loop/anchor instruction found in |BB|.
+// std::nullo
@@ -137,6 +137,12 @@ Removed Compiler Flags
Attribute Changes in Clang
--
+- Introduced a new function attribute
``__attribute__((amdgpu_max_num_work_groups(x, y, z)))`` or
arsenm wrote:
I think ".max_num_workgroups" "amdgpu-max-num-w
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/79035
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?=
Message-ID:
In-Reply-To:
@@ -4554,6 +4554,13 @@ def HLSLWaveActiveCountBits : LangBuiltin<"HLSL_LANG"> {
let Prototype = "unsigned int(bool)";
}
+// HLSL
+def HLSLWav
@@ -2067,6 +2067,10 @@ Constant *ConstantExpr::getBitCast(Constant *C, Type
*DstTy,
Constant *ConstantExpr::getAddrSpaceCast(Constant *C, Type *DstTy,
bool OnlyIfReduced) {
+ // Skip cast if types are identical
arsenm
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/78759
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm closed https://github.com/llvm/llvm-project/pull/78759
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm commented:
Title should be rephrased; this doesn't have anything to do with inlining
https://github.com/llvm/llvm-project/pull/81058
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailm
arsenm wrote:
> So overall, the practical effect of the `denormal-fp-math` attribute being
> set incorrectly doesn't appear to matter.
It matters more for AMDGPU, where we need to care because some instructions
just don't respect denormals. We legalize some operations differently depending
on
@@ -0,0 +1,698 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache
@@ -2714,7 +2714,17 @@ static void setLinkageForGV(llvm::GlobalValue *GV, const
NamedDecl *ND) {
void CodeGenModule::CreateFunctionTypeMetadataForIcall(const FunctionDecl *FD,
llvm::Function *F) {
- // Only if we are che
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/79035
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -814,6 +814,15 @@ bool shouldEmitConstantsToTextSection(const Triple &TT);
/// to integer.
int getIntegerAttribute(const Function &F, StringRef Name, int Default);
+/// \returns Unsigned Integer value requested using \p F's \p Name attribute.
+///
+/// \returns \p Default i
@@ -356,6 +356,24 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
if (NumVGPR != 0)
F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR));
}
+
+ if (const auto *Attr = FD->getAttr()) {
+uint32_t X = Attr->getMaxNumWorkGroupsX()
+
@@ -0,0 +1,84 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s
+
+; Attribute not specified.
+; CHECK-LABEL: {{^}}empty_no_attribute:
+define amdgpu_kernel void @empty_no_attribute() {
+entry:
+ ret void
+}
+
+; Ignore if number of work groups for x dime
@@ -139,6 +139,36 @@ kernel void
reqd_work_group_size_32_2_1_flat_work_group_size_16_128() {
// CHECK: define{{.*}} amdgpu_kernel void
@reqd_work_group_size_32_2_1_flat_work_group_size_16_128()
[[FLAT_WORK_GROUP_SIZE_16_128:#[0-9]+]]
}
+__attribute__((amdgpu_max_num_work_gr
@@ -194,3 +204,105 @@ __global__ void non_cexpr_waves_per_eu_2() {}
// expected-error@+1{{'amdgpu_waves_per_eu' attribute requires parameter 1 to
be an integer constant}}
__attribute__((amdgpu_waves_per_eu(2, ipow2(2
__global__ void non_cexpr_waves_per_eu_2_4() {}
+
+// ex
@@ -0,0 +1,84 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s
+
arsenm wrote:
Missing a test for the various IR attribute parsing error conditions
https://github.com/llvm/llvm-project/pull/79035
___
https://github.com/arsenm requested changes to this pull request.
https://github.com/llvm/llvm-project/pull/79035
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -137,6 +137,11 @@ Removed Compiler Flags
Attribute Changes in Clang
--
+- Introduced a new function attribute
``__attribute__((amdgpu_max_num_work_groups(x, y, z)))`` or
+ ``[[clang::amdgpu_max_num_work_groups(x, y, z)]]`` for the AMDGPU target.
T
arsenm wrote:
> But the shared library stuff isn't an issue for AMDGPU, right?
No, we don't support shared library linking yet
https://github.com/llvm/llvm-project/pull/80475
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.o
arsenm wrote:
> In this case, MMRAs would only help in the sense that you won't need any new
> attributes and can just add an MMRA such as `atomic-lowering:fine-grained`.
> It's not really what MMRAs were made for (because this attribute doesn't
> affect semantics, just lowering style I think?
arsenm wrote:
reverse ping
https://github.com/llvm/llvm-project/pull/71019
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -2550,6 +2550,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl
GD, unsigned BuiltinID,
&getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad())
BuiltinID = mutateLongDoubleBuiltin(BuiltinID);
+ // Mutate the printf builtin ID so that we us
@@ -3616,6 +3617,12 @@ unsigned FunctionDecl::getBuiltinID(bool
ConsiderWrapperFunctions) const {
if (!ConsiderWrapperFunctions && getStorageClass() == SC_Static)
return 0;
+ // AMDGCN implementation supports printf as a builtin
+ // for OpenCL
+ if (Context.getTarge
@@ -406,5 +410,9 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb",
"nc", "fp8-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-insts")
+// OpenCL
+LANGBUILTIN(p
https://github.com/arsenm requested changes to this pull request.
https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -202,12 +207,20 @@ RValue
CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E) {
Args.push_back(Arg);
}
- llvm::IRBuilder<> IRB(Builder.GetInsertBlock(), Builder.GetInsertPoint());
- IRB.SetCurrentDebugLocation(Builder.getCurrentDebugLocation());
+ a
@@ -202,12 +207,20 @@ RValue
CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E) {
Args.push_back(Arg);
}
- llvm::IRBuilder<> IRB(Builder.GetInsertBlock(), Builder.GetInsertPoint());
- IRB.SetCurrentDebugLocation(Builder.getCurrentDebugLocation());
+ a
@@ -170,20 +173,46 @@ static Value *appendString(IRBuilder<> &Builder, Value
*Desc, Value *Arg,
return callAppendStringN(Builder, Desc, Arg, Length, IsLast);
}
+static Value *appendVectorArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,
arsenm wrote:
Thes
arsenm wrote:
> @arsenm That makes sense, I don't think MMRA fits the fine-grained use case
> either. Does that mean we can stick with the approach from this PR? @b-sumner
> mentioned there was another similar approach being worked on.
Something like this, but the naming and direction of this
@@ -2561,6 +2567,70 @@ bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(const
SIMemOpInfo &MOI,
return Changed;
}
+bool SIMemoryLegalizer::GFX9InsertWaitcntForPreciseMem(MachineFunction &MF) {
+ const GCNSubtarget &ST = MF.getSubtarget();
+ const SIInstrInfo *TII = ST.get
https://github.com/arsenm commented:
I don't really understand all of these manual scheduling control intrinsic.
This brings the total up to 4? I think effort would be better spent making the
scheduler better instead of giving users more footguns to shoot themselves (and
the compiler) with
ht
@@ -1410,13 +1414,14 @@ RValue
AtomicInfo::ConvertIntToValueOrAtomic(llvm::Value *IntVal,
auto *ValTy = AsValue
? CGF.ConvertTypeForMem(ValueTy)
: getAtomicAddress().getElementType();
-if (ValTy->isIntegerTy()) {
- asser
@@ -1410,13 +1414,14 @@ RValue
AtomicInfo::ConvertIntToValueOrAtomic(llvm::Value *IntVal,
auto *ValTy = AsValue
? CGF.ConvertTypeForMem(ValueTy)
: getAtomicAddress().getElementType();
-if (ValTy->isIntegerTy()) {
- asser
@@ -292,8 +292,14 @@ void AMDGPUTargetInfo::getTargetDefines(const LangOptions
&Opts,
}
Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
+
+ // Don't emit feature macros in host code because in such cases the
+ // feature list is not accurate.
+ if (I
@@ -6,32 +6,32 @@
// R600-based processors.
//
-// RUN: %clang -E -dM -target r600 -mcpu=r600 %s 2>&1 | FileCheck
--check-prefixes=ARCH-R600,R600 %s -DCPU=r600
-// RUN: %clang -E -dM -target r600 -mcpu=rv630 %s 2>&1 | FileCheck
--check-prefixes=ARCH-R600,R600 %s -DCPU=r600
-
@@ -839,6 +839,18 @@ unsigned test_wavefrontsize() {
return __builtin_amdgcn_wavefrontsize();
}
+// CHECK-LABEL test_get_fpenv(
arsenm wrote:
Ideally we would also warn if you used these without fenv access enabled
https://github.com/llvm/llvm-project/pul
@@ -839,6 +839,18 @@ unsigned test_wavefrontsize() {
return __builtin_amdgcn_wavefrontsize();
}
+// CHECK-LABEL test_get_fpenv(
arsenm wrote:
It's a standard C concept. You need to enable #pragma STDC FENV_ACCESS ON to do
anything valid with the floating p
@@ -839,6 +839,18 @@ unsigned test_wavefrontsize() {
return __builtin_amdgcn_wavefrontsize();
}
+// CHECK-LABEL test_get_fpenv(
arsenm wrote:
This interface is 1 level removed from the hardware, but this cannot be
legitimately used without fenv access enab
https://github.com/arsenm commented:
I think we would be better off teaching an IR optimizer pass to recognize the
divide pattern and remap it to the load from the location, rather than forcing
the complexity into every frontend
https://github.com/llvm/llvm-project/pull/83927
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/83927
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -139,6 +139,36 @@ kernel void
reqd_work_group_size_32_2_1_flat_work_group_size_16_128() {
// CHECK: define{{.*}} amdgpu_kernel void
@reqd_work_group_size_32_2_1_flat_work_group_size_16_128()
[[FLAT_WORK_GROUP_SIZE_16_128:#[0-9]+]]
}
+__attribute__((amdgpu_max_num_work_gr
@@ -194,3 +204,105 @@ __global__ void non_cexpr_waves_per_eu_2() {}
// expected-error@+1{{'amdgpu_waves_per_eu' attribute requires parameter 1 to
be an integer constant}}
__attribute__((amdgpu_waves_per_eu(2, ipow2(2
__global__ void non_cexpr_waves_per_eu_2_4() {}
+
+// ex
@@ -0,0 +1,77 @@
+; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s 2>&1 | FileCheck
--check-prefix=ERROR %s
+
+; ERROR: error: can't parse integer attribute -1 in amdgpu-max-num-work-groups
+define amdgpu_kernel void @empty_max_num_work_groups_neg_num1() #21 {
+entry:
@@ -6826,6 +6826,10 @@ def warn_floatingpoint_eq : Warning<
"comparing floating point with == or != is unsafe">,
InGroup>, DefaultIgnore;
+def warn_fenv_access : Warning<
+ "floating point environment access without #pragma STDC FENV_ACCESS set ON">,
+ InGroup>;
https://github.com/arsenm commented:
Also should get a run line that errors due to wavesize?
https://github.com/llvm/llvm-project/pull/79980
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-co
@@ -25,4 +25,4 @@ entry:
}
!llvm.module.flags = !{!0}
-!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
+!0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
arsenm wrote:
Separate would be better
https://github.com/llvm/llvm-project/pull/79905
__
@@ -175,6 +175,8 @@ Predefined Macros
- Defined when the GPU default stream is set to per-thread mode.
* - ``HIP_API_PER_THREAD_DEFAULT_STREAM``
- Alias to ``__HIP_API_PER_THREAD_DEFAULT_STREAM__``. Deprecated.
+ * - ``__AMDGCN_WAVEFRONT_SIZE__``
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/80102
>From b64f7ba4afc6cbb3e5e34757e6979a0d5ee73e2b Mon Sep 17 00:00:00 2001
From: Sameer Sahasrabuddhe
Date: Tue, 30 Jan 2024 11:26:53 +0530
Subject: [PATCH] [AMDGPU] Every convergent operation needs post-isel
proces
@@ -151,7 +151,7 @@ BUILTIN(__builtin_amdgcn_mqsad_u32_u8, "V4UiWUiUiV4Ui",
"nc")
//===--===//
TARGET_BUILTIN(__builtin_amdgcn_ballot_w32, "ZUib", "nc", "wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_ba
@@ -4,13 +4,10 @@
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -target-feature
-wavefrontsize64 -verify -S -o - %s
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -verify -S -o - %s
+// expected-no-diagnostics
+
typedef unsigned long ulong;
void test_ba
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/71019
>From 2477ae87e7bb82b4551e42b8255dfe93dadff453 Mon Sep 17 00:00:00 2001
From: Pravin Jagtap
Date: Thu, 2 Nov 2023 01:05:35 -0400
Subject: [PATCH 1/6] [AMDGPU] Add code model (#70760) test for amdgpu target.
---
arsenm wrote:
> LGTM. Please update PR title before merging
So this was only supposed to add the test, or implement this too?
https://github.com/llvm/llvm-project/pull/71019
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.or
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/79980
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/80303
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -0,0 +1,21 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -disable-llvm-optzns
-mprintf-kind=hostcall -fno-builtin-printf -fcuda-is-device \
+// RUN: -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emi
arsenm wrote:
> > It looks reasonable to me, although I'm not really an AMDGPU person. /me
> > summons @arsenm ?
>
> AMDGPU backend relies on LLVM passes to translate printf at IR level.
For the OpenCL case only, not for HIP/OpenMP
https://github.com/llvm/llvm-project/pull/68515
_
arsenm wrote:
> I wonder if, instead, we should just have `-ffast-math` always downgrade
> `-fdenormal-fp-math=ieee` to `-fdenormal-fp-math=preserve-sign`, under the
> rationale of "you asked for fast math, and preserve-sign mode might let the
> compiler generate faster code"?
This could also
@@ -1127,10 +1131,16 @@ void AMDGPUAsmPrinter::emitPALFunctionMetadata(const
MachineFunction &MF) {
MD->setFunctionScratchSize(FnName, MFI.getStackSize());
const GCNSubtarget &ST = MF.getSubtarget();
- // Set compute registers
- MD->setRsrc1(CallingConv::AMDGPU_CS,
-
301 - 400 of 2453 matches
Mail list logo