@@ -1,12 +1,605 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --version 5
arsenm wrote:
I guess this doesn't apply if it was a fail test before
https://github.com/llvm/llvm-project/pull/98331
__
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/98331
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/98331
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -0,0 +1,117 @@
+// REQUIRES: nvptx-registered-target
+//
+// RUN: %clang_cc1 -ffp-contract=off -triple nvptx-unknown-unknown -target-cpu
\
+// RUN: sm_86 -target-feature +ptx72 -fcuda-is-device -x cuda -emit-llvm -o
- %s \
arsenm wrote:
I assume you don't
https://github.com/arsenm closed https://github.com/llvm/llvm-project/pull/83131
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Author: Matt Arsenault
Date: 2024-07-14T19:15:54+04:00
New Revision: 677cc15e0ff2e0e6aa30538eb187990a6a8f53c0
URL:
https://github.com/llvm/llvm-project/commit/677cc15e0ff2e0e6aa30538eb187990a6a8f53c0
DIFF:
https://github.com/llvm/llvm-project/commit/677cc15e0ff2e0e6aa30538eb187990a6a8f53c0.diff
@@ -1,9 +0,0 @@
-; RUN: llvm-as < %s | llvm-dis | FileCheck %s
-; RUN: verify-uselistorder %s
-; Basic smoke test for x86_mmx type.
-
-; CHECK: define x86_mmx @sh16
-define x86_mmx @sh16(x86_mmx %A) {
arsenm wrote:
I'd expect this to require movement to a bitcod
arsenm wrote:
> Hey @arsenm this broke all AMDGPU OpenMP Offload buildbots (e.g.,
> https://lab.llvm.org/buildbot/#/builders/30).
>
> Any chance you can fix these issues?
Can you attach before/after IR and just XFAIL them for now?
https://github.com/llvm/llvm-project/pull/83131
__
@@ -5892,8 +5892,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl
GD, unsigned BuiltinID,
getTarget().getTriple().isAMDGCN() ||
(getTarget().getTriple().isSPIRV() &&
getTarget().getTriple().getVendor() == Triple::VendorType::AMD)) {
-
@@ -316,3 +316,82 @@ define <2 x i32> @test_trunc_both_reversed_vector(<2 x
i64> %a) {
%res = trunc nsw nuw <2 x i64> %a to <2 x i32>
ret <2 x i32> %res
}
+
+define ptr @gep_nuw(ptr %p, i64 %idx) {
+; CHECK: %gep = getelementptr nuw i8, ptr %p, i64 %idx
+ %gep = getelemen
arsenm wrote:
> I'm now wondering if adding a new builtin is needed at all, or if it should
> just be part of the original builtin? It's an additive change.
Maybe?
>
> Should we also rename the MMRA to `amdgpu-fence-as` (remove OpenCL from the
> name) ?
>
I definitely do not want to mainta
@@ -6091,6 +5982,70 @@ static SDValue lowerBALLOTIntrinsic(const
SITargetLowering &TLI, SDNode *N,
DAG.getConstant(0, SL, MVT::i32), DAG.getCondCode(ISD::SETNE));
}
+static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N,
+ Selection
@@ -5386,6 +5386,94 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
+ MachineInstr &MI,
+
@@ -6091,6 +5982,70 @@ static SDValue lowerBALLOTIntrinsic(const
SITargetLowering &TLI, SDNode *N,
DAG.getConstant(0, SL, MVT::i32), DAG.getCondCode(ISD::SETNE));
}
+static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N,
+ Selection
@@ -5386,6 +5386,94 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
+ MachineInstr &MI,
+
@@ -504,3 +508,15 @@ def AMDGPUdiv_fmas : PatFrags<(ops node:$src0, node:$src1,
node:$src2, node:$vcc
def AMDGPUperm : PatFrags<(ops node:$src0, node:$src1, node:$src2),
[(int_amdgcn_perm node:$src0, node:$src1, node:$src2),
(AMDGPUperm_impl node:$src0, node:$src1, node:$
@@ -5386,6 +5386,94 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
+ MachineInstr &MI,
+
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/90994
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
arsenm wrote:
> ping Ping Do you have another review comment?
This has now confused me. You should roll back to the case where you only
changed the scalar behavior. Any vector behavior change should be a separate
PR, if that is even correct. I would still like to know what the gcc behavior
is
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -disable-O0-optnone
-emit-llvm \
+// RUN: %s -o - | opt -S -passes=mem2reg | FileCheck %s
+
+// CHECK-LABEL: define dso_local half @test_convert_from_bf16_to_fp16(
+// CHECK-SAME: bfloat noundef [[A:%.*]]) #[
@@ -5386,6 +5386,130 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
+ MachineInstr &MI,
+
@@ -5386,6 +5386,130 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
+ MachineInstr &MI,
+
@@ -5982,6 +5982,68 @@ static SDValue lowerBALLOTIntrinsic(const
SITargetLowering &TLI, SDNode *N,
DAG.getConstant(0, SL, MVT::i32), DAG.getCondCode(ISD::SETNE));
}
+static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N,
+ Selection
@@ -5386,6 +5386,130 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
+ MachineInstr &MI,
+
@@ -5386,6 +5386,130 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
+ MachineInstr &MI,
+
@@ -504,3 +508,16 @@ def AMDGPUdiv_fmas : PatFrags<(ops node:$src0, node:$src1,
node:$src2, node:$vcc
def AMDGPUperm : PatFrags<(ops node:$src0, node:$src1, node:$src2),
[(int_amdgcn_perm node:$src0, node:$src1, node:$src2),
(AMDGPUperm_impl node:$src0, node:$src1, node:$
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?=
Message-ID:
In-Reply-To:
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/88918
___
cfe-commits mailing list
cfe-commit
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?=
Message-ID:
In-Reply-To:
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/88918
___
cfe-commits mailing list
cfe-commit
Nathan =?utf-8?q?Gau=C3=ABr?= ,
Nathan =?utf-8?q?Gau=C3=ABr?= ,
Nathan =?utf-8?q?Gau=C3=ABr?= ,
Nathan =?utf-8?q?Gau=C3=ABr?=
Message-ID:
In-Reply-To:
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/88918
Nathan =?utf-8?q?Gau=C3=ABr?= ,
Nathan =?utf-8?q?Gau=C3=ABr?= ,
Nathan =?utf-8?q?Gau=C3=ABr?= ,
Nathan =?utf-8?q?Gau=C3=ABr?=
Message-ID:
In-Reply-To:
@@ -1586,6 +1586,12 @@ class CodeGenModule : public CodeGenTypeCache {
void AddGlobalDtor(llvm::Function *Dtor, int Priority
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/90925
>From 4760ebce0ff7725f4bb75f5107f551d867e4db6d Mon Sep 17 00:00:00 2001
From: Ellis Hoag
Date: Thu, 2 May 2024 17:47:38 -0700
Subject: [PATCH 1/4] [modules] Accept equivalent module caches from different
symlink
@@ -1,22 +1,113 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --version 4
// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 %s -emit-llvm -O0 -o - \
-// RUN: -triple=amdgcn-amd-amdhsa | opt -S | FileCheck %s
+// RUN: -tr
@@ -4408,6 +4409,42 @@ Target-Specific Extensions
Clang supports some language features conditionally on some targets.
+AMDGPU Language Extensions
+--
+
+__builtin_amdgcn_fence
+^^
+
+``__builtin_amdgcn_fence`` emits a fence.
+
+* `
@@ -0,0 +1,55 @@
+/*=== __clang_hip_device_macro_guards.h - guards for HIP device macros -===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apach
@@ -24,6 +24,7 @@ MODULE_PASS("amdgpu-lower-ctor-dtor",
AMDGPUCtorDtorLoweringPass())
MODULE_PASS("amdgpu-lower-module-lds", AMDGPULowerModuleLDSPass(*this))
MODULE_PASS("amdgpu-printf-runtime-binding", AMDGPUPrintfRuntimeBindingPass())
MODULE_PASS("amdgpu-unify-metadata", AMD
@@ -157,7 +157,7 @@ llvm::Value
*CodeGen::emitRoundPointerUpToAlignment(CodeGenFunction &CGF,
llvm::Value *RoundUp = CGF.Builder.CreateConstInBoundsGEP1_32(
CGF.Builder.getInt8Ty(), Ptr, Align.getQuantity() - 1);
return CGF.Builder.CreateIntrinsic(
- llvm::Intri
@@ -247,7 +247,7 @@ Address CodeGen::emitMergePHI(CodeGenFunction &CGF, Address
Addr1,
bool CodeGen::isEmptyField(ASTContext &Context, const FieldDecl *FD,
bool AllowArrays, bool AsIfNoUniqueAddr) {
- if (FD->isUnnamedBitField())
+ if (FD->isUnnam
https://github.com/arsenm commented:
I don't understand how anyone is supposed to use this. This is exposing
extremely specific, random low level details of the scheduling. Users claim
they want scheduling controls, but what they actually want is the scheduler to
just do the right thing. We sh
@@ -1284,7 +1284,29 @@ The AMDGPU backend implements the following LLVM IR
intrinsics.
| ``// 5 MFMA``
|
``__builtin_amdgcn_sched_group_barrier(8, 5, 0)``
- llvm.amdgcn.i
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/85304
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -2658,21 +2676,102 @@
IGroupLPDAGMutation::invertSchedBarrierMask(SchedGroupMask Mask) const {
return InvertedMask;
}
+void IGroupLPDAGMutation::addSchedGroupBarrierRules() {
+
+ /// Whether or not the instruction has no true data predecessors
+ /// with opcode \p Opc.
@@ -0,0 +1,111 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -fsyntax-only -verify %s
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+kernel void test () {
+
+ int sgpr = 0, vgpr = 0, imm = 0;
+
+ // sgpr constraints
+ __asm__ ("s_
@@ -0,0 +1,111 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -fsyntax-only -verify %s
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+kernel void test () {
+
+ int sgpr = 0, vgpr = 0, imm = 0;
+
+ // sgpr constraints
+ __asm__ ("s_
@@ -0,0 +1,111 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -fsyntax-only -verify %s
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+kernel void test () {
+
+ int sgpr = 0, vgpr = 0, imm = 0;
+
+ // sgpr constraints
+ __asm__ ("s_
@@ -5386,6 +5386,153 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
+ MachineInstr &MI,
+
@@ -5386,6 +5386,153 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
+ MachineInstr &MI,
+
@@ -5386,6 +5386,153 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
+ MachineInstr &MI,
+
@@ -5386,6 +5386,153 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
+ MachineInstr &MI,
+
@@ -2176,26 +2176,23 @@ def int_amdgcn_wave_reduce_umin : AMDGPUWaveReduce;
def int_amdgcn_wave_reduce_umax : AMDGPUWaveReduce;
def int_amdgcn_readfirstlane :
- ClangBuiltin<"__builtin_amdgcn_readfirstlane">,
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty],
+ Intrinsic<[llvm_any_t
arsenm wrote:
> > (You can even place `.quad sym[0].hash; .long sym[0].size` in a section
> > `SHF_LINK_ORDER` linking to the global variable for linker garbage
> > collection.)
> > The runtime can build a map correlating hashes to sizes, which can be used
> > to answer variable size queries.
https://github.com/arsenm commented:
amdgpu changes lgtm
https://github.com/llvm/llvm-project/pull/91854
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -4408,6 +4409,42 @@ Target-Specific Extensions
Clang supports some language features conditionally on some targets.
+AMDGPU Language Extensions
+--
+
+__builtin_amdgcn_fence
+^^
+
+``__builtin_amdgcn_fence`` emits a fence.
+
+* `
@@ -58,7 +58,7 @@ CHECK-CNT3-NOT: {{^}}this is duplicate
CHECK-CNT4-COUNT-5: this is duplicate
CHECK-CNT4-EMPTY:
-Many-label:
+Many-LABEL:
arsenm wrote:
I would be careful about touching FileCheck tests. The point might be the wrong
label
https://githu
@@ -5386,6 +5386,153 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
+ MachineInstr &MI,
+
@@ -5387,6 +5387,212 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
+ MachineInstr &MI,
+
@@ -5387,6 +5387,212 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
+ MachineInstr &MI,
+
@@ -3400,7 +3400,7 @@ def : GCNPat<
// FIXME: Should also do this for readlane, but tablegen crashes on
// the ignored src1.
def : GCNPat<
- (int_amdgcn_readfirstlane (i32 imm:$src)),
+ (i32 (AMDGPUreadfirstlane (i32 imm:$src))),
arsenm wrote:
We might need
@@ -6086,6 +6086,68 @@ static SDValue lowerBALLOTIntrinsic(const
SITargetLowering &TLI, SDNode *N,
DAG.getConstant(0, SL, MVT::i32), DAG.getCondCode(ISD::SETNE));
}
+static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N,
+ Selection
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?=
Message-ID:
In-Reply-To:
@@ -1586,6 +1586,12 @@ class CodeGenModule : public CodeGenTypeCache {
void AddGlobalDtor(llvm::Function *Dtor, int Priority = 65535,
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/92232
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
arsenm wrote:
> It's still used:
>
> ```
> /work/kparzysz/git/llvm.org/mlir/lib/Target/LLVM/ROCDL/Target.cpp: In member
> function ‘std::optional >
> mlir::ROCDL::SerializeGPUModuleBase::assembleIsa(llvm::StringRef)’:
> /work/kparzysz/git/llvm.org/mlir/lib/Target/LLVM/ROCDL/Target.cpp:302:15:
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/92294
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -243,11 +243,16 @@ def VOP_READFIRSTLANE : VOPProfile <[i32, i32, untyped,
untyped]> {
// FIXME: Specify SchedRW for READFIRSTLANE_B32
// TODO: There is VOP3 encoding also
def V_READFIRSTLANE_B32 : VOP1_Pseudo <"v_readfirstlane_b32",
VOP_READFIRSTLANE,
-
@@ -780,14 +780,22 @@ defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32",
VOP_I32_I32_I32_ARITH, null_frag,
// These are special and do not read the exec mask.
let isConvergent = 1, Uses = [] in {
-def V_READLANE_B32 : VOP2_Pseudo<"v_readlane_b32", VOP_READLANE,
- [(set i32:$vdst,
@@ -243,11 +243,16 @@ def VOP_READFIRSTLANE : VOPProfile <[i32, i32, untyped,
untyped]> {
// FIXME: Specify SchedRW for READFIRSTLANE_B32
// TODO: There is VOP3 encoding also
def V_READFIRSTLANE_B32 : VOP1_Pseudo <"v_readfirstlane_b32",
VOP_READFIRSTLANE,
-
@@ -243,11 +243,16 @@ def VOP_READFIRSTLANE : VOPProfile <[i32, i32, untyped,
untyped]> {
// FIXME: Specify SchedRW for READFIRSTLANE_B32
// TODO: There is VOP3 encoding also
def V_READFIRSTLANE_B32 : VOP1_Pseudo <"v_readfirstlane_b32",
VOP_READFIRSTLANE,
-
@@ -243,11 +243,16 @@ def VOP_READFIRSTLANE : VOPProfile <[i32, i32, untyped,
untyped]> {
// FIXME: Specify SchedRW for READFIRSTLANE_B32
// TODO: There is VOP3 encoding also
def V_READFIRSTLANE_B32 : VOP1_Pseudo <"v_readfirstlane_b32",
VOP_READFIRSTLANE,
-
@@ -243,11 +243,16 @@ def VOP_READFIRSTLANE : VOPProfile <[i32, i32, untyped,
untyped]> {
// FIXME: Specify SchedRW for READFIRSTLANE_B32
// TODO: There is VOP3 encoding also
def V_READFIRSTLANE_B32 : VOP1_Pseudo <"v_readfirstlane_b32",
VOP_READFIRSTLANE,
-
@@ -6086,6 +6086,62 @@ static SDValue lowerBALLOTIntrinsic(const
SITargetLowering &TLI, SDNode *N,
DAG.getConstant(0, SL, MVT::i32), DAG.getCondCode(ISD::SETNE));
}
+static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N,
+ Selection
@@ -0,0 +1,25 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --version 4
+//RUN: %clang_cc1 %s -emit-llvm -O1 -o - | FileCheck %s
arsenm wrote:
codegen tests need an explicit target
https://github.com/llvm/llvm-proj
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -5387,6 +5387,192 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
+ MachineInstr &MI,
+
@@ -5387,6 +5387,192 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
+ MachineInstr &MI,
+
@@ -6086,6 +6086,62 @@ static SDValue lowerBALLOTIntrinsic(const
SITargetLowering &TLI, SDNode *N,
DAG.getConstant(0, SL, MVT::i32), DAG.getCondCode(ISD::SETNE));
}
+static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N,
+ Selection
https://github.com/arsenm requested changes to this pull request.
There should be no need to introduce same-sized value casts, whether bitcast or
ptrtoint in either legalizer
https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing lis
@@ -5387,6 +5387,192 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
+ MachineInstr &MI,
+
@@ -5387,6 +5387,192 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
+ MachineInstr &MI,
+
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/92725
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm commented:
On this and the previous, can you add a section to AMDGPUUsage for the
intrinsics and what types they support
https://github.com/llvm/llvm-project/pull/92725
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
@@ -18479,6 +18479,25 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned
BuiltinID,
CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());
return Builder.CreateCall(F, Args);
}
+ case AMDGPU::BI__builtin_amdgcn_permlane16:
+ case AMDGPU::BI_
@@ -18479,6 +18479,25 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned
BuiltinID,
CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());
return Builder.CreateCall(F, Args);
}
+ case AMDGPU::BI__builtin_amdgcn_permlane16:
+ case AMDGPU::BI_
@@ -5433,7 +5450,16 @@ bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper
&Helper,
? Src0
: B.buildBitcast(LLT::scalar(Size),
Src0).getReg(0);
Src0 = B.buildAnyExt(S32, Src0Cast).getReg(0);
-if (Src2.isVali
arsenm wrote:
> @arsenm Should we use `image` or `private`? We could allow both in the
> frontend, and only use `private` as the canonical MMRA.
I don't understand why image would imply private. I would just keep at as
private throughout
https://github.com/llvm/llvm-project/pull/78572
__
arsenm wrote:
> I thought image memory = private. It's unclear to me, what AS does OpenCL
> IMAGE memory map to in our backend? (But otherwise, yes, MMRA should just
> have the backend names, the mapping of the OpenCL IMAGE to a backend AS
> should be in the device-lib)
Images are global memo
@@ -240,6 +240,7 @@ TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_v2bf16,
"V2sV2s*0V2s", "t", "at
TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_v2bf16, "V2sV2s*1V2s", "t",
"atomic-global-pk-add-bf16-inst")
TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_v2bf16, "V2sV2s*
arsenm wrote:
> Then I guess the MMRA should just have "global" and "local" for now, we can
> always add more later if needed. What do you think?
Yes, we don't have specific image counters. They are just vcmnt
https://github.com/llvm/llvm-project/pull/78572
___
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/78572
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/78572
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -678,6 +680,49 @@ class SIMemoryLegalizer final : public MachineFunctionPass
{
bool runOnMachineFunction(MachineFunction &MF) override;
};
+static const StringMap ASNames = {{
+{"global", SIAtomicAddrSpace::GLOBAL},
+{"local", SIAtomicAddrSpace::LDS},
+}};
+
+voi
@@ -19040,6 +19040,48 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned
BuiltinID,
CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
return Builder.CreateCall(F, {Arg});
}
+ case AMDGPU::BI__builtin_amdgcn_global_load_lds: {
+SmallVect
@@ -19040,6 +19040,48 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned
BuiltinID,
CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
return Builder.CreateCall(F, {Arg});
}
+ case AMDGPU::BI__builtin_amdgcn_global_load_lds: {
+SmallVect
@@ -19040,6 +19040,48 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned
BuiltinID,
CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
return Builder.CreateCall(F, {Arg});
}
+ case AMDGPU::BI__builtin_amdgcn_global_load_lds: {
+SmallVect
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown
-target-cpu gfx940 -S -verify -o - %s
+// REQUIRES: amdgpu-registered-target
arsenm wrote:
Test belongs in SemaOpenCL
https://github.com/llvm/llvm-project/pull/93064
_
@@ -19040,6 +19040,48 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned
BuiltinID,
CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
return Builder.CreateCall(F, {Arg});
}
+ case AMDGPU::BI__builtin_amdgcn_global_load_lds: {
+SmallVect
@@ -2537,6 +2537,47 @@ static RValue
EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF,
return RValue::get(CGF->Builder.CreateCall(UBF, Args));
}
+static void buildInstrinsicCallArgs(CodeGenFunction &CGF, const CallExpr *E,
arsenm wrote:
Shouldn't need
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown
-target-cpu gfx940 -S -verify -o - %s
+// REQUIRES: amdgpu-registered-target
+
+typedef unsigned int u32;
+
+void test_global_load_lds_unsupported_size(global u32* src, local u32 *dst,
u32 size)
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown
-target-cpu gfx940 -S -verify -o - %s
+// REQUIRES: amdgpu-registered-target
+
+typedef unsigned int u32;
+
+void test_global_load_lds_unsupported_size(global u32* src, local u32 *dst,
u32 size)
https://github.com/arsenm requested changes to this pull request.
You cannot encode language standards in this. We should simply have different
operations that provide the range of semantics and not make the IR modal
https://github.com/llvm/llvm-project/pull/93159
_
@@ -5456,43 +5444,32 @@ bool
AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
if ((Size % 32) == 0) {
SmallVector PartialRes;
unsigned NumParts = Size / 32;
-auto IsS16Vec = Ty.isVector() && Ty.getElementType() == S16;
+bool IsS16Vec = Ty.isVector
@@ -6086,6 +6086,62 @@ static SDValue lowerBALLOTIntrinsic(const
SITargetLowering &TLI, SDNode *N,
DAG.getConstant(0, SL, MVT::i32), DAG.getCondCode(ISD::SETNE));
}
+static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N,
+ Selection
1201 - 1300 of 2827 matches
Mail list logo