@@ -2928,12 +2928,13 @@ static void emitUsed(CodeGenModule &CGM, StringRef Name,
for (unsigned i = 0, e = List.size(); i != e; ++i) {
UsedArray[i] =
llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
-cast(&*List[i]), CGM.Int8PtrTy);
---
@@ -2047,9 +2047,9 @@ void CodeGenModule::EmitCtorList(CtorList &Fns, const
char *GlobalName) {
llvm::Type *CtorPFTy = llvm::PointerType::get(CtorFTy,
TheModule.getDataLayout().getProgramAddressSpace());
- // Get the type of a ctor entry, { i32, void ()*, i8* }.
+ /
@@ -0,0 +1,180 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --function-signature
+
+// Simple calls to known variadic functions that are completely elided when
+// optimisations are on This is a functional check that the expand-varia
@@ -103,19 +104,27 @@ void AMDGPUABIInfo::computeInfo(CGFunctionInfo &FI) const
{
if (!getCXXABI().classifyReturnType(FI))
FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
+ unsigned ArgumentIndex = 0;
+ const unsigned numFixedArguments = FI.getNumRequired
@@ -1170,6 +1170,23 @@ The AMDGPU backend implements the following LLVM IR
intrinsics.
:ref:`llvm.set.fpenv` Sets the floating point
environment to the specifies state.
+ llvm.amdgcn.readfirstlaneProvides direct access to
v_readfirstl
@@ -5387,6 +5387,124 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper,
return true;
}
+// TODO: Fix pointer type handling
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
+ MachineInst
@@ -6086,6 +6086,63 @@ static SDValue lowerBALLOTIntrinsic(const
SITargetLowering &TLI, SDNode *N,
DAG.getConstant(0, SL, MVT::i32), DAG.getCondCode(ISD::SETNE));
}
+static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N,
+ Selection
@@ -1170,6 +1170,23 @@ The AMDGPU backend implements the following LLVM IR
intrinsics.
:ref:`llvm.set.fpenv` Sets the floating point
environment to the specifies state.
+ llvm.amdgcn.readfirstlaneProvides direct access to
v_readfirstl
@@ -5387,6 +5387,124 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper,
return true;
}
+// TODO: Fix pointer type handling
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
+ MachineInst
@@ -6086,6 +6086,63 @@ static SDValue lowerBALLOTIntrinsic(const
SITargetLowering &TLI, SDNode *N,
DAG.getConstant(0, SL, MVT::i32), DAG.getCondCode(ISD::SETNE));
}
+static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N,
+ Selection
@@ -2928,12 +2928,13 @@ static void emitUsed(CodeGenModule &CGM, StringRef Name,
for (unsigned i = 0, e = List.size(); i != e; ++i) {
UsedArray[i] =
llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
-cast(&*List[i]), CGM.Int8PtrTy);
---
@@ -6086,6 +6086,63 @@ static SDValue lowerBALLOTIntrinsic(const
SITargetLowering &TLI, SDNode *N,
DAG.getConstant(0, SL, MVT::i32), DAG.getCondCode(ISD::SETNE));
}
+static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N,
+ Selection
@@ -18479,6 +18479,28 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned
BuiltinID,
CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());
return Builder.CreateCall(F, Args);
}
+ case AMDGPU::BI__builtin_amdgcn_permlane16:
+ case AMDGPU::BI_
@@ -285,6 +285,9 @@ def SanitizeHWAddress : EnumAttr<"sanitize_hwaddress",
[FnAttr]>;
/// MemTagSanitizer is on.
def SanitizeMemTag : EnumAttr<"sanitize_memtag", [FnAttr]>;
+/// NumericalStabilitySanitizer is on.
+def SanitizeNumericalStability : EnumAttr<"sanitize_numericals
@@ -2928,12 +2928,13 @@ static void emitUsed(CodeGenModule &CGM, StringRef Name,
for (unsigned i = 0, e = List.size(); i != e; ++i) {
UsedArray[i] =
llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
-cast(&*List[i]), CGM.Int8PtrTy);
---
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/93601
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -1,6 +1,8 @@
// RUN: %clang_cc1 %s -triple x86_64-apple-darwin -emit-llvm -o - | FileCheck
%s
+// RUN: %clang_cc1 %s -triple amdgcn-amd-amdhsa -emit-llvm -o - | FileCheck %s
--check-prefix=GLOBALAS
// CHECK: @llvm.used = appending global [2 x ptr] [ptr @foo, ptr @X], sect
@@ -2047,9 +2047,9 @@ void CodeGenModule::EmitCtorList(CtorList &Fns, const
char *GlobalName) {
llvm::Type *CtorPFTy = llvm::PointerType::get(CtorFTy,
TheModule.getDataLayout().getProgramAddressSpace());
- // Get the type of a ctor entry, { i32, void ()*, i8* }.
https://github.com/arsenm commented:
lgtm with a few nits
https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -1170,6 +1170,23 @@ The AMDGPU backend implements the following LLVM IR
intrinsics.
:ref:`llvm.set.fpenv` Sets the floating point
environment to the specifies state.
+ llvm.amdgcn.readfirstlaneProvides direct access to
v_readfirstl
@@ -5387,6 +5387,98 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper,
return true;
}
+// TODO: Fix pointer type handling
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
+ MachineInstr
@@ -1208,7 +1225,7 @@ The AMDGPU backend implements the following LLVM IR
intrinsics.
the output.
llvm.amdgcn.sdot2Provides direct access to
v_dot2_i32_i16 across targets which
-
@@ -5461,8 +5461,7 @@ bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper
&Helper,
SmallVector PartialRes;
unsigned NumParts = Size / 32;
- MachineInstrBuilder Src0Parts, Src2Parts;
- Src0Parts = B.buildUnmerge(PartialResTy, Src0);
+ MachineInstrBuilder Src0Parts
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
arsenm wrote:
> Does this need IR autoupgrade?
This type of auto upgrade is free, it just happens
https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/
https://github.com/arsenm commented:
> 3. PowerPC: has some interaction with the behavior of `minnum/maxnum`: need
> define `fcanonicalize`.
AMDGPU has the same handling. This is to break the signaling nan handling from
IEEE to the broken old glibc libm behavior. If we fix the definition to ma
@@ -3636,6 +3648,22 @@ def Fmin : FPMathTemplate, LibBuiltin<"math.h"> {
let OnlyBuiltinPrefixedAliasIsConstexpr = 1;
}
+def FmaximumNum : FPMathTemplate, LibBuiltin<"math.h"> {
arsenm wrote:
I'd prefer to split the clang changes into a separate change
ht
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/93841
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -16049,6 +16094,84 @@ of the two arguments. -0.0 is considered to be less
than +0.0 for this
intrinsic. Note that these are the semantics specified in the draft of
IEEE 754-2019.
+.. _i_minimumnum:
+
+'``llvm.minimumnum.*``' Intrinsic
+^
+
+
@@ -5005,8 +5007,11 @@ void computeKnownFPClass(const Value *V, const APInt
&DemandedElts,
// If either operand is not NaN, the result is not NaN.
if (NeverNaN && (IID == Intrinsic::minnum || IID == Intrinsic::maxnum))
Known.knownNot(fcNan);
+ if (Neve
arsenm wrote:
> I think the comments here are fed into #93362 successfully, will go through
> the list again to check.
So #93362 is the replacement, and not the sequential next piece? Can we close
this one then?
https://github.com/llvm/llvm-project/pull/89007
@@ -2928,12 +2928,13 @@ static void emitUsed(CodeGenModule &CGM, StringRef Name,
for (unsigned i = 0, e = List.size(); i != e; ++i) {
UsedArray[i] =
llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
-cast(&*List[i]), CGM.Int8PtrTy);
---
arsenm wrote:
> The third argument here is like for llvm.used, it's a way to associate the
> entry with a global or function. If the corresponding global or function is
> omitted from the output then the entry will be removed. It isn't used for
> anything at run time. So I think there should b
@@ -0,0 +1,1023 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apach
@@ -0,0 +1,1023 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apach
arsenm wrote:
> Perhaps an alternative is to tweak LangRef wording to say that that these are
> always emitted as unqualified ptrs, and that their ephemeral nature implies
> that their AS is meaningless?
I think this is the correct way to handle it. Also we'll need a few
stripPointerCasts add
arsenm wrote:
You should add the mentioned convergence-tokens.ll test function
https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -32,27 +32,29 @@ class StoreInst;
/// These are the kinds of recurrences that we support.
enum class RecurKind {
- None, ///< Not a recurrence.
- Add, ///< Sum of integers.
- Mul, ///< Product of integers.
- Or, ///< Bitwise or logical OR of integers
@@ -197,12 +202,20 @@ ABIArgInfo
AMDGPUABIInfo::classifyKernelArgumentType(QualType Ty) const {
return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
}
-ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty,
+ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty,
@@ -0,0 +1,293 @@
+// REQUIRES: amdgpu-registered-target
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --function-signature
+// RUN: %clang_cc1 -cc1 -std=c23 -triple amdgcn-amd-amdhsa -emit-llvm -O1 %s
-o - | FileCheck %s
+
+void sink_0
@@ -197,12 +202,20 @@ ABIArgInfo
AMDGPUABIInfo::classifyKernelArgumentType(QualType Ty) const {
return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
}
-ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty,
+ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty,
https://github.com/arsenm commented:
Missing non-constant tests for each parameter?
https://github.com/llvm/llvm-project/pull/94376
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -4987,7 +4990,11 @@ class CodeGenFunction : public CodeGenTypeCache {
const llvm::Twine &Name = "");
// Adds a convergence_ctrl token to |Input| and emits the required parent
// convergence instructions.
- llvm::CallBase *addControll
@@ -4824,6 +4824,9 @@ llvm::CallInst
*CodeGenFunction::EmitRuntimeCall(llvm::FunctionCallee callee,
llvm::CallInst *call = Builder.CreateCall(
callee, args, getBundlesForFunclet(callee.getCallee()), name);
call->setCallingConv(getRuntimeCC());
+
+ if (getTarget().ge
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/88922
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
arsenm wrote:
> I don't know what the policy is for promoting intrinsics from experimental to
> first-class or if it's documented anywhere (?), but I would expect this to be
> accompanied with an RFC / announcement on Discourse.
I don't remember any intrinsic ever making the move out of experi
Nathan =?utf-8?q?Gauër?=
Message-ID:
In-Reply-To:
@@ -1109,6 +1124,10 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S,
llvm::BasicBlock *CondBlock = CondDest.getBlock();
EmitBlock(CondBlock);
+ if (getTarget().getTriple().isSPIRVLogical())
arsenm
Nathan =?utf-8?q?Gauër?=
Message-ID:
In-Reply-To:
@@ -4824,6 +4824,9 @@ llvm::CallInst
*CodeGenFunction::EmitRuntimeCall(llvm::FunctionCallee callee,
llvm::CallInst *call = Builder.CreateCall(
callee, args, getBundlesForFunclet(callee.getCallee()), name);
call->se
@@ -154,11 +154,20 @@ llvm::Value
*CodeGen::emitRoundPointerUpToAlignment(CodeGenFunction &CGF,
llvm::Value *Ptr,
CharUnits Align) {
// OverflowArgArea = (OverflowArgArea
@@ -154,11 +154,20 @@ llvm::Value
*CodeGen::emitRoundPointerUpToAlignment(CodeGenFunction &CGF,
llvm::Value *Ptr,
CharUnits Align) {
// OverflowArgArea = (OverflowArgArea
@@ -115,7 +115,13 @@ void AMDGPUABIInfo::computeInfo(CGFunctionInfo &FI) const {
Address AMDGPUABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const {
- llvm_unreachable("AMDGPU does not support varargs");
+ const bo
@@ -0,0 +1,43 @@
+//===- ExpandVariadics.h - expand variadic functions *- C++
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apa
@@ -0,0 +1,1056 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apach
@@ -0,0 +1,1056 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apach
@@ -0,0 +1,1056 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apach
@@ -0,0 +1,1056 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apach
@@ -0,0 +1,1056 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apach
@@ -0,0 +1,1056 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apach
@@ -0,0 +1,1056 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apach
@@ -0,0 +1,1056 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apach
@@ -0,0 +1,1056 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apach
@@ -0,0 +1,1056 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apach
@@ -0,0 +1,1056 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apach
@@ -0,0 +1,1056 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apach
@@ -115,7 +115,13 @@ void AMDGPUABIInfo::computeInfo(CGFunctionInfo &FI) const {
Address AMDGPUABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const {
- llvm_unreachable("AMDGPU does not support varargs");
+ const bo
https://github.com/arsenm commented:
This appears to just assert today, but interpreting this as bitcast doesn't
make sense. I would expect this to emit a pair of casts, fpext to float, and
fptrunc down to half
https://github.com/llvm/llvm-project/pull/89051
__
@@ -0,0 +1,14 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --version 4
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-feature +fullbf16
-S -emit-llvm %s -o - | FileCheck %s
+// CHECK-LABEL: define dso_local half @test_
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/89051
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
arsenm wrote:
> This appears to just assert today, but interpreting this as bitcast doesn't
> make sense. I would expect this to emit a pair of casts, fpext to float, and
> fptrunc down to half
If we don't just reject it as an invalid cast
https://github.com/llvm/llvm-project/pull/89051
@@ -1,3 +1,6 @@
-__kernel void foo(int *i) {
+// RUN: %clang -emit-llvm -S -o - %s | FileCheck %s
arsenm wrote:
The target absolutely should be explicit in any testing files. Pretending these
tests can be generic is going to be an intractable problem
https://g
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/87989
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -1,3 +1,6 @@
-__kernel void foo(int *i) {
+// RUN: %clang -emit-llvm -S -o - %s | FileCheck %s
arsenm wrote:
This just requires additional REQUIRES support in lit for the built targets
when running the test, the same as other built-backend dependent codegen t
@@ -1,3 +1,6 @@
-__kernel void foo(int *i) {
+// RUN: %clang -emit-llvm -S -o - %s | FileCheck %s
arsenm wrote:
You can have different target coexist in the same test files when appropriate
and just multi-list REQUIRES. Clang does this regularly (OpenMP in parti
@@ -0,0 +1,109 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --version 4
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-feature +fullbf16
-S -emit-llvm %s -o - | FileCheck %s
+// CHECK-LABEL: define dso_local half @test
https://github.com/arsenm requested changes to this pull request.
Bitcast is not the correct behavior
https://github.com/llvm/llvm-project/pull/89051
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listin
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/89051
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -0,0 +1,109 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --version 4
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-feature +fullbf16
-S -emit-llvm %s -o - | FileCheck %s
+// CHECK-LABEL: define dso_local half @test
@@ -39,6 +39,10 @@ function(compile_to_bc)
set( TARGET_ARG "-target" ${ARG_TRIPLE} )
endif()
+ # Ensure the directory we are told to output to exists
+ get_filename_component( ARG_OUTPUT_DIR ${ARG_OUTPUT} DIRECTORY )
arsenm wrote:
I thought there was
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/89147
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
arsenm wrote:
> But In some target, it supply a HW instruction to complete the process
> (fp16->float32->bf16) . so it just supply a intrinsic (fp16 -> bf16)
Which is not a bitcast. The correct IR representation of this conversion is
fpext+fptrunc
https://github.com/llvm/llvm-project/pull/89
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/89473
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -318,12 +320,12 @@
// RUN: %clang -### -fassociative-math -freciprocal-math -fno-signed-zeros \
// RUN: -fno-trapping-math -ftrapping-math -c %s 2>&1 \
-// RUN: | FileCheck --check-prefix=CHECK-NO-REASSOC-NO-UNSAFE-MATH %s
+// RUN: | FileCheck --check-prefix=CHECK-N
@@ -271,30 +271,32 @@
// RUN: 2>&1 | FileCheck --check-prefix=CHECK-NO-UNSAFE-MATH %s
// RUN: %clang -### -funsafe-math-optimizations -fno-reciprocal-math -c %s \
-// RUN: 2>&1 | FileCheck --check-prefix=CHECK-NO-UNSAFE-MATH %s
+// RUN: 2>&1 | FileCheck --check-prefix=CH
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -4822,6 +4822,111 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr
&MI,
return RetBB;
}
+static MachineBasicBlock *lowerPseudoLaneOp(MachineInstr &MI,
arsenm wrote:
You should try to do this before selection. Doing it after just adds a lot of
@@ -18410,6 +18410,24 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned
BuiltinID,
CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());
return Builder.CreateCall(F, Args);
}
+ case AMDGPU::BI__builtin_amdgcn_readlane:
+ case AMDGPU::BI__b
@@ -18410,6 +18410,24 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned
BuiltinID,
CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());
return Builder.CreateCall(F, Args);
}
+ case AMDGPU::BI__builtin_amdgcn_readlane:
+ case AMDGPU::BI__b
https://github.com/arsenm commented:
In a separate patch should have AMDGPUInstCombineIntrinsic try to fold bitcasts
into the intrinsic
https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://li
arsenm wrote:
For AMDGPU 64 is probably right
https://github.com/llvm/llvm-project/pull/89446
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -0,0 +1,194 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --version 4
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-feature +fullbf16
-S -emit-llvm %s -o - | FileCheck %s
+// CHECK-LABEL: define dso_local half @test
@@ -0,0 +1,194 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --version 4
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-feature +fullbf16
-S -emit-llvm %s -o - | FileCheck %s
+// CHECK-LABEL: define dso_local half @test
@@ -0,0 +1,194 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --version 4
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-feature +fullbf16
-S -emit-llvm %s -o - | FileCheck %s
+// CHECK-LABEL: define dso_local half @test
Author: Matt Arsenault
Date: 2024-04-22T11:35:09+02:00
New Revision: bd84f5d5d71ee26d9552a9cd96ef058cfb8a39fc
URL:
https://github.com/llvm/llvm-project/commit/bd84f5d5d71ee26d9552a9cd96ef058cfb8a39fc
DIFF:
https://github.com/llvm/llvm-project/commit/bd84f5d5d71ee26d9552a9cd96ef058cfb8a39fc.diff
@@ -4822,6 +4822,111 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr
&MI,
return RetBB;
}
+static MachineBasicBlock *lowerPseudoLaneOp(MachineInstr &MI,
arsenm wrote:
No, that's a generic pass. I would directly handle this in the legalizer, in
S
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm approved this pull request.
I think this works for the test. I'm slightly confused by the PR not-stacking
with the test changes on top of the other PR
https://github.com/llvm/llvm-project/pull/89687
___
cfe-commits mailing l
@@ -0,0 +1,194 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --version 4
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-feature +fullbf16
-S -emit-llvm %s -o - | FileCheck %s
+// CHECK-LABEL: define dso_local half @test
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/89051
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm approved this pull request.
LGTM. Would be good to verify the vector case is "correct" in as far as it's
what GCC does
https://github.com/llvm/llvm-project/pull/89051
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
h
1001 - 1100 of 2830 matches
Mail list logo