from:"Sameer Sahasrabuddhe via cfe\-commits"

[clang] 06bdffb - [AMDGPU] Expose llvm fence instruction as clang intrinsic

2020-04-26 Thread Sameer Sahasrabuddhe via cfe-commits


Author: Saiyedul Islam
Date: 2020-04-27T09:39:03+05:30
New Revision: 06bdffb2bb45d8666ec86782d21214ef545a71fd

URL: 
https://github.com/llvm/llvm-project/commit/06bdffb2bb45d8666ec86782d21214ef545a71fd
DIFF: 
https://github.com/llvm/llvm-project/commit/06bdffb2bb45d8666ec86782d21214ef545a71fd.diff

LOG: [AMDGPU] Expose llvm fence instruction as clang intrinsic

Expose llvm fence instruction as clang builtin for AMDGPU target

__builtin_amdgcn_fence(unsigned int memoryOrdering, const char *syncScope)

The first argument of this builtin is one of the memory-ordering specifiers
__ATOMIC_ACQUIRE, __ATOMIC_RELEASE, __ATOMIC_ACQ_REL, or __ATOMIC_SEQ_CST
following C++11 memory model semantics. This is mapped to corresponding
LLVM atomic memory ordering for the fence instruction using LLVM atomic C
ABI. The second argument is an AMDGPU-specific synchronization scope
defined as string.

Reviewed By: sameerds

Differential Revision: https://reviews.llvm.org/D75917

Added: 
clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
clang/test/Sema/builtin-amdgcn-fence-failure.cpp

Modified: 
clang/include/clang/Basic/BuiltinsAMDGPU.def
clang/include/clang/Sema/Sema.h
clang/lib/CodeGen/CGBuiltin.cpp
clang/lib/Sema/SemaChecking.cpp
clang/test/SemaOpenCL/builtins-amdgcn-error.cl

Removed: 




diff  --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def 
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index b42c8a77c4bc..5633ccd5d744 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -57,6 +57,7 @@ BUILTIN(__builtin_amdgcn_ds_gws_barrier, "vUiUi", "n")
 BUILTIN(__builtin_amdgcn_ds_gws_sema_v, "vUi", "n")
 BUILTIN(__builtin_amdgcn_ds_gws_sema_br, "vUiUi", "n")
 BUILTIN(__builtin_amdgcn_ds_gws_sema_p, "vUi", "n")
+BUILTIN(__builtin_amdgcn_fence, "vUicC*", "n")
 
 // FIXME: Need to disallow constant address space.
 BUILTIN(__builtin_amdgcn_div_scale, "dddbb*", "n")

diff  --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 5cd75b176761..8a0dd4a1d96f 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -11992,6 +11992,7 @@ class Sema final {
   bool CheckX86BuiltinGatherScatterScale(unsigned BuiltinID, CallExpr 
*TheCall);
   bool CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
   bool CheckPPCBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
+  bool CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
 
   bool SemaBuiltinVAStart(unsigned BuiltinID, CallExpr *TheCall);
   bool SemaBuiltinVAStartARMMicrosoft(CallExpr *Call);

diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 58965efd5c44..ba5ea9d94023 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -28,6 +28,7 @@
 #include "clang/CodeGen/CGFunctionInfo.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/Intrinsics.h"
@@ -14131,6 +14132,43 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
 Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
 return Builder.CreateCall(F, { Src0, Src1, Src2 });
   }
+
+  case AMDGPU::BI__builtin_amdgcn_fence: {
+llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
+llvm::SyncScope::ID SSID;
+Value *Order = EmitScalarExpr(E->getArg(0));
+Value *Scope = EmitScalarExpr(E->getArg(1));
+
+if (isa(Order)) {
+  int ord = cast(Order)->getZExtValue();
+
+  // Map C11/C++11 memory ordering to LLVM memory ordering
+  switch (static_cast(ord)) {
+  case llvm::AtomicOrderingCABI::acquire:
+AO = llvm::AtomicOrdering::Acquire;
+break;
+  case llvm::AtomicOrderingCABI::release:
+AO = llvm::AtomicOrdering::Release;
+break;
+  case llvm::AtomicOrderingCABI::acq_rel:
+AO = llvm::AtomicOrdering::AcquireRelease;
+break;
+  case llvm::AtomicOrderingCABI::seq_cst:
+AO = llvm::AtomicOrdering::SequentiallyConsistent;
+break;
+  case llvm::AtomicOrderingCABI::consume: // not supported by LLVM fence
+  case llvm::AtomicOrderingCABI::relaxed: // not supported by LLVM fence
+break;
+  }
+
+  StringRef scp;
+  llvm::getConstantStringInfo(Scope, scp);
+  SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
+
+  return Builder.CreateFence(AO, SSID);
+}
+LLVM_FALLTHROUGH;
+  }
   default:
 return nullptr;
   }

diff  --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index a88db3324ef3..478a534ab71c 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -1920,6 +1920,10 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl,

[llvm] [clang] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-19 Thread Sameer Sahasrabuddhe via cfe-commits



@@ -26,28 +26,31 @@ using namespace llvm;
 
 #define DEBUG_TYPE "amdgpu-emit-printf"
 
-static Value *fitArgInto64Bits(IRBuilder<> &Builder, Value *Arg) {
+static Value *fitArgInto64Bits(IRBuilder<> &Builder, Value *Arg,
+   bool IsBuffered) {
+  const DataLayout &DL = 
Builder.GetInsertBlock()->getModule()->getDataLayout();
   auto Int64Ty = Builder.getInt64Ty();
   auto Ty = Arg->getType();
 
   if (auto IntTy = dyn_cast(Ty)) {
-switch (IntTy->getBitWidth()) {
-case 32:
-  return Builder.CreateZExt(Arg, Int64Ty);
-case 64:
-  return Arg;
+if (IntTy->getBitWidth() < 64) {
+  return Builder.CreateZExt(Arg, Builder.getInt64Ty());
 }
   }
 
-  if (Ty->getTypeID() == Type::DoubleTyID) {
+  if (Ty->isFloatingPointTy()) {
+if (DL.getTypeAllocSize(Ty) < 8)
+  Arg = Builder.CreateFPExt(Arg, Builder.getDoubleTy());
+if (IsBuffered)
+  return Arg;
 return Builder.CreateBitCast(Arg, Int64Ty);
   }
 
-  if (isa(Ty)) {
+  if (!IsBuffered && isa(Ty)) {
 return Builder.CreatePtrToInt(Arg, Int64Ty);

ssahasra wrote:

How are pointers handled in the buffered case? 

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[llvm] [clang] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-19 Thread Sameer Sahasrabuddhe via cfe-commits



@@ -406,5 +410,9 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb", 
"nc", "fp8-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-insts")
 
+// OpenCL
+LANGBUILTIN(printf, "icC*4.", "fp:0:", ALL_OCL_LANGUAGES)

ssahasra wrote:

I don't understand why this is necessary for the current task. What does it fix 
in the parsing OpenCL builtins?

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-19 Thread Sameer Sahasrabuddhe via cfe-commits



@@ -4742,6 +4742,16 @@ void Clang::ConstructJob(Compilation &C, const JobAction 
&JA,
 Args.ClaimAllArgs(options::OPT_gen_cdb_fragment_path);
   }
 
+  if (TC.getTriple().isAMDGPU() && types::isOpenCL(Input.getType())) {
+if (Args.getLastArg(options::OPT_mprintf_kind_EQ)) {
+  CmdArgs.push_back(Args.MakeArgString(
+  "-mprintf-kind=" +
+  Args.getLastArgValue(options::OPT_mprintf_kind_EQ)));
+  // Force compiler error on invalid conversion specifiers
+  
CmdArgs.push_back(Args.MakeArgString("-Werror=format-invalid-specifier"));

ssahasra wrote:

Why is this necessary here?

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-19 Thread Sameer Sahasrabuddhe via cfe-commits



@@ -26,28 +26,31 @@ using namespace llvm;
 
 #define DEBUG_TYPE "amdgpu-emit-printf"
 
-static Value *fitArgInto64Bits(IRBuilder<> &Builder, Value *Arg) {
+static Value *fitArgInto64Bits(IRBuilder<> &Builder, Value *Arg,
+   bool IsBuffered) {
+  const DataLayout &DL = 
Builder.GetInsertBlock()->getModule()->getDataLayout();
   auto Int64Ty = Builder.getInt64Ty();
   auto Ty = Arg->getType();
 
   if (auto IntTy = dyn_cast(Ty)) {
-switch (IntTy->getBitWidth()) {
-case 32:
-  return Builder.CreateZExt(Arg, Int64Ty);
-case 64:
-  return Arg;
+if (IntTy->getBitWidth() < 64) {
+  return Builder.CreateZExt(Arg, Builder.getInt64Ty());
 }
   }
 
-  if (Ty->getTypeID() == Type::DoubleTyID) {
+  if (Ty->isFloatingPointTy()) {
+if (DL.getTypeAllocSize(Ty) < 8)
+  Arg = Builder.CreateFPExt(Arg, Builder.getDoubleTy());

ssahasra wrote:

This typecast should not be necessary. Default argument promotions in C++ for 
variadic functions ensure that all floating point arguments are promoted to 
double. If that is not happening, can you demonstrate with a test?

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-19 Thread Sameer Sahasrabuddhe via cfe-commits



@@ -170,20 +173,49 @@ static Value *appendString(IRBuilder<> &Builder, Value 
*Desc, Value *Arg,
   return callAppendStringN(Builder, Desc, Arg, Length, IsLast);
 }
 
+static Value *appendVectorArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,
+  bool IsLast, bool IsBuffered) {
+  assert(Arg->getType()->isVectorTy() && "incorrent append* function");
+  auto VectorTy = dyn_cast(Arg->getType());
+  auto Zero = Builder.getInt64(0);
+  if (VectorTy) {

ssahasra wrote:

So the argument is ignored if it is not a FixedVectorType?

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-19 Thread Sameer Sahasrabuddhe via cfe-commits



@@ -170,20 +173,49 @@ static Value *appendString(IRBuilder<> &Builder, Value 
*Desc, Value *Arg,
   return callAppendStringN(Builder, Desc, Arg, Length, IsLast);
 }
 
+static Value *appendVectorArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,
+  bool IsLast, bool IsBuffered) {
+  assert(Arg->getType()->isVectorTy() && "incorrent append* function");
+  auto VectorTy = dyn_cast(Arg->getType());
+  auto Zero = Builder.getInt64(0);
+  if (VectorTy) {
+for (unsigned int i = 0; i < VectorTy->getNumElements() - 1; i++) {
+  auto Val = Builder.CreateExtractElement(Arg, i);
+  Desc = callAppendArgs(Builder, Desc, 1,
+fitArgInto64Bits(Builder, Val, IsBuffered), Zero,
+Zero, Zero, Zero, Zero, Zero, false);
+}
+
+Value* Val =
+Builder.CreateExtractElement(Arg, VectorTy->getNumElements() - 1);
+return callAppendArgs(Builder, Desc, 1,
+  fitArgInto64Bits(Builder, Val, IsBuffered), Zero,
+  Zero, Zero, Zero, Zero, Zero, IsLast);
+  }
+  return nullptr;
+}
+
 static Value *processArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,
- bool SpecIsCString, bool IsLast) {
+ bool SpecIsCString, bool IsVector, bool IsLast,
+ bool IsBuffered) {
   if (SpecIsCString && isa(Arg->getType())) {
 return appendString(Builder, Desc, Arg, IsLast);
   }
-  // If the format specifies a string but the argument is not, the frontend 
will
-  // have printed a warning. We just rely on undefined behaviour and send the
-  // argument anyway.
-  return appendArg(Builder, Desc, Arg, IsLast);
+
+  if (IsVector) {
+return appendVectorArg(Builder, Desc, Arg, IsLast, IsBuffered);
+  } 
+
+  // If the format specifies a string but the argument is not, the frontend
+  // will have printed a warning. We just rely on undefined behaviour and send
+  // the argument anyway.

ssahasra wrote:

This is a pure whitespace change. Keeping the original formatting of the 
comment helps simplify the diff.

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-19 Thread Sameer Sahasrabuddhe via cfe-commits



@@ -194,6 +226,8 @@ static void locateCStrings(SparseBitVector<8> &BV, 
StringRef Str) {
   SpecPos += 2;
   continue;
 }
+if (Str.find_first_of("v", SpecPos) != StringRef::npos)

ssahasra wrote:

I don't think this will work as expected. It can clearly match a "v" that 
occurs after the data type. For example, it is supposed to match "%v2d", but it 
will also match "%d v". The match should be performed inside the "Spec" 
substring created below. 

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[llvm] [clang] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-19 Thread Sameer Sahasrabuddhe via cfe-commits



@@ -170,20 +173,49 @@ static Value *appendString(IRBuilder<> &Builder, Value 
*Desc, Value *Arg,
   return callAppendStringN(Builder, Desc, Arg, Length, IsLast);
 }
 
+static Value *appendVectorArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,
+  bool IsLast, bool IsBuffered) {
+  assert(Arg->getType()->isVectorTy() && "incorrent append* function");
+  auto VectorTy = dyn_cast(Arg->getType());
+  auto Zero = Builder.getInt64(0);
+  if (VectorTy) {
+for (unsigned int i = 0; i < VectorTy->getNumElements() - 1; i++) {
+  auto Val = Builder.CreateExtractElement(Arg, i);
+  Desc = callAppendArgs(Builder, Desc, 1,
+fitArgInto64Bits(Builder, Val, IsBuffered), Zero,
+Zero, Zero, Zero, Zero, Zero, false);
+}
+
+Value* Val =
+Builder.CreateExtractElement(Arg, VectorTy->getNumElements() - 1);
+return callAppendArgs(Builder, Desc, 1,
+  fitArgInto64Bits(Builder, Val, IsBuffered), Zero,
+  Zero, Zero, Zero, Zero, Zero, IsLast);
+  }
+  return nullptr;
+}
+
 static Value *processArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,
- bool SpecIsCString, bool IsLast) {
+ bool SpecIsCString, bool IsVector, bool IsLast,
+ bool IsBuffered) {
   if (SpecIsCString && isa(Arg->getType())) {
 return appendString(Builder, Desc, Arg, IsLast);
   }
-  // If the format specifies a string but the argument is not, the frontend 
will
-  // have printed a warning. We just rely on undefined behaviour and send the
-  // argument anyway.
-  return appendArg(Builder, Desc, Arg, IsLast);
+
+  if (IsVector) {
+return appendVectorArg(Builder, Desc, Arg, IsLast, IsBuffered);
+  } 
+
+  // If the format specifies a string but the argument is not, the frontend
+  // will have printed a warning. We just rely on undefined behaviour and send
+  // the argument anyway.
+  return appendArg(Builder, Desc, Arg, IsLast, IsBuffered);
 }
 
 // Scan the format string to locate all specifiers, and mark the ones that
 // specify a string, i.e, the "%s" specifier with optional '*' characters.
-static void locateCStrings(SparseBitVector<8> &BV, StringRef Str) {
+static void locateCStringsAndVectors(SparseBitVector<8> &BV,

ssahasra wrote:

update the comment above the function

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[llvm] [clang] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-19 Thread Sameer Sahasrabuddhe via cfe-commits



@@ -1,12 +1,68 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -cl-std=CL1.2 -triple amdgcn-amd-amdhsa 
-disable-llvm-passes -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -cl-std=CL1.2 -triple amdgcn-amd-amdhsa 
-mprintf-kind=buffered -disable-llvm-passes -emit-llvm -o - %s | FileCheck 
--check-prefix=CHECK_BUFFERED %s
+// RUN: %clang_cc1 -cl-std=CL1.2 -triple amdgcn-amd-amdhsa 
-mprintf-kind=hostcall -disable-llvm-passes -emit-llvm -o - %s | FileCheck 
--check-prefix=CHECK_HOSTCALL %s
 
 int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 
2)));

ssahasra wrote:

There should be tests that use the "v" modifier inside a format specifier such 
as "%v2d" and also tests that use outside it, such as "%dv".

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [AMDGPU] Treat printf as builtin for OpenCL (PR #72554)

2023-11-19 Thread Sameer Sahasrabuddhe via cfe-commits



@@ -406,5 +410,9 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb", 
"nc", "fp8-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-insts")
 
+// OpenCL
+LANGBUILTIN(printf, "icC*4.", "fp:0:", ALL_OCL_LANGUAGES)

ssahasra wrote:

I still don't see why this is necessary. A target-defined language-specific 
builtin is a whole new beast. What is missing in the current parsing of OpenCL 
printf?

https://github.com/llvm/llvm-project/pull/72554
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[llvm] [clang] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-29 Thread Sameer Sahasrabuddhe via cfe-commits


ssahasra wrote:

> ping

Some comments still need to be addressed.

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[llvm] [clang] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-29 Thread Sameer Sahasrabuddhe via cfe-commits



@@ -406,5 +410,9 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb", 
"nc", "fp8-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-insts")
 
+// OpenCL
+LANGBUILTIN(printf, "icC*4.", "fp:0:", ALL_OCL_LANGUAGES)

ssahasra wrote:

Although we talked about this offline, the explanation needs to be added here. 
In fact, the motivation for having this builtin should be added as a comment to 
the source itself for future reference.

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[llvm] [clang] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-29 Thread Sameer Sahasrabuddhe via cfe-commits



@@ -198,6 +229,10 @@ static void locateCStrings(SparseBitVector<8> &BV, 
StringRef Str) {
 if (SpecEnd == StringRef::npos)
   return;
 auto Spec = Str.slice(SpecPos, SpecEnd + 1);
+
+if ((Spec.find_first_of("v")) != StringRef::npos)

ssahasra wrote:

Just ".find()" should be sufficient?

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[llvm] [clang] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-29 Thread Sameer Sahasrabuddhe via cfe-commits



@@ -26,28 +26,31 @@ using namespace llvm;
 
 #define DEBUG_TYPE "amdgpu-emit-printf"
 
-static Value *fitArgInto64Bits(IRBuilder<> &Builder, Value *Arg) {
+static Value *fitArgInto64Bits(IRBuilder<> &Builder, Value *Arg,
+   bool IsBuffered) {
+  const DataLayout &DL = 
Builder.GetInsertBlock()->getModule()->getDataLayout();
   auto Int64Ty = Builder.getInt64Ty();
   auto Ty = Arg->getType();
 
   if (auto IntTy = dyn_cast(Ty)) {
-switch (IntTy->getBitWidth()) {
-case 32:
-  return Builder.CreateZExt(Arg, Int64Ty);
-case 64:
-  return Arg;
+if (IntTy->getBitWidth() < 64) {
+  return Builder.CreateZExt(Arg, Builder.getInt64Ty());
 }
   }
 
-  if (Ty->getTypeID() == Type::DoubleTyID) {
+  if (Ty->isFloatingPointTy()) {
+if (DL.getTypeAllocSize(Ty) < 8)
+  Arg = Builder.CreateFPExt(Arg, Builder.getDoubleTy());
+if (IsBuffered)
+  return Arg;
 return Builder.CreateBitCast(Arg, Int64Ty);
   }
 
-  if (isa(Ty)) {
+  if (!IsBuffered && isa(Ty)) {
 return Builder.CreatePtrToInt(Arg, Int64Ty);

ssahasra wrote:

Information like this should be written in comments in the source code itself.

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-29 Thread Sameer Sahasrabuddhe via cfe-commits



@@ -1,12 +1,68 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -cl-std=CL1.2 -triple amdgcn-amd-amdhsa 
-disable-llvm-passes -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -cl-std=CL1.2 -triple amdgcn-amd-amdhsa 
-mprintf-kind=buffered -disable-llvm-passes -emit-llvm -o - %s | FileCheck 
--check-prefix=CHECK_BUFFERED %s
+// RUN: %clang_cc1 -cl-std=CL1.2 -triple amdgcn-amd-amdhsa 
-mprintf-kind=hostcall -disable-llvm-passes -emit-llvm -o - %s | FileCheck 
--check-prefix=CHECK_HOSTCALL %s
 
 int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 
2)));

ssahasra wrote:

I only see tests with the "v" correctly used as a vector specifier. What about 
tests like "%dv", and other cases where either the "v" is wrong, or it's just 
part of the text being printed? Given that the first attempt at detecting "v" 
had errors in it, I think it will be good to cover all corner cases where a "v" 
is actually a vector specifier and and where it is not.

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang][AMDGPU] precommit test for ballot on Windows (PR #73920)

2023-11-30 Thread Sameer Sahasrabuddhe via cfe-commits


https://github.com/ssahasra created 
https://github.com/llvm/llvm-project/pull/73920

The Clang declaration of the wave-64 builtin uses "UL" as the return type, 
which is interpreted as a 32-bit unsigned integer on Windows. This emits an 
incorrect LLVM declaration with i32 return type instead of i64. The clang 
declaration needs to be fixed to use "WU" instead.

>From d5945a3926567c67b0c355a95b4bc98446f2d764 Mon Sep 17 00:00:00 2001
From: Sameer Sahasrabuddhe 
Date: Thu, 30 Nov 2023 12:13:23 +0530
Subject: [PATCH] [clang][AMDGPU] precommit test for ballot on Windows

The Clang declaration of the wave-64 builtin uses "UL" as the return type, which
is interpreted as a 32-bit unsigned integer on Windows. This emits an incorrect
LLVM declaration with i32 return type instead of i64. The clang declaration
needs to be fixed to use "WU" instead.
---
 clang/test/CodeGenHIP/ballot.cpp | 21 +
 1 file changed, 21 insertions(+)
 create mode 100644 clang/test/CodeGenHIP/ballot.cpp

diff --git a/clang/test/CodeGenHIP/ballot.cpp b/clang/test/CodeGenHIP/ballot.cpp
new file mode 100644
index 000..823b1583a42fc02
--- /dev/null
+++ b/clang/test/CodeGenHIP/ballot.cpp
@@ -0,0 +1,21 @@
+// REQUIRES: amdgpu-registered-target
+// XFAIL: system-windows
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx900 -x hip 
-emit-llvm -fcuda-is-device -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx900 -x hip -S 
-fcuda-is-device -o - %s | FileCheck %s --check-prefix=GFX9
+
+// FIXME: The Clang declaration of the wave-64 builtin uses "UL" as the return
+// type, which is interpreted as a 32-bit unsigned integer on Windows. This
+// emits an incorrect LLVM declaration with i32 return type instead of i64. The
+// clang declaration needs to be fixed to use "WU" instead.
+
+// CHECK-LABEL: @_Z3fooi
+// CHECK: call i64 @llvm.amdgcn.ballot.i64
+
+// GFX9-LABEL: _Z3fooi:
+// GFX9: v_cmp_ne_u32_e64
+
+#define __device__ __attribute__((device))
+
+__device__ unsigned long long foo(int p) {
+  return __builtin_amdgcn_ballot_w64(p);
+}

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang][AMDGPU] precommit test for ballot on Windows (PR #73920)

2023-11-30 Thread Sameer Sahasrabuddhe via cfe-commits


https://github.com/ssahasra updated 
https://github.com/llvm/llvm-project/pull/73920

>From 6b87550b48f5fae5c34304a14a302d37e81a Mon Sep 17 00:00:00 2001
From: Sameer Sahasrabuddhe 
Date: Fri, 1 Dec 2023 11:49:02 +0530
Subject: [PATCH] [clang][AMDGPU] precommit test for ballot on Windows

The Clang declaration of the wave-64 builtin uses "UL" as the return type, which
is interpreted as a 32-bit unsigned integer on Windows. This emits an incorrect
LLVM declaration with i32 return type instead of i64. The clang declaration
needs to be fixed to use "WU" instead.
---
 clang/test/CodeGenHIP/ballot.cpp | 21 +
 1 file changed, 21 insertions(+)
 create mode 100644 clang/test/CodeGenHIP/ballot.cpp

diff --git a/clang/test/CodeGenHIP/ballot.cpp b/clang/test/CodeGenHIP/ballot.cpp
new file mode 100644
index 000..d229b7027a7b1a8
--- /dev/null
+++ b/clang/test/CodeGenHIP/ballot.cpp
@@ -0,0 +1,21 @@
+// REQUIRES: amdgpu-registered-target
+// XFAIL: system-windows
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -aux-triple 
x86_64-pc-windows-msvc -target-cpu gfx900 -x hip -emit-llvm -fcuda-is-device -o 
- %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -aux-triple 
x86_64-pc-windows-msvc -target-cpu gfx900 -x hip -S -fcuda-is-device -o - %s | 
FileCheck %s --check-prefix=GFX9
+
+// FIXME: The Clang declaration of the wave-64 builtin uses "UL" as the return
+// type, which is interpreted as a 32-bit unsigned integer on Windows. This
+// emits an incorrect LLVM declaration with i32 return type instead of i64. The
+// clang declaration needs to be fixed to use "WU" instead.
+
+// CHECK-LABEL: @_Z3fooi
+// CHECK: call i64 @llvm.amdgcn.ballot.i64
+
+// GFX9-LABEL: _Z3fooi:
+// GFX9: v_cmp_ne_u32_e64
+
+#define __device__ __attribute__((device))
+
+__device__ unsigned long long foo(int p) {
+  return __builtin_amdgcn_ballot_w64(p);
+}

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang][AMDGPU] precommit test for ballot on Windows (PR #73920)

2023-12-01 Thread Sameer Sahasrabuddhe via cfe-commits


https://github.com/ssahasra updated 
https://github.com/llvm/llvm-project/pull/73920

>From 8ecb6310a4912de50628cf3db5ff8488fa919bb1 Mon Sep 17 00:00:00 2001
From: Sameer Sahasrabuddhe 
Date: Fri, 1 Dec 2023 14:24:30 +0530
Subject: [PATCH] [clang][AMDGPU] precommit test for ballot on Windows

The Clang declaration of the wave-64 builtin uses "UL" as the return type, which
is interpreted as a 32-bit unsigned integer on Windows. This emits an incorrect
LLVM declaration with i32 return type instead of i64. The clang declaration
needs to be fixed to use "WU" instead.
---
 clang/test/CodeGenHIP/ballot.cpp | 27 +++
 1 file changed, 27 insertions(+)
 create mode 100644 clang/test/CodeGenHIP/ballot.cpp

diff --git a/clang/test/CodeGenHIP/ballot.cpp b/clang/test/CodeGenHIP/ballot.cpp
new file mode 100644
index 000..6e1cbbdfc7af170
--- /dev/null
+++ b/clang/test/CodeGenHIP/ballot.cpp
@@ -0,0 +1,27 @@
+// REQUIRES: amdgpu-registered-target
+// XFAIL: *
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -aux-triple 
x86_64-pc-windows-msvc -target-cpu gfx900 -x hip -emit-llvm -fcuda-is-device -o 
- %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -aux-triple 
x86_64-pc-windows-msvc -target-cpu gfx900 -x hip -S -fcuda-is-device -o - %s | 
FileCheck %s --check-prefix=GFX9
+
+// Unlike OpenCL, HIP depends on the C++ interpration of "unsigned long", which
+// is 64 bits long on Linux and 32 bits long on Windows. The return type of the
+// ballot intrinsic needs to be a 64 bit integer on both platforms. This test
+// cross-compiles to Windows to confirm that the return type is indeed 64 bits
+// on Windows.
+
+// FIXME: The Clang declaration of the wave-64 builtin uses "UL" as the return
+// type, which is interpreted as a 32-bit unsigned integer on Windows. This
+// emits an incorrect LLVM declaration with i32 return type instead of i64. The
+// clang declaration needs to be fixed to use "WU" instead.
+
+// CHECK-LABEL: @_Z3fooi
+// CHECK: call i64 @llvm.amdgcn.ballot.i64
+
+// GFX9-LABEL: _Z3fooi:
+// GFX9: v_cmp_ne_u32_e64
+
+#define __device__ __attribute__((device))
+
+__device__ unsigned long long foo(int p) {
+  return __builtin_amdgcn_ballot_w64(p);
+}

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang][AMDGPU] fix the return type for ballot (PR #73906)

2023-12-01 Thread Sameer Sahasrabuddhe via cfe-commits


https://github.com/ssahasra updated 
https://github.com/llvm/llvm-project/pull/73906

>From 8ecb6310a4912de50628cf3db5ff8488fa919bb1 Mon Sep 17 00:00:00 2001
From: Sameer Sahasrabuddhe 
Date: Fri, 1 Dec 2023 14:24:30 +0530
Subject: [PATCH 1/2] [clang][AMDGPU] precommit test for ballot on Windows

The Clang declaration of the wave-64 builtin uses "UL" as the return type, which
is interpreted as a 32-bit unsigned integer on Windows. This emits an incorrect
LLVM declaration with i32 return type instead of i64. The clang declaration
needs to be fixed to use "WU" instead.
---
 clang/test/CodeGenHIP/ballot.cpp | 27 +++
 1 file changed, 27 insertions(+)
 create mode 100644 clang/test/CodeGenHIP/ballot.cpp

diff --git a/clang/test/CodeGenHIP/ballot.cpp b/clang/test/CodeGenHIP/ballot.cpp
new file mode 100644
index 000..6e1cbbdfc7af170
--- /dev/null
+++ b/clang/test/CodeGenHIP/ballot.cpp
@@ -0,0 +1,27 @@
+// REQUIRES: amdgpu-registered-target
+// XFAIL: *
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -aux-triple 
x86_64-pc-windows-msvc -target-cpu gfx900 -x hip -emit-llvm -fcuda-is-device -o 
- %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -aux-triple 
x86_64-pc-windows-msvc -target-cpu gfx900 -x hip -S -fcuda-is-device -o - %s | 
FileCheck %s --check-prefix=GFX9
+
+// Unlike OpenCL, HIP depends on the C++ interpration of "unsigned long", which
+// is 64 bits long on Linux and 32 bits long on Windows. The return type of the
+// ballot intrinsic needs to be a 64 bit integer on both platforms. This test
+// cross-compiles to Windows to confirm that the return type is indeed 64 bits
+// on Windows.
+
+// FIXME: The Clang declaration of the wave-64 builtin uses "UL" as the return
+// type, which is interpreted as a 32-bit unsigned integer on Windows. This
+// emits an incorrect LLVM declaration with i32 return type instead of i64. The
+// clang declaration needs to be fixed to use "WU" instead.
+
+// CHECK-LABEL: @_Z3fooi
+// CHECK: call i64 @llvm.amdgcn.ballot.i64
+
+// GFX9-LABEL: _Z3fooi:
+// GFX9: v_cmp_ne_u32_e64
+
+#define __device__ __attribute__((device))
+
+__device__ unsigned long long foo(int p) {
+  return __builtin_amdgcn_ballot_w64(p);
+}

>From bfcff343a601923da554cafda26568a445fc39b0 Mon Sep 17 00:00:00 2001
From: Sameer Sahasrabuddhe 
Date: Thu, 30 Nov 2023 12:14:38 +0530
Subject: [PATCH 2/2] [clang][AMDGPU] fix the return type for ballot

In the builtins declaration, "ULi" is a 32-bit integer on Windows. Use "WUi"
instead to ensure a 64-bit integer on all platforms.
---
 clang/include/clang/Basic/BuiltinsAMDGPU.def | 4 ++--
 clang/test/CodeGenHIP/ballot.cpp | 6 --
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def 
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index a19c8bd5f219ec6..8b59b3790d7bc66 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -150,8 +150,8 @@ BUILTIN(__builtin_amdgcn_mqsad_u32_u8, "V4UiWUiUiV4Ui", 
"nc")
 // Ballot builtins.
 
//===--===//
 
-TARGET_BUILTIN(__builtin_amdgcn_ballot_w32, "Uib", "nc", "wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_ballot_w64, "LUib", "nc", "wavefrontsize64")
+TARGET_BUILTIN(__builtin_amdgcn_ballot_w32, "ZUib", "nc", "wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_ballot_w64, "WUib", "nc", "wavefrontsize64")
 
 // Deprecated intrinsics in favor of __builtin_amdgn_ballot_{w32|w64}
 BUILTIN(__builtin_amdgcn_uicmp, "WUiUiUiIi", "nc")
diff --git a/clang/test/CodeGenHIP/ballot.cpp b/clang/test/CodeGenHIP/ballot.cpp
index 6e1cbbdfc7af170..a1c23e2136c7153 100644
--- a/clang/test/CodeGenHIP/ballot.cpp
+++ b/clang/test/CodeGenHIP/ballot.cpp
@@ -1,5 +1,4 @@
 // REQUIRES: amdgpu-registered-target
-// XFAIL: *
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -aux-triple 
x86_64-pc-windows-msvc -target-cpu gfx900 -x hip -emit-llvm -fcuda-is-device -o 
- %s | FileCheck %s
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -aux-triple 
x86_64-pc-windows-msvc -target-cpu gfx900 -x hip -S -fcuda-is-device -o - %s | 
FileCheck %s --check-prefix=GFX9
 
@@ -9,11 +8,6 @@
 // cross-compiles to Windows to confirm that the return type is indeed 64 bits
 // on Windows.
 
-// FIXME: The Clang declaration of the wave-64 builtin uses "UL" as the return
-// type, which is interpreted as a 32-bit unsigned integer on Windows. This
-// emits an incorrect LLVM declaration with i32 return type instead of i64. The
-// clang declaration needs to be fixed to use "WU" instead.
-
 // CHECK-LABEL: @_Z3fooi
 // CHECK: call i64 @llvm.amdgcn.ballot.i64
 

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang][AMDGPU] fix the return type for ballot (PR #73906)

2023-12-01 Thread Sameer Sahasrabuddhe via cfe-commits



@@ -150,8 +150,8 @@ BUILTIN(__builtin_amdgcn_mqsad_u32_u8, "V4UiWUiUiV4Ui", 
"nc")
 // Ballot builtins.
 
//===--===//
 
-TARGET_BUILTIN(__builtin_amdgcn_ballot_w32, "Uib", "nc", "wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_ballot_w64, "LUib", "nc", "wavefrontsize64")
+TARGET_BUILTIN(__builtin_amdgcn_ballot_w32, "ZUib", "nc", "wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_ballot_w64, "WUib", "nc", "wavefrontsize64")

ssahasra wrote:

I checked now. The tell is whether the builtin uses "L" in its type descriptor. 
None of them do except ballot.

https://github.com/llvm/llvm-project/pull/73906
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang][AMDGPU] fix the return type for ballot (PR #73906)

2023-12-01 Thread Sameer Sahasrabuddhe via cfe-commits



@@ -0,0 +1,15 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx900 -x hip 
-emit-llvm -fcuda-is-device -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx900 -x hip -S 
-fcuda-is-device -o - %s | FileCheck %s --check-prefix=GFX9

ssahasra wrote:

You're right. The precommit test needed fixing. The updated version uses an 
aux-triple to specify a Windows host. Now the test exposes the problem even on 
a Linux build when trying cross-compile to Windows.

Added another note in the test: The same problem does not arise with existing 
OpenCL tests because the language fully specifies that "long" is 64 bits on all 
platforms.

https://github.com/llvm/llvm-project/pull/73906
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang][AMDGPU] precommit test for ballot on Windows (PR #73920)

2023-12-01 Thread Sameer Sahasrabuddhe via cfe-commits



@@ -0,0 +1,27 @@
+// REQUIRES: amdgpu-registered-target
+// XFAIL: *
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -aux-triple 
x86_64-pc-windows-msvc -target-cpu gfx900 -x hip -emit-llvm -fcuda-is-device -o 
- %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -aux-triple 
x86_64-pc-windows-msvc -target-cpu gfx900 -x hip -S -fcuda-is-device -o - %s | 
FileCheck %s --check-prefix=GFX9
+
+// Unlike OpenCL, HIP depends on the C++ interpration of "unsigned long", which
+// is 64 bits long on Linux and 32 bits long on Windows. The return type of the
+// ballot intrinsic needs to be a 64 bit integer on both platforms. This test
+// cross-compiles to Windows to confirm that the return type is indeed 64 bits
+// on Windows.
+
+// FIXME: The Clang declaration of the wave-64 builtin uses "UL" as the return
+// type, which is interpreted as a 32-bit unsigned integer on Windows. This
+// emits an incorrect LLVM declaration with i32 return type instead of i64. The
+// clang declaration needs to be fixed to use "WU" instead.
+
+// CHECK-LABEL: @_Z3fooi
+// CHECK: call i64 @llvm.amdgcn.ballot.i64
+
+// GFX9-LABEL: _Z3fooi:
+// GFX9: v_cmp_ne_u32_e64

ssahasra wrote:

Yeah, we should check codegen because ultimately that was the symptom ... llc 
crashed when running hipcc on Windows.

https://github.com/llvm/llvm-project/pull/73920
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [AMDGPU] Treat printf as builtin for OpenCL (PR #72554)

2023-12-01 Thread Sameer Sahasrabuddhe via cfe-commits



@@ -406,5 +410,9 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb", 
"nc", "fp8-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-insts")
 
+// OpenCL
+LANGBUILTIN(printf, "icC*4.", "fp:0:", ALL_OCL_LANGUAGES)

ssahasra wrote:

I think what @vikramRH is saying is that the magic number "4" for OpenCL 
address space "__constant" is specific to AMDGPU.

https://github.com/llvm/llvm-project/pull/72554
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang][AMDGPU] precommit test for ballot on Windows (PR #73920)

2023-12-03 Thread Sameer Sahasrabuddhe via cfe-commits


https://github.com/ssahasra closed 
https://github.com/llvm/llvm-project/pull/73920
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[libcxx] [compiler-rt] [clang] [libc] [clang-tools-extra] [flang] [llvm] [clang][AMDGPU] fix the return type for ballot (PR #73906)

2023-12-03 Thread Sameer Sahasrabuddhe via cfe-commits


https://github.com/ssahasra updated 
https://github.com/llvm/llvm-project/pull/73906

>From 8ecb6310a4912de50628cf3db5ff8488fa919bb1 Mon Sep 17 00:00:00 2001
From: Sameer Sahasrabuddhe 
Date: Fri, 1 Dec 2023 14:24:30 +0530
Subject: [PATCH 1/2] [clang][AMDGPU] precommit test for ballot on Windows

The Clang declaration of the wave-64 builtin uses "UL" as the return type, which
is interpreted as a 32-bit unsigned integer on Windows. This emits an incorrect
LLVM declaration with i32 return type instead of i64. The clang declaration
needs to be fixed to use "WU" instead.
---
 clang/test/CodeGenHIP/ballot.cpp | 27 +++
 1 file changed, 27 insertions(+)
 create mode 100644 clang/test/CodeGenHIP/ballot.cpp

diff --git a/clang/test/CodeGenHIP/ballot.cpp b/clang/test/CodeGenHIP/ballot.cpp
new file mode 100644
index 0..6e1cbbdfc7af1
--- /dev/null
+++ b/clang/test/CodeGenHIP/ballot.cpp
@@ -0,0 +1,27 @@
+// REQUIRES: amdgpu-registered-target
+// XFAIL: *
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -aux-triple 
x86_64-pc-windows-msvc -target-cpu gfx900 -x hip -emit-llvm -fcuda-is-device -o 
- %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -aux-triple 
x86_64-pc-windows-msvc -target-cpu gfx900 -x hip -S -fcuda-is-device -o - %s | 
FileCheck %s --check-prefix=GFX9
+
+// Unlike OpenCL, HIP depends on the C++ interpration of "unsigned long", which
+// is 64 bits long on Linux and 32 bits long on Windows. The return type of the
+// ballot intrinsic needs to be a 64 bit integer on both platforms. This test
+// cross-compiles to Windows to confirm that the return type is indeed 64 bits
+// on Windows.
+
+// FIXME: The Clang declaration of the wave-64 builtin uses "UL" as the return
+// type, which is interpreted as a 32-bit unsigned integer on Windows. This
+// emits an incorrect LLVM declaration with i32 return type instead of i64. The
+// clang declaration needs to be fixed to use "WU" instead.
+
+// CHECK-LABEL: @_Z3fooi
+// CHECK: call i64 @llvm.amdgcn.ballot.i64
+
+// GFX9-LABEL: _Z3fooi:
+// GFX9: v_cmp_ne_u32_e64
+
+#define __device__ __attribute__((device))
+
+__device__ unsigned long long foo(int p) {
+  return __builtin_amdgcn_ballot_w64(p);
+}

>From bfcff343a601923da554cafda26568a445fc39b0 Mon Sep 17 00:00:00 2001
From: Sameer Sahasrabuddhe 
Date: Thu, 30 Nov 2023 12:14:38 +0530
Subject: [PATCH 2/2] [clang][AMDGPU] fix the return type for ballot

In the builtins declaration, "ULi" is a 32-bit integer on Windows. Use "WUi"
instead to ensure a 64-bit integer on all platforms.
---
 clang/include/clang/Basic/BuiltinsAMDGPU.def | 4 ++--
 clang/test/CodeGenHIP/ballot.cpp | 6 --
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def 
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index a19c8bd5f219e..8b59b3790d7bc 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -150,8 +150,8 @@ BUILTIN(__builtin_amdgcn_mqsad_u32_u8, "V4UiWUiUiV4Ui", 
"nc")
 // Ballot builtins.
 
//===--===//
 
-TARGET_BUILTIN(__builtin_amdgcn_ballot_w32, "Uib", "nc", "wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_ballot_w64, "LUib", "nc", "wavefrontsize64")
+TARGET_BUILTIN(__builtin_amdgcn_ballot_w32, "ZUib", "nc", "wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_ballot_w64, "WUib", "nc", "wavefrontsize64")
 
 // Deprecated intrinsics in favor of __builtin_amdgn_ballot_{w32|w64}
 BUILTIN(__builtin_amdgcn_uicmp, "WUiUiUiIi", "nc")
diff --git a/clang/test/CodeGenHIP/ballot.cpp b/clang/test/CodeGenHIP/ballot.cpp
index 6e1cbbdfc7af1..a1c23e2136c71 100644
--- a/clang/test/CodeGenHIP/ballot.cpp
+++ b/clang/test/CodeGenHIP/ballot.cpp
@@ -1,5 +1,4 @@
 // REQUIRES: amdgpu-registered-target
-// XFAIL: *
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -aux-triple 
x86_64-pc-windows-msvc -target-cpu gfx900 -x hip -emit-llvm -fcuda-is-device -o 
- %s | FileCheck %s
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -aux-triple 
x86_64-pc-windows-msvc -target-cpu gfx900 -x hip -S -fcuda-is-device -o - %s | 
FileCheck %s --check-prefix=GFX9
 
@@ -9,11 +8,6 @@
 // cross-compiles to Windows to confirm that the return type is indeed 64 bits
 // on Windows.
 
-// FIXME: The Clang declaration of the wave-64 builtin uses "UL" as the return
-// type, which is interpreted as a 32-bit unsigned integer on Windows. This
-// emits an incorrect LLVM declaration with i32 return type instead of i64. The
-// clang declaration needs to be fixed to use "WU" instead.
-
 // CHECK-LABEL: @_Z3fooi
 // CHECK: call i64 @llvm.amdgcn.ballot.i64
 

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[flang] [libcxx] [llvm] [libc] [compiler-rt] [clang-tools-extra] [clang] [clang][AMDGPU] fix the return type for ballot (PR #73906)

2023-12-04 Thread Sameer Sahasrabuddhe via cfe-commits


https://github.com/ssahasra closed 
https://github.com/llvm/llvm-project/pull/73906
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-12-04 Thread Sameer Sahasrabuddhe via cfe-commits



@@ -170,20 +173,49 @@ static Value *appendString(IRBuilder<> &Builder, Value 
*Desc, Value *Arg,
   return callAppendStringN(Builder, Desc, Arg, Length, IsLast);
 }
 
+static Value *appendVectorArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,
+  bool IsLast, bool IsBuffered) {
+  assert(Arg->getType()->isVectorTy() && "incorrent append* function");
+  auto VectorTy = dyn_cast(Arg->getType());
+  auto Zero = Builder.getInt64(0);
+  if (VectorTy) {
+for (unsigned int i = 0; i < VectorTy->getNumElements() - 1; i++) {
+  auto Val = Builder.CreateExtractElement(Arg, i);
+  Desc = callAppendArgs(Builder, Desc, 1,
+fitArgInto64Bits(Builder, Val, IsBuffered), Zero,
+Zero, Zero, Zero, Zero, Zero, false);
+}
+
+Value* Val =
+Builder.CreateExtractElement(Arg, VectorTy->getNumElements() - 1);
+return callAppendArgs(Builder, Desc, 1,
+  fitArgInto64Bits(Builder, Val, IsBuffered), Zero,
+  Zero, Zero, Zero, Zero, Zero, IsLast);
+  }
+  return nullptr;
+}
+
 static Value *processArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,
- bool SpecIsCString, bool IsLast) {
+ bool SpecIsCString, bool IsVector, bool IsLast,
+ bool IsBuffered) {
   if (SpecIsCString && isa(Arg->getType())) {
 return appendString(Builder, Desc, Arg, IsLast);
   }
-  // If the format specifies a string but the argument is not, the frontend 
will
-  // have printed a warning. We just rely on undefined behaviour and send the
-  // argument anyway.
-  return appendArg(Builder, Desc, Arg, IsLast);
+
+  if (IsVector) {
+return appendVectorArg(Builder, Desc, Arg, IsLast, IsBuffered);
+  } 
+
+  // If the format specifies a string but the argument is not, the frontend
+  // will have printed a warning. We just rely on undefined behaviour and send
+  // the argument anyway.

ssahasra wrote:

Bump. Please restore the original comment if there is no change in the actual 
words.

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[llvm] [clang] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-12-04 Thread Sameer Sahasrabuddhe via cfe-commits



@@ -4742,6 +4742,16 @@ void Clang::ConstructJob(Compilation &C, const JobAction 
&JA,
 Args.ClaimAllArgs(options::OPT_gen_cdb_fragment_path);
   }
 
+  if (TC.getTriple().isAMDGPU() && types::isOpenCL(Input.getType())) {
+if (Args.getLastArg(options::OPT_mprintf_kind_EQ)) {
+  CmdArgs.push_back(Args.MakeArgString(
+  "-mprintf-kind=" +
+  Args.getLastArgValue(options::OPT_mprintf_kind_EQ)));
+  // Force compiler error on invalid conversion specifiers
+  
CmdArgs.push_back(Args.MakeArgString("-Werror=format-invalid-specifier"));

ssahasra wrote:

Bump. I am not convinced that we should force errors on invalid specifiers. 
What is the rationale for that?

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[llvm] [clang] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-12-04 Thread Sameer Sahasrabuddhe via cfe-commits



@@ -4742,6 +4742,16 @@ void Clang::ConstructJob(Compilation &C, const JobAction 
&JA,
 Args.ClaimAllArgs(options::OPT_gen_cdb_fragment_path);
   }
 
+  if (TC.getTriple().isAMDGPU() && types::isOpenCL(Input.getType())) {
+if (Args.getLastArg(options::OPT_mprintf_kind_EQ)) {
+  CmdArgs.push_back(Args.MakeArgString(
+  "-mprintf-kind=" +
+  Args.getLastArgValue(options::OPT_mprintf_kind_EQ)));
+  // Force compiler error on invalid conversion specifiers
+  
CmdArgs.push_back(Args.MakeArgString("-Werror=format-invalid-specifier"));

ssahasra wrote:

Actually this is important. Converting a warning into an error can cause 
surprises when users build existing applications. It is upto the user to decide 
if they want this to be an error, which is why it is a command-line option. 
Even if the spec says undefined behaviour, we should just do something 
reasonable and accept whatever the user wrote without forcing an error. There 
is a lot of freedom in deciding what is "something reasonable" ... we could 
choose to print nothing, or a default value, or a placeholder instead of the 
actual printf format string, etc.

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] d8f99bb - [AMDGPU] replace hostcall module flag with function attribute

2022-02-11 Thread Sameer Sahasrabuddhe via cfe-commits


Author: Sameer Sahasrabuddhe
Date: 2022-02-11T22:51:56+05:30
New Revision: d8f99bb6e0641474b6bc1728295b40a8fa279f9a

URL: 
https://github.com/llvm/llvm-project/commit/d8f99bb6e0641474b6bc1728295b40a8fa279f9a
DIFF: 
https://github.com/llvm/llvm-project/commit/d8f99bb6e0641474b6bc1728295b40a8fa279f9a.diff

LOG: [AMDGPU] replace hostcall module flag with function attribute

The module flag to indicate use of hostcall is insufficient to catch
all cases where hostcall might be in use by a kernel. This is now
replaced by a function attribute that gets propagated to top-level
kernel functions via their respective call-graph.

If the attribute "amdgpu-no-hostcall-ptr" is absent on a kernel, the
default behaviour is to emit kernel metadata indicating that the
kernel uses the hostcall buffer pointer passed as an implicit
argument.

The attribute may be placed explicitly by the user, or inferred by the
AMDGPU attributor by examining the call-graph. The attribute is
inferred only if the function is not being sanitized, and the
implictarg_ptr does not result in a load of any byte in the hostcall
pointer argument.

Reviewed By: jdoerfert, arsenm, kpyzhov

Differential Revision: https://reviews.llvm.org/D119216

Added: 
llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v3.ll
llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v5.ll

Modified: 
clang/lib/CodeGen/CodeGenModule.cpp
clang/test/CodeGenCUDA/amdgpu-asan.cu
llvm/lib/Target/AMDGPU/AMDGPUAttributes.def
llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll
llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel.ll
llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll
llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v5.ll
llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args.ll
llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3-asan.ll
llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll
llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll
llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll
mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp

Removed: 
clang/test/CodeGenCUDA/amdgpu-asan-printf.cu
llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-absent-v3.ll
llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-absent.ll
llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3.ll
llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present.ll



diff  --git a/clang/lib/CodeGen/CodeGenModule.cpp 
b/clang/lib/CodeGen/CodeGenModule.cpp
index 8a7345a9f494a..0d89cb723c76b 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -566,9 +566,6 @@ void CodeGenModule::Release() {
   "__amdgpu_device_library_preserve_asan_functions_ptr", nullptr,
   llvm::GlobalVariable::NotThreadLocal);
   addCompilerUsedGlobal(Var);
-  if (!getModule().getModuleFlag("amdgpu_hostcall")) {
-getModule().addModuleFlag(llvm::Module::Override, "amdgpu_hostcall", 
1);
-  }
 }
 // Emit amdgpu_code_object_version module flag, which is code object 
version
 // times 100.

diff  --git a/clang/test/CodeGenCUDA/amdgpu-asan-printf.cu 
b/clang/test/CodeGenCUDA/amdgpu-asan-printf.cu
deleted file mode 100644
index 69246f9ce7af1..0
--- a/clang/test/CodeGenCUDA/amdgpu-asan-printf.cu
+++ /dev/null
@@ -1,17 +0,0 @@
-// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
-// RUN:   -fcuda-is-device -target-cpu gfx906 -fsanitize=address \
-// RUN:   -O3 -x hip | FileCheck -check-prefixes=MFCHECK %s
-
-// MFCHECK: !{{.*}} = !{i32 4, !"amdgpu_hostcall", i32 1}
-
-// Test to check hostcall module flag metadata is generated correctly
-// when a program has printf call and compiled with -fsanitize=address.
-#include "Inputs/cuda.h"
-__device__ void non_kernel() {
-  printf("sanitized device function");
-}
-
-__global_

[clang] [llvm] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)

2024-03-28 Thread Sameer Sahasrabuddhe via cfe-commits

Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= 
Message-ID:
In-Reply-To: 


https://github.com/ssahasra commented:

LGTM, with a few nits.

For the record, I did not have the bandwidth to comment on the translation from 
HLSL to SPIR-V. But given that the generated IR passes the verifier, this seems 
like a correct and very interesting first use of convergence control.

https://github.com/llvm/llvm-project/pull/80680
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)

2024-03-28 Thread Sameer Sahasrabuddhe via cfe-commits

Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= 
Message-ID:
In-Reply-To: 



@@ -1130,8 +1130,92 @@ struct BitTest {
 
   static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
 };
+
+// Returns the first convergence entry/loop/anchor instruction found in |BB|.
+// std::nullptr otherwise.
+llvm::IntrinsicInst *getConvergenceToken(llvm::BasicBlock *BB) {
+  for (auto &I : *BB) {
+auto *II = dyn_cast(&I);
+if (II && isConvergenceControlIntrinsic(II->getIntrinsicID()))
+  return II;
+  }
+  return nullptr;
+}
+
 } // namespace
 
+llvm::CallBase *
+CodeGenFunction::AddConvergenceControlAttr(llvm::CallBase *Input,

ssahasra wrote:

In this and other places, "ConvergenceControlToken" is a better name than 
"Attr" ... it's not really an attribute.

https://github.com/llvm/llvm-project/pull/80680
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)

2024-03-28 Thread Sameer Sahasrabuddhe via cfe-commits

Nathan =?utf-8?q?Gau=C3=ABr?= ,
Nathan =?utf-8?q?Gau=C3=ABr?= ,
Nathan =?utf-8?q?Gau=C3=ABr?= ,
Nathan =?utf-8?q?Gau=C3=ABr?= ,
Nathan =?utf-8?q?Gau=C3=ABr?= ,
Nathan =?utf-8?q?Gau=C3=ABr?= ,
Nathan =?utf-8?q?Gau=C3=ABr?= 
Message-ID:
In-Reply-To: 



@@ -1130,8 +1130,92 @@ struct BitTest {
 
   static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
 };
+
+// Returns the first convergence entry/loop/anchor instruction found in |BB|.
+// std::nullptr otherwise.
+llvm::IntrinsicInst *getConvergenceToken(llvm::BasicBlock *BB) {
+  for (auto &I : *BB) {
+auto *II = dyn_cast(&I);
+if (II && isConvergenceControlIntrinsic(II->getIntrinsicID()))
+  return II;
+  }
+  return nullptr;
+}
+
 } // namespace
 
+llvm::CallBase *
+CodeGenFunction::AddConvergenceControlAttr(llvm::CallBase *Input,

ssahasra wrote:

Also function names need to start with lower case.

https://github.com/llvm/llvm-project/pull/80680
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)

2024-03-28 Thread Sameer Sahasrabuddhe via cfe-commits

Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= 
Message-ID:
In-Reply-To: 



@@ -1130,8 +1130,92 @@ struct BitTest {
 
   static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
 };
+
+// Returns the first convergence entry/loop/anchor instruction found in |BB|.
+// std::nullptr otherwise.
+llvm::IntrinsicInst *getConvergenceToken(llvm::BasicBlock *BB) {
+  for (auto &I : *BB) {
+auto *II = dyn_cast(&I);
+if (II && isConvergenceControlIntrinsic(II->getIntrinsicID()))
+  return II;
+  }
+  return nullptr;
+}
+
 } // namespace
 
+llvm::CallBase *
+CodeGenFunction::AddConvergenceControlAttr(llvm::CallBase *Input,
+   llvm::Value *ParentToken) {
+  llvm::Value *bundleArgs[] = {ParentToken};
+  llvm::OperandBundleDef OB("convergencectrl", bundleArgs);
+  auto Output = llvm::CallBase::addOperandBundle(
+  Input, llvm::LLVMContext::OB_convergencectrl, OB, Input);
+  Input->replaceAllUsesWith(Output);
+  Input->eraseFromParent();
+  return Output;
+}
+
+llvm::IntrinsicInst *
+CodeGenFunction::EmitConvergenceLoop(llvm::BasicBlock *BB,

ssahasra wrote:

Other places use "LoopToken". Ending with "Token" certainly feels more 
consistent.

https://github.com/llvm/llvm-project/pull/80680
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)

2024-03-28 Thread Sameer Sahasrabuddhe via cfe-commits

Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= 
Message-ID:
In-Reply-To: 


https://github.com/ssahasra edited 
https://github.com/llvm/llvm-project/pull/80680
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)

2024-03-28 Thread Sameer Sahasrabuddhe via cfe-commits

Nathan =?utf-8?q?Gau=C3=ABr?= ,
Nathan =?utf-8?q?Gau=C3=ABr?= ,
Nathan =?utf-8?q?Gau=C3=ABr?= ,
Nathan =?utf-8?q?Gau=C3=ABr?= ,
Nathan =?utf-8?q?Gau=C3=ABr?= ,
Nathan =?utf-8?q?Gau=C3=ABr?= ,
Nathan =?utf-8?q?Gau=C3=ABr?= 
Message-ID:
In-Reply-To: 


https://github.com/ssahasra approved this pull request.


https://github.com/llvm/llvm-project/pull/80680
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)

2024-03-12 Thread Sameer Sahasrabuddhe via cfe-commits

Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= 
Message-ID:
In-Reply-To: 



@@ -1130,8 +1130,96 @@ struct BitTest {
 
   static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
 };
+
+// Returns the first convergence entry/loop/anchor instruction found in |BB|.
+// std::nullopt otherwise.
+std::optional getConvergenceToken(llvm::BasicBlock *BB) 
{
+  for (auto &I : *BB) {

ssahasra wrote:

This kind of map will be needed in more places as we continue to introduces 
uses of these convergence tokens. I have a simple Analysis in my upstreaming 
pipeline that produces such a map, but it's really very simple. You can wait 
for that, or just feel free to start your own.

https://github.com/llvm/llvm-project/pull/80680
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)

2024-02-25 Thread Sameer Sahasrabuddhe via cfe-commits


https://github.com/ssahasra approved this pull request.


https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)

2024-03-02 Thread Sameer Sahasrabuddhe via cfe-commits



@@ -3616,6 +3617,12 @@ unsigned FunctionDecl::getBuiltinID(bool 
ConsiderWrapperFunctions) const {
   if (!ConsiderWrapperFunctions && getStorageClass() == SC_Static)
 return 0;
 
+  // AMDGCN implementation supports printf as a builtin
+  // for OpenCL
+  if (Context.getTargetInfo().getTriple().isAMDGCN() &&
+  Context.getLangOpts().OpenCL && BuiltinID == AMDGPU::BIprintf)
+return BuiltinID;

ssahasra wrote:

I thought this had been clarified earlier too. It's quite imprecise to just say 
that "signatures differ". Perhaps the following detailed explanation might move 
the conversatino forward. The problem is that the OpenCL printf expects a 
format string in the constant address space, which has no representation in 
Clang builtin. What we do have is the ability to specify an address-space 
number in the builtin declaration. But this number is target-specific, which 
makes the whole builtin target-specific. Is there a way around that magic 
number 4?


https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[llvm] [clang] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)

2024-01-21 Thread Sameer Sahasrabuddhe via cfe-commits


https://github.com/ssahasra edited 
https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)

2024-01-21 Thread Sameer Sahasrabuddhe via cfe-commits


https://github.com/ssahasra requested changes to this pull request.


https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[llvm] [clang] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)

2024-01-21 Thread Sameer Sahasrabuddhe via cfe-commits



@@ -26,28 +26,34 @@ using namespace llvm;
 
 #define DEBUG_TYPE "amdgpu-emit-printf"
 
-static Value *fitArgInto64Bits(IRBuilder<> &Builder, Value *Arg) {
+static Value *fitArgInto64Bits(IRBuilder<> &Builder, Value *Arg,
+   bool IsBuffered) {
+  const DataLayout &DL = 
Builder.GetInsertBlock()->getModule()->getDataLayout();
   auto Int64Ty = Builder.getInt64Ty();
   auto Ty = Arg->getType();
 
   if (auto IntTy = dyn_cast(Ty)) {
-switch (IntTy->getBitWidth()) {
-case 32:
-  return Builder.CreateZExt(Arg, Int64Ty);
-case 64:
-  return Arg;
+if (IntTy->getBitWidth() < 64) {
+  return Builder.CreateZExt(Arg, Builder.getInt64Ty());
 }
   }
 
-  if (Ty->getTypeID() == Type::DoubleTyID) {
+  if (Ty->isFloatingPointTy()) {
+if (DL.getTypeAllocSize(Ty) < 8)
+  Arg = Builder.CreateFPExt(Arg, Builder.getDoubleTy());
+if (IsBuffered)
+  return Arg;
 return Builder.CreateBitCast(Arg, Int64Ty);
   }
 
-  if (isa(Ty)) {
+  // The cast is necessary for the hostcall case 
+  // for the argument to be compatible with device lib 
+  // functions.
+  if (!IsBuffered && isa(Ty)) {
 return Builder.CreatePtrToInt(Arg, Int64Ty);
   }
 
-  llvm_unreachable("unexpected type");

ssahasra wrote:

This llvm_unreachable is preferred. It's clear documentation that all supported 
types have been handled by this point. Each if-block for integer, floating and 
pointer types should have its own default "return Arg".

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)

2024-01-21 Thread Sameer Sahasrabuddhe via cfe-commits



@@ -4742,6 +4742,16 @@ void Clang::ConstructJob(Compilation &C, const JobAction 
&JA,
 Args.ClaimAllArgs(options::OPT_gen_cdb_fragment_path);
   }
 
+  if (TC.getTriple().isAMDGPU() && types::isOpenCL(Input.getType())) {
+if (Args.getLastArg(options::OPT_mprintf_kind_EQ)) {
+  CmdArgs.push_back(Args.MakeArgString(
+  "-mprintf-kind=" +
+  Args.getLastArgValue(options::OPT_mprintf_kind_EQ)));
+  // Force compiler error on invalid conversion specifiers
+  
CmdArgs.push_back(Args.MakeArgString("-Werror=format-invalid-specifier"));

ssahasra wrote:

Bump! Remove this command-line flag.

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)

2024-01-21 Thread Sameer Sahasrabuddhe via cfe-commits



@@ -170,20 +173,49 @@ static Value *appendString(IRBuilder<> &Builder, Value 
*Desc, Value *Arg,
   return callAppendStringN(Builder, Desc, Arg, Length, IsLast);
 }
 
+static Value *appendVectorArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,
+  bool IsLast, bool IsBuffered) {
+  assert(Arg->getType()->isVectorTy() && "incorrent append* function");
+  auto VectorTy = dyn_cast(Arg->getType());
+  auto Zero = Builder.getInt64(0);
+  if (VectorTy) {
+for (unsigned int i = 0; i < VectorTy->getNumElements() - 1; i++) {
+  auto Val = Builder.CreateExtractElement(Arg, i);
+  Desc = callAppendArgs(Builder, Desc, 1,
+fitArgInto64Bits(Builder, Val, IsBuffered), Zero,
+Zero, Zero, Zero, Zero, Zero, false);
+}
+
+Value* Val =
+Builder.CreateExtractElement(Arg, VectorTy->getNumElements() - 1);
+return callAppendArgs(Builder, Desc, 1,
+  fitArgInto64Bits(Builder, Val, IsBuffered), Zero,
+  Zero, Zero, Zero, Zero, Zero, IsLast);
+  }
+  return nullptr;
+}
+
 static Value *processArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,
- bool SpecIsCString, bool IsLast) {
+ bool SpecIsCString, bool IsVector, bool IsLast,
+ bool IsBuffered) {
   if (SpecIsCString && isa(Arg->getType())) {
 return appendString(Builder, Desc, Arg, IsLast);
   }
-  // If the format specifies a string but the argument is not, the frontend 
will
-  // have printed a warning. We just rely on undefined behaviour and send the
-  // argument anyway.
-  return appendArg(Builder, Desc, Arg, IsLast);
+
+  if (IsVector) {
+return appendVectorArg(Builder, Desc, Arg, IsLast, IsBuffered);
+  } 
+
+  // If the format specifies a string but the argument is not, the frontend
+  // will have printed a warning. We just rely on undefined behaviour and send
+  // the argument anyway.

ssahasra wrote:

Bump!

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)

2024-01-21 Thread Sameer Sahasrabuddhe via cfe-commits



@@ -168,20 +174,48 @@ static Value *appendString(IRBuilder<> &Builder, Value 
*Desc, Value *Arg,
   return callAppendStringN(Builder, Desc, Arg, Length, IsLast);
 }
 
+static Value *appendVectorArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,
+  bool IsLast, bool IsBuffered) {
+  assert(Arg->getType()->isVectorTy() && "incorrect append* function");
+  auto VectorTy = cast(Arg->getType());
+  auto Zero = Builder.getInt64(0);
+  for (unsigned int i = 0; i < VectorTy->getNumElements() - 1; i++) {
+auto Val = Builder.CreateExtractElement(Arg, i);
+Desc = callAppendArgs(Builder, Desc, 1,
+  fitArgInto64Bits(Builder, Val, IsBuffered), Zero,
+  Zero, Zero, Zero, Zero, Zero, false);
+  }
+
+  Value *Val =
+  Builder.CreateExtractElement(Arg, VectorTy->getNumElements() - 1);
+  return callAppendArgs(Builder, Desc, 1,
+fitArgInto64Bits(Builder, Val, IsBuffered), Zero, Zero,
+Zero, Zero, Zero, Zero, IsLast);
+}
+
 static Value *processArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,
- bool SpecIsCString, bool IsLast) {
+ bool SpecIsCString, bool IsVector, bool IsLast,
+ bool IsBuffered) {
   if (SpecIsCString && isa(Arg->getType())) {
 return appendString(Builder, Desc, Arg, IsLast);
   }
-  // If the format specifies a string but the argument is not, the frontend 
will
-  // have printed a warning. We just rely on undefined behaviour and send the
-  // argument anyway.
-  return appendArg(Builder, Desc, Arg, IsLast);
+
+  if (IsVector) {
+return appendVectorArg(Builder, Desc, Arg, IsLast, IsBuffered);
+  } 
+
+  // If the format specifies a string but the argument is not, the frontend
+  // will have printed a warning. We just rely on undefined behaviour and send
+  // the argument anyway.
+  return appendArg(Builder, Desc, Arg, IsLast, IsBuffered);
 }
 
-// Scan the format string to locate all specifiers, and mark the ones that
-// specify a string, i.e, the "%s" specifier with optional '*' characters.
-static void locateCStrings(SparseBitVector<8> &BV, StringRef Str) {
+// Scan the format string to locate all specifiers and OCL vectors,

ssahasra wrote:

"all specifiers" is enough ... there is no need to say "OCL vectors". The rest 
of the sentence is the one which correctly says "string or vector".

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)

2024-01-22 Thread Sameer Sahasrabuddhe via cfe-commits



@@ -198,15 +213,31 @@ RValue 
CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E) {
 }
 
 llvm::Value *Arg = A.getRValue(*this).getScalarVal();
+if (isString(A.getType().getTypePtr()) && CGM.getLangOpts().OpenCL)
+  Arg = Builder.CreateAddrSpaceCast(Arg, CGM.Int8PtrTy);
 Args.push_back(Arg);
   }
 
-  llvm::IRBuilder<> IRB(Builder.GetInsertBlock(), Builder.GetInsertPoint());
-  IRB.SetCurrentDebugLocation(Builder.getCurrentDebugLocation());
+  auto PFK = CGM.getTarget().getTargetOpts().AMDGPUPrintfKindVal;
+  bool isBuffered =
+   (PFK == clang::TargetOptions::AMDGPUPrintfKind::Buffered);
+
+  StringRef FmtStr;
+  if (llvm::getConstantStringInfo(Args[0], FmtStr)) {
+if (FmtStr.empty())
+  FmtStr = StringRef("", 1);
+  } else {
+if (CGM.getLangOpts().OpenCL) {

ssahasra wrote:

This looks like the wrong place for a diagnostic. For an OpenCL program, 
shouldn't Sema have already verified that the arguments match the required 
types, such as "constant address space" for the format string?

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] ed181ef - [HIP][AMDGPU] expand printf when compiling HIP to AMDGPU

2020-01-16 Thread Sameer Sahasrabuddhe via cfe-commits


Author: Sameer Sahasrabuddhe
Date: 2020-01-16T15:15:38+05:30
New Revision: ed181efa175d3e0acc134e6cd161914e64c7195e

URL: 
https://github.com/llvm/llvm-project/commit/ed181efa175d3e0acc134e6cd161914e64c7195e
DIFF: 
https://github.com/llvm/llvm-project/commit/ed181efa175d3e0acc134e6cd161914e64c7195e.diff

LOG: [HIP][AMDGPU] expand printf when compiling HIP to AMDGPU

Summary:
This change implements the expansion in two parts:
- Add a utility function emitAMDGPUPrintfCall() in LLVM.
- Invoke the above function from Clang CodeGen, when processing a HIP
  program for the AMDGPU target.

The printf expansion has undefined behaviour if the format string is
not a compile-time constant. As a sufficient condition, the HIP
ToolChain now emits -Werror=format-nonliteral.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D71365

Added: 
clang/test/CodeGenHIP/printf-aggregate.cpp
clang/test/CodeGenHIP/printf.cpp
clang/test/Driver/hip-printf.hip
llvm/include/llvm/Transforms/Utils/AMDGPUEmitPrintf.h
llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp

Modified: 
clang/lib/CodeGen/CGBuiltin.cpp
clang/lib/CodeGen/CGGPUBuiltin.cpp
clang/lib/CodeGen/CodeGenFunction.h
clang/lib/Driver/ToolChains/HIP.cpp
llvm/lib/Transforms/Utils/CMakeLists.txt

Removed: 




diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 09fd3087b494..4decaa593a59 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -4115,6 +4115,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
   case Builtin::BIprintf:
 if (getTarget().getTriple().isNVPTX())
   return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue);
+if (getTarget().getTriple().getArch() == Triple::amdgcn &&
+getLangOpts().HIP)
+  return EmitAMDGPUDevicePrintfCallExpr(E, ReturnValue);
 break;
   case Builtin::BI__builtin_canonicalize:
   case Builtin::BI__builtin_canonicalizef:

diff  --git a/clang/lib/CodeGen/CGGPUBuiltin.cpp 
b/clang/lib/CodeGen/CGGPUBuiltin.cpp
index d7e267630762..bccce7dd7ff4 100644
--- a/clang/lib/CodeGen/CGGPUBuiltin.cpp
+++ b/clang/lib/CodeGen/CGGPUBuiltin.cpp
@@ -16,6 +16,7 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Transforms/Utils/AMDGPUEmitPrintf.h"
 
 using namespace clang;
 using namespace CodeGen;
@@ -120,3 +121,36 @@ CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const 
CallExpr *E,
   return RValue::get(Builder.CreateCall(
   VprintfFunc, {Args[0].getRValue(*this).getScalarVal(), BufferPtr}));
 }
+
+RValue
+CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E,
+ReturnValueSlot ReturnValue) {
+  assert(getTarget().getTriple().getArch() == llvm::Triple::amdgcn);
+  assert(E->getBuiltinCallee() == Builtin::BIprintf ||
+ E->getBuiltinCallee() == Builtin::BI__builtin_printf);
+  assert(E->getNumArgs() >= 1); // printf always has at least one arg.
+
+  CallArgList CallArgs;
+  EmitCallArgs(CallArgs,
+   E->getDirectCallee()->getType()->getAs(),
+   E->arguments(), E->getDirectCallee(),
+   /* ParamsToSkip = */ 0);
+
+  SmallVector Args;
+  for (auto A : CallArgs) {
+// We don't know how to emit non-scalar varargs.
+if (!A.getRValue(*this).isScalar()) {
+  CGM.ErrorUnsupported(E, "non-scalar arg to printf");
+  return RValue::get(llvm::ConstantInt::get(IntTy, -1));
+}
+
+llvm::Value *Arg = A.getRValue(*this).getScalarVal();
+Args.push_back(Arg);
+  }
+
+  llvm::IRBuilder<> IRB(Builder.GetInsertBlock(), Builder.GetInsertPoint());
+  IRB.SetCurrentDebugLocation(Builder.getCurrentDebugLocation());
+  auto Printf = llvm::emitAMDGPUPrintfCall(IRB, Args);
+  Builder.SetInsertPoint(IRB.GetInsertBlock(), IRB.GetInsertPoint());
+  return RValue::get(Printf);
+}

diff  --git a/clang/lib/CodeGen/CodeGenFunction.h 
b/clang/lib/CodeGen/CodeGenFunction.h
index 3d8bc93eb965..5ab15bf74a23 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3722,6 +3722,8 @@ class CodeGenFunction : public CodeGenTypeCache {
 
   RValue EmitNVPTXDevicePrintfCallExpr(const CallExpr *E,
ReturnValueSlot ReturnValue);
+  RValue EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E,
+ReturnValueSlot ReturnValue);
 
   RValue EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
  const CallExpr *E, ReturnValueSlot ReturnValue);

diff  --git a/clang/lib/Driver/ToolChains/HIP.cpp 
b/clang/lib/Driver/ToolChains/HIP.cpp
index f89e648948ab..d4b015a7e873 100644
--- a/clang/lib/Driver/ToolChains/HIP.cpp
+++ b/clang/lib/Driver/ToolChains/HIP.cpp
@@ -436,6 +436,7 @@ Tool *HIPToolChain::buildLinker() cons

[clang] 280593b - [Clang] [NFC] fix CHECK lines for convergent attribute tests

2021-06-28 Thread Sameer Sahasrabuddhe via cfe-commits


Author: Sameer Sahasrabuddhe
Date: 2021-06-29T00:21:07+05:30
New Revision: 280593bd3ff1db6d19ccb8182698dd9c816734e2

URL: 
https://github.com/llvm/llvm-project/commit/280593bd3ff1db6d19ccb8182698dd9c816734e2
DIFF: 
https://github.com/llvm/llvm-project/commit/280593bd3ff1db6d19ccb8182698dd9c816734e2.diff

LOG: [Clang] [NFC] fix CHECK lines for convergent attribute tests

Added: 


Modified: 
clang/test/CodeGen/convergent-functions.cpp
clang/test/CodeGenCUDA/convergent.cu
clang/test/CodeGenCUDA/dft-func-attr-skip-intrinsic.hip

Removed: 




diff  --git a/clang/test/CodeGen/convergent-functions.cpp 
b/clang/test/CodeGen/convergent-functions.cpp
index 7ddb8d3f94501..cb8682474f931 100644
--- a/clang/test/CodeGen/convergent-functions.cpp
+++ b/clang/test/CodeGen/convergent-functions.cpp
@@ -1,8 +1,10 @@
-// RUN: %clang_cc1 -triple i386-pc-win32 -emit-llvm -fconvergent-functions -o 
- < %s | FileCheck -check-prefix=CONVFUNC %s
-// RUN: %clang_cc1 -triple i386-pc-win32 -emit-llvm -o - < %s | FileCheck 
-check-prefix=NOCONVFUNC %s
+// RUN: %clang_cc1 -triple i386-pc-win32 -emit-llvm -fconvergent-functions -o 
- < %s | FileCheck -check-prefixes=CHECK,CONVFUNC %s
+// RUN: %clang_cc1 -triple i386-pc-win32 -emit-llvm -o - < %s | FileCheck 
-check-prefixes=CHECK,NOCONVFUNC %s
 
 // Test that the -fconvergent-functions flag works
 
-// CONVFUNC: attributes #0 = { convergent {{.*}} }
+// CHECK: attributes #0 = {
 // NOCONVFUNC-NOT: convergent
+// CONVFUNC-SAME: convergent
+// CHECK-SAME: }
 void func() { }

diff  --git a/clang/test/CodeGenCUDA/convergent.cu 
b/clang/test/CodeGenCUDA/convergent.cu
index ff18f92ef1eae..5d98d4ba69262 100644
--- a/clang/test/CodeGenCUDA/convergent.cu
+++ b/clang/test/CodeGenCUDA/convergent.cu
@@ -42,4 +42,4 @@ __host__ __device__ void bar() {
 // HOST: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]]
 // HOST: attributes [[BAZ_ATTR]] = {
 // HOST-NOT: convergent
-// NOST-SAME: }
+// HOST-SAME: }

diff  --git a/clang/test/CodeGenCUDA/dft-func-attr-skip-intrinsic.hip 
b/clang/test/CodeGenCUDA/dft-func-attr-skip-intrinsic.hip
index 9e3e436200fc3..ee4c585cb5d7c 100644
--- a/clang/test/CodeGenCUDA/dft-func-attr-skip-intrinsic.hip
+++ b/clang/test/CodeGenCUDA/dft-func-attr-skip-intrinsic.hip
@@ -15,4 +15,4 @@ __device__ float foo(float x) {
 // CHECK: attributes [[ATTR1]] = { convergent
 // CHECK: attributes [[ATTR2]] = {
 // CHECK-NOT: convergent
-// CHECK: }
+// CHECK-SAME: }



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] bb48aa2 - [Clang][NFC] Prevent lit tests from matching substrings in current path

2022-12-12 Thread Sameer Sahasrabuddhe via cfe-commits


Author: Sameer Sahasrabuddhe
Date: 2022-12-12T16:32:58+05:30
New Revision: bb48aa20e761e26226c6f909a07246781d68ba41

URL: 
https://github.com/llvm/llvm-project/commit/bb48aa20e761e26226c6f909a07246781d68ba41
DIFF: 
https://github.com/llvm/llvm-project/commit/bb48aa20e761e26226c6f909a07246781d68ba41.diff

LOG: [Clang][NFC] Prevent lit tests from matching substrings in current path

Added: 


Modified: 
clang/test/CodeGenCXX/2004-01-11-DynamicInitializedConstant.cpp
clang/test/CodeGenCXX/ignored-bitfield-conditional.cpp

Removed: 




diff  --git a/clang/test/CodeGenCXX/2004-01-11-DynamicInitializedConstant.cpp 
b/clang/test/CodeGenCXX/2004-01-11-DynamicInitializedConstant.cpp
index 0c9333fb6d7a..9bbf16f55fab 100644
--- a/clang/test/CodeGenCXX/2004-01-11-DynamicInitializedConstant.cpp
+++ b/clang/test/CodeGenCXX/2004-01-11-DynamicInitializedConstant.cpp
@@ -1,6 +1,11 @@
 // RUN: %clang_cc1 -emit-llvm -o - %s | FileCheck %s
 
+// Catch the beginning and the end of the IR. This prevents the CHECK- from
+// matching a spurious "constant" string in file paths printed later.
+
+// CHECK-LABEL: target triple
 // CHECK-NOT: constant
+// CHECK-LABEL: attributes
 extern int X;
 const int Y = X;
 const int* foo() { return &Y; }

diff  --git a/clang/test/CodeGenCXX/ignored-bitfield-conditional.cpp 
b/clang/test/CodeGenCXX/ignored-bitfield-conditional.cpp
index 7700e97ae9d5..c9880f91a517 100644
--- a/clang/test/CodeGenCXX/ignored-bitfield-conditional.cpp
+++ b/clang/test/CodeGenCXX/ignored-bitfield-conditional.cpp
@@ -7,7 +7,7 @@ struct S {
 };
 
 void use(bool cond, struct S s1, struct S s2, int val1, int val2) {
-  // CHECK: define {{.*}}use{{.*}}(
+  // CHECK-LABEL: define {{.*}}use{{.*}}(
   // CHECK: %[[S1:.+]] = alloca %struct.S
   // CHECK: %[[S2:.+]] = alloca %struct.S
   // CHECK: %[[COND:.+]] = alloca i8
@@ -43,7 +43,7 @@ void use(bool cond, struct S s1, struct S s2, int val1, int 
val2) {
   // CHECK: store i16 %[[BF_SET]], ptr %[[S1]]
   // CHECK: br label %[[END:.+]]
 
-  // CHECK: [[END]]:
+  // CHECK-LABEL: [[END]]:
   // There is nothing in the 'end' block associated with this, but it is the
   // 'continuation' block for the rest of the function.
 
@@ -77,7 +77,7 @@ void use(bool cond, struct S s1, struct S s2, int val1, int 
val2) {
   // CHECK: store i16 %[[BF_SET]], ptr %[[S2]]
   // CHECK: br label %[[END:.+]]
 
-  // CHECK: [[END]]:
+  // CHECK-LABEL: [[END]]:
   // CHECK-NOT: phi
   // There is nothing in the 'end' block associated with this, but it is the
   // 'continuation' block for the rest of the function.
@@ -86,7 +86,7 @@ void use(bool cond, struct S s1, struct S s2, int val1, int 
val2) {
 
 
 void use2(bool cond1, bool cond2, struct S s1, int val1, int val2, int val3) {
-  // CHECK: define {{.*}}use2{{.*}}(
+  // CHECK-LABEL: define {{.*}}use2{{.*}}(
   // CHECK: %[[S1:.+]] = alloca %struct.S
   // CHECK: %[[COND1:.+]] = alloca i8
   // CHECK: %[[COND2:.+]] = alloca i8
@@ -141,7 +141,10 @@ void use2(bool cond1, bool cond2, struct S s1, int val1, 
int val2, int val3) {
   // CHECK: store i16 %[[BF_SET]], ptr %[[S1]]
   // CHECK: br label %[[END:.+]]
 
-  // CHECK[[END]]:
+  // CHECK-LABEL: [[END]]:
   // CHECK-NOT: phi
   // Nothing left to do here.
 }
+// Catch the end of the IR. This prevents the CHECK-NOT above from matching a
+// spurious "phi" in file paths printed later.
+// CHECK-LABEL: attributes



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] cd50f91 - [Clang][NFC] Prevent lit tests from matching substrings in current path

2022-12-12 Thread Sameer Sahasrabuddhe via cfe-commits


Author: Sameer Sahasrabuddhe
Date: 2022-12-13T11:18:39+05:30
New Revision: cd50f910f4d1a6bb54fd8968f067febbc7320f28

URL: 
https://github.com/llvm/llvm-project/commit/cd50f910f4d1a6bb54fd8968f067febbc7320f28
DIFF: 
https://github.com/llvm/llvm-project/commit/cd50f910f4d1a6bb54fd8968f067febbc7320f28.diff

LOG: [Clang][NFC] Prevent lit tests from matching substrings in current path

Added: 


Modified: 
clang/test/CodeGenCXX/2004-01-11-DynamicInitializedConstant.cpp
clang/test/CodeGenCXX/ignored-bitfield-conditional.cpp

Removed: 




diff  --git a/clang/test/CodeGenCXX/2004-01-11-DynamicInitializedConstant.cpp 
b/clang/test/CodeGenCXX/2004-01-11-DynamicInitializedConstant.cpp
index 0c9333fb6d7a..0cd8419185ae 100644
--- a/clang/test/CodeGenCXX/2004-01-11-DynamicInitializedConstant.cpp
+++ b/clang/test/CodeGenCXX/2004-01-11-DynamicInitializedConstant.cpp
@@ -1,6 +1,12 @@
 // RUN: %clang_cc1 -emit-llvm -o - %s | FileCheck %s
 
+// Catch the beginning and the end of the IR. This prevents the CHECK-NOT from
+// matching a spurious "constant" string in file paths printed in the output.
+//
+// CHECK-LABEL: target triple
 // CHECK-NOT: constant
+// CHECK-LABEL: attributes
+
 extern int X;
 const int Y = X;
 const int* foo() { return &Y; }

diff  --git a/clang/test/CodeGenCXX/ignored-bitfield-conditional.cpp 
b/clang/test/CodeGenCXX/ignored-bitfield-conditional.cpp
index 7700e97ae9d5..d5763f49f954 100644
--- a/clang/test/CodeGenCXX/ignored-bitfield-conditional.cpp
+++ b/clang/test/CodeGenCXX/ignored-bitfield-conditional.cpp
@@ -7,7 +7,7 @@ struct S {
 };
 
 void use(bool cond, struct S s1, struct S s2, int val1, int val2) {
-  // CHECK: define {{.*}}use{{.*}}(
+  // CHECK-LABEL: define {{.*}}use{{.*}}(
   // CHECK: %[[S1:.+]] = alloca %struct.S
   // CHECK: %[[S2:.+]] = alloca %struct.S
   // CHECK: %[[COND:.+]] = alloca i8
@@ -86,7 +86,7 @@ void use(bool cond, struct S s1, struct S s2, int val1, int 
val2) {
 
 
 void use2(bool cond1, bool cond2, struct S s1, int val1, int val2, int val3) {
-  // CHECK: define {{.*}}use2{{.*}}(
+  // CHECK-LABEL: define {{.*}}use2{{.*}}(
   // CHECK: %[[S1:.+]] = alloca %struct.S
   // CHECK: %[[COND1:.+]] = alloca i8
   // CHECK: %[[COND2:.+]] = alloca i8
@@ -141,7 +141,11 @@ void use2(bool cond1, bool cond2, struct S s1, int val1, 
int val2, int val3) {
   // CHECK: store i16 %[[BF_SET]], ptr %[[S1]]
   // CHECK: br label %[[END:.+]]
 
-  // CHECK[[END]]:
+  // CHECK: [[END]]:
   // CHECK-NOT: phi
   // Nothing left to do here.
 }
+
+// Catch the end of the IR. This prevents the CHECK-NOT above from matching a
+// spurious "phi" in file paths printed in the output.
+// CHECK-LABEL: attributes



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)

2024-02-20 Thread Sameer Sahasrabuddhe via cfe-commits



@@ -178,17 +181,29 @@ RValue 
CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E) {
   E, this, GetVprintfDeclaration(CGM.getModule()), false);
 }
 
+// Deterimines if an argument is a string
+static bool isString(const clang::Type *argXTy) {

ssahasra wrote:

This could be called "MayBeString()" at best. It's not necessary that a char* 
argument type is a C-style string.

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)

2024-02-20 Thread Sameer Sahasrabuddhe via cfe-commits



@@ -198,15 +213,31 @@ RValue 
CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E) {
 }
 
 llvm::Value *Arg = A.getRValue(*this).getScalarVal();
+if (isString(A.getType().getTypePtr()) && CGM.getLangOpts().OpenCL)
+  Arg = Builder.CreateAddrSpaceCast(Arg, CGM.Int8PtrTy);
 Args.push_back(Arg);
   }
 
-  llvm::IRBuilder<> IRB(Builder.GetInsertBlock(), Builder.GetInsertPoint());
-  IRB.SetCurrentDebugLocation(Builder.getCurrentDebugLocation());
+  auto PFK = CGM.getTarget().getTargetOpts().AMDGPUPrintfKindVal;
+  bool isBuffered =
+   (PFK == clang::TargetOptions::AMDGPUPrintfKind::Buffered);
+
+  StringRef FmtStr;
+  if (llvm::getConstantStringInfo(Args[0], FmtStr)) {
+if (FmtStr.empty())
+  FmtStr = StringRef("", 1);
+  } else {
+if (CGM.getLangOpts().OpenCL) {

ssahasra wrote:

The diagnostic should be replaced with an assert() or an llvm_unreachable(). 
The OpenCL spec says that the format string should be resolvable at compile 
time, but this is not the right place to check that. By now, the frontend or 
sema should have rejected the program as ill-formed.

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)

2024-02-20 Thread Sameer Sahasrabuddhe via cfe-commits



@@ -199,15 +214,31 @@ RValue 
CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E) {
 }
 
 llvm::Value *Arg = A.getRValue(*this).getScalarVal();
+if (isString(A.getType().getTypePtr()) && CGM.getLangOpts().OpenCL)

ssahasra wrote:

The typecast can be inserted later when the arguments are actually processed. 
At that point, we already know which args are strings because we have parsed 
the format string.

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [AMDGPU] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)

2024-06-12 Thread Sameer Sahasrabuddhe via cfe-commits

ssahasra wrote:

> > @jayfoad's testcase fails and the same test should be repeated for all 3 
> > intrinsics
> 
> added MIR tests for 3 intrinsics. The issue is that Im not able to attach the 
> glue nodes to newly created laneop pieces since they fail at selection. 
> #87509 should enable this,

I am not really comfortable waiting for #87509 to fix convergence tokens in 
this expansion. Is it really true that this expansion cannot be fixed 
independent of future work on `CONVERGENCE_GLUE`? There is no way to manually 
handle the same glue operands??

https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [AMDGPU] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)

2024-06-12 Thread Sameer Sahasrabuddhe via cfe-commits



@@ -0,0 +1,46 @@
+# RUN: not --crash llc -mtriple=amdgcn -run-pass=none -verify-machineinstrs -o 
/dev/null %s 2>&1 | FileCheck %s

ssahasra wrote:

All it needs is one new file in `test/CodeGen/AMDGPU` where 64-bit lane ops are 
used with a convergence tokens. Mark that as XFAIL. When the issue is fixed, 
that file can be merged into the existing tests. We don't need to test each of 
the convergence control intrinsics. It's enough to just have a token on a 
readlane.

https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [AMDGPU] Add OpenCL-specific fence address space masks (PR #78572)

2024-05-03 Thread Sameer Sahasrabuddhe via cfe-commits



@@ -18365,6 +18366,30 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
   return nullptr;
 }
 
+void CodeGenFunction::AddAMDGCNAddressSpaceMMRA(llvm::Instruction *Inst,
+const CallExpr *E,
+unsigned FirstASNameIdx) {
+  constexpr const char *Tag = "opencl-fence-mem";
+
+  LLVMContext &Ctx = Inst->getContext();
+  SmallVector MMRAs;
+  for (unsigned K = FirstASNameIdx; K < E->getNumArgs(); ++K) {
+llvm::Value *V = EmitScalarExpr(E->getArg(K));
+StringRef AS;
+if (llvm::getConstantStringInfo(V, AS) &&
+(AS == "local" || AS == "global" || AS == "image")) {

ssahasra wrote:

Can these magic strings be declared somewhere? Ideally some sort of 
target-specific mechanism that Clang will use to find such sets of valid 
strings?

https://github.com/llvm/llvm-project/pull/78572
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [AMDGPU] Add OpenCL-specific fence address space masks (PR #78572)

2024-05-03 Thread Sameer Sahasrabuddhe via cfe-commits



@@ -18365,6 +18366,30 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
   return nullptr;
 }
 
+void CodeGenFunction::AddAMDGCNAddressSpaceMMRA(llvm::Instruction *Inst,
+const CallExpr *E,
+unsigned FirstASNameIdx) {
+  constexpr const char *Tag = "opencl-fence-mem";

ssahasra wrote:

Defintely drop the "opencl" prefix.

https://github.com/llvm/llvm-project/pull/78572
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [AMDGPU] Add OpenCL-specific fence address space masks (PR #78572)

2024-05-03 Thread Sameer Sahasrabuddhe via cfe-commits



@@ -4408,6 +4409,54 @@ Target-Specific Extensions
 
 Clang supports some language features conditionally on some targets.
 
+AMDGPU Language Extensions
+--
+
+__builtin_amdgcn_fence
+^^
+
+``__builtin_amdgcn_fence`` emits a fence for all address spaces
+and takes the following arguments:
+
+* ``unsigned`` atomic ordering, e.g. ``__ATOMIC_ACQUIRE``
+* ``const char *`` synchronization scope, e.g. ``workgroup``
+
+.. code-block:: c++
+
+  __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup");
+  __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent");
+
+__builtin_amdgcn_masked_fence
+^
+
+``__builtin_amdgcn_masked_fence`` emits a fence for one or more address
+spaces and takes the following arguments:
+
+* ``unsigned`` atomic ordering, e.g. ``__ATOMIC_ACQUIRE``
+* ``const char *`` synchronization scope, e.g. ``workgroup``
+* Zero or more ``const char *`` address spaces.
+
+The address spaces arguments must be string literals with known values, such 
as:
+
+* ``"local"``
+* ``"global"``
+* ``"image"``
+
+If there are no address spaces specified, this fence behaves like

ssahasra wrote:

To answer comments elsewhere, this documentation makes it really obvious that 
the two fences are really just one.

https://github.com/llvm/llvm-project/pull/78572
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [AMDGPU] Add OpenCL-specific fence address space masks (PR #78572)

2024-05-03 Thread Sameer Sahasrabuddhe via cfe-commits

ssahasra wrote:

> Should we also rename the MMRA to `amdgpu-fence-as` (remove OpenCL from the 
> name) ?

Even the "fence" prefix is not entirely correct. The same tags also make sense 
on a load-acquire or store-release, which are "fence like" instructions, or 
"operations with implicit fences". Why not just "as:global", "as:local", etc? 
The fact that they are used as !mmra on a fence-like instruction makes it clear 
that they represent the address spaces that are caused to be synchronized by 
that instruction, and not incidentally the address space that the load-acquire 
or store-release itself wants to access.

https://github.com/llvm/llvm-project/pull/78572
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [AMDGPU] Add amdgpu-as MMRA for fences (PR #78572)

2024-05-05 Thread Sameer Sahasrabuddhe via cfe-commits


https://github.com/ssahasra approved this pull request.

The frontend changes and the MMRA emitted in LLVM IR look good to me. The 
backend changes also look okay, but please see if anyone else has comments 
about that.

https://github.com/llvm/llvm-project/pull/78572
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [AMDGPU] Add amdgpu-as MMRA for fences (PR #78572)

2024-05-05 Thread Sameer Sahasrabuddhe via cfe-commits



@@ -4408,6 +4409,42 @@ Target-Specific Extensions
 
 Clang supports some language features conditionally on some targets.
 
+AMDGPU Language Extensions
+--
+
+__builtin_amdgcn_fence
+^^
+
+``__builtin_amdgcn_fence`` emits a fence.
+
+* ``unsigned`` atomic ordering, e.g. ``__ATOMIC_ACQUIRE``
+* ``const char *`` synchronization scope, e.g. ``workgroup``
+* Zero or more ``const char *`` address spaces names.
+
+The address spaces arguments must be string literals with known values, such 
as:
+
+* ``"local"``
+* ``"global"``
+* ``"image"``
+
+If one or more address space name are provided, the code generator will attempt
+to emit potentially faster instructions that only fence those address spaces.

ssahasra wrote:

This use of "fence" as a verb seems a bit too informal. Reword it to say 
"instructions that order access to at least those address spaces"? (Note the 
addition of "at least" to signify a lower bound)

https://github.com/llvm/llvm-project/pull/78572
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [AMDGPU] Add amdgpu-as MMRA for fences (PR #78572)

2024-05-05 Thread Sameer Sahasrabuddhe via cfe-commits


https://github.com/ssahasra edited 
https://github.com/llvm/llvm-project/pull/78572
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [AMDGPU] Add amdgpu-as MMRA for fences (PR #78572)

2024-05-05 Thread Sameer Sahasrabuddhe via cfe-commits



@@ -18365,6 +18366,28 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
   return nullptr;
 }
 
+void CodeGenFunction::AddAMDGCNFenceAddressSpaceMMRA(llvm::Instruction *Inst,

ssahasra wrote:

The function immediately below this uses "AMDGPU" in its name. I think that's 
the newer practice. I don't have a strong opinion on this, because the same 
file also has functions which say "AMDGCN" instead. I am not sure which way the 
naming convention is leaning.

https://github.com/llvm/llvm-project/pull/78572
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [AMDGPU] Add amdgpu-as MMRA for fences (PR #78572)

2024-05-05 Thread Sameer Sahasrabuddhe via cfe-commits



@@ -678,6 +679,59 @@ class SIMemoryLegalizer final : public MachineFunctionPass 
{
   bool runOnMachineFunction(MachineFunction &MF) override;
 };
 
+static std::array, 3> ASNames = {{
+{"global", SIAtomicAddrSpace::GLOBAL},
+{"local", SIAtomicAddrSpace::LDS},
+{"image", SIAtomicAddrSpace::SCRATCH},
+}};
+
+void diagnoseUnknownMMRAASName(const MachineInstr &MI, StringRef AS) {
+  const MachineFunction *MF = MI.getMF();
+  const Function &Fn = MF->getFunction();
+  std::string Str;
+  raw_string_ostream OS(Str);
+  OS << "unknown address space '" << AS << "'; expected one of ";
+  bool IsFirst = true;

ssahasra wrote:

Use ListSeparator from StringExtras.h

https://github.com/llvm/llvm-project/pull/78572
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [AMDGPU] Add amdgpu-as MMRA for fences (PR #78572)

2024-05-05 Thread Sameer Sahasrabuddhe via cfe-commits



@@ -18365,6 +18366,28 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
   return nullptr;
 }
 
+void CodeGenFunction::AddAMDGCNFenceAddressSpaceMMRA(llvm::Instruction *Inst,
+ const CallExpr *E) {
+  constexpr const char *Tag = "amdgpu-as";

ssahasra wrote:

Just bikeshedding a bit, but do we really need the "amdgpu" prefix on the tag? 
Clang will only generate these for AMDGPU anyway. It's not a blocker, but feels 
like we are being cautious for no reason.

https://github.com/llvm/llvm-project/pull/78572
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [AMDGPU] Add amdgpu-as MMRA for fences (PR #78572)

2024-05-13 Thread Sameer Sahasrabuddhe via cfe-commits



@@ -678,6 +680,54 @@ class SIMemoryLegalizer final : public MachineFunctionPass 
{
   bool runOnMachineFunction(MachineFunction &MF) override;
 };
 
+static std::array, 3> ASNames = {{

ssahasra wrote:

Use StringMap for this?

https://github.com/llvm/llvm-project/pull/78572
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [AMDGPU] Add amdgpu-as MMRA for fences (PR #78572)

2024-05-13 Thread Sameer Sahasrabuddhe via cfe-commits



@@ -678,6 +680,54 @@ class SIMemoryLegalizer final : public MachineFunctionPass 
{
   bool runOnMachineFunction(MachineFunction &MF) override;
 };
 
+static std::array, 3> ASNames = {{
+{"global", SIAtomicAddrSpace::GLOBAL},
+{"local", SIAtomicAddrSpace::LDS},
+{"image", SIAtomicAddrSpace::SCRATCH},
+}};
+
+void diagnoseUnknownMMRAASName(const MachineInstr &MI, StringRef AS) {
+  const MachineFunction *MF = MI.getMF();
+  const Function &Fn = MF->getFunction();
+  std::string Str;
+  raw_string_ostream OS(Str);
+  OS << "unknown address space '" << AS << "'; expected one of ";
+  ListSeparator LS;
+  for (const auto &[Name, Val] : ASNames)
+OS << LS << '\'' << Name << '\'';
+  DiagnosticInfoUnsupported BadTag(Fn, Str, MI.getDebugLoc(), DS_Warning);
+  Fn.getContext().diagnose(BadTag);
+}
+
+/// Reads \p MI's MMRAs to parse the "amdgpu-as" MMRA.
+/// If this tag isn't present, or if it has no meaningful values, returns \p
+/// Default. Otherwise returns all the address spaces concerned by the MMRA.
+static SIAtomicAddrSpace getFenceAddrSpaceMMRA(const MachineInstr &MI,
+   SIAtomicAddrSpace Default) {
+  static constexpr StringLiteral FenceASPrefix = "amdgpu-as";
+
+  auto MMRA = MMRAMetadata(MI.getMMRAMetadata());
+  if (!MMRA)
+return Default;
+
+  SIAtomicAddrSpace Result = SIAtomicAddrSpace::NONE;
+  for (const auto &[Prefix, Suffix] : MMRA) {
+if (Prefix != FenceASPrefix)
+  continue;
+
+auto It = find_if(ASNames, [Suffix = Suffix](auto &Pair) {

ssahasra wrote:

I didn't understand the default assignment of Suffix. Shouldn't the lambda 
capture it simply because the outer symbol is used inside? But anyway, this use 
of find_if shouldn't be necessary with StringMap.

https://github.com/llvm/llvm-project/pull/78572
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [AMDGPU] Add amdgpu-as MMRA for fences (PR #78572)

2024-05-14 Thread Sameer Sahasrabuddhe via cfe-commits


https://github.com/ssahasra edited 
https://github.com/llvm/llvm-project/pull/78572
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [AMDGPU] Add amdgpu-as MMRA for fences (PR #78572)

2024-05-14 Thread Sameer Sahasrabuddhe via cfe-commits



@@ -678,6 +680,50 @@ class SIMemoryLegalizer final : public MachineFunctionPass 
{
   bool runOnMachineFunction(MachineFunction &MF) override;
 };
 
+static const StringMap ASNames = {{
+{"global", SIAtomicAddrSpace::GLOBAL},
+{"local", SIAtomicAddrSpace::LDS},
+{"image", SIAtomicAddrSpace::SCRATCH},
+}};
+
+void diagnoseUnknownMMRAASName(const MachineInstr &MI, StringRef AS) {
+  const MachineFunction *MF = MI.getMF();
+  const Function &Fn = MF->getFunction();
+  std::string Str;
+  raw_string_ostream OS(Str);
+  OS << "unknown address space '" << AS << "'; expected one of ";
+  ListSeparator LS;
+  for (const auto &[Name, Val] : ASNames)
+OS << LS << '\'' << Name << '\'';
+  DiagnosticInfoUnsupported BadTag(Fn, Str, MI.getDebugLoc(), DS_Warning);
+  Fn.getContext().diagnose(BadTag);
+}
+
+/// Reads \p MI's MMRAs to parse the "amdgpu-as" MMRA.
+/// If this tag isn't present, or if it has no meaningful values, returns \p
+/// Default. Otherwise returns all the address spaces concerned by the MMRA.
+static SIAtomicAddrSpace getFenceAddrSpaceMMRA(const MachineInstr &MI,
+   SIAtomicAddrSpace Default) {
+  static constexpr StringLiteral FenceASPrefix = "amdgpu-as";
+
+  auto MMRA = MMRAMetadata(MI.getMMRAMetadata());
+  if (!MMRA)
+return Default;
+
+  SIAtomicAddrSpace Result = SIAtomicAddrSpace::NONE;
+  for (const auto &[Prefix, Suffix] : MMRA) {
+if (Prefix != FenceASPrefix)
+  continue;
+
+if (auto It = ASNames.find(Suffix); It != ASNames.end())

ssahasra wrote:

Wow, I have never considered using a semicolon like this before. But it makes 
so much sense! :)

https://github.com/llvm/llvm-project/pull/78572
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [AMDGPU] Add amdgpu-as MMRA for fences (PR #78572)

2024-05-14 Thread Sameer Sahasrabuddhe via cfe-commits


https://github.com/ssahasra approved this pull request.

Looks good to me. But I have no opinion about that discussion with whether 
"image" should be available for explicit use!

https://github.com/llvm/llvm-project/pull/78572
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang][CUDA] Add 'noconvergent' function and statement attribute (PR #100637)

2024-07-31 Thread Sameer Sahasrabuddhe via cfe-commits


ssahasra wrote:

> > Please also update the documentation for the attribute and the release 
> > notes.
> 
> It looks like you may have overlooked the request to add release notes for 
> this new feature.

@darkbuck, please revert and address the documentation!

https://github.com/llvm/llvm-project/pull/100637
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [SPIR-V] strengthen some lit tests (PR #111636)

2024-10-08 Thread Sameer Sahasrabuddhe via cfe-commits


https://github.com/ssahasra created 
https://github.com/llvm/llvm-project/pull/111636

These tests were failing spuriously with unrelated changes under development.

>From 107a8819e02c9a5eaf7db5a520543666ea3c3a91 Mon Sep 17 00:00:00 2001
From: Sameer Sahasrabuddhe 
Date: Wed, 9 Oct 2024 11:30:37 +0530
Subject: [PATCH] [SPIR-V] strengthen some lit tests

These tests were failing spuriously with unrelated changes under development.
---
 .../CodeGenHLSL/convergence/do.while.hlsl | 23 +++-
 clang/test/CodeGenHLSL/convergence/for.hlsl   | 35 +++
 clang/test/CodeGenHLSL/convergence/while.hlsl | 18 ++
 3 files changed, 47 insertions(+), 29 deletions(-)

diff --git a/clang/test/CodeGenHLSL/convergence/do.while.hlsl 
b/clang/test/CodeGenHLSL/convergence/do.while.hlsl
index ea5a45ba8fd780..934fe3ea9eb7a0 100644
--- a/clang/test/CodeGenHLSL/convergence/do.while.hlsl
+++ b/clang/test/CodeGenHLSL/convergence/do.while.hlsl
@@ -8,7 +8,8 @@ void test1() {
   do {
   } while (cond());
 }
-// CHECK: define spir_func void @_Z5test1v() [[A0:#[0-9]+]] {
+// CHECK-LABEL: define spir_func void @_Z5test1v()
+// CHECK-SAME: [[A0:#[0-9]+]] {
 // CHECK: entry:
 // CHECK:   [[T0:%[0-9]+]] = call token @llvm.experimental.convergence.entry()
 // CHECK: do.body:
@@ -21,14 +22,15 @@ void test2() {
 foo();
   } while (cond());
 }
-// CHECK: define spir_func void @_Z5test2v() [[A0:#[0-9]+]] {
+// CHECK-LABEL: define spir_func void @_Z5test2v()
+// CHECK-SAME: [[A0]] {
 // CHECK: entry:
 // CHECK:   [[T0:%[0-9]+]] = call token @llvm.experimental.convergence.entry()
 // CHECK: do.body:
 // CHECK:   [[T1:%[0-9]+]] = call token @llvm.experimental.convergence.loop() 
[ "convergencectrl"(token [[T0]]) ]
 // CHECK:call spir_func void @_Z3foov() [[A3]] [ 
"convergencectrl"(token [[T1]]) ]
 // CHECK: do.cond:
-// CHECK:call spir_func noundef i1 @_Z4condv() 
[[A3:#[0-9]+]] [ "convergencectrl"(token [[T1]]) ]
+// CHECK:call spir_func noundef i1 @_Z4condv() [[A3]] [ 
"convergencectrl"(token [[T1]]) ]
 
 void test3() {
   do {
@@ -36,7 +38,8 @@ void test3() {
   foo();
   } while (cond());
 }
-// CHECK: define spir_func void @_Z5test3v() [[A0:#[0-9]+]] {
+// CHECK-LABEL: define spir_func void @_Z5test3v()
+// CHECK-SAME: [[A0]] {
 // CHECK: entry:
 // CHECK:   [[T0:%[0-9]+]] = call token @llvm.experimental.convergence.entry()
 // CHECK: do.body:
@@ -44,7 +47,7 @@ void test3() {
 // CHECK: if.then:
 // CHECK:call spir_func void @_Z3foov() [[A3]] [ 
"convergencectrl"(token [[T1]]) ]
 // CHECK: do.cond:
-// CHECK:call spir_func noundef i1 @_Z4condv() 
[[A3:#[0-9]+]] [ "convergencectrl"(token [[T1]]) ]
+// CHECK:call spir_func noundef i1 @_Z4condv() [[A3]] [ 
"convergencectrl"(token [[T1]]) ]
 
 void test4() {
   do {
@@ -54,7 +57,8 @@ void test4() {
 }
   } while (cond());
 }
-// CHECK: define spir_func void @_Z5test4v() [[A0:#[0-9]+]] {
+// CHECK-LABEL: define spir_func void @_Z5test4v()
+// CHECK-SAME: [[A0]] {
 // CHECK: entry:
 // CHECK:   [[T0:%[0-9]+]] = call token @llvm.experimental.convergence.entry()
 // CHECK: do.body:
@@ -62,7 +66,7 @@ void test4() {
 // CHECK: if.then:
 // CHECK:call spir_func void @_Z3foov() [[A3]] [ 
"convergencectrl"(token [[T1]]) ]
 // CHECK: do.cond:
-// CHECK:call spir_func noundef i1 @_Z4condv() 
[[A3:#[0-9]+]] [ "convergencectrl"(token [[T1]]) ]
+// CHECK:call spir_func noundef i1 @_Z4condv() [[A3]] [ 
"convergencectrl"(token [[T1]]) ]
 
 void test5() {
   do {
@@ -74,7 +78,8 @@ void test5() {
 }
   } while (cond());
 }
-// CHECK: define spir_func void @_Z5test5v() [[A0:#[0-9]+]] {
+// CHECK-LABEL: define spir_func void @_Z5test5v()
+// CHECK-SAME: [[A0]] {
 // CHECK: entry:
 // CHECK:   [[T0:%[0-9]+]] = call token @llvm.experimental.convergence.entry()
 // CHECK: do.body:
@@ -84,7 +89,7 @@ void test5() {
 // CHECK: if.then:
 // CHECK:call spir_func void @_Z3foov() [[A3]] [ 
"convergencectrl"(token [[T2]]) ]
 // CHECK: do.cond:
-// CHECK:call spir_func noundef i1 @_Z4condv() 
[[A3:#[0-9]+]] [ "convergencectrl"(token [[T1]]) ]
+// CHECK:call spir_func noundef i1 @_Z4condv() [[A3]] [ 
"convergencectrl"(token [[T1]]) ]
 
 // CHECK-DAG: attributes [[A0]] = { {{.*}}convergent{{.*}} }
 // CHECK-DAG: attributes [[A3]] = { {{.*}}convergent{{.*}} }
diff --git a/clang/test/CodeGenHLSL/convergence/for.hlsl 
b/clang/test/CodeGenHLSL/convergence/for.hlsl
index 95f9a196bdb676..363c6a48839b56 100644
--- a/clang/test/CodeGenHLSL/convergence/for.hlsl
+++ b/clang/test/CodeGenHLSL/convergence/for.hlsl
@@ -10,7 +10,8 @@ void test1() {
 foo();
   }
 }
-// CHECK: define spir_func void @_Z5test1v() [[A0:#[0-9]+]] {
+// CHECK-LABEL: define spir_func void @_Z5test1v()
+// CHECK-SAME: [[A0:#[0-9]+]] {
 // CHECK: entry:
 // CHECK:   [[T0:%[0-9]+]] = call

[clang] [SPIR-V] strengthen some lit tests (PR #111636)

2024-10-09 Thread Sameer Sahasrabuddhe via cfe-commits


https://github.com/ssahasra closed 
https://github.com/llvm/llvm-project/pull/111636
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang][NFC] clean up the handling of convergence control tokens (PR #121738)

2025-01-06 Thread Sameer Sahasrabuddhe via cfe-commits


https://github.com/ssahasra created 
https://github.com/llvm/llvm-project/pull/121738

None

>From e1611a9dbfe7a8239b93b84fa7682e68dc727f0f Mon Sep 17 00:00:00 2001
From: Sameer Sahasrabuddhe 
Date: Mon, 6 Jan 2025 14:01:49 +0530
Subject: [PATCH] [clang][NFC] clean up the handling of convergence control
 tokens

---
 clang/lib/CodeGen/CGCall.cpp|  4 +--
 clang/lib/CodeGen/CGStmt.cpp| 46 +
 clang/lib/CodeGen/CodeGenFunction.h | 23 +--
 3 files changed, 29 insertions(+), 44 deletions(-)

diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index f139c30f3dfd44..89e2eace9120bf 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -4871,7 +4871,7 @@ llvm::CallInst 
*CodeGenFunction::EmitRuntimeCall(llvm::FunctionCallee callee,
   call->setCallingConv(getRuntimeCC());
 
   if (CGM.shouldEmitConvergenceTokens() && call->isConvergent())
-return addControlledConvergenceToken(call);
+return cast(addConvergenceControlToken(call));
   return call;
 }
 
@@ -5787,7 +5787,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo 
&CallInfo,
 CI->setName("call");
 
   if (CGM.shouldEmitConvergenceTokens() && CI->isConvergent())
-CI = addControlledConvergenceToken(CI);
+CI = addConvergenceControlToken(CI);
 
   // Update largest vector width from the return type.
   LargestVectorWidth =
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 3974739d2abb47..7904e17dbebb81 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -1024,8 +1024,8 @@ void CodeGenFunction::EmitWhileStmt(const WhileStmt &S,
   EmitBlock(LoopHeader.getBlock());
 
   if (CGM.shouldEmitConvergenceTokens())
-ConvergenceTokenStack.push_back(emitConvergenceLoopToken(
-LoopHeader.getBlock(), ConvergenceTokenStack.back()));
+ConvergenceTokenStack.push_back(
+emitConvergenceLoopToken(LoopHeader.getBlock()));
 
   // Create an exit block for when the condition fails, which will
   // also become the break target.
@@ -1152,8 +1152,7 @@ void CodeGenFunction::EmitDoStmt(const DoStmt &S,
 EmitBlockWithFallThrough(LoopBody, &S);
 
   if (CGM.shouldEmitConvergenceTokens())
-ConvergenceTokenStack.push_back(
-emitConvergenceLoopToken(LoopBody, ConvergenceTokenStack.back()));
+ConvergenceTokenStack.push_back(emitConvergenceLoopToken(LoopBody));
 
   {
 RunCleanupsScope BodyScope(*this);
@@ -1231,8 +1230,7 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S,
   EmitBlock(CondBlock);
 
   if (CGM.shouldEmitConvergenceTokens())
-ConvergenceTokenStack.push_back(
-emitConvergenceLoopToken(CondBlock, ConvergenceTokenStack.back()));
+ConvergenceTokenStack.push_back(emitConvergenceLoopToken(CondBlock));
 
   const SourceRange &R = S.getSourceRange();
   LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(), ForAttrs,
@@ -1369,8 +1367,7 @@ CodeGenFunction::EmitCXXForRangeStmt(const 
CXXForRangeStmt &S,
   EmitBlock(CondBlock);
 
   if (CGM.shouldEmitConvergenceTokens())
-ConvergenceTokenStack.push_back(
-emitConvergenceLoopToken(CondBlock, ConvergenceTokenStack.back()));
+ConvergenceTokenStack.push_back(emitConvergenceLoopToken(CondBlock));
 
   const SourceRange &R = S.getSourceRange();
   LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(), ForAttrs,
@@ -3245,35 +3242,32 @@ CodeGenFunction::GenerateCapturedStmtFunction(const 
CapturedStmt &S) {
   return F;
 }
 
-namespace {
 // Returns the first convergence entry/loop/anchor instruction found in |BB|.
 // std::nullptr otherwise.
-llvm::IntrinsicInst *getConvergenceToken(llvm::BasicBlock *BB) {
+static llvm::ConvergenceControlInst *getConvergenceToken(llvm::BasicBlock *BB) 
{
   for (auto &I : *BB) {
-auto *II = dyn_cast(&I);
-if (II && llvm::isConvergenceControlIntrinsic(II->getIntrinsicID()))
-  return II;
+if (auto *CI = dyn_cast(&I))
+  return CI;
   }
   return nullptr;
 }
 
-} // namespace
-
 llvm::CallBase *
-CodeGenFunction::addConvergenceControlToken(llvm::CallBase *Input,
-llvm::Value *ParentToken) {
+CodeGenFunction::addConvergenceControlToken(llvm::CallBase *Input) {
+  llvm::ConvergenceControlInst *ParentToken = ConvergenceTokenStack.back();
+  assert(ParentToken);
+
   llvm::Value *bundleArgs[] = {ParentToken};
   llvm::OperandBundleDef OB("convergencectrl", bundleArgs);
-  auto Output = llvm::CallBase::addOperandBundle(
+  auto *Output = llvm::CallBase::addOperandBundle(
   Input, llvm::LLVMContext::OB_convergencectrl, OB, Input->getIterator());
   Input->replaceAllUsesWith(Output);
   Input->eraseFromParent();
   return Output;
 }
 
-llvm::IntrinsicInst *
-CodeGenFunction::emitConvergenceLoopToken(llvm::BasicBlock *BB,
-  llvm::Value *ParentToken) {
+llvm::ConvergenceControlInst *
+CodeGenFunction::emitConvergenceLoopToken(llvm

[clang] [clang][NFC] clean up the handling of convergence control tokens (PR #121738)

2025-01-06 Thread Sameer Sahasrabuddhe via cfe-commits


https://github.com/ssahasra closed 
https://github.com/llvm/llvm-project/pull/121738
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang] [NFC] explicitly check if ParentMap contains key (PR #121736)

2025-01-06 Thread Sameer Sahasrabuddhe via cfe-commits


https://github.com/ssahasra updated 
https://github.com/llvm/llvm-project/pull/121736

>From 2cae10eb0b1e94729c26299af018216e729607de Mon Sep 17 00:00:00 2001
From: Sameer Sahasrabuddhe 
Date: Thu, 2 Jan 2025 14:30:07 +0530
Subject: [PATCH 1/2] [clang] explicitly check if ParentMap contains key

The implementation of ParentMap assumes that the key is absent if it is mapped
to nullptr. This breaks when trying to store a tuple as the value type. Remove
this assumption by explicit uses of `contains()` and `erase()`.
---
 clang/lib/AST/ParentMap.cpp | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/clang/lib/AST/ParentMap.cpp b/clang/lib/AST/ParentMap.cpp
index fd749b02b758c9..ada7b19487a782 100644
--- a/clang/lib/AST/ParentMap.cpp
+++ b/clang/lib/AST/ParentMap.cpp
@@ -34,13 +34,13 @@ static void BuildParentMap(MapTy& M, Stmt* S,
   case Stmt::PseudoObjectExprClass: {
 PseudoObjectExpr *POE = cast(S);
 
-if (OVMode == OV_Opaque && M[POE->getSyntacticForm()])
+if (OVMode == OV_Opaque && M.contains(POE->getSyntacticForm()))
   break;
 
 // If we are rebuilding the map, clear out any existing state.
-if (M[POE->getSyntacticForm()])
+if (M.contains(POE->getSyntacticForm()))
   for (Stmt *SubStmt : S->children())
-M[SubStmt] = nullptr;
+M.erase(SubStmt);
 
 M[POE->getSyntacticForm()] = S;
 BuildParentMap(M, POE->getSyntacticForm(), OV_Transparent);
@@ -78,7 +78,7 @@ static void BuildParentMap(MapTy& M, Stmt* S,
 // The right thing to do is to give the OpaqueValueExpr its syntactic
 // parent, then not reassign that when traversing the semantic expressions.
 OpaqueValueExpr *OVE = cast(S);
-if (OVMode == OV_Transparent || !M[OVE->getSourceExpr()]) {
+if (OVMode == OV_Transparent || !M.contains(OVE->getSourceExpr())) {
   M[OVE->getSourceExpr()] = S;
   BuildParentMap(M, OVE->getSourceExpr(), OV_Transparent);
 }

>From b3777e594b509fa1fc6222ca4fae8ca89cc1c8eb Mon Sep 17 00:00:00 2001
From: Sameer Sahasrabuddhe 
Date: Tue, 7 Jan 2025 11:26:07 +0530
Subject: [PATCH 2/2] eliminate multiple lookups

---
 clang/lib/AST/ParentMap.cpp | 31 +++
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/clang/lib/AST/ParentMap.cpp b/clang/lib/AST/ParentMap.cpp
index ada7b19487a782..60e35f24cb2ef4 100644
--- a/clang/lib/AST/ParentMap.cpp
+++ b/clang/lib/AST/ParentMap.cpp
@@ -33,17 +33,19 @@ static void BuildParentMap(MapTy& M, Stmt* S,
   switch (S->getStmtClass()) {
   case Stmt::PseudoObjectExprClass: {
 PseudoObjectExpr *POE = cast(S);
-
-if (OVMode == OV_Opaque && M.contains(POE->getSyntacticForm()))
-  break;
-
-// If we are rebuilding the map, clear out any existing state.
-if (M.contains(POE->getSyntacticForm()))
+Expr * SF = POE->getSyntacticForm();
+
+auto [Iter, Inserted] = M.try_emplace(SF, S);
+if (!Inserted) {
+  // Nothing more to do in opaque mode if we are updating an existing map.
+  if (OVMode == OV_Opaque)
+break;
+  // Update the entry in transparent mode, and clear existing state.
+  Iter->second = SF;
   for (Stmt *SubStmt : S->children())
 M.erase(SubStmt);
-
-M[POE->getSyntacticForm()] = S;
-BuildParentMap(M, POE->getSyntacticForm(), OV_Transparent);
+}
+BuildParentMap(M, SF, OV_Transparent);
 
 for (PseudoObjectExpr::semantics_iterator I = POE->semantics_begin(),
   E = POE->semantics_end();
@@ -78,10 +80,15 @@ static void BuildParentMap(MapTy& M, Stmt* S,
 // The right thing to do is to give the OpaqueValueExpr its syntactic
 // parent, then not reassign that when traversing the semantic expressions.
 OpaqueValueExpr *OVE = cast(S);
-if (OVMode == OV_Transparent || !M.contains(OVE->getSourceExpr())) {
-  M[OVE->getSourceExpr()] = S;
-  BuildParentMap(M, OVE->getSourceExpr(), OV_Transparent);
+Expr *SrcExpr = OVE->getSourceExpr();
+auto [Iter, Inserted] = M.try_emplace(SrcExpr, S);
+// Force update in transparent mode.
+if (!Inserted && OVMode == OV_Transparent) {
+  Iter->second = S;
+  Inserted = true;
 }
+if (Inserted)
+  BuildParentMap(M, SrcExpr, OV_Transparent);
 break;
   }
   case Stmt::CapturedStmtClass:

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang] [NFC] explicitly check if ParentMap contains key (PR #121736)

2025-01-06 Thread Sameer Sahasrabuddhe via cfe-commits


https://github.com/ssahasra updated 
https://github.com/llvm/llvm-project/pull/121736

>From 2cae10eb0b1e94729c26299af018216e729607de Mon Sep 17 00:00:00 2001
From: Sameer Sahasrabuddhe 
Date: Thu, 2 Jan 2025 14:30:07 +0530
Subject: [PATCH 1/2] [clang] explicitly check if ParentMap contains key

The implementation of ParentMap assumes that the key is absent if it is mapped
to nullptr. This breaks when trying to store a tuple as the value type. Remove
this assumption by explicit uses of `contains()` and `erase()`.
---
 clang/lib/AST/ParentMap.cpp | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/clang/lib/AST/ParentMap.cpp b/clang/lib/AST/ParentMap.cpp
index fd749b02b758c9..ada7b19487a782 100644
--- a/clang/lib/AST/ParentMap.cpp
+++ b/clang/lib/AST/ParentMap.cpp
@@ -34,13 +34,13 @@ static void BuildParentMap(MapTy& M, Stmt* S,
   case Stmt::PseudoObjectExprClass: {
 PseudoObjectExpr *POE = cast(S);
 
-if (OVMode == OV_Opaque && M[POE->getSyntacticForm()])
+if (OVMode == OV_Opaque && M.contains(POE->getSyntacticForm()))
   break;
 
 // If we are rebuilding the map, clear out any existing state.
-if (M[POE->getSyntacticForm()])
+if (M.contains(POE->getSyntacticForm()))
   for (Stmt *SubStmt : S->children())
-M[SubStmt] = nullptr;
+M.erase(SubStmt);
 
 M[POE->getSyntacticForm()] = S;
 BuildParentMap(M, POE->getSyntacticForm(), OV_Transparent);
@@ -78,7 +78,7 @@ static void BuildParentMap(MapTy& M, Stmt* S,
 // The right thing to do is to give the OpaqueValueExpr its syntactic
 // parent, then not reassign that when traversing the semantic expressions.
 OpaqueValueExpr *OVE = cast(S);
-if (OVMode == OV_Transparent || !M[OVE->getSourceExpr()]) {
+if (OVMode == OV_Transparent || !M.contains(OVE->getSourceExpr())) {
   M[OVE->getSourceExpr()] = S;
   BuildParentMap(M, OVE->getSourceExpr(), OV_Transparent);
 }

>From 028e4644b9e71e61acdeb05ed95692f67e4463d7 Mon Sep 17 00:00:00 2001
From: Sameer Sahasrabuddhe 
Date: Tue, 7 Jan 2025 12:23:32 +0530
Subject: [PATCH 2/2] eliminate multiple lookups

---
 clang/lib/AST/ParentMap.cpp | 31 +++
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/clang/lib/AST/ParentMap.cpp b/clang/lib/AST/ParentMap.cpp
index ada7b19487a782..58c1d4334c6d5f 100644
--- a/clang/lib/AST/ParentMap.cpp
+++ b/clang/lib/AST/ParentMap.cpp
@@ -33,17 +33,19 @@ static void BuildParentMap(MapTy& M, Stmt* S,
   switch (S->getStmtClass()) {
   case Stmt::PseudoObjectExprClass: {
 PseudoObjectExpr *POE = cast(S);
-
-if (OVMode == OV_Opaque && M.contains(POE->getSyntacticForm()))
-  break;
-
-// If we are rebuilding the map, clear out any existing state.
-if (M.contains(POE->getSyntacticForm()))
+Expr *SF = POE->getSyntacticForm();
+
+auto [Iter, Inserted] = M.try_emplace(SF, S);
+if (!Inserted) {
+  // Nothing more to do in opaque mode if we are updating an existing map.
+  if (OVMode == OV_Opaque)
+break;
+  // Update the entry in transparent mode, and clear existing state.
+  Iter->second = SF;
   for (Stmt *SubStmt : S->children())
 M.erase(SubStmt);
-
-M[POE->getSyntacticForm()] = S;
-BuildParentMap(M, POE->getSyntacticForm(), OV_Transparent);
+}
+BuildParentMap(M, SF, OV_Transparent);
 
 for (PseudoObjectExpr::semantics_iterator I = POE->semantics_begin(),
   E = POE->semantics_end();
@@ -78,10 +80,15 @@ static void BuildParentMap(MapTy& M, Stmt* S,
 // The right thing to do is to give the OpaqueValueExpr its syntactic
 // parent, then not reassign that when traversing the semantic expressions.
 OpaqueValueExpr *OVE = cast(S);
-if (OVMode == OV_Transparent || !M.contains(OVE->getSourceExpr())) {
-  M[OVE->getSourceExpr()] = S;
-  BuildParentMap(M, OVE->getSourceExpr(), OV_Transparent);
+Expr *SrcExpr = OVE->getSourceExpr();
+auto [Iter, Inserted] = M.try_emplace(SrcExpr, S);
+// Force update in transparent mode.
+if (!Inserted && OVMode == OV_Transparent) {
+  Iter->second = S;
+  Inserted = true;
 }
+if (Inserted)
+  BuildParentMap(M, SrcExpr, OV_Transparent);
 break;
   }
   case Stmt::CapturedStmtClass:

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang] [NFC] explicitly check if ParentMap contains key (PR #121736)

2025-01-06 Thread Sameer Sahasrabuddhe via cfe-commits



@@ -78,7 +78,7 @@ static void BuildParentMap(MapTy& M, Stmt* S,
 // The right thing to do is to give the OpaqueValueExpr its syntactic
 // parent, then not reassign that when traversing the semantic expressions.
 OpaqueValueExpr *OVE = cast(S);
-if (OVMode == OV_Transparent || !M[OVE->getSourceExpr()]) {
+if (OVMode == OV_Transparent || !M.contains(OVE->getSourceExpr())) {
   M[OVE->getSourceExpr()] = S;

ssahasra wrote:

Fixed with try_emplace() to avoid the second lookup as well, which inserts when 
the key is already present.


https://github.com/llvm/llvm-project/pull/121736
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang] [NFC] explicitly check if ParentMap contains key (PR #121736)

2025-01-06 Thread Sameer Sahasrabuddhe via cfe-commits



@@ -34,13 +34,13 @@ static void BuildParentMap(MapTy& M, Stmt* S,
   case Stmt::PseudoObjectExprClass: {
 PseudoObjectExpr *POE = cast(S);
 
-if (OVMode == OV_Opaque && M[POE->getSyntacticForm()])
+if (OVMode == OV_Opaque && M.contains(POE->getSyntacticForm()))

ssahasra wrote:

Fixed with try_emplace() to avoid the second lookup as well, which inserts when 
the key is already present.

https://github.com/llvm/llvm-project/pull/121736
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang] explicitly check if ParentMap contains key (PR #121736)

2025-01-06 Thread Sameer Sahasrabuddhe via cfe-commits


https://github.com/ssahasra created 
https://github.com/llvm/llvm-project/pull/121736

The implementation of ParentMap assumes that the key is absent if it is mapped 
to nullptr. This breaks when trying to store a tuple as the value type. Remove 
this assumption by explicit uses of `contains()` and `erase()`.

>From 2cae10eb0b1e94729c26299af018216e729607de Mon Sep 17 00:00:00 2001
From: Sameer Sahasrabuddhe 
Date: Thu, 2 Jan 2025 14:30:07 +0530
Subject: [PATCH] [clang] explicitly check if ParentMap contains key

The implementation of ParentMap assumes that the key is absent if it is mapped
to nullptr. This breaks when trying to store a tuple as the value type. Remove
this assumption by explicit uses of `contains()` and `erase()`.
---
 clang/lib/AST/ParentMap.cpp | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/clang/lib/AST/ParentMap.cpp b/clang/lib/AST/ParentMap.cpp
index fd749b02b758c9..ada7b19487a782 100644
--- a/clang/lib/AST/ParentMap.cpp
+++ b/clang/lib/AST/ParentMap.cpp
@@ -34,13 +34,13 @@ static void BuildParentMap(MapTy& M, Stmt* S,
   case Stmt::PseudoObjectExprClass: {
 PseudoObjectExpr *POE = cast(S);
 
-if (OVMode == OV_Opaque && M[POE->getSyntacticForm()])
+if (OVMode == OV_Opaque && M.contains(POE->getSyntacticForm()))
   break;
 
 // If we are rebuilding the map, clear out any existing state.
-if (M[POE->getSyntacticForm()])
+if (M.contains(POE->getSyntacticForm()))
   for (Stmt *SubStmt : S->children())
-M[SubStmt] = nullptr;
+M.erase(SubStmt);
 
 M[POE->getSyntacticForm()] = S;
 BuildParentMap(M, POE->getSyntacticForm(), OV_Transparent);
@@ -78,7 +78,7 @@ static void BuildParentMap(MapTy& M, Stmt* S,
 // The right thing to do is to give the OpaqueValueExpr its syntactic
 // parent, then not reassign that when traversing the semantic expressions.
 OpaqueValueExpr *OVE = cast(S);
-if (OVMode == OV_Transparent || !M[OVE->getSourceExpr()]) {
+if (OVMode == OV_Transparent || !M.contains(OVE->getSourceExpr())) {
   M[OVE->getSourceExpr()] = S;
   BuildParentMap(M, OVE->getSourceExpr(), OV_Transparent);
 }

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang] [NFC] explicitly check if ParentMap contains key (PR #121736)

2025-01-06 Thread Sameer Sahasrabuddhe via cfe-commits


https://github.com/ssahasra edited 
https://github.com/llvm/llvm-project/pull/121736
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] 61b806f - [clang] assign the correct parent in update to ParentMap

2025-01-07 Thread Sameer Sahasrabuddhe via cfe-commits


Author: Sameer Sahasrabuddhe
Date: 2025-01-08T12:03:25+05:30
New Revision: 61b806f43b2d6b3673a8f91393a28c98521472a8

URL: 
https://github.com/llvm/llvm-project/commit/61b806f43b2d6b3673a8f91393a28c98521472a8
DIFF: 
https://github.com/llvm/llvm-project/commit/61b806f43b2d6b3673a8f91393a28c98521472a8.diff

LOG: [clang] assign the correct parent in update to ParentMap

This fixes a bug that slipped into #121736.

Added: 


Modified: 
clang/lib/AST/ParentMap.cpp

Removed: 




diff  --git a/clang/lib/AST/ParentMap.cpp b/clang/lib/AST/ParentMap.cpp
index 58c1d4334c6d5f..e62e71bf5a5145 100644
--- a/clang/lib/AST/ParentMap.cpp
+++ b/clang/lib/AST/ParentMap.cpp
@@ -41,7 +41,7 @@ static void BuildParentMap(MapTy& M, Stmt* S,
   if (OVMode == OV_Opaque)
 break;
   // Update the entry in transparent mode, and clear existing state.
-  Iter->second = SF;
+  Iter->second = S;
   for (Stmt *SubStmt : S->children())
 M.erase(SubStmt);
 }



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [llvm] Create() functions for ConvergenceControlInst (PR #125627)

2025-02-03 Thread Sameer Sahasrabuddhe via cfe-commits


https://github.com/ssahasra created 
https://github.com/llvm/llvm-project/pull/125627

None

>From 5d6d4fbbfabf5e33ec366ea113a0e6c93ba46bf4 Mon Sep 17 00:00:00 2001
From: Sameer Sahasrabuddhe 
Date: Thu, 9 Jan 2025 13:36:20 +0530
Subject: [PATCH] [llvm] Create() functions for ConvergenceControlInst

---
 clang/lib/CodeGen/CGStmt.cpp | 25 -
 llvm/include/llvm/IR/IntrinsicInst.h |  4 
 llvm/lib/IR/IntrinsicInst.cpp| 23 +++
 3 files changed, 31 insertions(+), 21 deletions(-)

diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 7c944fe85a352d5..f7aea9da9177a4a 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -3303,18 +3303,9 @@ 
CodeGenFunction::addConvergenceControlToken(llvm::CallBase *Input) {
 
 llvm::ConvergenceControlInst *
 CodeGenFunction::emitConvergenceLoopToken(llvm::BasicBlock *BB) {
-  CGBuilderTy::InsertPoint IP = Builder.saveIP();
-  if (BB->empty())
-Builder.SetInsertPoint(BB);
-  else
-Builder.SetInsertPoint(BB->getFirstInsertionPt());
-
-  llvm::CallBase *CB = Builder.CreateIntrinsic(
-  llvm::Intrinsic::experimental_convergence_loop, {}, {});
-  Builder.restoreIP(IP);
-
-  CB = addConvergenceControlToken(CB);
-  return cast(CB);
+  llvm::ConvergenceControlInst *ParentToken = ConvergenceTokenStack.back();
+  assert(ParentToken);
+  return llvm::ConvergenceControlInst::CreateLoop(*BB, ParentToken);
 }
 
 llvm::ConvergenceControlInst *
@@ -3327,13 +3318,5 @@ 
CodeGenFunction::getOrEmitConvergenceEntryToken(llvm::Function *F) {
   // Adding a convergence token requires the function to be marked as
   // convergent.
   F->setConvergent();
-
-  CGBuilderTy::InsertPoint IP = Builder.saveIP();
-  Builder.SetInsertPoint(&BB->front());
-  llvm::CallBase *I = Builder.CreateIntrinsic(
-  llvm::Intrinsic::experimental_convergence_entry, {}, {});
-  assert(isa(I));
-  Builder.restoreIP(IP);
-
-  return cast(I);
+  return llvm::ConvergenceControlInst::CreateEntry(*BB);
 }
diff --git a/llvm/include/llvm/IR/IntrinsicInst.h 
b/llvm/include/llvm/IR/IntrinsicInst.h
index 6ccbb6b185c7d96..ba7a0219ebcaefb 100644
--- a/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -1882,6 +1882,10 @@ class ConvergenceControlInst : public IntrinsicInst {
   bool isLoop() const {
 return getIntrinsicID() == Intrinsic::experimental_convergence_loop;
   }
+
+  static ConvergenceControlInst* CreateAnchor(BasicBlock &BB);
+  static ConvergenceControlInst* CreateEntry(BasicBlock &BB);
+  static ConvergenceControlInst* CreateLoop(BasicBlock &BB, 
ConvergenceControlInst *Parent);
 };
 
 } // end namespace llvm
diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp
index ad174b1487a6435..eb358b9fdea1e1c 100644
--- a/llvm/lib/IR/IntrinsicInst.cpp
+++ b/llvm/lib/IR/IntrinsicInst.cpp
@@ -885,3 +885,26 @@ Value *GCRelocateInst::getDerivedPtr() const {
 return *(Opt->Inputs.begin() + getDerivedPtrIndex());
   return *(GCInst->arg_begin() + getDerivedPtrIndex());
 }
+
+ConvergenceControlInst *ConvergenceControlInst::CreateAnchor(BasicBlock &BB) {
+  Module *M = BB.getModule();
+  Function *Fn = Intrinsic::getOrInsertDeclaration(M, 
llvm::Intrinsic::experimental_convergence_anchor);
+  auto *Call = CallInst::Create(Fn, "", BB.getFirstInsertionPt());
+  return cast(Call);
+}
+
+ConvergenceControlInst *ConvergenceControlInst::CreateEntry(BasicBlock &BB) {
+  Module *M = BB.getModule();
+  Function *Fn = Intrinsic::getOrInsertDeclaration(M, 
llvm::Intrinsic::experimental_convergence_entry);
+  auto *Call = CallInst::Create(Fn, "", BB.getFirstInsertionPt());
+  return cast(Call);
+}
+
+ConvergenceControlInst *ConvergenceControlInst::CreateLoop(BasicBlock &BB, 
ConvergenceControlInst *ParentToken) {
+  Module *M = BB.getModule();
+  Function *Fn = Intrinsic::getOrInsertDeclaration(M, 
llvm::Intrinsic::experimental_convergence_loop);
+  llvm::Value *BundleArgs[] = {ParentToken};
+  llvm::OperandBundleDef OB("convergencectrl", BundleArgs);
+  auto *Call = CallInst::Create(Fn, {}, {OB}, "", BB.getFirstInsertionPt());
+  return cast(Call);
+}

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [llvm] Create() functions for ConvergenceControlInst (PR #125627)

2025-02-03 Thread Sameer Sahasrabuddhe via cfe-commits


https://github.com/ssahasra updated 
https://github.com/llvm/llvm-project/pull/125627

>From 5d6d4fbbfabf5e33ec366ea113a0e6c93ba46bf4 Mon Sep 17 00:00:00 2001
From: Sameer Sahasrabuddhe 
Date: Thu, 9 Jan 2025 13:36:20 +0530
Subject: [PATCH 1/2] [llvm] Create() functions for ConvergenceControlInst

---
 clang/lib/CodeGen/CGStmt.cpp | 25 -
 llvm/include/llvm/IR/IntrinsicInst.h |  4 
 llvm/lib/IR/IntrinsicInst.cpp| 23 +++
 3 files changed, 31 insertions(+), 21 deletions(-)

diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 7c944fe85a352d..f7aea9da9177a4 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -3303,18 +3303,9 @@ 
CodeGenFunction::addConvergenceControlToken(llvm::CallBase *Input) {
 
 llvm::ConvergenceControlInst *
 CodeGenFunction::emitConvergenceLoopToken(llvm::BasicBlock *BB) {
-  CGBuilderTy::InsertPoint IP = Builder.saveIP();
-  if (BB->empty())
-Builder.SetInsertPoint(BB);
-  else
-Builder.SetInsertPoint(BB->getFirstInsertionPt());
-
-  llvm::CallBase *CB = Builder.CreateIntrinsic(
-  llvm::Intrinsic::experimental_convergence_loop, {}, {});
-  Builder.restoreIP(IP);
-
-  CB = addConvergenceControlToken(CB);
-  return cast(CB);
+  llvm::ConvergenceControlInst *ParentToken = ConvergenceTokenStack.back();
+  assert(ParentToken);
+  return llvm::ConvergenceControlInst::CreateLoop(*BB, ParentToken);
 }
 
 llvm::ConvergenceControlInst *
@@ -3327,13 +3318,5 @@ 
CodeGenFunction::getOrEmitConvergenceEntryToken(llvm::Function *F) {
   // Adding a convergence token requires the function to be marked as
   // convergent.
   F->setConvergent();
-
-  CGBuilderTy::InsertPoint IP = Builder.saveIP();
-  Builder.SetInsertPoint(&BB->front());
-  llvm::CallBase *I = Builder.CreateIntrinsic(
-  llvm::Intrinsic::experimental_convergence_entry, {}, {});
-  assert(isa(I));
-  Builder.restoreIP(IP);
-
-  return cast(I);
+  return llvm::ConvergenceControlInst::CreateEntry(*BB);
 }
diff --git a/llvm/include/llvm/IR/IntrinsicInst.h 
b/llvm/include/llvm/IR/IntrinsicInst.h
index 6ccbb6b185c7d9..ba7a0219ebcaef 100644
--- a/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -1882,6 +1882,10 @@ class ConvergenceControlInst : public IntrinsicInst {
   bool isLoop() const {
 return getIntrinsicID() == Intrinsic::experimental_convergence_loop;
   }
+
+  static ConvergenceControlInst* CreateAnchor(BasicBlock &BB);
+  static ConvergenceControlInst* CreateEntry(BasicBlock &BB);
+  static ConvergenceControlInst* CreateLoop(BasicBlock &BB, 
ConvergenceControlInst *Parent);
 };
 
 } // end namespace llvm
diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp
index ad174b1487a643..eb358b9fdea1e1 100644
--- a/llvm/lib/IR/IntrinsicInst.cpp
+++ b/llvm/lib/IR/IntrinsicInst.cpp
@@ -885,3 +885,26 @@ Value *GCRelocateInst::getDerivedPtr() const {
 return *(Opt->Inputs.begin() + getDerivedPtrIndex());
   return *(GCInst->arg_begin() + getDerivedPtrIndex());
 }
+
+ConvergenceControlInst *ConvergenceControlInst::CreateAnchor(BasicBlock &BB) {
+  Module *M = BB.getModule();
+  Function *Fn = Intrinsic::getOrInsertDeclaration(M, 
llvm::Intrinsic::experimental_convergence_anchor);
+  auto *Call = CallInst::Create(Fn, "", BB.getFirstInsertionPt());
+  return cast(Call);
+}
+
+ConvergenceControlInst *ConvergenceControlInst::CreateEntry(BasicBlock &BB) {
+  Module *M = BB.getModule();
+  Function *Fn = Intrinsic::getOrInsertDeclaration(M, 
llvm::Intrinsic::experimental_convergence_entry);
+  auto *Call = CallInst::Create(Fn, "", BB.getFirstInsertionPt());
+  return cast(Call);
+}
+
+ConvergenceControlInst *ConvergenceControlInst::CreateLoop(BasicBlock &BB, 
ConvergenceControlInst *ParentToken) {
+  Module *M = BB.getModule();
+  Function *Fn = Intrinsic::getOrInsertDeclaration(M, 
llvm::Intrinsic::experimental_convergence_loop);
+  llvm::Value *BundleArgs[] = {ParentToken};
+  llvm::OperandBundleDef OB("convergencectrl", BundleArgs);
+  auto *Call = CallInst::Create(Fn, {}, {OB}, "", BB.getFirstInsertionPt());
+  return cast(Call);
+}

>From 0dd249c05d480fedbb823d9a5f8a5350c79e6f44 Mon Sep 17 00:00:00 2001
From: Sameer Sahasrabuddhe 
Date: Tue, 4 Feb 2025 12:26:15 +0530
Subject: [PATCH 2/2] clang format

---
 llvm/include/llvm/IR/IntrinsicInst.h |  7 ---
 llvm/lib/IR/IntrinsicInst.cpp| 13 +
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/llvm/include/llvm/IR/IntrinsicInst.h 
b/llvm/include/llvm/IR/IntrinsicInst.h
index ba7a0219ebcaef..93750d6e3845ef 100644
--- a/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -1883,9 +1883,10 @@ class ConvergenceControlInst : public IntrinsicInst {
 return getIntrinsicID() == Intrinsic::experimental_convergence_loop;
   }
 
-  static ConvergenceControlInst* CreateAnchor(BasicBlock &BB);
-  static ConvergenceControlInst* CreateEntry(BasicBlock &

[clang] [llvm] [llvm] Create() functions for ConvergenceControlInst (PR #125627)

2025-02-04 Thread Sameer Sahasrabuddhe via cfe-commits


https://github.com/ssahasra closed 
https://github.com/llvm/llvm-project/pull/125627
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [llvm] Create() functions for ConvergenceControlInst (PR #125627)

2025-02-04 Thread Sameer Sahasrabuddhe via cfe-commits


https://github.com/ssahasra updated 
https://github.com/llvm/llvm-project/pull/125627

>From eb432f46aa1033432930e94f7db4ffc708a6f2a9 Mon Sep 17 00:00:00 2001
From: Sameer Sahasrabuddhe 
Date: Thu, 9 Jan 2025 13:36:20 +0530
Subject: [PATCH] [llvm] Create() functions for ConvergenceControlInst

---
 clang/lib/CodeGen/CGStmt.cpp | 25 -
 llvm/include/llvm/IR/IntrinsicInst.h |  5 +
 llvm/lib/IR/IntrinsicInst.cpp| 28 
 3 files changed, 37 insertions(+), 21 deletions(-)

diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index e2ae1046c084a8e..c96301c306d4123 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -3305,18 +3305,9 @@ 
CodeGenFunction::addConvergenceControlToken(llvm::CallBase *Input) {
 
 llvm::ConvergenceControlInst *
 CodeGenFunction::emitConvergenceLoopToken(llvm::BasicBlock *BB) {
-  CGBuilderTy::InsertPoint IP = Builder.saveIP();
-  if (BB->empty())
-Builder.SetInsertPoint(BB);
-  else
-Builder.SetInsertPoint(BB->getFirstInsertionPt());
-
-  llvm::CallBase *CB = Builder.CreateIntrinsic(
-  llvm::Intrinsic::experimental_convergence_loop, {}, {});
-  Builder.restoreIP(IP);
-
-  CB = addConvergenceControlToken(CB);
-  return cast(CB);
+  llvm::ConvergenceControlInst *ParentToken = ConvergenceTokenStack.back();
+  assert(ParentToken);
+  return llvm::ConvergenceControlInst::CreateLoop(*BB, ParentToken);
 }
 
 llvm::ConvergenceControlInst *
@@ -3329,13 +3320,5 @@ 
CodeGenFunction::getOrEmitConvergenceEntryToken(llvm::Function *F) {
   // Adding a convergence token requires the function to be marked as
   // convergent.
   F->setConvergent();
-
-  CGBuilderTy::InsertPoint IP = Builder.saveIP();
-  Builder.SetInsertPoint(&BB->front());
-  llvm::CallBase *I = Builder.CreateIntrinsic(
-  llvm::Intrinsic::experimental_convergence_entry, {}, {});
-  assert(isa(I));
-  Builder.restoreIP(IP);
-
-  return cast(I);
+  return llvm::ConvergenceControlInst::CreateEntry(*BB);
 }
diff --git a/llvm/include/llvm/IR/IntrinsicInst.h 
b/llvm/include/llvm/IR/IntrinsicInst.h
index 6ccbb6b185c7d96..93750d6e3845efd 100644
--- a/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -1882,6 +1882,11 @@ class ConvergenceControlInst : public IntrinsicInst {
   bool isLoop() const {
 return getIntrinsicID() == Intrinsic::experimental_convergence_loop;
   }
+
+  static ConvergenceControlInst *CreateAnchor(BasicBlock &BB);
+  static ConvergenceControlInst *CreateEntry(BasicBlock &BB);
+  static ConvergenceControlInst *CreateLoop(BasicBlock &BB,
+ConvergenceControlInst *Parent);
 };
 
 } // end namespace llvm
diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp
index ad174b1487a6435..256bce1abe71fad 100644
--- a/llvm/lib/IR/IntrinsicInst.cpp
+++ b/llvm/lib/IR/IntrinsicInst.cpp
@@ -885,3 +885,31 @@ Value *GCRelocateInst::getDerivedPtr() const {
 return *(Opt->Inputs.begin() + getDerivedPtrIndex());
   return *(GCInst->arg_begin() + getDerivedPtrIndex());
 }
+
+ConvergenceControlInst *ConvergenceControlInst::CreateAnchor(BasicBlock &BB) {
+  Module *M = BB.getModule();
+  Function *Fn = Intrinsic::getOrInsertDeclaration(
+  M, llvm::Intrinsic::experimental_convergence_anchor);
+  auto *Call = CallInst::Create(Fn, "", BB.getFirstInsertionPt());
+  return cast(Call);
+}
+
+ConvergenceControlInst *ConvergenceControlInst::CreateEntry(BasicBlock &BB) {
+  Module *M = BB.getModule();
+  Function *Fn = Intrinsic::getOrInsertDeclaration(
+  M, llvm::Intrinsic::experimental_convergence_entry);
+  auto *Call = CallInst::Create(Fn, "", BB.getFirstInsertionPt());
+  return cast(Call);
+}
+
+ConvergenceControlInst *
+ConvergenceControlInst::CreateLoop(BasicBlock &BB,
+   ConvergenceControlInst *ParentToken) {
+  Module *M = BB.getModule();
+  Function *Fn = Intrinsic::getOrInsertDeclaration(
+  M, llvm::Intrinsic::experimental_convergence_loop);
+  llvm::Value *BundleArgs[] = {ParentToken};
+  llvm::OperandBundleDef OB("convergencectrl", BundleArgs);
+  auto *Call = CallInst::Create(Fn, {}, {OB}, "", BB.getFirstInsertionPt());
+  return cast(Call);
+}

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang] [NFC] explicitly check if ParentMap contains key (PR #121736)

2025-01-07 Thread Sameer Sahasrabuddhe via cfe-commits


https://github.com/ssahasra closed 
https://github.com/llvm/llvm-project/pull/121736
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [IR] Mark convergence intrins as has-side-effect (PR #134844)

2025-04-09 Thread Sameer Sahasrabuddhe via cfe-commits


ssahasra wrote:

To take this to its logical conclusion, when convergence tokens are in use, the 
`convergent` attribute is redundant. All we need is a `noconvergent` attribute 
for function declarations. A function definition is convergent iff the body 
contains a call to the `entry` intrinsic, and a function declaration is assumed 
to be convergent unless it has a `noconvergent` attribute on it. So the 
immediate impact on the specification is that everywhere including the 
verifier, it is always okay to ignore the `convergent` attribute on a function 
definition if convergence tokens are in use.

https://github.com/llvm/llvm-project/pull/134844
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [IR] Mark convergence intrins as has-side-effect (PR #134844)

2025-04-09 Thread Sameer Sahasrabuddhe via cfe-commits


ssahasra wrote:

>From the spec for convergence control tokens:
https://llvm.org/docs/ConvergentOperations.html#inferring-non-convergence

> An optimizer may remove the convergent attribute on a function if it can 
> prove that the function does not contain a call to 
> `llvm.experimental.convergence.entry`, or any uncontrolled convergent 
> operations.

https://github.com/llvm/llvm-project/pull/134844
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [IR] Mark convergence intrins as has-side-effect (PR #134844)

2025-04-09 Thread Sameer Sahasrabuddhe via cfe-commits


ssahasra wrote:

> When a callee is marked as convergent, some targets like HLSL/SPIR-V add a 
> convergent token to the call.
This is valid if both functions are marked as convergent.

I didn't understand the validity part. Why is the caller required to be 
convergent in order to add a token to a callsite?

> ADCE/BDCE and other DCE passes were allowed to remove convergence intrinsics 
> when their token were unused. This meant a leaf function could lose all its 
> convergence intrinsics. This would allow further optimization to remove the 
> convergent attribute from the callee.

Seems right to me.

> Issue was the caller was not updated, and we now had a convergence token 
> attached to a call function calling a non-convergent function.

Seems correct again.

> Would you be OK with me patching the several DCE functions to not drop 
> convergence intrinsics instead?

Why is DCE dropping convergence intrinsics? Is it because it cannot see the 
operand bundles as legit uses?

https://github.com/llvm/llvm-project/pull/134844
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang] Define convergence in C++ languages such as HIP, CUDA, OpenCL (PR #136280)

2025-04-18 Thread Sameer Sahasrabuddhe via cfe-commits


https://github.com/ssahasra created 
https://github.com/llvm/llvm-project/pull/136280

The proposed definition closely follows the existing definition of convergence 
in LLVM IR, but using C++ terms to describe language constructs.

There is no undefined behaviour. For each situation, convergence is either 
fully specified or implementation-defined.

Two important limitations where LLVM IR requires convergence control tokens to 
correctly express the convergence specified here:

1. Some combinations of loops, continue and break statements have different 
convergence specified for the statements inside that region of code, but result 
in the same loops in LLVM IR, thus producing ambiguous convergence in LLVM IR.

2. When a divergent condition inside a loop contains a convergent call followed 
by a break statement, these statements are lexically inside the loop, but in 
LLVM IR, they are outside the corresponding CFG loop.

>From 4ddf344b77cc01282571c643d621af34e6a7d8ad Mon Sep 17 00:00:00 2001
From: Sameer Sahasrabuddhe 
Date: Fri, 18 Apr 2025 12:21:36 +0530
Subject: [PATCH] [clang] Define convergence in C++ languages such as HIP,
 CUDA, OpenCL

The proposed definition closely follows the existing definition of convergence
in LLVM IR, but using C++ terms to describe language constructs.

There is no undefined behaviour. For each situation, convergence is either fully
specified or implementation-defined.

Two important limitations where LLVM IR requires convergence control tokens to
correctly express the convergence specified here:

1. Some combinations of loops, continue and break statements have different
   convergence specified for the statements inside that region of code, but
   result in the same loops in LLVM IR, thus producing ambiguous convergence in
   LLVM IR.

2. When a divergent condition inside a loop contains a convergent call followed
   by a break statement, these statements are lexically inside the loop, but in
   LLVM IR, they are outside the corresponding CFG loop.
---
 clang/docs/ThreadConvergence.rst  | 795 ++
 clang/docs/conf.py|   4 +
 clang/docs/index.rst  |   1 +
 clang/include/clang/AST/ParentMap.h   |  14 +-
 .../Analysis/Analyses/ConvergenceCheck.h  |  25 +
 clang/include/clang/Analysis/CFG.h|   2 +
 clang/include/clang/Basic/AttrDocs.td |  16 +-
 clang/include/clang/Basic/DiagnosticGroups.td |   3 +
 .../clang/Basic/DiagnosticSemaKinds.td|  10 +
 clang/lib/AST/ParentMap.cpp   |  65 +-
 clang/lib/Analysis/AnalysisDeclContext.cpp|   2 +-
 clang/lib/Analysis/CMakeLists.txt |   1 +
 clang/lib/Analysis/ConvergenceCheck.cpp   | 119 +++
 clang/lib/Sema/AnalysisBasedWarnings.cpp  |   7 +-
 clang/test/SemaHIP/convergence-warnings.hip   | 473 +++
 15 files changed, 1494 insertions(+), 43 deletions(-)
 create mode 100644 clang/docs/ThreadConvergence.rst
 create mode 100644 clang/include/clang/Analysis/Analyses/ConvergenceCheck.h
 create mode 100644 clang/lib/Analysis/ConvergenceCheck.cpp
 create mode 100644 clang/test/SemaHIP/convergence-warnings.hip

diff --git a/clang/docs/ThreadConvergence.rst b/clang/docs/ThreadConvergence.rst
new file mode 100644
index 0..d872ab9cb77f5
--- /dev/null
+++ b/clang/docs/ThreadConvergence.rst
@@ -0,0 +1,795 @@
+==
+Thread Convergence
+==
+
+.. contents::
+   :local:
+
+Revisions
+=
+
+- 2025/04/14 --- Created
+
+Introduction
+
+
+Some languages such as OpenCL, CUDA and HIP execute threads in groups 
(typically
+on a GPU) that allow efficient communication within the group using special
+*crosslane* primitives. The outcome of a crosslane communication
+is sensitive to the set of threads that execute it "together", i.e.,
+`convergently`__. When control flow *diverges*, i.e., threads of the same group
+follow different paths through the program, not all threads of the group may be
+available to participate in this communication.
+
+__ https://llvm.org/docs/ConvergenceAndUniformity.html
+
+Crosslane Operations
+
+
+A *crosslane operation* is an expression whose evaluation by multiple threads
+produces a side-effect visible to all those threads in a manner that does not
+depend on volatile objects, library I/O functions or memory. The set of threads
+which participate in this communication is implicitly affected by control flow.
+
+For example, in the following GPU compute kernel, communication during the
+crosslane operation is expected to occur precisely among an environment-defined
+set of threads (such as workgroup or subgroup) for which ``condition`` is true:
+
+.. code-block:: c++
+   :caption: A crosslane operation
+   :name: convergence-example-crosslane-operation
+
+   void example_kernel() {
+  ...
+  if (condition)
+  crosslane_operation();
+  ...
+   }
+
+Thread Convergence
+-

[clang] [clang] convergent attribute does not require "all threads" (PR #135803)

2025-04-16 Thread Sameer Sahasrabuddhe via cfe-commits


https://github.com/ssahasra closed 
https://github.com/llvm/llvm-project/pull/135803
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang] Define convergence in C++ languages such as HIP, CUDA, OpenCL (PR #136280)

2025-04-21 Thread Sameer Sahasrabuddhe via cfe-commits


https://github.com/ssahasra edited 
https://github.com/llvm/llvm-project/pull/136280
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang] Define convergence in C++ languages such as HIP, CUDA, OpenCL (PR #136280)

2025-04-21 Thread Sameer Sahasrabuddhe via cfe-commits


https://github.com/ssahasra edited 
https://github.com/llvm/llvm-project/pull/136280
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [IR] Mark convergence intrins as has-side-effect (PR #134844)

2025-04-10 Thread Sameer Sahasrabuddhe via cfe-commits


ssahasra wrote:

Thanks for the example! I think #133684 is a correct incremental step forward, 
but maybe just going the whole way bottom-up is better. It is also correct to 
remove the verifier check. It's not a well-formedness error or even a semantic 
error to pass a convergence control token to a non-convergent call. It is also 
not UB. The only outcome is that the user will not get the convergence they 
expected, and this is not even observable since there are no convergent 
operations that will be affected by this unused token.

@nhaehnle does this look okay to you?

https://github.com/llvm/llvm-project/pull/134844
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang] convergent attribute does not require "all threads" (PR #135803)

2025-04-16 Thread Sameer Sahasrabuddhe via cfe-commits


ssahasra wrote:

Taking the liberty to commit this change without waiting for a review. The 
change is self-contained, and actually does eliminate a legacy description that 
is best left to each language spec. 

https://github.com/llvm/llvm-project/pull/135803
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang] convergent attribute does not require "all threads" (PR #135803)

2025-04-15 Thread Sameer Sahasrabuddhe via cfe-commits


https://github.com/ssahasra created 
https://github.com/llvm/llvm-project/pull/135803

The documentation for the `convergent` attribute claims that OpenCL and CUDA 
require "all threads" in a group to call the same convergent operation. This is 
true only for OpenCL, and in general, the `convergent` attribute is used in 
LLVM IR on operations that have no such constraint.

>From 5024a4e5a9dc92744296cf59db7363602bca1807 Mon Sep 17 00:00:00 2001
From: Sameer Sahasrabuddhe 
Date: Tue, 15 Apr 2025 21:43:35 +0530
Subject: [PATCH] [clang] convergent attribute does not require "all threads"

The documentation for the `convergent` attribute claims that OpenCL and CUDA
require "all threads" in a group to call the same convergent operation. This is
true only for OpenCL, and in general, the `convergent` attribute is used in LLVM
IR on operations that have no such constraint.
---
 clang/include/clang/Basic/AttrDocs.td | 4 
 1 file changed, 4 deletions(-)

diff --git a/clang/include/clang/Basic/AttrDocs.td 
b/clang/include/clang/Basic/AttrDocs.td
index 97a5f24d35d7d..41e1918b4336f 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -1680,10 +1680,6 @@ translated into the LLVM ``convergent`` attribute, which 
indicates that the call
 instructions of a function with this attribute cannot be made control-dependent
 on any additional values.
 
-In languages designed for SPMD/SIMT programming model, e.g. OpenCL or CUDA,
-the call instructions of a function with this attribute must be executed by
-all work items or threads in a work group or sub group.
-
 This attribute is different from ``noduplicate`` because it allows duplicating
 function calls if it can be proved that the duplicated function calls are
 not made control-dependent on any additional values, e.g., unrolling a loop

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] AMDGPU: Move enqueued block handling into clang (PR #128519)

2025-03-10 Thread Sameer Sahasrabuddhe via cfe-commits


https://github.com/ssahasra approved this pull request.


https://github.com/llvm/llvm-project/pull/128519
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] AMDGPU: Move enqueued block handling into clang (PR #128519)

2025-03-10 Thread Sameer Sahasrabuddhe via cfe-commits



@@ -1,23 +1,23 @@
-//===- AMDGPUOpenCLEnqueuedBlockLowering.h ---*- 
C++-*-===//
+//===- AMDGPUExportKernelRuntimeHandles.h ---*- C++-*-===//

ssahasra wrote:

Do we need the filename and the emacs marking on the first line anymore? I 
would be very happy to see it dropped.

https://github.com/llvm/llvm-project/pull/128519
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

1 2 >

1 - 100 of 107 matches

Mail list logo