[clang] Fix lit test clang/test/CodeGenHIP/dpp-const-fold.hip. added (PR #71656)

2023-11-08 Thread Vikram Hegde via cfe-commits

https://github.com/vikramRH approved this pull request.


https://github.com/llvm/llvm-project/pull/71656
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [clang] [WIP][AMDGPU] Enable hostcall printf for OpenCL (PR #70932)

2023-11-10 Thread Vikram Hegde via cfe-commits

vikramRH wrote:

@b-sumner, @ssahasra , what are your thoughts about making the hostcall default 
throughout ?
Also I do not think splitting this into separate patches is feasible since 
changes are interdependent, however I could split changes into separate commits 
to make it easier to review.

https://github.com/llvm/llvm-project/pull/70932
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [clang] [WIP][AMDGPU] Enable hostcall printf for OpenCL (PR #70932)

2023-11-16 Thread Vikram Hegde via cfe-commits

https://github.com/vikramRH closed 
https://github.com/llvm/llvm-project/pull/70932
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AMDGPU] Treat printf as builtin for OpenCL (PR #72554)

2023-11-16 Thread Vikram Hegde via cfe-commits

https://github.com/vikramRH created 
https://github.com/llvm/llvm-project/pull/72554

This is a prerequisite to enable opencl hostcall printf.  ensures that AMDGPU 
printf calls are lowered at clang CodeGen for both HIP and OCL.

>From 6ace9d0a51064be189093ca3bb42416aafadb7f6 Mon Sep 17 00:00:00 2001
From: Vikram 
Date: Fri, 10 Nov 2023 09:39:41 +
Subject: [PATCH] [AMDGPU] Treat printf as builtin for OpenCL

---
 clang/include/clang/Basic/BuiltinsAMDGPU.def | 8 
 clang/lib/AST/Decl.cpp   | 7 +++
 clang/lib/Basic/Targets/AMDGPU.cpp   | 2 ++
 clang/lib/CodeGen/CGBuiltin.cpp  | 5 +
 4 files changed, 22 insertions(+)

diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def 
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index a19c8bd5f219ec6..1799c72806bfdd4 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -21,6 +21,10 @@
 #if defined(BUILTIN) && !defined(TARGET_BUILTIN)
 #   define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS)
 #endif
+
+#if defined(BUILTIN) && !defined(LANGBUILTIN)
+#define LANGBUILTIN(ID, TYPE, ATTRS, BUILTIN_LANG) BUILTIN(ID, TYPE, ATTRS)
+#endif
 
//===--===//
 // SI+ only builtins.
 
//===--===//
@@ -406,5 +410,9 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb", 
"nc", "fp8-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-insts")
 
+// OpenCL
+LANGBUILTIN(printf, "icC*4.", "fp:0:", ALL_OCL_LANGUAGES)
+
 #undef BUILTIN
 #undef TARGET_BUILTIN
+#undef LANGBUILTIN
diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp
index c5c2edf1bfe3aba..2597422bdd521a0 100644
--- a/clang/lib/AST/Decl.cpp
+++ b/clang/lib/AST/Decl.cpp
@@ -49,6 +49,7 @@
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/Specifiers.h"
+#include "clang/Basic/TargetBuiltins.h"
 #include "clang/Basic/TargetCXXABI.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Basic/Visibility.h"
@@ -3598,6 +3599,12 @@ unsigned FunctionDecl::getBuiltinID(bool 
ConsiderWrapperFunctions) const {
   if (!ConsiderWrapperFunctions && getStorageClass() == SC_Static)
 return 0;
 
+  // AMDGCN implementation supports printf as a builtin
+  // for OpenCL
+  if (Context.getTargetInfo().getTriple().isAMDGCN() &&
+  Context.getLangOpts().OpenCL && BuiltinID == AMDGPU::BIprintf)
+return BuiltinID;
+
   // OpenCL v1.2 s6.9.f - The library functions defined in
   // the C99 standard headers are not available.
   if (Context.getLangOpts().OpenCL &&
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp 
b/clang/lib/Basic/Targets/AMDGPU.cpp
index 409ae32ab424215..307cfa49f54e926 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -91,6 +91,8 @@ static constexpr Builtin::Info BuiltinInfo[] = {
   {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)   
\
   {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
+#define LANGBUILTIN(ID, TYPE, ATTRS, LANG) 
\
+  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, LANG},
 #include "clang/Basic/BuiltinsAMDGPU.def"
 };
 
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 09309a3937fb613..987909b5a62e11b 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2458,6 +2458,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
   &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad())
 BuiltinID = mutateLongDoubleBuiltin(BuiltinID);
 
+   // Mutate the printf builtin ID so that we use the same CodeGen path for
+   // HIP and OpenCL with AMDGPU targets.
+   if (getTarget().getTriple().isAMDGCN() && BuiltinID == AMDGPU::BIprintf)
+ BuiltinID = Builtin::BIprintf;
+
   // If the builtin has been declared explicitly with an assembler label,
   // disable the specialized emitting below. Ideally we should communicate the
   // rename in IR, or at least avoid generating the intrinsic calls that are

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-16 Thread Vikram Hegde via cfe-commits

https://github.com/vikramRH created 
https://github.com/llvm/llvm-project/pull/72556

Kindly review top commit here, The builtin specific changes are up for in a 
seperate patch (https://github.com/llvm/llvm-project/pull/72554)

Few implementation details,
1. Hostcall printf is now default for both HIP and OpenCL.
2. The implementation adds vector processing support both for hostcall and 
buffered cases. The vector elements are extracted and pushed onto the buffer 
individually (each alingned to 8 byte boundary)
3. for OpenCL hostcall case, The format string pointer is addrspace casted to 
generic address space to be compatible with hostcall device lib functions.

>From 6ace9d0a51064be189093ca3bb42416aafadb7f6 Mon Sep 17 00:00:00 2001
From: Vikram 
Date: Fri, 10 Nov 2023 09:39:41 +
Subject: [PATCH 1/2] [AMDGPU] Treat printf as builtin for OpenCL

---
 clang/include/clang/Basic/BuiltinsAMDGPU.def | 8 
 clang/lib/AST/Decl.cpp   | 7 +++
 clang/lib/Basic/Targets/AMDGPU.cpp   | 2 ++
 clang/lib/CodeGen/CGBuiltin.cpp  | 5 +
 4 files changed, 22 insertions(+)

diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def 
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index a19c8bd5f219ec6..1799c72806bfdd4 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -21,6 +21,10 @@
 #if defined(BUILTIN) && !defined(TARGET_BUILTIN)
 #   define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS)
 #endif
+
+#if defined(BUILTIN) && !defined(LANGBUILTIN)
+#define LANGBUILTIN(ID, TYPE, ATTRS, BUILTIN_LANG) BUILTIN(ID, TYPE, ATTRS)
+#endif
 
//===--===//
 // SI+ only builtins.
 
//===--===//
@@ -406,5 +410,9 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb", 
"nc", "fp8-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-insts")
 
+// OpenCL
+LANGBUILTIN(printf, "icC*4.", "fp:0:", ALL_OCL_LANGUAGES)
+
 #undef BUILTIN
 #undef TARGET_BUILTIN
+#undef LANGBUILTIN
diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp
index c5c2edf1bfe3aba..2597422bdd521a0 100644
--- a/clang/lib/AST/Decl.cpp
+++ b/clang/lib/AST/Decl.cpp
@@ -49,6 +49,7 @@
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/Specifiers.h"
+#include "clang/Basic/TargetBuiltins.h"
 #include "clang/Basic/TargetCXXABI.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Basic/Visibility.h"
@@ -3598,6 +3599,12 @@ unsigned FunctionDecl::getBuiltinID(bool 
ConsiderWrapperFunctions) const {
   if (!ConsiderWrapperFunctions && getStorageClass() == SC_Static)
 return 0;
 
+  // AMDGCN implementation supports printf as a builtin
+  // for OpenCL
+  if (Context.getTargetInfo().getTriple().isAMDGCN() &&
+  Context.getLangOpts().OpenCL && BuiltinID == AMDGPU::BIprintf)
+return BuiltinID;
+
   // OpenCL v1.2 s6.9.f - The library functions defined in
   // the C99 standard headers are not available.
   if (Context.getLangOpts().OpenCL &&
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp 
b/clang/lib/Basic/Targets/AMDGPU.cpp
index 409ae32ab424215..307cfa49f54e926 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -91,6 +91,8 @@ static constexpr Builtin::Info BuiltinInfo[] = {
   {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)   
\
   {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
+#define LANGBUILTIN(ID, TYPE, ATTRS, LANG) 
\
+  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, LANG},
 #include "clang/Basic/BuiltinsAMDGPU.def"
 };
 
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 09309a3937fb613..987909b5a62e11b 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2458,6 +2458,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
   &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad())
 BuiltinID = mutateLongDoubleBuiltin(BuiltinID);
 
+   // Mutate the printf builtin ID so that we use the same CodeGen path for
+   // HIP and OpenCL with AMDGPU targets.
+   if (getTarget().getTriple().isAMDGCN() && BuiltinID == AMDGPU::BIprintf)
+ BuiltinID = Builtin::BIprintf;
+
   // If the builtin has been declared explicitly with an assembler label,
   // disable the specialized emitting below. Ideally we should communicate the
   // rename in IR, or at least avoid generating the intrinsic calls that are

>From f9329597564d4e3390f6d0d3a08e4a6f66b52de4 Mon Sep 17 00:00:00 2001
From: Vikram 
Date: Wed, 15 Nov 2023 01:20:55 -0500
Subject: [P

[clang] [llvm] [WIP][AMDGPU] Enable hostcall printf for OpenCL (PR #70932)

2023-11-16 Thread Vikram Hegde via cfe-commits

vikramRH wrote:

Two different patch sets have been created here, 

https://github.com/llvm/llvm-project/pull/72554
https://github.com/llvm/llvm-project/pull/72556


https://github.com/llvm/llvm-project/pull/70932
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AMDGPU] Treat printf as builtin for OpenCL (PR #72554)

2023-11-16 Thread Vikram Hegde via cfe-commits


@@ -2458,6 +2458,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
   &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad())
 BuiltinID = mutateLongDoubleBuiltin(BuiltinID);
 
+   // Mutate the printf builtin ID so that we use the same CodeGen path for
+   // HIP and OpenCL with AMDGPU targets.
+   if (getTarget().getTriple().isAMDGCN() && BuiltinID == AMDGPU::BIprintf)
+ BuiltinID = Builtin::BIprintf;

vikramRH wrote:

@jhuber6 , I had your implementation in mind when I wrote this, The printf wont 
be expanded by clang with "-fno-builtin" and users would still have an option 
to use a lib variant if need be.  This also makes this more elegant as we would 
not have to hack the "fno-builtin" handling into the implementation, this is 
part of clang builtin handling.

https://github.com/llvm/llvm-project/pull/72554
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AMDGPU] Treat printf as builtin for OpenCL (PR #72554)

2023-11-16 Thread Vikram Hegde via cfe-commits


@@ -406,5 +410,9 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb", 
"nc", "fp8-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-insts")
 
+// OpenCL
+LANGBUILTIN(printf, "icC*4.", "fp:0:", ALL_OCL_LANGUAGES)

vikramRH wrote:

This is specifically to recognize the OpenCL version of printf (where fmt 
string arg is a pointer to const address space) as a builtin. The hack to 
generic builtin is just a option that I had as I did not want to add a new case 
to builtin expansion code (since the API used by both OpenCL and HIP are same 
), however Im okay with adding a new case too if you feel it makes more sense.

https://github.com/llvm/llvm-project/pull/72554
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AMDGPU] Treat printf as builtin for OpenCL (PR #72554)

2023-11-16 Thread Vikram Hegde via cfe-commits

vikramRH wrote:

> Any tests? Can you explain why it's not sufficient to do this lowering in the 
> AMDGPU pass?

I intended these changes to be part of 
https://github.com/llvm/llvm-project/pull/72556, but it seemed too many changes 
at one place, so I extracted this part out for ease of review. This cannot be 
merged standalone and has to be with 72556 , the tests are also part of that 
patch (This really should have been a stack of patches :( ). 

Also for AMDGPU pass, I plan to remove that altogether and handle all printf 
lowering at one place during clang Codegen. since we now use a compiler option 
to switch between different implementations, This makes a lot more sense I feel.

https://github.com/llvm/llvm-project/pull/72554
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AMDGPU] Treat printf as builtin for OpenCL (PR #72554)

2023-11-16 Thread Vikram Hegde via cfe-commits

https://github.com/vikramRH updated 
https://github.com/llvm/llvm-project/pull/72554

>From 9833353ab6d7bb9716883b89f4e8b90285c1a60c Mon Sep 17 00:00:00 2001
From: Vikram 
Date: Fri, 10 Nov 2023 09:39:41 +
Subject: [PATCH] [AMDGPU] Treat printf as builtin for OpenCL

---
 clang/include/clang/Basic/BuiltinsAMDGPU.def | 8 
 clang/lib/AST/Decl.cpp   | 7 +++
 clang/lib/Basic/Targets/AMDGPU.cpp   | 3 +++
 clang/lib/CodeGen/CGBuiltin.cpp  | 5 +
 4 files changed, 23 insertions(+)

diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def 
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index a19c8bd5f219ec6..1799c72806bfdd4 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -21,6 +21,10 @@
 #if defined(BUILTIN) && !defined(TARGET_BUILTIN)
 #   define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS)
 #endif
+
+#if defined(BUILTIN) && !defined(LANGBUILTIN)
+#define LANGBUILTIN(ID, TYPE, ATTRS, BUILTIN_LANG) BUILTIN(ID, TYPE, ATTRS)
+#endif
 
//===--===//
 // SI+ only builtins.
 
//===--===//
@@ -406,5 +410,9 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb", 
"nc", "fp8-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-insts")
 
+// OpenCL
+LANGBUILTIN(printf, "icC*4.", "fp:0:", ALL_OCL_LANGUAGES)
+
 #undef BUILTIN
 #undef TARGET_BUILTIN
+#undef LANGBUILTIN
diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp
index c5c2edf1bfe3aba..2597422bdd521a0 100644
--- a/clang/lib/AST/Decl.cpp
+++ b/clang/lib/AST/Decl.cpp
@@ -49,6 +49,7 @@
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/Specifiers.h"
+#include "clang/Basic/TargetBuiltins.h"
 #include "clang/Basic/TargetCXXABI.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Basic/Visibility.h"
@@ -3598,6 +3599,12 @@ unsigned FunctionDecl::getBuiltinID(bool 
ConsiderWrapperFunctions) const {
   if (!ConsiderWrapperFunctions && getStorageClass() == SC_Static)
 return 0;
 
+  // AMDGCN implementation supports printf as a builtin
+  // for OpenCL
+  if (Context.getTargetInfo().getTriple().isAMDGCN() &&
+  Context.getLangOpts().OpenCL && BuiltinID == AMDGPU::BIprintf)
+return BuiltinID;
+
   // OpenCL v1.2 s6.9.f - The library functions defined in
   // the C99 standard headers are not available.
   if (Context.getLangOpts().OpenCL &&
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp 
b/clang/lib/Basic/Targets/AMDGPU.cpp
index 409ae32ab424215..0cf6daee87c4a4c 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -91,6 +91,9 @@ static constexpr Builtin::Info BuiltinInfo[] = {
   {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)   
\
   {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
+#define LANGBUILTIN(ID, TYPE, ATTRS, LANG) 
\
+  { #ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, LANG }   
\
+  ,
 #include "clang/Basic/BuiltinsAMDGPU.def"
 };
 
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 09309a3937fb613..d7a4b895f3432ca 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2458,6 +2458,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
   &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad())
 BuiltinID = mutateLongDoubleBuiltin(BuiltinID);
 
+  // Mutate the printf builtin ID so that we use the same CodeGen path for
+  // HIP and OpenCL with AMDGPU targets.
+  if (getTarget().getTriple().isAMDGCN() && BuiltinID == AMDGPU::BIprintf)
+BuiltinID = Builtin::BIprintf;
+
   // If the builtin has been declared explicitly with an assembler label,
   // disable the specialized emitting below. Ideally we should communicate the
   // rename in IR, or at least avoid generating the intrinsic calls that are

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-16 Thread Vikram Hegde via cfe-commits

https://github.com/vikramRH edited 
https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AMDGPU] Treat printf as builtin for OpenCL (PR #72554)

2023-11-17 Thread Vikram Hegde via cfe-commits

https://github.com/vikramRH edited 
https://github.com/llvm/llvm-project/pull/72554
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-19 Thread Vikram Hegde via cfe-commits

https://github.com/vikramRH updated 
https://github.com/llvm/llvm-project/pull/72556

>From 6ace9d0a51064be189093ca3bb42416aafadb7f6 Mon Sep 17 00:00:00 2001
From: Vikram 
Date: Fri, 10 Nov 2023 09:39:41 +
Subject: [PATCH 1/3] [AMDGPU] Treat printf as builtin for OpenCL

---
 clang/include/clang/Basic/BuiltinsAMDGPU.def | 8 
 clang/lib/AST/Decl.cpp   | 7 +++
 clang/lib/Basic/Targets/AMDGPU.cpp   | 2 ++
 clang/lib/CodeGen/CGBuiltin.cpp  | 5 +
 4 files changed, 22 insertions(+)

diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def 
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index a19c8bd5f219ec6..1799c72806bfdd4 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -21,6 +21,10 @@
 #if defined(BUILTIN) && !defined(TARGET_BUILTIN)
 #   define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS)
 #endif
+
+#if defined(BUILTIN) && !defined(LANGBUILTIN)
+#define LANGBUILTIN(ID, TYPE, ATTRS, BUILTIN_LANG) BUILTIN(ID, TYPE, ATTRS)
+#endif
 
//===--===//
 // SI+ only builtins.
 
//===--===//
@@ -406,5 +410,9 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb", 
"nc", "fp8-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-insts")
 
+// OpenCL
+LANGBUILTIN(printf, "icC*4.", "fp:0:", ALL_OCL_LANGUAGES)
+
 #undef BUILTIN
 #undef TARGET_BUILTIN
+#undef LANGBUILTIN
diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp
index c5c2edf1bfe3aba..2597422bdd521a0 100644
--- a/clang/lib/AST/Decl.cpp
+++ b/clang/lib/AST/Decl.cpp
@@ -49,6 +49,7 @@
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/Specifiers.h"
+#include "clang/Basic/TargetBuiltins.h"
 #include "clang/Basic/TargetCXXABI.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Basic/Visibility.h"
@@ -3598,6 +3599,12 @@ unsigned FunctionDecl::getBuiltinID(bool 
ConsiderWrapperFunctions) const {
   if (!ConsiderWrapperFunctions && getStorageClass() == SC_Static)
 return 0;
 
+  // AMDGCN implementation supports printf as a builtin
+  // for OpenCL
+  if (Context.getTargetInfo().getTriple().isAMDGCN() &&
+  Context.getLangOpts().OpenCL && BuiltinID == AMDGPU::BIprintf)
+return BuiltinID;
+
   // OpenCL v1.2 s6.9.f - The library functions defined in
   // the C99 standard headers are not available.
   if (Context.getLangOpts().OpenCL &&
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp 
b/clang/lib/Basic/Targets/AMDGPU.cpp
index 409ae32ab424215..307cfa49f54e926 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -91,6 +91,8 @@ static constexpr Builtin::Info BuiltinInfo[] = {
   {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)   
\
   {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
+#define LANGBUILTIN(ID, TYPE, ATTRS, LANG) 
\
+  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, LANG},
 #include "clang/Basic/BuiltinsAMDGPU.def"
 };
 
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 09309a3937fb613..987909b5a62e11b 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2458,6 +2458,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
   &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad())
 BuiltinID = mutateLongDoubleBuiltin(BuiltinID);
 
+   // Mutate the printf builtin ID so that we use the same CodeGen path for
+   // HIP and OpenCL with AMDGPU targets.
+   if (getTarget().getTriple().isAMDGCN() && BuiltinID == AMDGPU::BIprintf)
+ BuiltinID = Builtin::BIprintf;
+
   // If the builtin has been declared explicitly with an assembler label,
   // disable the specialized emitting below. Ideally we should communicate the
   // rename in IR, or at least avoid generating the intrinsic calls that are

>From 040a28deef5fe7a5d9e357a898b50335992e708d Mon Sep 17 00:00:00 2001
From: Vikram 
Date: Mon, 20 Nov 2023 05:26:27 +
Subject: [PATCH 2/3] [AMDGPU] Enable OpenCL printf expansion at clang CodeGen

---
 clang/lib/CodeGen/CGBuiltin.cpp   |  3 ++-
 clang/lib/CodeGen/CGGPUBuiltin.cpp| 25 +++--
 clang/lib/Driver/ToolChains/Clang.cpp | 10 ++
 3 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 987909b5a62e11b..8d51df24c7872b7 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -5622,7 +5622,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsig

[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-19 Thread Vikram Hegde via cfe-commits


@@ -170,20 +173,46 @@ static Value *appendString(IRBuilder<> &Builder, Value 
*Desc, Value *Arg,
   return callAppendStringN(Builder, Desc, Arg, Length, IsLast);
 }
 
+static Value *appendVectorArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,
+  bool IsLast, bool IsBuffered) {
+  assert(Arg->getType()->isVectorTy() && "incorrent append* function");
+  auto VectorTy = dyn_cast(Arg->getType());
+  auto Zero = Builder.getInt64(0);
+  if (VectorTy) {
+for (unsigned int i = 0; i < VectorTy->getNumElements() - 1; i++) {
+  auto Val = Builder.CreateExtractElement(Arg, i);
+  Desc = callAppendArgs(Builder, Desc, 1,
+fitArgInto64Bits(Builder, Val, IsBuffered), Zero,
+Zero, Zero, Zero, Zero, Zero, false);
+}
+
+auto Val =

vikramRH wrote:

Done

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-19 Thread Vikram Hegde via cfe-commits


@@ -170,20 +173,46 @@ static Value *appendString(IRBuilder<> &Builder, Value 
*Desc, Value *Arg,
   return callAppendStringN(Builder, Desc, Arg, Length, IsLast);
 }
 
+static Value *appendVectorArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,
+  bool IsLast, bool IsBuffered) {
+  assert(Arg->getType()->isVectorTy() && "incorrent append* function");
+  auto VectorTy = dyn_cast(Arg->getType());
+  auto Zero = Builder.getInt64(0);
+  if (VectorTy) {
+for (unsigned int i = 0; i < VectorTy->getNumElements() - 1; i++) {
+  auto Val = Builder.CreateExtractElement(Arg, i);
+  Desc = callAppendArgs(Builder, Desc, 1,
+fitArgInto64Bits(Builder, Val, IsBuffered), Zero,
+Zero, Zero, Zero, Zero, Zero, false);
+}
+
+auto Val =
+Builder.CreateExtractElement(Arg, VectorTy->getNumElements() - 1);
+return callAppendArgs(Builder, Desc, 1,
+  fitArgInto64Bits(Builder, Val, IsBuffered), Zero,
+  Zero, Zero, Zero, Zero, Zero, IsLast);
+  }
+  return nullptr;
+}
+
 static Value *processArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,
- bool SpecIsCString, bool IsLast) {
+ bool SpecIsCString, bool IsVector, bool IsLast,
+ bool IsBuffered) {
   if (SpecIsCString && isa(Arg->getType())) {
 return appendString(Builder, Desc, Arg, IsLast);
-  }
-  // If the format specifies a string but the argument is not, the frontend 
will
-  // have printed a warning. We just rely on undefined behaviour and send the
-  // argument anyway.
-  return appendArg(Builder, Desc, Arg, IsLast);
+  } else if (IsVector) {

vikramRH wrote:

Done

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-19 Thread Vikram Hegde via cfe-commits


@@ -170,20 +173,46 @@ static Value *appendString(IRBuilder<> &Builder, Value 
*Desc, Value *Arg,
   return callAppendStringN(Builder, Desc, Arg, Length, IsLast);
 }
 
+static Value *appendVectorArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,

vikramRH wrote:

Done

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-19 Thread Vikram Hegde via cfe-commits


@@ -278,7 +310,13 @@ static Value *callBufferedPrintfStart(
 StringData(StringRef(), LenWithNull, LenWithNullAligned, false));
   }
 } else {
-  int AllocSize = M->getDataLayout().getTypeAllocSize(Args[i]->getType());
+  int AllocSize = 0;
+  if (OCLVectors.test(i)) {
+auto VecArg = dyn_cast(Args[i]->getType());
+assert(VecArg && "invalid vector specifier");

vikramRH wrote:

Done

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [clang] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-19 Thread Vikram Hegde via cfe-commits


@@ -278,7 +310,13 @@ static Value *callBufferedPrintfStart(
 StringData(StringRef(), LenWithNull, LenWithNullAligned, false));
   }
 } else {
-  int AllocSize = M->getDataLayout().getTypeAllocSize(Args[i]->getType());
+  int AllocSize = 0;
+  if (OCLVectors.test(i)) {
+auto VecArg = dyn_cast(Args[i]->getType());
+assert(VecArg && "invalid vector specifier");
+AllocSize = VecArg->getNumElements() * 8;
+  } else
+AllocSize = M->getDataLayout().getTypeAllocSize(Args[i]->getType());

vikramRH wrote:

I extract individual vector elements, expand them to 8 bytes and store them 
onto the buffer. The "getTypeAllocSize" would not give me the actual occupied 
size in the buffer in this case.

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [WIP][AMDGPU] Enable hostcall printf for OpenCL (PR #70932)

2023-11-01 Thread Vikram Hegde via cfe-commits

https://github.com/vikramRH created 
https://github.com/llvm/llvm-project/pull/70932

Attempt to enable OpenCL hostcall printf (SWDEV-204804).  I would like to have 
some inputs regarding few key points listed here,

1. We continue to use "-mprintf-kind" option to decide the lowering scheme. It 
now supports a new value "none" that just makes compiler use default lowering 
scheme. (hostcalls for HIP, Buffered for OpenCL)
2. OpenCL now treats printf as a builtin so that the printf expansion happens 
via clang CodeGen. This would also mean that the "AMDGPUPrintfRuntimeBinding" 
pass would no longer be required since all printf calls would be expanded 
earlier.
3. The implementation adds vector processing support both for hostcall and 
buffered cases. The vector elements are extracted and pushed onto the buffer 
individually (each alingned to 8 byte boundary)
4. for OpenCL hostcall case, The format string pointer is addrspace casted to 
generic address space to be compatible with hostcall device lib functions.


>From 9eec2462d07122f8376f1e60b2e679cc69814430 Mon Sep 17 00:00:00 2001
From: Vikram 
Date: Wed, 4 Oct 2023 05:41:47 -0400
Subject: [PATCH] [WIP][AMDGPU] hostcall printf support for OpenCL

---
 clang/include/clang/Basic/BuiltinsAMDGPU.def  |   8 +
 clang/include/clang/Basic/TargetOptions.h |  10 +-
 clang/include/clang/Driver/Options.td |   8 +-
 clang/lib/AST/Decl.cpp|   7 +
 clang/lib/Basic/Targets/AMDGPU.cpp|   2 +
 clang/lib/CodeGen/CGBuiltin.cpp   |   8 +-
 clang/lib/CodeGen/CGGPUBuiltin.cpp|  27 ++-
 clang/lib/CodeGen/CodeGenModule.cpp   |   4 +-
 clang/lib/Driver/ToolChains/Clang.cpp |  10 +
 .../CodeGenHIP/printf-kind-module-flag.hip|   6 +-
 clang/test/CodeGenOpenCL/amdgpu-printf.cl | 205 +-
 .../lib/Transforms/Utils/AMDGPUEmitPrintf.cpp | 135 +++-
 12 files changed, 359 insertions(+), 71 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def 
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 532a91fd903e87c..b5e8be145b03a0d 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -21,6 +21,10 @@
 #if defined(BUILTIN) && !defined(TARGET_BUILTIN)
 #   define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS)
 #endif
+
+#if defined(BUILTIN) && !defined(LANGBUILTIN)
+#define LANGBUILTIN(ID, TYPE, ATTRS, BUILTIN_LANG) BUILTIN(ID, TYPE, ATTRS)
+#endif
 
//===--===//
 // SI+ only builtins.
 
//===--===//
@@ -402,5 +406,9 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb", 
"nc", "fp8-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-insts")
 
+// OpenCL
+LANGBUILTIN(printf, "icC*4.", "fp:0:", ALL_OCL_LANGUAGES)
+
 #undef BUILTIN
 #undef TARGET_BUILTIN
+#undef LANGBUILTIN
diff --git a/clang/include/clang/Basic/TargetOptions.h 
b/clang/include/clang/Basic/TargetOptions.h
index 8bb03249b7f8308..8ff07783b0dd5d1 100644
--- a/clang/include/clang/Basic/TargetOptions.h
+++ b/clang/include/clang/Basic/TargetOptions.h
@@ -92,16 +92,20 @@ class TargetOptions {
 
   /// \brief Enumeration values for AMDGPU printf lowering scheme
   enum class AMDGPUPrintfKind {
+/// Use deafult lowering scheme, HIP programs use hostcall and OpenCL uses
+/// buffered by default,
+None = 0,
+
 /// printf lowering scheme involving hostcalls, currently used by HIP
 /// programs by default
-Hostcall = 0,
+Hostcall = 1,
 
 /// printf lowering scheme involving implicit printf buffers,
-Buffered = 1,
+Buffered = 2,
   };
 
   /// \brief AMDGPU Printf lowering scheme
-  AMDGPUPrintfKind AMDGPUPrintfKindVal = AMDGPUPrintfKind::Hostcall;
+  AMDGPUPrintfKind AMDGPUPrintfKindVal = AMDGPUPrintfKind::None;
 
   // The code model to be used as specified by the user. Corresponds to
   // CodeModel::Model enum defined in include/llvm/Support/CodeGen.h, plus
diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index c8b730e0f7ecd84..0fb7758845ff868 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1251,15 +1251,17 @@ def emit_static_lib : Flag<["--"], "emit-static-lib">,
 
 def mprintf_kind_EQ : Joined<["-"], "mprintf-kind=">, Group,
   HelpText<"Specify the printf lowering scheme (AMDGPU only), allowed values 
are "
+  "\"none\" (Use default lowering scheme for a language, HIP uses hostcalls 
and "
+  "OpenCL uses Buffered scheme), "
   "\"hostcall\"(printing happens during kernel execution, this scheme "
   "relies on hostcalls which require system to support pcie atomics) "
   "and \"buffered\"(printing happens after all kernel threads exit, "
   "this uses a pri

[clang] [llvm] [WIP][AMDGPU] Enable hostcall printf for OpenCL (PR #70932)

2023-11-01 Thread Vikram Hegde via cfe-commits

vikramRH wrote:

@arsenm, The choice of defaults is based on current state of printf.  HIP uses 
hostcalls and OpenCL uses buffered variant by default. However I'm willing to 
make one of the two variants consistent (preferably hostcalls for me). Do note 
that this would make all OpenCL printf to be lowered to hostcalls from this 
patch onwards. @b-sumner , are you okay with this ?

Also regarding point 2, The current behaviour is that with HIP,  printf is 
lowered at clang CodeGen (since it treats printf to be a builtin) while OpenCL 
uses the IR pass. This patch will make it so that we could remove the IR pass 
and handle all lowering at clang CodeGen

https://github.com/llvm/llvm-project/pull/70932
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [WIP][AMDGPU] Enable hostcall printf for OpenCL (PR #70932)

2023-11-01 Thread Vikram Hegde via cfe-commits

https://github.com/vikramRH updated 
https://github.com/llvm/llvm-project/pull/70932

>From 4c0467078b2f38e814569ad351f86129d1c1d5ee Mon Sep 17 00:00:00 2001
From: Vikram 
Date: Wed, 4 Oct 2023 05:41:47 -0400
Subject: [PATCH] [WIP][AMDGPU] hostcall printf support for OpenCL

---
 clang/include/clang/Basic/BuiltinsAMDGPU.def  |   8 +
 clang/include/clang/Basic/TargetOptions.h |  10 +-
 clang/include/clang/Driver/Options.td |   8 +-
 clang/lib/AST/Decl.cpp|   7 +
 clang/lib/Basic/Targets/AMDGPU.cpp|   2 +
 clang/lib/CodeGen/CGBuiltin.cpp   |   8 +-
 clang/lib/CodeGen/CGGPUBuiltin.cpp|  27 ++-
 clang/lib/CodeGen/CodeGenModule.cpp   |   4 +-
 clang/lib/Driver/ToolChains/Clang.cpp |  10 +
 .../CodeGenHIP/printf-kind-module-flag.hip|   6 +-
 clang/test/CodeGenOpenCL/amdgpu-printf.cl | 205 +-
 .../lib/Transforms/Utils/AMDGPUEmitPrintf.cpp | 135 +++-
 12 files changed, 359 insertions(+), 71 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def 
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 532a91fd903e87c..b5e8be145b03a0d 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -21,6 +21,10 @@
 #if defined(BUILTIN) && !defined(TARGET_BUILTIN)
 #   define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS)
 #endif
+
+#if defined(BUILTIN) && !defined(LANGBUILTIN)
+#define LANGBUILTIN(ID, TYPE, ATTRS, BUILTIN_LANG) BUILTIN(ID, TYPE, ATTRS)
+#endif
 
//===--===//
 // SI+ only builtins.
 
//===--===//
@@ -402,5 +406,9 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb", 
"nc", "fp8-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-insts")
 
+// OpenCL
+LANGBUILTIN(printf, "icC*4.", "fp:0:", ALL_OCL_LANGUAGES)
+
 #undef BUILTIN
 #undef TARGET_BUILTIN
+#undef LANGBUILTIN
diff --git a/clang/include/clang/Basic/TargetOptions.h 
b/clang/include/clang/Basic/TargetOptions.h
index 8bb03249b7f8308..8ff07783b0dd5d1 100644
--- a/clang/include/clang/Basic/TargetOptions.h
+++ b/clang/include/clang/Basic/TargetOptions.h
@@ -92,16 +92,20 @@ class TargetOptions {
 
   /// \brief Enumeration values for AMDGPU printf lowering scheme
   enum class AMDGPUPrintfKind {
+/// Use deafult lowering scheme, HIP programs use hostcall and OpenCL uses
+/// buffered by default,
+None = 0,
+
 /// printf lowering scheme involving hostcalls, currently used by HIP
 /// programs by default
-Hostcall = 0,
+Hostcall = 1,
 
 /// printf lowering scheme involving implicit printf buffers,
-Buffered = 1,
+Buffered = 2,
   };
 
   /// \brief AMDGPU Printf lowering scheme
-  AMDGPUPrintfKind AMDGPUPrintfKindVal = AMDGPUPrintfKind::Hostcall;
+  AMDGPUPrintfKind AMDGPUPrintfKindVal = AMDGPUPrintfKind::None;
 
   // The code model to be used as specified by the user. Corresponds to
   // CodeModel::Model enum defined in include/llvm/Support/CodeGen.h, plus
diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index b1229b2f4562379..d62cfe8961db90a 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1251,15 +1251,17 @@ def emit_static_lib : Flag<["--"], "emit-static-lib">,
 
 def mprintf_kind_EQ : Joined<["-"], "mprintf-kind=">, Group,
   HelpText<"Specify the printf lowering scheme (AMDGPU only), allowed values 
are "
+  "\"none\" (Use default lowering scheme for a language, HIP uses hostcalls 
and "
+  "OpenCL uses Buffered scheme), "
   "\"hostcall\"(printing happens during kernel execution, this scheme "
   "relies on hostcalls which require system to support pcie atomics) "
   "and \"buffered\"(printing happens after all kernel threads exit, "
   "this uses a printf buffer and does not rely on pcie atomic support)">,
   Visibility<[ClangOption, CC1Option]>,
-  Values<"hostcall,buffered">,
+  Values<"none,hostcall,buffered">,
   NormalizedValuesScope<"TargetOptions::AMDGPUPrintfKind">,
-  NormalizedValues<["Hostcall", "Buffered"]>,
-  MarshallingInfoEnum, "Hostcall">;
+  NormalizedValues<["None", "Hostcall", "Buffered"]>,
+  MarshallingInfoEnum, "None">;
 
 // HIP options
 let Group = hip_Group in {
diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp
index 6efc177d61c03ba..b99376e42b8e7ba 100644
--- a/clang/lib/AST/Decl.cpp
+++ b/clang/lib/AST/Decl.cpp
@@ -49,6 +49,7 @@
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/Specifiers.h"
+#include "clang/Basic/TargetBuiltins.h"
 #include "clang/Basic/TargetCXXABI.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Basic/Visibility.h"
@@ -3585,6 +3586,12 @@ unsigned Functio

[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-24 Thread Vikram Hegde via cfe-commits

https://github.com/vikramRH updated 
https://github.com/llvm/llvm-project/pull/72556

>From 6ace9d0a51064be189093ca3bb42416aafadb7f6 Mon Sep 17 00:00:00 2001
From: Vikram 
Date: Fri, 10 Nov 2023 09:39:41 +
Subject: [PATCH 1/4] [AMDGPU] Treat printf as builtin for OpenCL

---
 clang/include/clang/Basic/BuiltinsAMDGPU.def | 8 
 clang/lib/AST/Decl.cpp   | 7 +++
 clang/lib/Basic/Targets/AMDGPU.cpp   | 2 ++
 clang/lib/CodeGen/CGBuiltin.cpp  | 5 +
 4 files changed, 22 insertions(+)

diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def 
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index a19c8bd5f219ec6..1799c72806bfdd4 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -21,6 +21,10 @@
 #if defined(BUILTIN) && !defined(TARGET_BUILTIN)
 #   define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS)
 #endif
+
+#if defined(BUILTIN) && !defined(LANGBUILTIN)
+#define LANGBUILTIN(ID, TYPE, ATTRS, BUILTIN_LANG) BUILTIN(ID, TYPE, ATTRS)
+#endif
 
//===--===//
 // SI+ only builtins.
 
//===--===//
@@ -406,5 +410,9 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb", 
"nc", "fp8-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-insts")
 
+// OpenCL
+LANGBUILTIN(printf, "icC*4.", "fp:0:", ALL_OCL_LANGUAGES)
+
 #undef BUILTIN
 #undef TARGET_BUILTIN
+#undef LANGBUILTIN
diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp
index c5c2edf1bfe3aba..2597422bdd521a0 100644
--- a/clang/lib/AST/Decl.cpp
+++ b/clang/lib/AST/Decl.cpp
@@ -49,6 +49,7 @@
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/Specifiers.h"
+#include "clang/Basic/TargetBuiltins.h"
 #include "clang/Basic/TargetCXXABI.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Basic/Visibility.h"
@@ -3598,6 +3599,12 @@ unsigned FunctionDecl::getBuiltinID(bool 
ConsiderWrapperFunctions) const {
   if (!ConsiderWrapperFunctions && getStorageClass() == SC_Static)
 return 0;
 
+  // AMDGCN implementation supports printf as a builtin
+  // for OpenCL
+  if (Context.getTargetInfo().getTriple().isAMDGCN() &&
+  Context.getLangOpts().OpenCL && BuiltinID == AMDGPU::BIprintf)
+return BuiltinID;
+
   // OpenCL v1.2 s6.9.f - The library functions defined in
   // the C99 standard headers are not available.
   if (Context.getLangOpts().OpenCL &&
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp 
b/clang/lib/Basic/Targets/AMDGPU.cpp
index 409ae32ab424215..307cfa49f54e926 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -91,6 +91,8 @@ static constexpr Builtin::Info BuiltinInfo[] = {
   {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)   
\
   {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
+#define LANGBUILTIN(ID, TYPE, ATTRS, LANG) 
\
+  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, LANG},
 #include "clang/Basic/BuiltinsAMDGPU.def"
 };
 
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 09309a3937fb613..987909b5a62e11b 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2458,6 +2458,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
   &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad())
 BuiltinID = mutateLongDoubleBuiltin(BuiltinID);
 
+   // Mutate the printf builtin ID so that we use the same CodeGen path for
+   // HIP and OpenCL with AMDGPU targets.
+   if (getTarget().getTriple().isAMDGCN() && BuiltinID == AMDGPU::BIprintf)
+ BuiltinID = Builtin::BIprintf;
+
   // If the builtin has been declared explicitly with an assembler label,
   // disable the specialized emitting below. Ideally we should communicate the
   // rename in IR, or at least avoid generating the intrinsic calls that are

>From 040a28deef5fe7a5d9e357a898b50335992e708d Mon Sep 17 00:00:00 2001
From: Vikram 
Date: Mon, 20 Nov 2023 05:26:27 +
Subject: [PATCH 2/4] [AMDGPU] Enable OpenCL printf expansion at clang CodeGen

---
 clang/lib/CodeGen/CGBuiltin.cpp   |  3 ++-
 clang/lib/CodeGen/CGGPUBuiltin.cpp| 25 +++--
 clang/lib/Driver/ToolChains/Clang.cpp | 10 ++
 3 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 987909b5a62e11b..8d51df24c7872b7 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -5622,7 +5622,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsig

[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-24 Thread Vikram Hegde via cfe-commits


@@ -170,20 +173,49 @@ static Value *appendString(IRBuilder<> &Builder, Value 
*Desc, Value *Arg,
   return callAppendStringN(Builder, Desc, Arg, Length, IsLast);
 }
 
+static Value *appendVectorArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,
+  bool IsLast, bool IsBuffered) {
+  assert(Arg->getType()->isVectorTy() && "incorrent append* function");
+  auto VectorTy = dyn_cast(Arg->getType());
+  auto Zero = Builder.getInt64(0);
+  if (VectorTy) {
+for (unsigned int i = 0; i < VectorTy->getNumElements() - 1; i++) {
+  auto Val = Builder.CreateExtractElement(Arg, i);
+  Desc = callAppendArgs(Builder, Desc, 1,
+fitArgInto64Bits(Builder, Val, IsBuffered), Zero,
+Zero, Zero, Zero, Zero, Zero, false);
+}
+
+Value* Val =
+Builder.CreateExtractElement(Arg, VectorTy->getNumElements() - 1);
+return callAppendArgs(Builder, Desc, 1,
+  fitArgInto64Bits(Builder, Val, IsBuffered), Zero,
+  Zero, Zero, Zero, Zero, Zero, IsLast);
+  }
+  return nullptr;
+}
+
 static Value *processArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,
- bool SpecIsCString, bool IsLast) {
+ bool SpecIsCString, bool IsVector, bool IsLast,
+ bool IsBuffered) {
   if (SpecIsCString && isa(Arg->getType())) {
 return appendString(Builder, Desc, Arg, IsLast);
   }
-  // If the format specifies a string but the argument is not, the frontend 
will
-  // have printed a warning. We just rely on undefined behaviour and send the
-  // argument anyway.
-  return appendArg(Builder, Desc, Arg, IsLast);
+
+  if (IsVector) {
+return appendVectorArg(Builder, Desc, Arg, IsLast, IsBuffered);
+  } 
+
+  // If the format specifies a string but the argument is not, the frontend
+  // will have printed a warning. We just rely on undefined behaviour and send
+  // the argument anyway.
+  return appendArg(Builder, Desc, Arg, IsLast, IsBuffered);
 }
 
 // Scan the format string to locate all specifiers, and mark the ones that
 // specify a string, i.e, the "%s" specifier with optional '*' characters.
-static void locateCStrings(SparseBitVector<8> &BV, StringRef Str) {
+static void locateCStringsAndVectors(SparseBitVector<8> &BV,

vikramRH wrote:

Done

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-24 Thread Vikram Hegde via cfe-commits


@@ -1,12 +1,68 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -cl-std=CL1.2 -triple amdgcn-amd-amdhsa 
-disable-llvm-passes -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -cl-std=CL1.2 -triple amdgcn-amd-amdhsa 
-mprintf-kind=buffered -disable-llvm-passes -emit-llvm -o - %s | FileCheck 
--check-prefix=CHECK_BUFFERED %s
+// RUN: %clang_cc1 -cl-std=CL1.2 -triple amdgcn-amd-amdhsa 
-mprintf-kind=hostcall -disable-llvm-passes -emit-llvm -o - %s | FileCheck 
--check-prefix=CHECK_HOSTCALL %s
 
 int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 
2)));

vikramRH wrote:

Done

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-24 Thread Vikram Hegde via cfe-commits


@@ -194,6 +226,8 @@ static void locateCStrings(SparseBitVector<8> &BV, 
StringRef Str) {
   SpecPos += 2;
   continue;
 }
+if (Str.find_first_of("v", SpecPos) != StringRef::npos)

vikramRH wrote:

Fixed

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-24 Thread Vikram Hegde via cfe-commits


@@ -170,20 +173,49 @@ static Value *appendString(IRBuilder<> &Builder, Value 
*Desc, Value *Arg,
   return callAppendStringN(Builder, Desc, Arg, Length, IsLast);
 }
 
+static Value *appendVectorArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,
+  bool IsLast, bool IsBuffered) {
+  assert(Arg->getType()->isVectorTy() && "incorrent append* function");
+  auto VectorTy = dyn_cast(Arg->getType());
+  auto Zero = Builder.getInt64(0);
+  if (VectorTy) {

vikramRH wrote:

I have changed this code a little now so that only FixedVectorTypes are 
handled. This should be okay since the OCL specs specifically say only vectors 
of length 2,3,4,8 and 16 are supported for printf.

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-24 Thread Vikram Hegde via cfe-commits


@@ -26,28 +26,31 @@ using namespace llvm;
 
 #define DEBUG_TYPE "amdgpu-emit-printf"
 
-static Value *fitArgInto64Bits(IRBuilder<> &Builder, Value *Arg) {
+static Value *fitArgInto64Bits(IRBuilder<> &Builder, Value *Arg,
+   bool IsBuffered) {
+  const DataLayout &DL = 
Builder.GetInsertBlock()->getModule()->getDataLayout();
   auto Int64Ty = Builder.getInt64Ty();
   auto Ty = Arg->getType();
 
   if (auto IntTy = dyn_cast(Ty)) {
-switch (IntTy->getBitWidth()) {
-case 32:
-  return Builder.CreateZExt(Arg, Int64Ty);
-case 64:
-  return Arg;
+if (IntTy->getBitWidth() < 64) {
+  return Builder.CreateZExt(Arg, Builder.getInt64Ty());
 }
   }
 
-  if (Ty->getTypeID() == Type::DoubleTyID) {
+  if (Ty->isFloatingPointTy()) {
+if (DL.getTypeAllocSize(Ty) < 8)
+  Arg = Builder.CreateFPExt(Arg, Builder.getDoubleTy());
+if (IsBuffered)
+  return Arg;
 return Builder.CreateBitCast(Arg, Int64Ty);
   }
 
-  if (isa(Ty)) {
+  if (!IsBuffered && isa(Ty)) {
 return Builder.CreatePtrToInt(Arg, Int64Ty);

vikramRH wrote:

The pointer is just pushed onto the buffer. The cast is necessary for the 
hostcall case to be compatible with device lib functions

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-24 Thread Vikram Hegde via cfe-commits


@@ -26,28 +26,31 @@ using namespace llvm;
 
 #define DEBUG_TYPE "amdgpu-emit-printf"
 
-static Value *fitArgInto64Bits(IRBuilder<> &Builder, Value *Arg) {
+static Value *fitArgInto64Bits(IRBuilder<> &Builder, Value *Arg,
+   bool IsBuffered) {
+  const DataLayout &DL = 
Builder.GetInsertBlock()->getModule()->getDataLayout();
   auto Int64Ty = Builder.getInt64Ty();
   auto Ty = Arg->getType();
 
   if (auto IntTy = dyn_cast(Ty)) {
-switch (IntTy->getBitWidth()) {
-case 32:
-  return Builder.CreateZExt(Arg, Int64Ty);
-case 64:
-  return Arg;
+if (IntTy->getBitWidth() < 64) {
+  return Builder.CreateZExt(Arg, Builder.getInt64Ty());
 }
   }
 
-  if (Ty->getTypeID() == Type::DoubleTyID) {
+  if (Ty->isFloatingPointTy()) {
+if (DL.getTypeAllocSize(Ty) < 8)
+  Arg = Builder.CreateFPExt(Arg, Builder.getDoubleTy());

vikramRH wrote:

The type cast is necessary  for types such as _Float16, which is not handled at 
argument promotion. I have added a test case to show the same

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AMDGPU] Treat printf as builtin for OpenCL (PR #72554)

2023-11-24 Thread Vikram Hegde via cfe-commits


@@ -406,5 +410,9 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb", 
"nc", "fp8-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-insts")
 
+// OpenCL
+LANGBUILTIN(printf, "icC*4.", "fp:0:", ALL_OCL_LANGUAGES)

vikramRH wrote:

@ssahasra , I still feel this is the way to move here since I dont see a way to 
access the printf option at IR level and thus decide version of printf to use. 
It has to be at clang CodeGen. I ask other reviewers too if they feel there are 
major concerns with adding such a builtin variant (i.e AMDGPU and OCL 
specific). I might have to look for alternative approaches if so. 

https://github.com/llvm/llvm-project/pull/72554
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AMDGPU] Treat printf as builtin for OpenCL (PR #72554)

2023-11-24 Thread Vikram Hegde via cfe-commits

https://github.com/vikramRH edited 
https://github.com/llvm/llvm-project/pull/72554
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-27 Thread Vikram Hegde via cfe-commits

https://github.com/vikramRH ready_for_review 
https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-27 Thread Vikram Hegde via cfe-commits

https://github.com/vikramRH edited 
https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-28 Thread Vikram Hegde via cfe-commits

vikramRH wrote:

ping

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [clang] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-11-28 Thread Vikram Hegde via cfe-commits

vikramRH wrote:

@arsenm , apologies for the trouble here. I should have based this out of my 
earlier commit. currently I do not see a way to base this patch off of my 
earlier commit and it might get too confusing for other reviewers if I close 
this and raise another review. would the individual commit details suffice ?

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AMDGPU] Treat printf as builtin for OpenCL (PR #72554)

2023-11-30 Thread Vikram Hegde via cfe-commits


@@ -406,5 +410,9 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb", 
"nc", "fp8-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-insts")
 
+// OpenCL
+LANGBUILTIN(printf, "icC*4.", "fp:0:", ALL_OCL_LANGUAGES)

vikramRH wrote:

OpenCL spec says printf format string should be in constant address space. This 
makes the printf signature target specific and hence we would need a target 
specific builtinID to recognize this. Im not sure I understand how we can go 
ahead with generic "BIPrinf" here ?

https://github.com/llvm/llvm-project/pull/72554
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AMDGPU] Treat printf as builtin for OpenCL (PR #72554)

2023-11-30 Thread Vikram Hegde via cfe-commits

https://github.com/vikramRH edited 
https://github.com/llvm/llvm-project/pull/72554
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-12-03 Thread Vikram Hegde via cfe-commits

vikramRH wrote:

> Is there a separate PR open for "Add vector processing support to AMDGPU 
> printf"? I think it's easiest to move this part forward first

@arsenm , you are right. I just want to make sure we are good on runtime 
changes too now since there seems to be a blocker. The changes here are not 
necessary unless we are okay with runtime changes.

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [clang] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)

2023-12-04 Thread Vikram Hegde via cfe-commits


@@ -4742,6 +4742,16 @@ void Clang::ConstructJob(Compilation &C, const JobAction 
&JA,
 Args.ClaimAllArgs(options::OPT_gen_cdb_fragment_path);
   }
 
+  if (TC.getTriple().isAMDGPU() && types::isOpenCL(Input.getType())) {
+if (Args.getLastArg(options::OPT_mprintf_kind_EQ)) {
+  CmdArgs.push_back(Args.MakeArgString(
+  "-mprintf-kind=" +
+  Args.getLastArgValue(options::OPT_mprintf_kind_EQ)));
+  // Force compiler error on invalid conversion specifiers
+  
CmdArgs.push_back(Args.MakeArgString("-Werror=format-invalid-specifier"));

vikramRH wrote:

This was just so that we could error out instead of undefined behaviors due to 
wrong specifiers. I have no preference here and would be okay to change it if 
you feel so.

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang] support vector subscript expressions in constant evaluator (PR #76379)

2023-12-25 Thread Vikram Hegde via cfe-commits

https://github.com/vikramRH created 
https://github.com/llvm/llvm-project/pull/76379

Feel free to add additional reviewers as relevant,

I'm yet to update float test cases as I'm not sure whether it would be safe to 
directly compare float results in static assert. Would it okay to integer cast 
the results and compare them ?

>From 181a4629f08e7a0e7ec5b3a2406519c26c0d476b Mon Sep 17 00:00:00 2001
From: Vikram 
Date: Wed, 20 Dec 2023 05:36:40 +
Subject: [PATCH] [Clang] support vector subscript expressions in constant
 evaluator

---
 clang/lib/AST/ExprConstant.cpp   |  61 +-
 clang/test/CodeGenCXX/temporaries.cpp|  12 +-
 clang/test/SemaCXX/constexpr-vectors.cpp | 746 ++-
 3 files changed, 668 insertions(+), 151 deletions(-)

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index f6aeee1a4e935d..0074b8aa00fc75 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -226,6 +226,12 @@ namespace {
 ArraySize = 0;
 MostDerivedLength = I + 1;
 IsArray = false;
+  } else if (Type->isVectorType()) {
+const VectorType *CT = Type->castAs();
+Type = CT->getElementType();
+ArraySize = CT->getNumElements();
+MostDerivedLength = I + 1;
+IsArray = true;
   } else {
 // Path[I] describes a base class.
 ArraySize = 0;
@@ -437,6 +443,15 @@ namespace {
   MostDerivedArraySize = 2;
   MostDerivedPathLength = Entries.size();
 }
+/// Update this designator to refer to the given vector component.
+void addVectorUnchecked(const VectorType *VecTy) {
+  Entries.push_back(PathEntry::ArrayIndex(0));
+
+  MostDerivedType = VecTy->getElementType();
+  MostDerivedIsArrayElement = true;
+  MostDerivedArraySize = VecTy->getNumElements();
+  MostDerivedPathLength = Entries.size();
+}
 void diagnoseUnsizedArrayPointerArithmetic(EvalInfo &Info, const Expr *E);
 void diagnosePointerArithmetic(EvalInfo &Info, const Expr *E,
const APSInt &N);
@@ -1732,6 +1747,10 @@ namespace {
   if (checkSubobject(Info, E, Imag ? CSK_Imag : CSK_Real))
 Designator.addComplexUnchecked(EltTy, Imag);
 }
+void addVector(EvalInfo &Info, const Expr *E, const VectorType *VecTy) {
+  if (checkSubobject(Info, E, CSK_ArrayIndex))
+Designator.addVectorUnchecked(VecTy);
+}
 void clearIsNullPointer() {
   IsNullPtr = false;
 }
@@ -1890,6 +1909,8 @@ static bool EvaluateFixedPointOrInteger(const Expr *E, 
APFixedPoint &Result,
 static bool EvaluateFixedPoint(const Expr *E, APFixedPoint &Result,
EvalInfo &Info);
 
+static bool EvaluateVector(const Expr *E, APValue &Result, EvalInfo &Info);
+
 
//===--===//
 // Misc utilities
 
//===--===//
@@ -3278,6 +3299,19 @@ static bool HandleLValueComplexElement(EvalInfo &Info, 
const Expr *E,
   return true;
 }
 
+static bool HandeLValueVectorComponent(EvalInfo &Info, const Expr *E,
+   LValue &LVal, const VectorType *VecTy,
+   APSInt &Adjustment) {
+  LVal.addVector(Info, E, VecTy);
+
+  CharUnits SizeOfComponent;
+  if (!HandleSizeof(Info, E->getExprLoc(), VecTy->getElementType(),
+SizeOfComponent))
+return false;
+  LVal.adjustOffsetAndIndex(Info, E, Adjustment, SizeOfComponent);
+  return true;
+}
+
 /// Try to evaluate the initializer for a variable declaration.
 ///
 /// \param Info   Information about the ongoing evaluation.
@@ -3718,7 +3752,8 @@ findSubobject(EvalInfo &Info, const Expr *E, const 
CompleteObject &Obj,
 }
 
 // If this is our last pass, check that the final object type is OK.
-if (I == N || (I == N - 1 && ObjType->isAnyComplexType())) {
+if (I == N || (I == N - 1 &&
+   (ObjType->isAnyComplexType() || ObjType->isVectorType( {
   // Accesses to volatile objects are prohibited.
   if (ObjType.isVolatileQualified() && isFormalAccess(handler.AccessKind)) 
{
 if (Info.getLangOpts().CPlusPlus) {
@@ -3823,6 +3858,10 @@ findSubobject(EvalInfo &Info, const Expr *E, const 
CompleteObject &Obj,
 return handler.found(Index ? O->getComplexFloatImag()
: O->getComplexFloatReal(), ObjType);
   }
+} else if (ObjType->isVectorType()) {
+  // Next Subobject is a vector element
+  uint64_t Index = Sub.Entries[I].getAsArrayIndex();
+  O = &O->getVectorElt(Index);
 } else if (const FieldDecl *Field = getAsField(Sub.Entries[I])) {
   if (Field->isMutable() &&
   !Obj.mayAccessMutableMembers(Info, handler.AccessKind)) {
@@ -8756,14 +8795,28 @@ bool LValueExprEvaluator::VisitMemberExpr(const 
MemberExpr *E) {
 }
 
 bool LValueExprEvaluat

[clang] [Clang] support vector subscript expressions in constant evaluator (WIP) (PR #76379)

2023-12-25 Thread Vikram Hegde via cfe-commits

https://github.com/vikramRH edited 
https://github.com/llvm/llvm-project/pull/76379
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang] support vector subscript expressions in constant evaluator (WIP) (PR #76379)

2023-12-25 Thread Vikram Hegde via cfe-commits

vikramRH wrote:

It seems there are few crashes with systemZ vectors. Looking into them

https://github.com/llvm/llvm-project/pull/76379
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang] support vector subscript expressions in constant evaluator (WIP) (PR #76379)

2023-12-26 Thread Vikram Hegde via cfe-commits

https://github.com/vikramRH updated 
https://github.com/llvm/llvm-project/pull/76379

>From 89c79eea31d1a9ec0656fbf5c4eacf75b2471034 Mon Sep 17 00:00:00 2001
From: Vikram 
Date: Wed, 20 Dec 2023 05:36:40 +
Subject: [PATCH] [Clang] support vector subscript expressions in constant
 evaluator

---
 clang/lib/AST/ExprConstant.cpp   |  61 +-
 clang/test/CodeGenCXX/temporaries.cpp|  12 +-
 clang/test/SemaCXX/constexpr-vectors.cpp | 746 ++-
 3 files changed, 668 insertions(+), 151 deletions(-)

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index f6aeee1a4e935d..390c5aef477105 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -221,6 +221,12 @@ namespace {
 ArraySize = 2;
 MostDerivedLength = I + 1;
 IsArray = true;
+  } else if (Type->isVectorType()) {
+const VectorType *CT = Type->castAs();
+Type = CT->getElementType();
+ArraySize = CT->getNumElements();
+MostDerivedLength = I + 1;
+IsArray = true;
   } else if (const FieldDecl *FD = getAsField(Path[I])) {
 Type = FD->getType();
 ArraySize = 0;
@@ -437,6 +443,15 @@ namespace {
   MostDerivedArraySize = 2;
   MostDerivedPathLength = Entries.size();
 }
+/// Update this designator to refer to the given vector component.
+void addVectorUnchecked(const VectorType *VecTy) {
+  Entries.push_back(PathEntry::ArrayIndex(0));
+
+  MostDerivedType = VecTy->getElementType();
+  MostDerivedIsArrayElement = true;
+  MostDerivedArraySize = VecTy->getNumElements();
+  MostDerivedPathLength = Entries.size();
+}
 void diagnoseUnsizedArrayPointerArithmetic(EvalInfo &Info, const Expr *E);
 void diagnosePointerArithmetic(EvalInfo &Info, const Expr *E,
const APSInt &N);
@@ -1732,6 +1747,10 @@ namespace {
   if (checkSubobject(Info, E, Imag ? CSK_Imag : CSK_Real))
 Designator.addComplexUnchecked(EltTy, Imag);
 }
+void addVector(EvalInfo &Info, const Expr *E, const VectorType *VecTy) {
+  if (checkSubobject(Info, E, CSK_ArrayIndex))
+Designator.addVectorUnchecked(VecTy);
+}
 void clearIsNullPointer() {
   IsNullPtr = false;
 }
@@ -1890,6 +1909,8 @@ static bool EvaluateFixedPointOrInteger(const Expr *E, 
APFixedPoint &Result,
 static bool EvaluateFixedPoint(const Expr *E, APFixedPoint &Result,
EvalInfo &Info);
 
+static bool EvaluateVector(const Expr *E, APValue &Result, EvalInfo &Info);
+
 
//===--===//
 // Misc utilities
 
//===--===//
@@ -3278,6 +3299,19 @@ static bool HandleLValueComplexElement(EvalInfo &Info, 
const Expr *E,
   return true;
 }
 
+static bool HandeLValueVectorComponent(EvalInfo &Info, const Expr *E,
+   LValue &LVal, const VectorType *VecTy,
+   APSInt &Adjustment) {
+  LVal.addVector(Info, E, VecTy);
+
+  CharUnits SizeOfComponent;
+  if (!HandleSizeof(Info, E->getExprLoc(), VecTy->getElementType(),
+SizeOfComponent))
+return false;
+  LVal.adjustOffsetAndIndex(Info, E, Adjustment, SizeOfComponent);
+  return true;
+}
+
 /// Try to evaluate the initializer for a variable declaration.
 ///
 /// \param Info   Information about the ongoing evaluation.
@@ -3718,7 +3752,8 @@ findSubobject(EvalInfo &Info, const Expr *E, const 
CompleteObject &Obj,
 }
 
 // If this is our last pass, check that the final object type is OK.
-if (I == N || (I == N - 1 && ObjType->isAnyComplexType())) {
+if (I == N || (I == N - 1 &&
+   (ObjType->isAnyComplexType() || ObjType->isVectorType( {
   // Accesses to volatile objects are prohibited.
   if (ObjType.isVolatileQualified() && isFormalAccess(handler.AccessKind)) 
{
 if (Info.getLangOpts().CPlusPlus) {
@@ -3823,6 +3858,10 @@ findSubobject(EvalInfo &Info, const Expr *E, const 
CompleteObject &Obj,
 return handler.found(Index ? O->getComplexFloatImag()
: O->getComplexFloatReal(), ObjType);
   }
+} else if (ObjType->isVectorType()) {
+  // Next Subobject is a vector element
+  uint64_t Index = Sub.Entries[I].getAsArrayIndex();
+  O = &O->getVectorElt(Index);
 } else if (const FieldDecl *Field = getAsField(Sub.Entries[I])) {
   if (Field->isMutable() &&
   !Obj.mayAccessMutableMembers(Info, handler.AccessKind)) {
@@ -8756,14 +8795,28 @@ bool LValueExprEvaluator::VisitMemberExpr(const 
MemberExpr *E) {
 }
 
 bool LValueExprEvaluator::VisitArraySubscriptExpr(const ArraySubscriptExpr *E) 
{
-  // FIXME: Deal with vectors as array subscript bases.
-  if (E->getBase()->getType()->isVectorType() ||
-  E->getBase()->getType()->isSveVLSBuil

[clang] [Clang] support vector subscript expressions in constant evaluator (WIP) (PR #76379)

2023-12-27 Thread Vikram Hegde via cfe-commits

https://github.com/vikramRH closed 
https://github.com/llvm/llvm-project/pull/76379
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang] support vector subscript expressions in constant evaluator (WIP) (PR #76379)

2023-12-27 Thread Vikram Hegde via cfe-commits

vikramRH wrote:

Putting this on hold hold as @yuanfang-chen already has a PR

https://github.com/llvm/llvm-project/pull/76379
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)

2024-03-14 Thread Vikram Hegde via cfe-commits


@@ -202,12 +207,20 @@ RValue 
CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E) {
 Args.push_back(Arg);
   }
 
-  llvm::IRBuilder<> IRB(Builder.GetInsertBlock(), Builder.GetInsertPoint());
-  IRB.SetCurrentDebugLocation(Builder.getCurrentDebugLocation());
+  auto PFK = CGM.getTarget().getTargetOpts().AMDGPUPrintfKindVal;
+  bool isBuffered = (PFK == clang::TargetOptions::AMDGPUPrintfKind::Buffered);
+
+  StringRef FmtStr;
+  if (llvm::getConstantStringInfo(Args[0], FmtStr)) {
+if (FmtStr.empty())
+  FmtStr = StringRef("", 1);
+  } else {
+assert(!CGM.getLangOpts().OpenCL &&
+   "OpenCL needs compile time resolvable format string");

vikramRH wrote:

Done

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)

2024-03-14 Thread Vikram Hegde via cfe-commits


@@ -3616,6 +3617,12 @@ unsigned FunctionDecl::getBuiltinID(bool 
ConsiderWrapperFunctions) const {
   if (!ConsiderWrapperFunctions && getStorageClass() == SC_Static)
 return 0;
 
+  // AMDGCN implementation supports printf as a builtin
+  // for OpenCL
+  if (Context.getTargetInfo().getTriple().isAMDGCN() &&
+  Context.getLangOpts().OpenCL && BuiltinID == AMDGPU::BIprintf)
+return BuiltinID;

vikramRH wrote:

Only other alternative I see currently is to modify Sema (probably 
ActOnFunctionDeclarator) so that we map the ocl printf declaration to C printf 
builtin ID. This would be a really hacky solution and I would prefer this 
implementation.

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)

2024-03-15 Thread Vikram Hegde via cfe-commits


@@ -3616,6 +3617,12 @@ unsigned FunctionDecl::getBuiltinID(bool 
ConsiderWrapperFunctions) const {
   if (!ConsiderWrapperFunctions && getStorageClass() == SC_Static)
 return 0;
 
+  // AMDGCN implementation supports printf as a builtin
+  // for OpenCL
+  if (Context.getTargetInfo().getTriple().isAMDGCN() &&
+  Context.getLangOpts().OpenCL && BuiltinID == AMDGPU::BIprintf)
+return BuiltinID;

vikramRH wrote:

ping @arsenm 

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)

2024-03-18 Thread Vikram Hegde via cfe-commits


@@ -3616,6 +3617,12 @@ unsigned FunctionDecl::getBuiltinID(bool 
ConsiderWrapperFunctions) const {
   if (!ConsiderWrapperFunctions && getStorageClass() == SC_Static)
 return 0;
 
+  // AMDGCN implementation supports printf as a builtin
+  // for OpenCL
+  if (Context.getTargetInfo().getTriple().isAMDGCN() &&
+  Context.getLangOpts().OpenCL && BuiltinID == AMDGPU::BIprintf)
+return BuiltinID;

vikramRH wrote:

@arsenm, thanks for the info. CustomTypeChecking is a valid option. I'm not 
sure why OpenCL community did not consider this change despite OpenCL specs 
specifying the details. I could create a separate patch for this (probably 
folks from OCL community would provide further background). Meanwhile, can this 
go ahead as an AMDGPU specific workaround for now so that we have the intended 
feature in place ? (The frontend changes here can be reverted with that follow 
up patch )

PS :Also, I see another issue . OpenCL v1.2 s6.9.f states none of the functions 
defined in C99 headers are available. This would mean std printf is supposed to 
be treated differently than OpenCL builtins and consequently the builtin IDs 
assigned to them "need" to be different. If this understanding is correct, 
moving ahead with using same builtin ID as std printf is not the right way.

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)

2024-03-18 Thread Vikram Hegde via cfe-commits

https://github.com/vikramRH edited 
https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)

2024-03-18 Thread Vikram Hegde via cfe-commits

https://github.com/vikramRH edited 
https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)

2024-03-18 Thread Vikram Hegde via cfe-commits


@@ -3616,6 +3617,12 @@ unsigned FunctionDecl::getBuiltinID(bool 
ConsiderWrapperFunctions) const {
   if (!ConsiderWrapperFunctions && getStorageClass() == SC_Static)
 return 0;
 
+  // AMDGCN implementation supports printf as a builtin
+  // for OpenCL
+  if (Context.getTargetInfo().getTriple().isAMDGCN() &&
+  Context.getLangOpts().OpenCL && BuiltinID == AMDGPU::BIprintf)
+return BuiltinID;

vikramRH wrote:

I was referring to 
https://github.com/llvm/llvm-project/blob/3e6db602918435b6a5ac476f63f8b259e7e73af4/clang/lib/AST/Decl.cpp#L3633.
 This essentially means that even if frontend attaches the printf builtin ID to 
the decl (even after custom type checks), this would revert.

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [RFC][Clang] Enable custom type checking for printf (PR #86801)

2024-03-27 Thread Vikram Hegde via cfe-commits

https://github.com/vikramRH created 
https://github.com/llvm/llvm-project/pull/86801

The motivation for this change comes from an ongoing PR (#72556 ) , which 
enables hostcall based printf lowering for AMDGPU target and OpenCL inputs. The 
OpenCL printf has a different signature than the C printf. the difference being 
the explicit address space specifier for format string arg as follows
  int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 
2)));
This is not considered a builtin because of the type mismatch.

The patch #72556 tried to address this scenario by declaring OCL printf 
essentially as a target specific printf overload. However, the discussions 
there resulted in the decision that this should not be target-specific.
 
The idea in this patch is that the changes are NFC for current framework (i.e 
the semantic checks for printf are preserved) however the printf declarations 
are considered builtins now regardless of LangOpt. This would allow me to 
hanlde the printf CodeGen without any target specific hacks.

PS: feel free to add additional reviewers, I'm not aware of others who could 
comment here.


>From a2731d056cee9c4e75d49f8d4fa3325dc532b207 Mon Sep 17 00:00:00 2001
From: Vikram 
Date: Wed, 27 Mar 2024 04:24:10 -0400
Subject: [PATCH] [Clang] Implement custom type checking for printf

---
 clang/include/clang/Basic/Builtins.td |  4 +--
 clang/include/clang/Sema/Sema.h   |  1 +
 clang/lib/AST/Decl.cpp|  6 +++-
 clang/lib/Basic/Builtins.cpp  |  3 +-
 clang/lib/CodeGen/CGBuiltin.cpp   |  2 +-
 clang/lib/Sema/SemaChecking.cpp   | 51 +++
 6 files changed, 62 insertions(+), 5 deletions(-)

diff --git a/clang/include/clang/Basic/Builtins.td 
b/clang/include/clang/Basic/Builtins.td
index 52c0dd52c28b11..f795c7c42c7b25 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -2816,14 +2816,14 @@ def StrLen : LibBuiltin<"string.h"> {
 // FIXME: This list is incomplete.
 def Printf : LibBuiltin<"stdio.h"> {
   let Spellings = ["printf"];
-  let Attributes = [PrintfFormat<0>];
+  let Attributes = [PrintfFormat<0>, CustomTypeChecking];
   let Prototype = "int(char const*, ...)";
 }
 
 // FIXME: The builtin and library function should have the same signature.
 def BuiltinPrintf : Builtin {
   let Spellings = ["__builtin_printf"];
-  let Attributes = [NoThrow, PrintfFormat<0>, FunctionWithBuiltinPrefix];
+  let Attributes = [NoThrow, PrintfFormat<0>, FunctionWithBuiltinPrefix, 
CustomTypeChecking];
   let Prototype = "int(char const* restrict, ...)";
 }
 
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 5ecd2f9eb2881f..b18b208a75bdf4 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -2245,6 +2245,7 @@ class Sema final {
 
   bool SemaBuiltinVAStart(unsigned BuiltinID, CallExpr *TheCall);
   bool SemaBuiltinVAStartARMMicrosoft(CallExpr *Call);
+  bool SemaBuiltinPrintf(FunctionDecl *FDecl, CallExpr *TheCall);
   bool SemaBuiltinUnorderedCompare(CallExpr *TheCall, unsigned BuiltinID);
   bool SemaBuiltinFPClassification(CallExpr *TheCall, unsigned NumArgs,
unsigned BuiltinID);
diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp
index 131f82985e903b..298223f874cda3 100644
--- a/clang/lib/AST/Decl.cpp
+++ b/clang/lib/AST/Decl.cpp
@@ -3629,8 +3629,12 @@ unsigned FunctionDecl::getBuiltinID(bool 
ConsiderWrapperFunctions) const {
   // OpenCL v1.2 s6.9.f - The library functions defined in
   // the C99 standard headers are not available.
   if (Context.getLangOpts().OpenCL &&
-  Context.BuiltinInfo.isPredefinedLibFunction(BuiltinID))
+  Context.BuiltinInfo.isPredefinedLibFunction(BuiltinID)) {
+if (Context.getLangOpts().getOpenCLCompatibleVersion() >= 120 &&
+(BuiltinID == Builtin::BIprintf))
+  return BuiltinID;
 return 0;
+  }
 
   // CUDA does not have device-side standard library. printf and malloc are the
   // only special cases that are supported by device-side runtime.
diff --git a/clang/lib/Basic/Builtins.cpp b/clang/lib/Basic/Builtins.cpp
index 3467847ac1672e..25590ed9299e8b 100644
--- a/clang/lib/Basic/Builtins.cpp
+++ b/clang/lib/Basic/Builtins.cpp
@@ -235,7 +235,8 @@ bool Builtin::Context::performsCallback(unsigned ID,
 
 bool Builtin::Context::canBeRedeclared(unsigned ID) const {
   return ID == Builtin::NotBuiltin || ID == Builtin::BI__va_start ||
- ID == Builtin::BI__builtin_assume_aligned ||
+ ID == Builtin::BI__builtin_assume_aligned || ID == Builtin::BIprintf 
||
+ ID == Builtin::BI__builtin_printf ||
  (!hasReferenceArgsOrResult(ID) && !hasCustomTypechecking(ID)) ||
  isInStdNamespace(ID);
 }
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 3cfdb261a0eac0..baed36acb12437 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp

[clang] [AMDGPU] Treat printf as builtin for OpenCL (PR #72554)

2024-03-27 Thread Vikram Hegde via cfe-commits

https://github.com/vikramRH closed 
https://github.com/llvm/llvm-project/pull/72554
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AMDGPU] Treat printf as builtin for OpenCL (PR #72554)

2024-03-27 Thread Vikram Hegde via cfe-commits

vikramRH wrote:

closing this in favour of https://github.com/llvm/llvm-project/pull/86801

https://github.com/llvm/llvm-project/pull/72554
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [RFC][Clang] Enable custom type checking for printf (PR #86801)

2024-03-27 Thread Vikram Hegde via cfe-commits

https://github.com/vikramRH ready_for_review 
https://github.com/llvm/llvm-project/pull/86801
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [RFC][Clang] Enable custom type checking for printf (PR #86801)

2024-03-28 Thread Vikram Hegde via cfe-commits

vikramRH wrote:

Thanks for the comments @AaronBallman. The core issue here is that the current 
builtin handling design does not allow multiple overloads for the same 
identifier to coexist  (ref. 
https://github.com/llvm/llvm-project/blob/eacda36c7dd842cb15c0c954eda74b67d0c73814/clang/include/clang/Basic/Builtins.h#L66),
 unless the builtins are defined in target specific namespaces which is what I 
tried in my original patch . If we want change this approach, I currently think 
of couple of ways at a top level
1. As you said, we could have OCL specific LibBuiltin and LangBuiltin TableGen 
classes (and corresponding macros in Buitlins.inc). To make this work they 
would need new builtin ID's  of different form (say "BI_OCL##"). 
This is very Language specific.
2. Probably change the current Builtin Info structure to allow vector of 
possible signatures for an identifier. The builtin type decoder could choose 
the appropriate signature based on LangOpt. (This wording is vague and could be 
a separate discussion in itself )

either way, changes in current design are required. printf is the only current 
use case I know that can benefit out of this (since OpenCL v1.2 s6.9.f says 
other library functions defined in C standard header are not available ,so 🤷‍♂️ 
 ). But I guess we could have more use cases in future. can this be a separate 
discussion ? This patch would unblock my current work for now. 
 


https://github.com/llvm/llvm-project/pull/86801
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [RFC][Clang] Enable custom type checking for printf (PR #86801)

2024-03-30 Thread Vikram Hegde via cfe-commits

vikramRH wrote:

> I looked at the OpenCL spec for C standard library support and was surprised 
> that 1) it's only talking about C99 so it's unclear what happens for C11 
> (clause 6 says "This document describes the modifications and restrictions to 
> C99 and C11 in OpenCL C" but 6.11 only talks about C99 headers and leaves 
> `iso646.h`, `math.h`, `stdbool.h`, `stddef.h`, (all in C99) as well as 
> `stdalign.h`, `stdatomic.h`, `stdnoreturn.h`, `threads.h`, and `uchar.h` 
> available?), and 2) OpenCL's `printf` is not really the same function as C's 
> `printf` 
> (https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_C.html#differences-between-opencl-c-and-c99-printf).
> 
> #1 is probably more of an oversight than anything, at least with the C11 
> headers. So maybe this isn't a super slippery slope, but maybe C23 will 
> change that (I can imagine `stdbit.h` being of use in OpenCL for bit-bashing 
> operations). However, the fact that the builtin isn't really `printf` but is 
> `printf`-like makes me think we should make it a separate builtin to avoid 
> surprises (we do diagnostics based on builtin IDs and we have special 
> checking logic that we perhaps should be exempting in some cases).

Understood. Then I propose the following. 
1. Currently Builtin TableGen does not seem to support specifying lang address 
spaces in function prototypes. this needs to be implemented first if not 
already in development.
2. We could have two new macro variants probably named "OCL_BUILTIN" and 
"OCL_LIB_BUILTIN" which will take the ID's of the form 
"BI_OCL##". we would also need corresponding TableGen classes 
(probably named similar to the macros) which can expose such overloaded 
prototypes when required.

How does this sound ?


https://github.com/llvm/llvm-project/pull/86801
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)

2024-03-01 Thread Vikram Hegde via cfe-commits


@@ -3616,6 +3617,12 @@ unsigned FunctionDecl::getBuiltinID(bool 
ConsiderWrapperFunctions) const {
   if (!ConsiderWrapperFunctions && getStorageClass() == SC_Static)
 return 0;
 
+  // AMDGCN implementation supports printf as a builtin
+  // for OpenCL
+  if (Context.getTargetInfo().getTriple().isAMDGCN() &&
+  Context.getLangOpts().OpenCL && BuiltinID == AMDGPU::BIprintf)
+return BuiltinID;

vikramRH wrote:

The signatures of C-printf and OCL printf differ and I dont think generic 
builtin handling provides a way to register overloaded builtins with "shared" 
builtin ID's. do you have any alternate suggestions here ?

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)

2024-03-01 Thread Vikram Hegde via cfe-commits


@@ -2550,6 +2550,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
   &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad())
 BuiltinID = mutateLongDoubleBuiltin(BuiltinID);
 
+  // Mutate the printf builtin ID so that we use the same CodeGen path for
+  // HIP and OpenCL with AMDGPU targets.
+  if (getTarget().getTriple().isAMDGCN() && BuiltinID == AMDGPU::BIprintf)
+BuiltinID = Builtin::BIprintf;

vikramRH wrote:

This can be removed if you feel so, probably we would need a new case in Expr 
CodeGen

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)

2024-03-01 Thread Vikram Hegde via cfe-commits


@@ -202,12 +207,20 @@ RValue 
CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E) {
 Args.push_back(Arg);
   }
 
-  llvm::IRBuilder<> IRB(Builder.GetInsertBlock(), Builder.GetInsertPoint());
-  IRB.SetCurrentDebugLocation(Builder.getCurrentDebugLocation());
+  auto PFK = CGM.getTarget().getTargetOpts().AMDGPUPrintfKindVal;
+  bool isBuffered = (PFK == clang::TargetOptions::AMDGPUPrintfKind::Buffered);
+
+  StringRef FmtStr;
+  if (llvm::getConstantStringInfo(Args[0], FmtStr)) {
+if (FmtStr.empty())
+  FmtStr = StringRef("", 1);

vikramRH wrote:

not really. This is just to say the format string is not really empty (i.e size 
= 0) when the user input is an empty format string (a weird corner case)

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)

2024-02-20 Thread Vikram Hegde via cfe-commits

vikramRH wrote:

The new set of changes adds following changes,
1. The iteration over vector elements now happens using vector size from the 
format specifier as reference, this is inline with runtime implementation and 
helps handling undefined behavior when we have a mismatch.
2. The error flag "-Werror=format-invalid-specifier" has been removed.

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)

2024-02-21 Thread Vikram Hegde via cfe-commits


@@ -178,17 +181,29 @@ RValue 
CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E) {
   E, this, GetVprintfDeclaration(CGM.getModule()), false);
 }
 
+// Deterimines if an argument is a string
+static bool isString(const clang::Type *argXTy) {

vikramRH wrote:

I have removed this, addrspace cast is done during arg processing.

https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)

2024-06-12 Thread Vikram Hegde via cfe-commits

vikramRH wrote:

> @jayfoad's testcase fails and the same test should be repeated for all 3 
> intrinsics

added MIR tests for 3 intrinsics. The issue is that Im not able to attach the 
glue nodes to newly created laneop pieces since they fail at selection. 
https://github.com/llvm/llvm-project/pull/87509 should enable this,

https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)

2024-06-12 Thread Vikram Hegde via cfe-commits


@@ -0,0 +1,46 @@
+# RUN: not --crash llc -mtriple=amdgcn -run-pass=none -verify-machineinstrs -o 
/dev/null %s 2>&1 | FileCheck %s

vikramRH wrote:

Okay, I'll update with IR's

https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)

2024-06-12 Thread Vikram Hegde via cfe-commits

vikramRH wrote:

> > > @jayfoad's testcase fails and the same test should be repeated for all 3 
> > > intrinsics
> > 
> > 
> > added MIR tests for 3 intrinsics. The issue is that Im not able to attach 
> > the glue nodes to newly created laneop pieces since they fail at selection. 
> > #87509 should enable this,
> 
> I am not really comfortable waiting for #87509 to fix convergence tokens in 
> this expansion. Is it really true that this expansion cannot be fixed 
> independent of future work on `CONVERGENCE_GLUE`? There is no way to manually 
> handle the same glue operands??

I guess one way would be to have custom selection for each of the new node type 
introduced, but would this be a proper way forward ? (this would be in general 
for all convergent SDNodes i guess if selection is not made generic)

https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)

2024-06-12 Thread Vikram Hegde via cfe-commits

vikramRH wrote:

> > > > @jayfoad's testcase fails and the same test should be repeated for all 
> > > > 3 intrinsics
> > > 
> > > 
> > > added MIR tests for 3 intrinsics. The issue is that Im not able to attach 
> > > the glue nodes to newly created laneop pieces since they fail at 
> > > selection. #87509 should enable this,
> > 
> > 
> > I am not really comfortable waiting for #87509 to fix convergence tokens in 
> > this expansion. Is it really true that this expansion cannot be fixed 
> > independent of future work on `CONVERGENCE_GLUE`? There is no way to 
> > manually handle the same glue operands??
> 
> I guess one way would be to have custom selection for each of the new node 
> type introduced, but would this be a proper way forward ? (this would be in 
> general for all convergent SDNodes i guess if selection is not made generic)

Or drop the new nodes altogether and legelaize to intrinsics directly ?

https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)

2024-06-14 Thread Vikram Hegde via cfe-commits

vikramRH wrote:





> That's another option. The only real plus to the intermediate is it's 
> slightly less annoying to write combines for. But there are limited combining 
> opportunities for these

we now legalize to intrinsics directly. The SDAG lowering uses a new helper to 
unroll vector cases while also handling convergence tokens

https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)

2024-06-14 Thread Vikram Hegde via cfe-commits


@@ -0,0 +1,65 @@
+; RUN: llc -stop-after=amdgpu-isel -mtriple=amdgcn-- -mcpu=gfx1100 
-verify-machineinstrs -o - %s | FileCheck --check-prefixes=CHECK,ISEL %s
+
+; CHECK-LABEL: name:basic_readfirstlane_i64
+;   CHECK:[[TOKEN:%[0-9]+]]{{[^ ]*}} = CONVERGENCECTRL_ANCHOR

vikramRH wrote:

I currently see machine verifier failure which is not related to this patch. An 
 i32 example with trunc here, https://godbolt.org/z/he8asMe77. 
This is also seen with wider type legalizations that we do now, so I cannot 
integrate these with existing tests just yet. am I missing something here ?

https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)

2024-06-14 Thread Vikram Hegde via cfe-commits


@@ -0,0 +1,65 @@
+; RUN: llc -stop-after=amdgpu-isel -mtriple=amdgcn-- -mcpu=gfx1100 
-verify-machineinstrs -o - %s | FileCheck --check-prefixes=CHECK,ISEL %s
+
+; CHECK-LABEL: name:basic_readfirstlane_i64
+;   CHECK:[[TOKEN:%[0-9]+]]{{[^ ]*}} = CONVERGENCECTRL_ANCHOR

vikramRH wrote:

this is a preexisting error, and the failure is further down the pipeline. 
(after sreg alloc now i guess), does it make sense to have it as xfail now 
rather then stopping after isel? 

https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)

2024-06-14 Thread Vikram Hegde via cfe-commits

https://github.com/vikramRH edited 
https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)

2024-06-16 Thread Vikram Hegde via cfe-commits


@@ -0,0 +1,65 @@
+; RUN: llc -stop-after=amdgpu-isel -mtriple=amdgcn-- -mcpu=gfx1100 
-verify-machineinstrs -o - %s | FileCheck --check-prefixes=CHECK,ISEL %s
+
+; CHECK-LABEL: name:basic_readfirstlane_i64
+;   CHECK:[[TOKEN:%[0-9]+]]{{[^ ]*}} = CONVERGENCECTRL_ANCHOR

vikramRH wrote:

Makes sense, updated.

https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][ExprConst] allow single element access of vector object to be constant expression (PR #72607)

2024-06-16 Thread Vikram Hegde via cfe-commits

vikramRH wrote:

@yuanfang-chen , @AaronBallman, @shafik,  are we still actively looking into 
this ? (I would be willing to commandeer this if its not high on your priority 
list)

https://github.com/llvm/llvm-project/pull/72607
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU][WIP] Extend permlane16, permlanex16 and permlane64 intrinsic lowering for generic types (PR #92725)

2024-06-17 Thread Vikram Hegde via cfe-commits


@@ -18479,6 +18479,28 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
 CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());
 return Builder.CreateCall(F, Args);
   }
+  case AMDGPU::BI__builtin_amdgcn_permlane16:
+  case AMDGPU::BI__builtin_amdgcn_permlanex16: {
+llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));

vikramRH wrote:

added a new helper

https://github.com/llvm/llvm-project/pull/92725
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU][WIP] Extend permlane16, permlanex16 and permlane64 intrinsic lowering for generic types (PR #92725)

2024-06-17 Thread Vikram Hegde via cfe-commits

vikramRH wrote:

Updated this PR to be in sync with #89217, However still plan is to land this 
land this only after changes in #89217 are accepted.

https://github.com/llvm/llvm-project/pull/92725
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][ExprConst] allow single element access of vector object to be constant expression (PR #72607)

2024-06-18 Thread Vikram Hegde via cfe-commits

vikramRH wrote:

> Hello @vikramRH, please feel free to commandeer this.

Thanks @yuanfang-chen. Also, clang already rejects expressions like &V[0] 
(https://godbolt.org/z/eGcxzGo66), which is also true with constexprs and this 
PR. What's the specific concern here ?

https://github.com/llvm/llvm-project/pull/72607
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)

2024-05-28 Thread Vikram Hegde via cfe-commits


@@ -6086,6 +6086,63 @@ static SDValue lowerBALLOTIntrinsic(const 
SITargetLowering &TLI, SDNode *N,
   DAG.getConstant(0, SL, MVT::i32), DAG.getCondCode(ISD::SETNE));
 }
 
+static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N,
+   SelectionDAG &DAG) {
+  EVT VT = N->getValueType(0);
+  unsigned ValSize = VT.getSizeInBits();
+  unsigned IntrinsicID = N->getConstantOperandVal(0);
+  SDValue Src0 = N->getOperand(1);
+  SDLoc SL(N);
+  MVT IntVT = MVT::getIntegerVT(ValSize);
+
+  auto createLaneOp = [&DAG, &SL](SDValue Src0, SDValue Src1, SDValue Src2,
+  MVT VT) -> SDValue {
+return (Src2 ? DAG.getNode(AMDGPUISD::WRITELANE, SL, VT, {Src0, Src1, 
Src2})
+: Src1 ? DAG.getNode(AMDGPUISD::READLANE, SL, VT, {Src0, Src1})
+   : DAG.getNode(AMDGPUISD::READFIRSTLANE, SL, VT, {Src0}));
+  };
+
+  SDValue Src1, Src2;
+  if (IntrinsicID == Intrinsic::amdgcn_readlane ||
+  IntrinsicID == Intrinsic::amdgcn_writelane) {
+Src1 = N->getOperand(2);
+if (IntrinsicID == Intrinsic::amdgcn_writelane)
+  Src2 = N->getOperand(3);
+  }
+
+  if (ValSize == 32) {
+// Already legal
+return SDValue();
+  }
+
+  if (ValSize < 32) {
+bool IsFloat = VT.isFloatingPoint();
+Src0 = DAG.getAnyExtOrTrunc(IsFloat ? DAG.getBitcast(IntVT, Src0) : Src0,
+SL, MVT::i32);
+if (Src2.getNode()) {
+  Src2 = DAG.getAnyExtOrTrunc(IsFloat ? DAG.getBitcast(IntVT, Src2) : Src2,
+  SL, MVT::i32);
+}
+SDValue LaneOp = createLaneOp(Src0, Src1, Src2, MVT::i32);
+SDValue Trunc = DAG.getAnyExtOrTrunc(LaneOp, SL, IntVT);
+return IsFloat ? DAG.getBitcast(VT, Trunc) : Trunc;
+  }
+
+  if ((ValSize % 32) == 0) {
+MVT VecVT = MVT::getVectorVT(MVT::i32, ValSize / 32);
+Src0 = DAG.getBitcast(VecVT, Src0);
+
+if (Src2.getNode())
+  Src2 = DAG.getBitcast(VecVT, Src2);
+
+SDValue LaneOp = createLaneOp(Src0, Src1, Src2, VecVT);
+SDValue UnrolledLaneOp = DAG.UnrollVectorOp(LaneOp.getNode());
+return DAG.getBitcast(VT, UnrolledLaneOp);

vikramRH wrote:

```suggestion
  MVT LaneOpT =
VT.isVector() && VT.getVectorElementType().getSizeInBits() == 16
? MVT::v2i16
: MVT::i32;
SDValue Src0SubReg, Src2SubReg;
SmallVector LaneOps;
LaneOps.push_back(DAG.getTargetConstant(
TLI.getRegClassFor(VT.getSimpleVT(), N->isDivergent())->getID(), SL,
MVT::i32));
for (unsigned i = 0; i < (ValSize / 32); i++) {
  unsigned SubRegIdx = SIRegisterInfo::getSubRegFromChannel(i);
  Src0SubReg = DAG.getTargetExtractSubreg(SubRegIdx, SL, LaneOpT, Src0);
  if (Src2)
Src2SubReg = DAG.getTargetExtractSubreg(SubRegIdx, SL, LaneOpT, Src2);
  LaneOps.push_back(createLaneOp(Src0SubReg, Src1, Src2SubReg, LaneOpT));
  LaneOps.push_back(DAG.getTargetConstant(SubRegIdx, SL, MVT::i32));
}
return SDValue(
DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, SL, VT, LaneOps), 0);
```

@arsenm , @jayfoad , an alternate idea here that is much closer in logic to the 
GIsel implementation and doesn't rely on bitcasts. how does this look ? 

https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU][WIP] Extend permlane16, permlanex16 and permlane64 intrinsic lowering for generic types (PR #92725)

2024-05-29 Thread Vikram Hegde via cfe-commits

https://github.com/vikramRH edited 
https://github.com/llvm/llvm-project/pull/92725
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU][WIP] Extend permlane16, permlanex16 and permlane64 intrinsic lowering for generic types (PR #92725)

2024-05-29 Thread Vikram Hegde via cfe-commits

vikramRH wrote:

1. Added/updated tests for permlanex16, permlane64
2. This needs https://github.com/llvm/llvm-project/pull/89217 to land first so 
that only incremental changes can be reviewed. 

https://github.com/llvm/llvm-project/pull/92725
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)

2024-05-29 Thread Vikram Hegde via cfe-commits

https://github.com/vikramRH edited 
https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)

2024-05-29 Thread Vikram Hegde via cfe-commits


@@ -6086,6 +6086,63 @@ static SDValue lowerBALLOTIntrinsic(const 
SITargetLowering &TLI, SDNode *N,
   DAG.getConstant(0, SL, MVT::i32), DAG.getCondCode(ISD::SETNE));
 }
 
+static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N,
+   SelectionDAG &DAG) {
+  EVT VT = N->getValueType(0);
+  unsigned ValSize = VT.getSizeInBits();
+  unsigned IntrinsicID = N->getConstantOperandVal(0);
+  SDValue Src0 = N->getOperand(1);
+  SDLoc SL(N);
+  MVT IntVT = MVT::getIntegerVT(ValSize);
+
+  auto createLaneOp = [&DAG, &SL](SDValue Src0, SDValue Src1, SDValue Src2,
+  MVT VT) -> SDValue {
+return (Src2 ? DAG.getNode(AMDGPUISD::WRITELANE, SL, VT, {Src0, Src1, 
Src2})
+: Src1 ? DAG.getNode(AMDGPUISD::READLANE, SL, VT, {Src0, Src1})
+   : DAG.getNode(AMDGPUISD::READFIRSTLANE, SL, VT, {Src0}));
+  };
+
+  SDValue Src1, Src2;
+  if (IntrinsicID == Intrinsic::amdgcn_readlane ||
+  IntrinsicID == Intrinsic::amdgcn_writelane) {
+Src1 = N->getOperand(2);
+if (IntrinsicID == Intrinsic::amdgcn_writelane)
+  Src2 = N->getOperand(3);
+  }
+
+  if (ValSize == 32) {
+// Already legal
+return SDValue();
+  }
+
+  if (ValSize < 32) {
+bool IsFloat = VT.isFloatingPoint();
+Src0 = DAG.getAnyExtOrTrunc(IsFloat ? DAG.getBitcast(IntVT, Src0) : Src0,
+SL, MVT::i32);
+if (Src2.getNode()) {
+  Src2 = DAG.getAnyExtOrTrunc(IsFloat ? DAG.getBitcast(IntVT, Src2) : Src2,
+  SL, MVT::i32);
+}
+SDValue LaneOp = createLaneOp(Src0, Src1, Src2, MVT::i32);
+SDValue Trunc = DAG.getAnyExtOrTrunc(LaneOp, SL, IntVT);
+return IsFloat ? DAG.getBitcast(VT, Trunc) : Trunc;
+  }
+
+  if ((ValSize % 32) == 0) {
+MVT VecVT = MVT::getVectorVT(MVT::i32, ValSize / 32);

vikramRH wrote:

Understood. Thanks !

https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)

2024-05-30 Thread Vikram Hegde via cfe-commits


@@ -5387,6 +5387,124 @@ bool 
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper,
   return true;
 }
 
+// TODO: Fix pointer type handling
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
+ MachineInstr &MI,
+ Intrinsic::ID IID) const {
+
+  MachineIRBuilder &B = Helper.MIRBuilder;
+  MachineRegisterInfo &MRI = *B.getMRI();
+
+  Register DstReg = MI.getOperand(0).getReg();
+  Register Src0 = MI.getOperand(2).getReg();
+
+  auto createLaneOp = [&](Register Src0, Register Src1,
+  Register Src2) -> Register {
+auto LaneOp = B.buildIntrinsic(IID, {S32}).addUse(Src0);
+switch (IID) {
+case Intrinsic::amdgcn_readfirstlane:
+  return LaneOp.getReg(0);
+case Intrinsic::amdgcn_readlane:
+  return LaneOp.addUse(Src1).getReg(0);
+case Intrinsic::amdgcn_writelane:
+  return LaneOp.addUse(Src1).addUse(Src2).getReg(0);
+default:
+  llvm_unreachable("unhandled lane op");
+}
+  };
+
+  Register Src1, Src2;
+  if (IID == Intrinsic::amdgcn_readlane || IID == Intrinsic::amdgcn_writelane) 
{
+Src1 = MI.getOperand(3).getReg();
+if (IID == Intrinsic::amdgcn_writelane) {
+  Src2 = MI.getOperand(4).getReg();
+}
+  }
+
+  LLT Ty = MRI.getType(DstReg);
+  unsigned Size = Ty.getSizeInBits();
+
+  if (Size == 32) {
+// Already legal
+return true;
+  }
+
+  if (Size < 32) {
+Src0 = B.buildAnyExt(S32, Src0).getReg(0);
+if (Src2.isValid())
+  Src2 = B.buildAnyExt(LLT::scalar(32), Src2).getReg(0);
+
+Register LaneOpDst = createLaneOp(Src0, Src1, Src2);
+B.buildTrunc(DstReg, LaneOpDst);
+
+MI.eraseFromParent();
+return true;
+  }
+
+  if ((Size % 32) == 0) {
+SmallVector PartialRes;
+unsigned NumParts = Size / 32;
+LLT PartialResTy =
+Ty.isVector() && Ty.getElementType() == S16 ? V2S16 : S32;

vikramRH wrote:

Done

https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)

2024-05-30 Thread Vikram Hegde via cfe-commits


@@ -1170,6 +1170,23 @@ The AMDGPU backend implements the following LLVM IR 
intrinsics.
 
   :ref:`llvm.set.fpenv` Sets the floating point 
environment to the specifies state.
 
+  llvm.amdgcn.readfirstlaneProvides direct access to 
v_readfirstlane_b32. Returns the value in
+   the lowest active lane of 
the input operand. Currently 
+   implemented for i16, i32, 
float, half, bf16, v2i16, v2f16 and types 
+   whose sizes are multiples 
of 32-bit.
+
+  llvm.amdgcn.readlane Provides direct access to 
v_readlane_b32. Returns the value in the 
+   specified lane of the first 
input operand. The second operand 
+   specifies the lane to read 
from. Currently implemented
+   for i16, i32, float, half, 
bf16, v2i16, v2f16 and types whose sizes

vikramRH wrote:

Updated

https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)

2024-05-30 Thread Vikram Hegde via cfe-commits


@@ -6086,6 +6086,63 @@ static SDValue lowerBALLOTIntrinsic(const 
SITargetLowering &TLI, SDNode *N,
   DAG.getConstant(0, SL, MVT::i32), DAG.getCondCode(ISD::SETNE));
 }
 
+static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N,
+   SelectionDAG &DAG) {
+  EVT VT = N->getValueType(0);
+  unsigned ValSize = VT.getSizeInBits();
+  unsigned IntrinsicID = N->getConstantOperandVal(0);
+  SDValue Src0 = N->getOperand(1);
+  SDLoc SL(N);
+  MVT IntVT = MVT::getIntegerVT(ValSize);
+
+  auto createLaneOp = [&DAG, &SL](SDValue Src0, SDValue Src1, SDValue Src2,
+  MVT VT) -> SDValue {
+return (Src2 ? DAG.getNode(AMDGPUISD::WRITELANE, SL, VT, {Src0, Src1, 
Src2})
+: Src1 ? DAG.getNode(AMDGPUISD::READLANE, SL, VT, {Src0, Src1})
+   : DAG.getNode(AMDGPUISD::READFIRSTLANE, SL, VT, {Src0}));
+  };
+
+  SDValue Src1, Src2;
+  if (IntrinsicID == Intrinsic::amdgcn_readlane ||
+  IntrinsicID == Intrinsic::amdgcn_writelane) {
+Src1 = N->getOperand(2);
+if (IntrinsicID == Intrinsic::amdgcn_writelane)
+  Src2 = N->getOperand(3);
+  }
+
+  if (ValSize == 32) {
+// Already legal
+return SDValue();
+  }
+
+  if (ValSize < 32) {
+bool IsFloat = VT.isFloatingPoint();
+Src0 = DAG.getAnyExtOrTrunc(IsFloat ? DAG.getBitcast(IntVT, Src0) : Src0,
+SL, MVT::i32);
+if (Src2.getNode()) {
+  Src2 = DAG.getAnyExtOrTrunc(IsFloat ? DAG.getBitcast(IntVT, Src2) : Src2,
+  SL, MVT::i32);
+}
+SDValue LaneOp = createLaneOp(Src0, Src1, Src2, MVT::i32);
+SDValue Trunc = DAG.getAnyExtOrTrunc(LaneOp, SL, IntVT);
+return IsFloat ? DAG.getBitcast(VT, Trunc) : Trunc;
+  }
+
+  if ((ValSize % 32) == 0) {
+MVT VecVT = MVT::getVectorVT(MVT::i32, ValSize / 32);

vikramRH wrote:

Updated

https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)

2024-05-30 Thread Vikram Hegde via cfe-commits


@@ -1170,6 +1170,23 @@ The AMDGPU backend implements the following LLVM IR 
intrinsics.
 
   :ref:`llvm.set.fpenv` Sets the floating point 
environment to the specifies state.
 
+  llvm.amdgcn.readfirstlaneProvides direct access to 
v_readfirstlane_b32. Returns the value in
+   the lowest active lane of 
the input operand. Currently implemented
+   for i16, i32, float, half, 
bf16, <2 x i16>, <2 x half>, <2 x bfloat>,

vikramRH wrote:

done

https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)

2024-05-30 Thread Vikram Hegde via cfe-commits


@@ -5461,8 +5461,7 @@ bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper 
&Helper,
 
   SmallVector PartialRes;
   unsigned NumParts = Size / 32;
-  MachineInstrBuilder Src0Parts, Src2Parts;
-  Src0Parts = B.buildUnmerge(PartialResTy, Src0);
+  MachineInstrBuilder Src0Parts = B.buildUnmerge(PartialResTy, Src0), 
Src2Parts;

vikramRH wrote:

Done

https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)

2024-05-31 Thread Vikram Hegde via cfe-commits


@@ -5496,6 +5496,9 @@ const char* 
AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(LDS)
   NODE_NAME_CASE(FPTRUNC_ROUND_UPWARD)
   NODE_NAME_CASE(FPTRUNC_ROUND_DOWNWARD)
+  NODE_NAME_CASE(READLANE)
+  NODE_NAME_CASE(READFIRSTLANE)

vikramRH wrote:

done

https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)

2024-06-03 Thread Vikram Hegde via cfe-commits

vikramRH wrote:

> You should add the mentioned convergence-tokens.ll test function

Added the test in a separate test file

https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU][WIP] Add support for i64/f64 readlane, writelane and readfirstlane operations. (PR #89217)

2024-04-19 Thread Vikram Hegde via cfe-commits

vikramRH wrote:

Added/updated tests for readfirstlane and writelane ops

https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU][WIP] Add support for i64/f64 readlane, writelane and readfirstlane operations. (PR #89217)

2024-04-19 Thread Vikram Hegde via cfe-commits

vikramRH wrote:

Gentle ping :)

https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU][WIP] Add support for i64/f64 readlane, writelane and readfirstlane operations. (PR #89217)

2024-04-22 Thread Vikram Hegde via cfe-commits


@@ -4822,6 +4822,111 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr 
&MI,
   return RetBB;
 }
 
+static MachineBasicBlock *lowerPseudoLaneOp(MachineInstr &MI,

vikramRH wrote:

@arsenm, would "PreISelIntrinsicLowering" be a proper place for this ?

https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][ExprConst] allow single element access of vector object to be constant expression (PR #101126)

2024-07-29 Thread Vikram Hegde via cfe-commits

https://github.com/vikramRH created 
https://github.com/llvm/llvm-project/pull/101126

None

>From 690901f2370381285afa7cf7c2f7401d89e568f6 Mon Sep 17 00:00:00 2001
From: Vikram 
Date: Mon, 29 Jul 2024 08:56:07 -0400
Subject: [PATCH] [clang][ExprConst] allow single element access of vector
 object to be constant expression

---
 clang/docs/ReleaseNotes.rst   |   3 +
 clang/lib/AST/ExprConstant.cpp| 102 +-
 clang/lib/AST/Interp/State.h  |   3 +-
 clang/test/AST/Interp/builtin-functions.cpp   |  26 ++---
 clang/test/AST/Interp/vectors.cpp |  50 -
 clang/test/CodeGenCXX/temporaries.cpp |  41 ---
 .../constexpr-vectors-access-elements.cpp |  29 +
 7 files changed, 190 insertions(+), 64 deletions(-)
 create mode 100644 clang/test/SemaCXX/constexpr-vectors-access-elements.cpp

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index ddad083571eb1..2179aaea12387 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -64,6 +64,9 @@ sections with improvements to Clang's support for those 
languages.
 
 C++ Language Changes
 
+- Allow single element access of vector object to be constant expression.
+  Supports the `V.xyzw` syntax and other tidbits as seen in OpenCL.
+  Selecting multiple elements is left as a future work.
 
 C++17 Feature Support
 ^
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 558e20ed3e423..08f49ac896153 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -222,6 +222,11 @@ namespace {
 ArraySize = 2;
 MostDerivedLength = I + 1;
 IsArray = true;
+  } else if (const auto *VT = Type->getAs()) {
+Type = VT->getElementType();
+ArraySize = VT->getNumElements();
+MostDerivedLength = I + 1;
+IsArray = true;
   } else if (const FieldDecl *FD = getAsField(Path[I])) {
 Type = FD->getType();
 ArraySize = 0;
@@ -268,7 +273,6 @@ namespace {
 /// If the current array is an unsized array, the value of this is
 /// undefined.
 uint64_t MostDerivedArraySize;
-
 /// The type of the most derived object referred to by this address.
 QualType MostDerivedType;
 
@@ -442,6 +446,16 @@ namespace {
   MostDerivedArraySize = 2;
   MostDerivedPathLength = Entries.size();
 }
+
+void addVectorElementUnchecked(QualType EltTy, uint64_t Size,
+   uint64_t Idx) {
+  Entries.push_back(PathEntry::ArrayIndex(Idx));
+  MostDerivedType = EltTy;
+  MostDerivedPathLength = Entries.size();
+  MostDerivedArraySize = 0;
+  MostDerivedIsArrayElement = false;
+}
+
 void diagnoseUnsizedArrayPointerArithmetic(EvalInfo &Info, const Expr *E);
 void diagnosePointerArithmetic(EvalInfo &Info, const Expr *E,
const APSInt &N);
@@ -1737,6 +1751,11 @@ namespace {
   if (checkSubobject(Info, E, Imag ? CSK_Imag : CSK_Real))
 Designator.addComplexUnchecked(EltTy, Imag);
 }
+void addVectorElement(EvalInfo &Info, const Expr *E, QualType EltTy,
+  uint64_t Size, uint64_t Idx) {
+  if (checkSubobject(Info, E, CSK_VectorElement))
+Designator.addVectorElementUnchecked(EltTy, Size, Idx);
+}
 void clearIsNullPointer() {
   IsNullPtr = false;
 }
@@ -3310,6 +3329,19 @@ static bool HandleLValueComplexElement(EvalInfo &Info, 
const Expr *E,
   return true;
 }
 
+static bool HandleLValueVectorElement(EvalInfo &Info, const Expr *E,
+  LValue &LVal, QualType EltTy,
+  uint64_t Size, uint64_t Idx) {
+  if (Idx) {
+CharUnits SizeOfElement;
+if (!HandleSizeof(Info, E->getExprLoc(), EltTy, SizeOfElement))
+  return false;
+LVal.Offset += SizeOfElement * Idx;
+  }
+  LVal.addVectorElement(Info, E, EltTy, Size, Idx);
+  return true;
+}
+
 /// Try to evaluate the initializer for a variable declaration.
 ///
 /// \param Info   Information about the ongoing evaluation.
@@ -3855,6 +3887,19 @@ findSubobject(EvalInfo &Info, const Expr *E, const 
CompleteObject &Obj,
 return handler.found(Index ? O->getComplexFloatImag()
: O->getComplexFloatReal(), ObjType);
   }
+} else if (const auto *VT = ObjType->getAs()) {
+  uint64_t Index = Sub.Entries[I].getAsArrayIndex();
+  if (Index >= VT->getNumElements()) {
+if (Info.getLangOpts().CPlusPlus11)
+  Info.FFDiag(E, diag::note_constexpr_access_past_end)
+  << handler.AccessKind;
+else
+  Info.FFDiag(E);
+return handler.failed();
+  }
+  ObjType = VT->getElementType();
+  assert(I == N - 1 && "extracting subobject of scalar?");
+  return handler.found(O->getVectorElt(Index), ObjType);
 } else 

[clang] [clang][ExprConst] allow single element access of vector object to be constant expression (PR #101126)

2024-07-29 Thread Vikram Hegde via cfe-commits

vikramRH wrote:

* **#101126** https://app.graphite.dev/github/pr/llvm/llvm-project/101126?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about 
stacking.


 Join @vikramRH and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/101126
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][ExprConst] allow single element access of vector object to be constant expression (PR #101126)

2024-07-29 Thread Vikram Hegde via cfe-commits

https://github.com/vikramRH edited 
https://github.com/llvm/llvm-project/pull/101126
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][ExprConst] allow single element access of vector object to be constant expression (PR #101126)

2024-07-29 Thread Vikram Hegde via cfe-commits

https://github.com/vikramRH ready_for_review 
https://github.com/llvm/llvm-project/pull/101126
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][ExprConst] allow single element access of vector object to be constant expression (PR #101126)

2024-07-30 Thread Vikram Hegde via cfe-commits


@@ -442,6 +446,16 @@ namespace {
   MostDerivedArraySize = 2;
   MostDerivedPathLength = Entries.size();
 }
+
+void addVectorElementUnchecked(QualType EltTy, uint64_t Size,
+   uint64_t Idx) {
+  Entries.push_back(PathEntry::ArrayIndex(Idx));

vikramRH wrote:

Yes, I thought about having a new accessor of the sort "vectorIndex" but all it 
seems to achieve is just adding new API that returns does the exact same thing 
as array (other than perhaps adding a new meaning to PathEntry value). I will 
update it if you feel this makes sense.

https://github.com/llvm/llvm-project/pull/101126
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][ExprConst] allow single element access of vector object to be constant expression (PR #101126)

2024-07-30 Thread Vikram Hegde via cfe-commits


@@ -3,40 +3,40 @@
 
 typedef int __attribute__((vector_size(16))) VI4;
 constexpr VI4 A = {1,2,3,4};
-static_assert(A[0] == 1, ""); // ref-error {{not an integral constant 
expression}}
-static_assert(A[1] == 2, ""); // ref-error {{not an integral constant 
expression}}
-static_assert(A[2] == 3, ""); // ref-error {{not an integral constant 
expression}}
-static_assert(A[3] == 4, ""); // ref-error {{not an integral constant 
expression}}
+static_assert(A[0] == 1, "");
+static_assert(A[1] == 2, "");
+static_assert(A[2] == 3, "");
+static_assert(A[3] == 4, "");
 
 
 /// FIXME: It would be nice if the note said 'vector' instead of 'array'.
-static_assert(A[12] == 4, ""); // ref-error {{not an integral constant 
expression}} \
-   // expected-error {{not an integral constant 
expression}} \
-   // expected-note {{cannot refer to element 12 
of array of 4 elements in a constant expression}}
+static_assert(A[12] == 4, ""); // both-error {{not an integral constant 
expression}} \
+   // expected-note {{cannot refer to element 12 
of array of 4 elements in a constant expression}} \
+   // ref-note {{read of dereferenced 
one-past-the-end pointer is not allowed in a constant expression}}

vikramRH wrote:

I just kept the original version of the PR, but the message "cannot refer to 
element 12 of array of 4 elements" seems correct here. I shall update this

https://github.com/llvm/llvm-project/pull/101126
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU][WIP] Add support for i64/f64 readlane, writelane and readfirstlane operations. (PR #89217)

2024-05-02 Thread Vikram Hegde via cfe-commits

vikramRH wrote:

new commit extends @jayfoad's implementation with GIsel support.  yet to add 
tests for half, floats and some vectors

https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU][WIP] Add support for i64/f64 readlane, writelane and readfirstlane operations. (PR #89217)

2024-05-05 Thread Vikram Hegde via cfe-commits

vikramRH wrote:

1. Review comments
2. improve GIsel lowering
3. add tests for half, bfloat, float2, ptr, vector of ptr and int
4. removed gfx700 checks from writelane test since it caused issues with f16 
legalization. is this required ?

https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU][WIP] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)

2024-05-05 Thread Vikram Hegde via cfe-commits

https://github.com/vikramRH edited 
https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU][WIP] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)

2024-05-09 Thread Vikram Hegde via cfe-commits


@@ -5386,6 +5386,130 @@ bool 
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper,
   return true;
 }
 
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
+ MachineInstr &MI,
+ Intrinsic::ID IID) const {
+
+  MachineIRBuilder &B = Helper.MIRBuilder;
+  MachineRegisterInfo &MRI = *B.getMRI();
+
+  Register DstReg = MI.getOperand(0).getReg();
+  Register Src0 = MI.getOperand(2).getReg();
+
+  auto createLaneOp = [&](Register &Src0, Register &Src1,
+  Register &Src2) -> Register {
+auto LaneOpDst = B.buildIntrinsic(IID, {S32}).addUse(Src0);
+if (Src2.isValid())
+  return (LaneOpDst.addUse(Src1).addUse(Src2)).getReg(0);
+if (Src1.isValid())
+  return (LaneOpDst.addUse(Src1)).getReg(0);
+return LaneOpDst.getReg(0);
+  };
+
+  Register Src1, Src2, Src0Valid, Src2Valid;
+  if (IID == Intrinsic::amdgcn_readlane || IID == Intrinsic::amdgcn_writelane) 
{
+Src1 = MI.getOperand(3).getReg();
+if (IID == Intrinsic::amdgcn_writelane) {
+  Src2 = MI.getOperand(4).getReg();
+}
+  }
+
+  LLT Ty = MRI.getType(DstReg);
+  unsigned Size = Ty.getSizeInBits();
+
+  if (Size == 32) {
+if (Ty.isScalar())
+  // Already legal
+  return true;
+
+Register Src0Valid = B.buildBitcast(S32, Src0).getReg(0);
+if (Src2.isValid())
+  Src2Valid = B.buildBitcast(S32, Src2).getReg(0);
+Register LaneOp = createLaneOp(Src0Valid, Src1, Src2Valid);
+B.buildBitcast(DstReg, LaneOp);
+MI.eraseFromParent();
+return true;
+  }
+
+  if (Size < 32) {
+Register Src0Cast = MRI.getType(Src0).isScalar()
+? Src0
+: B.buildBitcast(LLT::scalar(Size), 
Src0).getReg(0);
+Src0Valid = B.buildAnyExt(S32, Src0Cast).getReg(0);
+
+if (Src2.isValid()) {
+  Register Src2Cast =
+  MRI.getType(Src2).isScalar()
+  ? Src2
+  : B.buildBitcast(LLT::scalar(Size), Src2).getReg(0);
+  Src2Valid = B.buildAnyExt(LLT::scalar(32), Src2Cast).getReg(0);
+}
+Register LaneOp = createLaneOp(Src0Valid, Src1, Src2Valid);
+if (Ty.isScalar())
+  B.buildTrunc(DstReg, LaneOp);
+else {
+  auto Trunc = B.buildTrunc(LLT::scalar(Size), LaneOp);
+  B.buildBitcast(DstReg, Trunc);
+}
+
+MI.eraseFromParent();
+return true;
+  }
+
+  if ((Size % 32) == 0) {
+SmallVector PartialRes;
+unsigned NumParts = Size / 32;
+auto Src0Parts = B.buildUnmerge(S32, Src0);
+
+switch (IID) {
+case Intrinsic::amdgcn_readlane: {
+  Register Src1 = MI.getOperand(3).getReg();
+  for (unsigned i = 0; i < NumParts; ++i)
+PartialRes.push_back(
+(B.buildIntrinsic(Intrinsic::amdgcn_readlane, {S32})
+ .addUse(Src0Parts.getReg(i))
+ .addUse(Src1))
+.getReg(0));

vikramRH wrote:

should this be a seperate change that addresses other such instances too ?

https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


  1   2   >