[clang] Fix lit test clang/test/CodeGenHIP/dpp-const-fold.hip. added (PR #71656)
https://github.com/vikramRH approved this pull request. https://github.com/llvm/llvm-project/pull/71656 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [WIP][AMDGPU] Enable hostcall printf for OpenCL (PR #70932)
vikramRH wrote: @b-sumner, @ssahasra , what are your thoughts about making the hostcall default throughout ? Also I do not think splitting this into separate patches is feasible since changes are interdependent, however I could split changes into separate commits to make it easier to review. https://github.com/llvm/llvm-project/pull/70932 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [WIP][AMDGPU] Enable hostcall printf for OpenCL (PR #70932)
https://github.com/vikramRH closed https://github.com/llvm/llvm-project/pull/70932 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AMDGPU] Treat printf as builtin for OpenCL (PR #72554)
https://github.com/vikramRH created https://github.com/llvm/llvm-project/pull/72554 This is a prerequisite to enable opencl hostcall printf. ensures that AMDGPU printf calls are lowered at clang CodeGen for both HIP and OCL. >From 6ace9d0a51064be189093ca3bb42416aafadb7f6 Mon Sep 17 00:00:00 2001 From: Vikram Date: Fri, 10 Nov 2023 09:39:41 + Subject: [PATCH] [AMDGPU] Treat printf as builtin for OpenCL --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 8 clang/lib/AST/Decl.cpp | 7 +++ clang/lib/Basic/Targets/AMDGPU.cpp | 2 ++ clang/lib/CodeGen/CGBuiltin.cpp | 5 + 4 files changed, 22 insertions(+) diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index a19c8bd5f219ec6..1799c72806bfdd4 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -21,6 +21,10 @@ #if defined(BUILTIN) && !defined(TARGET_BUILTIN) # define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS) #endif + +#if defined(BUILTIN) && !defined(LANGBUILTIN) +#define LANGBUILTIN(ID, TYPE, ATTRS, BUILTIN_LANG) BUILTIN(ID, TYPE, ATTRS) +#endif //===--===// // SI+ only builtins. //===--===// @@ -406,5 +410,9 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb", "nc", "fp8-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-insts") +// OpenCL +LANGBUILTIN(printf, "icC*4.", "fp:0:", ALL_OCL_LANGUAGES) + #undef BUILTIN #undef TARGET_BUILTIN +#undef LANGBUILTIN diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index c5c2edf1bfe3aba..2597422bdd521a0 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -49,6 +49,7 @@ #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/Specifiers.h" +#include "clang/Basic/TargetBuiltins.h" #include "clang/Basic/TargetCXXABI.h" #include "clang/Basic/TargetInfo.h" #include "clang/Basic/Visibility.h" @@ -3598,6 +3599,12 @@ unsigned FunctionDecl::getBuiltinID(bool ConsiderWrapperFunctions) const { if (!ConsiderWrapperFunctions && getStorageClass() == SC_Static) return 0; + // AMDGCN implementation supports printf as a builtin + // for OpenCL + if (Context.getTargetInfo().getTriple().isAMDGCN() && + Context.getLangOpts().OpenCL && BuiltinID == AMDGPU::BIprintf) +return BuiltinID; + // OpenCL v1.2 s6.9.f - The library functions defined in // the C99 standard headers are not available. if (Context.getLangOpts().OpenCL && diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index 409ae32ab424215..307cfa49f54e926 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -91,6 +91,8 @@ static constexpr Builtin::Info BuiltinInfo[] = { {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, +#define LANGBUILTIN(ID, TYPE, ATTRS, LANG) \ + {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, LANG}, #include "clang/Basic/BuiltinsAMDGPU.def" }; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 09309a3937fb613..987909b5a62e11b 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -2458,6 +2458,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad()) BuiltinID = mutateLongDoubleBuiltin(BuiltinID); + // Mutate the printf builtin ID so that we use the same CodeGen path for + // HIP and OpenCL with AMDGPU targets. + if (getTarget().getTriple().isAMDGCN() && BuiltinID == AMDGPU::BIprintf) + BuiltinID = Builtin::BIprintf; + // If the builtin has been declared explicitly with an assembler label, // disable the specialized emitting below. Ideally we should communicate the // rename in IR, or at least avoid generating the intrinsic calls that are ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)
https://github.com/vikramRH created https://github.com/llvm/llvm-project/pull/72556 Kindly review top commit here, The builtin specific changes are up for in a seperate patch (https://github.com/llvm/llvm-project/pull/72554) Few implementation details, 1. Hostcall printf is now default for both HIP and OpenCL. 2. The implementation adds vector processing support both for hostcall and buffered cases. The vector elements are extracted and pushed onto the buffer individually (each alingned to 8 byte boundary) 3. for OpenCL hostcall case, The format string pointer is addrspace casted to generic address space to be compatible with hostcall device lib functions. >From 6ace9d0a51064be189093ca3bb42416aafadb7f6 Mon Sep 17 00:00:00 2001 From: Vikram Date: Fri, 10 Nov 2023 09:39:41 + Subject: [PATCH 1/2] [AMDGPU] Treat printf as builtin for OpenCL --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 8 clang/lib/AST/Decl.cpp | 7 +++ clang/lib/Basic/Targets/AMDGPU.cpp | 2 ++ clang/lib/CodeGen/CGBuiltin.cpp | 5 + 4 files changed, 22 insertions(+) diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index a19c8bd5f219ec6..1799c72806bfdd4 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -21,6 +21,10 @@ #if defined(BUILTIN) && !defined(TARGET_BUILTIN) # define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS) #endif + +#if defined(BUILTIN) && !defined(LANGBUILTIN) +#define LANGBUILTIN(ID, TYPE, ATTRS, BUILTIN_LANG) BUILTIN(ID, TYPE, ATTRS) +#endif //===--===// // SI+ only builtins. //===--===// @@ -406,5 +410,9 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb", "nc", "fp8-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-insts") +// OpenCL +LANGBUILTIN(printf, "icC*4.", "fp:0:", ALL_OCL_LANGUAGES) + #undef BUILTIN #undef TARGET_BUILTIN +#undef LANGBUILTIN diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index c5c2edf1bfe3aba..2597422bdd521a0 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -49,6 +49,7 @@ #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/Specifiers.h" +#include "clang/Basic/TargetBuiltins.h" #include "clang/Basic/TargetCXXABI.h" #include "clang/Basic/TargetInfo.h" #include "clang/Basic/Visibility.h" @@ -3598,6 +3599,12 @@ unsigned FunctionDecl::getBuiltinID(bool ConsiderWrapperFunctions) const { if (!ConsiderWrapperFunctions && getStorageClass() == SC_Static) return 0; + // AMDGCN implementation supports printf as a builtin + // for OpenCL + if (Context.getTargetInfo().getTriple().isAMDGCN() && + Context.getLangOpts().OpenCL && BuiltinID == AMDGPU::BIprintf) +return BuiltinID; + // OpenCL v1.2 s6.9.f - The library functions defined in // the C99 standard headers are not available. if (Context.getLangOpts().OpenCL && diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index 409ae32ab424215..307cfa49f54e926 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -91,6 +91,8 @@ static constexpr Builtin::Info BuiltinInfo[] = { {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, +#define LANGBUILTIN(ID, TYPE, ATTRS, LANG) \ + {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, LANG}, #include "clang/Basic/BuiltinsAMDGPU.def" }; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 09309a3937fb613..987909b5a62e11b 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -2458,6 +2458,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad()) BuiltinID = mutateLongDoubleBuiltin(BuiltinID); + // Mutate the printf builtin ID so that we use the same CodeGen path for + // HIP and OpenCL with AMDGPU targets. + if (getTarget().getTriple().isAMDGCN() && BuiltinID == AMDGPU::BIprintf) + BuiltinID = Builtin::BIprintf; + // If the builtin has been declared explicitly with an assembler label, // disable the specialized emitting below. Ideally we should communicate the // rename in IR, or at least avoid generating the intrinsic calls that are >From f9329597564d4e3390f6d0d3a08e4a6f66b52de4 Mon Sep 17 00:00:00 2001 From: Vikram Date: Wed, 15 Nov 2023 01:20:55 -0500 Subject: [P
[clang] [llvm] [WIP][AMDGPU] Enable hostcall printf for OpenCL (PR #70932)
vikramRH wrote: Two different patch sets have been created here, https://github.com/llvm/llvm-project/pull/72554 https://github.com/llvm/llvm-project/pull/72556 https://github.com/llvm/llvm-project/pull/70932 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AMDGPU] Treat printf as builtin for OpenCL (PR #72554)
@@ -2458,6 +2458,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad()) BuiltinID = mutateLongDoubleBuiltin(BuiltinID); + // Mutate the printf builtin ID so that we use the same CodeGen path for + // HIP and OpenCL with AMDGPU targets. + if (getTarget().getTriple().isAMDGCN() && BuiltinID == AMDGPU::BIprintf) + BuiltinID = Builtin::BIprintf; vikramRH wrote: @jhuber6 , I had your implementation in mind when I wrote this, The printf wont be expanded by clang with "-fno-builtin" and users would still have an option to use a lib variant if need be. This also makes this more elegant as we would not have to hack the "fno-builtin" handling into the implementation, this is part of clang builtin handling. https://github.com/llvm/llvm-project/pull/72554 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AMDGPU] Treat printf as builtin for OpenCL (PR #72554)
@@ -406,5 +410,9 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb", "nc", "fp8-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-insts") +// OpenCL +LANGBUILTIN(printf, "icC*4.", "fp:0:", ALL_OCL_LANGUAGES) vikramRH wrote: This is specifically to recognize the OpenCL version of printf (where fmt string arg is a pointer to const address space) as a builtin. The hack to generic builtin is just a option that I had as I did not want to add a new case to builtin expansion code (since the API used by both OpenCL and HIP are same ), however Im okay with adding a new case too if you feel it makes more sense. https://github.com/llvm/llvm-project/pull/72554 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AMDGPU] Treat printf as builtin for OpenCL (PR #72554)
vikramRH wrote: > Any tests? Can you explain why it's not sufficient to do this lowering in the > AMDGPU pass? I intended these changes to be part of https://github.com/llvm/llvm-project/pull/72556, but it seemed too many changes at one place, so I extracted this part out for ease of review. This cannot be merged standalone and has to be with 72556 , the tests are also part of that patch (This really should have been a stack of patches :( ). Also for AMDGPU pass, I plan to remove that altogether and handle all printf lowering at one place during clang Codegen. since we now use a compiler option to switch between different implementations, This makes a lot more sense I feel. https://github.com/llvm/llvm-project/pull/72554 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AMDGPU] Treat printf as builtin for OpenCL (PR #72554)
https://github.com/vikramRH updated https://github.com/llvm/llvm-project/pull/72554 >From 9833353ab6d7bb9716883b89f4e8b90285c1a60c Mon Sep 17 00:00:00 2001 From: Vikram Date: Fri, 10 Nov 2023 09:39:41 + Subject: [PATCH] [AMDGPU] Treat printf as builtin for OpenCL --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 8 clang/lib/AST/Decl.cpp | 7 +++ clang/lib/Basic/Targets/AMDGPU.cpp | 3 +++ clang/lib/CodeGen/CGBuiltin.cpp | 5 + 4 files changed, 23 insertions(+) diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index a19c8bd5f219ec6..1799c72806bfdd4 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -21,6 +21,10 @@ #if defined(BUILTIN) && !defined(TARGET_BUILTIN) # define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS) #endif + +#if defined(BUILTIN) && !defined(LANGBUILTIN) +#define LANGBUILTIN(ID, TYPE, ATTRS, BUILTIN_LANG) BUILTIN(ID, TYPE, ATTRS) +#endif //===--===// // SI+ only builtins. //===--===// @@ -406,5 +410,9 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb", "nc", "fp8-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-insts") +// OpenCL +LANGBUILTIN(printf, "icC*4.", "fp:0:", ALL_OCL_LANGUAGES) + #undef BUILTIN #undef TARGET_BUILTIN +#undef LANGBUILTIN diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index c5c2edf1bfe3aba..2597422bdd521a0 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -49,6 +49,7 @@ #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/Specifiers.h" +#include "clang/Basic/TargetBuiltins.h" #include "clang/Basic/TargetCXXABI.h" #include "clang/Basic/TargetInfo.h" #include "clang/Basic/Visibility.h" @@ -3598,6 +3599,12 @@ unsigned FunctionDecl::getBuiltinID(bool ConsiderWrapperFunctions) const { if (!ConsiderWrapperFunctions && getStorageClass() == SC_Static) return 0; + // AMDGCN implementation supports printf as a builtin + // for OpenCL + if (Context.getTargetInfo().getTriple().isAMDGCN() && + Context.getLangOpts().OpenCL && BuiltinID == AMDGPU::BIprintf) +return BuiltinID; + // OpenCL v1.2 s6.9.f - The library functions defined in // the C99 standard headers are not available. if (Context.getLangOpts().OpenCL && diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index 409ae32ab424215..0cf6daee87c4a4c 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -91,6 +91,9 @@ static constexpr Builtin::Info BuiltinInfo[] = { {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, +#define LANGBUILTIN(ID, TYPE, ATTRS, LANG) \ + { #ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, LANG } \ + , #include "clang/Basic/BuiltinsAMDGPU.def" }; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 09309a3937fb613..d7a4b895f3432ca 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -2458,6 +2458,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad()) BuiltinID = mutateLongDoubleBuiltin(BuiltinID); + // Mutate the printf builtin ID so that we use the same CodeGen path for + // HIP and OpenCL with AMDGPU targets. + if (getTarget().getTriple().isAMDGCN() && BuiltinID == AMDGPU::BIprintf) +BuiltinID = Builtin::BIprintf; + // If the builtin has been declared explicitly with an assembler label, // disable the specialized emitting below. Ideally we should communicate the // rename in IR, or at least avoid generating the intrinsic calls that are ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)
https://github.com/vikramRH edited https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AMDGPU] Treat printf as builtin for OpenCL (PR #72554)
https://github.com/vikramRH edited https://github.com/llvm/llvm-project/pull/72554 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)
https://github.com/vikramRH updated https://github.com/llvm/llvm-project/pull/72556 >From 6ace9d0a51064be189093ca3bb42416aafadb7f6 Mon Sep 17 00:00:00 2001 From: Vikram Date: Fri, 10 Nov 2023 09:39:41 + Subject: [PATCH 1/3] [AMDGPU] Treat printf as builtin for OpenCL --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 8 clang/lib/AST/Decl.cpp | 7 +++ clang/lib/Basic/Targets/AMDGPU.cpp | 2 ++ clang/lib/CodeGen/CGBuiltin.cpp | 5 + 4 files changed, 22 insertions(+) diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index a19c8bd5f219ec6..1799c72806bfdd4 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -21,6 +21,10 @@ #if defined(BUILTIN) && !defined(TARGET_BUILTIN) # define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS) #endif + +#if defined(BUILTIN) && !defined(LANGBUILTIN) +#define LANGBUILTIN(ID, TYPE, ATTRS, BUILTIN_LANG) BUILTIN(ID, TYPE, ATTRS) +#endif //===--===// // SI+ only builtins. //===--===// @@ -406,5 +410,9 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb", "nc", "fp8-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-insts") +// OpenCL +LANGBUILTIN(printf, "icC*4.", "fp:0:", ALL_OCL_LANGUAGES) + #undef BUILTIN #undef TARGET_BUILTIN +#undef LANGBUILTIN diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index c5c2edf1bfe3aba..2597422bdd521a0 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -49,6 +49,7 @@ #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/Specifiers.h" +#include "clang/Basic/TargetBuiltins.h" #include "clang/Basic/TargetCXXABI.h" #include "clang/Basic/TargetInfo.h" #include "clang/Basic/Visibility.h" @@ -3598,6 +3599,12 @@ unsigned FunctionDecl::getBuiltinID(bool ConsiderWrapperFunctions) const { if (!ConsiderWrapperFunctions && getStorageClass() == SC_Static) return 0; + // AMDGCN implementation supports printf as a builtin + // for OpenCL + if (Context.getTargetInfo().getTriple().isAMDGCN() && + Context.getLangOpts().OpenCL && BuiltinID == AMDGPU::BIprintf) +return BuiltinID; + // OpenCL v1.2 s6.9.f - The library functions defined in // the C99 standard headers are not available. if (Context.getLangOpts().OpenCL && diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index 409ae32ab424215..307cfa49f54e926 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -91,6 +91,8 @@ static constexpr Builtin::Info BuiltinInfo[] = { {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, +#define LANGBUILTIN(ID, TYPE, ATTRS, LANG) \ + {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, LANG}, #include "clang/Basic/BuiltinsAMDGPU.def" }; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 09309a3937fb613..987909b5a62e11b 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -2458,6 +2458,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad()) BuiltinID = mutateLongDoubleBuiltin(BuiltinID); + // Mutate the printf builtin ID so that we use the same CodeGen path for + // HIP and OpenCL with AMDGPU targets. + if (getTarget().getTriple().isAMDGCN() && BuiltinID == AMDGPU::BIprintf) + BuiltinID = Builtin::BIprintf; + // If the builtin has been declared explicitly with an assembler label, // disable the specialized emitting below. Ideally we should communicate the // rename in IR, or at least avoid generating the intrinsic calls that are >From 040a28deef5fe7a5d9e357a898b50335992e708d Mon Sep 17 00:00:00 2001 From: Vikram Date: Mon, 20 Nov 2023 05:26:27 + Subject: [PATCH 2/3] [AMDGPU] Enable OpenCL printf expansion at clang CodeGen --- clang/lib/CodeGen/CGBuiltin.cpp | 3 ++- clang/lib/CodeGen/CGGPUBuiltin.cpp| 25 +++-- clang/lib/Driver/ToolChains/Clang.cpp | 10 ++ 3 files changed, 31 insertions(+), 7 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 987909b5a62e11b..8d51df24c7872b7 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -5622,7 +5622,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsig
[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)
@@ -170,20 +173,46 @@ static Value *appendString(IRBuilder<> &Builder, Value *Desc, Value *Arg, return callAppendStringN(Builder, Desc, Arg, Length, IsLast); } +static Value *appendVectorArg(IRBuilder<> &Builder, Value *Desc, Value *Arg, + bool IsLast, bool IsBuffered) { + assert(Arg->getType()->isVectorTy() && "incorrent append* function"); + auto VectorTy = dyn_cast(Arg->getType()); + auto Zero = Builder.getInt64(0); + if (VectorTy) { +for (unsigned int i = 0; i < VectorTy->getNumElements() - 1; i++) { + auto Val = Builder.CreateExtractElement(Arg, i); + Desc = callAppendArgs(Builder, Desc, 1, +fitArgInto64Bits(Builder, Val, IsBuffered), Zero, +Zero, Zero, Zero, Zero, Zero, false); +} + +auto Val = vikramRH wrote: Done https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)
@@ -170,20 +173,46 @@ static Value *appendString(IRBuilder<> &Builder, Value *Desc, Value *Arg, return callAppendStringN(Builder, Desc, Arg, Length, IsLast); } +static Value *appendVectorArg(IRBuilder<> &Builder, Value *Desc, Value *Arg, + bool IsLast, bool IsBuffered) { + assert(Arg->getType()->isVectorTy() && "incorrent append* function"); + auto VectorTy = dyn_cast(Arg->getType()); + auto Zero = Builder.getInt64(0); + if (VectorTy) { +for (unsigned int i = 0; i < VectorTy->getNumElements() - 1; i++) { + auto Val = Builder.CreateExtractElement(Arg, i); + Desc = callAppendArgs(Builder, Desc, 1, +fitArgInto64Bits(Builder, Val, IsBuffered), Zero, +Zero, Zero, Zero, Zero, Zero, false); +} + +auto Val = +Builder.CreateExtractElement(Arg, VectorTy->getNumElements() - 1); +return callAppendArgs(Builder, Desc, 1, + fitArgInto64Bits(Builder, Val, IsBuffered), Zero, + Zero, Zero, Zero, Zero, Zero, IsLast); + } + return nullptr; +} + static Value *processArg(IRBuilder<> &Builder, Value *Desc, Value *Arg, - bool SpecIsCString, bool IsLast) { + bool SpecIsCString, bool IsVector, bool IsLast, + bool IsBuffered) { if (SpecIsCString && isa(Arg->getType())) { return appendString(Builder, Desc, Arg, IsLast); - } - // If the format specifies a string but the argument is not, the frontend will - // have printed a warning. We just rely on undefined behaviour and send the - // argument anyway. - return appendArg(Builder, Desc, Arg, IsLast); + } else if (IsVector) { vikramRH wrote: Done https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)
@@ -170,20 +173,46 @@ static Value *appendString(IRBuilder<> &Builder, Value *Desc, Value *Arg, return callAppendStringN(Builder, Desc, Arg, Length, IsLast); } +static Value *appendVectorArg(IRBuilder<> &Builder, Value *Desc, Value *Arg, vikramRH wrote: Done https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)
@@ -278,7 +310,13 @@ static Value *callBufferedPrintfStart( StringData(StringRef(), LenWithNull, LenWithNullAligned, false)); } } else { - int AllocSize = M->getDataLayout().getTypeAllocSize(Args[i]->getType()); + int AllocSize = 0; + if (OCLVectors.test(i)) { +auto VecArg = dyn_cast(Args[i]->getType()); +assert(VecArg && "invalid vector specifier"); vikramRH wrote: Done https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] Enable OpenCL hostcall printf (WIP) (PR #72556)
@@ -278,7 +310,13 @@ static Value *callBufferedPrintfStart( StringData(StringRef(), LenWithNull, LenWithNullAligned, false)); } } else { - int AllocSize = M->getDataLayout().getTypeAllocSize(Args[i]->getType()); + int AllocSize = 0; + if (OCLVectors.test(i)) { +auto VecArg = dyn_cast(Args[i]->getType()); +assert(VecArg && "invalid vector specifier"); +AllocSize = VecArg->getNumElements() * 8; + } else +AllocSize = M->getDataLayout().getTypeAllocSize(Args[i]->getType()); vikramRH wrote: I extract individual vector elements, expand them to 8 bytes and store them onto the buffer. The "getTypeAllocSize" would not give me the actual occupied size in the buffer in this case. https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [WIP][AMDGPU] Enable hostcall printf for OpenCL (PR #70932)
https://github.com/vikramRH created https://github.com/llvm/llvm-project/pull/70932 Attempt to enable OpenCL hostcall printf (SWDEV-204804). I would like to have some inputs regarding few key points listed here, 1. We continue to use "-mprintf-kind" option to decide the lowering scheme. It now supports a new value "none" that just makes compiler use default lowering scheme. (hostcalls for HIP, Buffered for OpenCL) 2. OpenCL now treats printf as a builtin so that the printf expansion happens via clang CodeGen. This would also mean that the "AMDGPUPrintfRuntimeBinding" pass would no longer be required since all printf calls would be expanded earlier. 3. The implementation adds vector processing support both for hostcall and buffered cases. The vector elements are extracted and pushed onto the buffer individually (each alingned to 8 byte boundary) 4. for OpenCL hostcall case, The format string pointer is addrspace casted to generic address space to be compatible with hostcall device lib functions. >From 9eec2462d07122f8376f1e60b2e679cc69814430 Mon Sep 17 00:00:00 2001 From: Vikram Date: Wed, 4 Oct 2023 05:41:47 -0400 Subject: [PATCH] [WIP][AMDGPU] hostcall printf support for OpenCL --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 8 + clang/include/clang/Basic/TargetOptions.h | 10 +- clang/include/clang/Driver/Options.td | 8 +- clang/lib/AST/Decl.cpp| 7 + clang/lib/Basic/Targets/AMDGPU.cpp| 2 + clang/lib/CodeGen/CGBuiltin.cpp | 8 +- clang/lib/CodeGen/CGGPUBuiltin.cpp| 27 ++- clang/lib/CodeGen/CodeGenModule.cpp | 4 +- clang/lib/Driver/ToolChains/Clang.cpp | 10 + .../CodeGenHIP/printf-kind-module-flag.hip| 6 +- clang/test/CodeGenOpenCL/amdgpu-printf.cl | 205 +- .../lib/Transforms/Utils/AMDGPUEmitPrintf.cpp | 135 +++- 12 files changed, 359 insertions(+), 71 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 532a91fd903e87c..b5e8be145b03a0d 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -21,6 +21,10 @@ #if defined(BUILTIN) && !defined(TARGET_BUILTIN) # define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS) #endif + +#if defined(BUILTIN) && !defined(LANGBUILTIN) +#define LANGBUILTIN(ID, TYPE, ATTRS, BUILTIN_LANG) BUILTIN(ID, TYPE, ATTRS) +#endif //===--===// // SI+ only builtins. //===--===// @@ -402,5 +406,9 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb", "nc", "fp8-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-insts") +// OpenCL +LANGBUILTIN(printf, "icC*4.", "fp:0:", ALL_OCL_LANGUAGES) + #undef BUILTIN #undef TARGET_BUILTIN +#undef LANGBUILTIN diff --git a/clang/include/clang/Basic/TargetOptions.h b/clang/include/clang/Basic/TargetOptions.h index 8bb03249b7f8308..8ff07783b0dd5d1 100644 --- a/clang/include/clang/Basic/TargetOptions.h +++ b/clang/include/clang/Basic/TargetOptions.h @@ -92,16 +92,20 @@ class TargetOptions { /// \brief Enumeration values for AMDGPU printf lowering scheme enum class AMDGPUPrintfKind { +/// Use deafult lowering scheme, HIP programs use hostcall and OpenCL uses +/// buffered by default, +None = 0, + /// printf lowering scheme involving hostcalls, currently used by HIP /// programs by default -Hostcall = 0, +Hostcall = 1, /// printf lowering scheme involving implicit printf buffers, -Buffered = 1, +Buffered = 2, }; /// \brief AMDGPU Printf lowering scheme - AMDGPUPrintfKind AMDGPUPrintfKindVal = AMDGPUPrintfKind::Hostcall; + AMDGPUPrintfKind AMDGPUPrintfKindVal = AMDGPUPrintfKind::None; // The code model to be used as specified by the user. Corresponds to // CodeModel::Model enum defined in include/llvm/Support/CodeGen.h, plus diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index c8b730e0f7ecd84..0fb7758845ff868 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1251,15 +1251,17 @@ def emit_static_lib : Flag<["--"], "emit-static-lib">, def mprintf_kind_EQ : Joined<["-"], "mprintf-kind=">, Group, HelpText<"Specify the printf lowering scheme (AMDGPU only), allowed values are " + "\"none\" (Use default lowering scheme for a language, HIP uses hostcalls and " + "OpenCL uses Buffered scheme), " "\"hostcall\"(printing happens during kernel execution, this scheme " "relies on hostcalls which require system to support pcie atomics) " "and \"buffered\"(printing happens after all kernel threads exit, " "this uses a pri
[clang] [llvm] [WIP][AMDGPU] Enable hostcall printf for OpenCL (PR #70932)
vikramRH wrote: @arsenm, The choice of defaults is based on current state of printf. HIP uses hostcalls and OpenCL uses buffered variant by default. However I'm willing to make one of the two variants consistent (preferably hostcalls for me). Do note that this would make all OpenCL printf to be lowered to hostcalls from this patch onwards. @b-sumner , are you okay with this ? Also regarding point 2, The current behaviour is that with HIP, printf is lowered at clang CodeGen (since it treats printf to be a builtin) while OpenCL uses the IR pass. This patch will make it so that we could remove the IR pass and handle all lowering at clang CodeGen https://github.com/llvm/llvm-project/pull/70932 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [WIP][AMDGPU] Enable hostcall printf for OpenCL (PR #70932)
https://github.com/vikramRH updated https://github.com/llvm/llvm-project/pull/70932 >From 4c0467078b2f38e814569ad351f86129d1c1d5ee Mon Sep 17 00:00:00 2001 From: Vikram Date: Wed, 4 Oct 2023 05:41:47 -0400 Subject: [PATCH] [WIP][AMDGPU] hostcall printf support for OpenCL --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 8 + clang/include/clang/Basic/TargetOptions.h | 10 +- clang/include/clang/Driver/Options.td | 8 +- clang/lib/AST/Decl.cpp| 7 + clang/lib/Basic/Targets/AMDGPU.cpp| 2 + clang/lib/CodeGen/CGBuiltin.cpp | 8 +- clang/lib/CodeGen/CGGPUBuiltin.cpp| 27 ++- clang/lib/CodeGen/CodeGenModule.cpp | 4 +- clang/lib/Driver/ToolChains/Clang.cpp | 10 + .../CodeGenHIP/printf-kind-module-flag.hip| 6 +- clang/test/CodeGenOpenCL/amdgpu-printf.cl | 205 +- .../lib/Transforms/Utils/AMDGPUEmitPrintf.cpp | 135 +++- 12 files changed, 359 insertions(+), 71 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 532a91fd903e87c..b5e8be145b03a0d 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -21,6 +21,10 @@ #if defined(BUILTIN) && !defined(TARGET_BUILTIN) # define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS) #endif + +#if defined(BUILTIN) && !defined(LANGBUILTIN) +#define LANGBUILTIN(ID, TYPE, ATTRS, BUILTIN_LANG) BUILTIN(ID, TYPE, ATTRS) +#endif //===--===// // SI+ only builtins. //===--===// @@ -402,5 +406,9 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb", "nc", "fp8-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-insts") +// OpenCL +LANGBUILTIN(printf, "icC*4.", "fp:0:", ALL_OCL_LANGUAGES) + #undef BUILTIN #undef TARGET_BUILTIN +#undef LANGBUILTIN diff --git a/clang/include/clang/Basic/TargetOptions.h b/clang/include/clang/Basic/TargetOptions.h index 8bb03249b7f8308..8ff07783b0dd5d1 100644 --- a/clang/include/clang/Basic/TargetOptions.h +++ b/clang/include/clang/Basic/TargetOptions.h @@ -92,16 +92,20 @@ class TargetOptions { /// \brief Enumeration values for AMDGPU printf lowering scheme enum class AMDGPUPrintfKind { +/// Use deafult lowering scheme, HIP programs use hostcall and OpenCL uses +/// buffered by default, +None = 0, + /// printf lowering scheme involving hostcalls, currently used by HIP /// programs by default -Hostcall = 0, +Hostcall = 1, /// printf lowering scheme involving implicit printf buffers, -Buffered = 1, +Buffered = 2, }; /// \brief AMDGPU Printf lowering scheme - AMDGPUPrintfKind AMDGPUPrintfKindVal = AMDGPUPrintfKind::Hostcall; + AMDGPUPrintfKind AMDGPUPrintfKindVal = AMDGPUPrintfKind::None; // The code model to be used as specified by the user. Corresponds to // CodeModel::Model enum defined in include/llvm/Support/CodeGen.h, plus diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index b1229b2f4562379..d62cfe8961db90a 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1251,15 +1251,17 @@ def emit_static_lib : Flag<["--"], "emit-static-lib">, def mprintf_kind_EQ : Joined<["-"], "mprintf-kind=">, Group, HelpText<"Specify the printf lowering scheme (AMDGPU only), allowed values are " + "\"none\" (Use default lowering scheme for a language, HIP uses hostcalls and " + "OpenCL uses Buffered scheme), " "\"hostcall\"(printing happens during kernel execution, this scheme " "relies on hostcalls which require system to support pcie atomics) " "and \"buffered\"(printing happens after all kernel threads exit, " "this uses a printf buffer and does not rely on pcie atomic support)">, Visibility<[ClangOption, CC1Option]>, - Values<"hostcall,buffered">, + Values<"none,hostcall,buffered">, NormalizedValuesScope<"TargetOptions::AMDGPUPrintfKind">, - NormalizedValues<["Hostcall", "Buffered"]>, - MarshallingInfoEnum, "Hostcall">; + NormalizedValues<["None", "Hostcall", "Buffered"]>, + MarshallingInfoEnum, "None">; // HIP options let Group = hip_Group in { diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 6efc177d61c03ba..b99376e42b8e7ba 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -49,6 +49,7 @@ #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/Specifiers.h" +#include "clang/Basic/TargetBuiltins.h" #include "clang/Basic/TargetCXXABI.h" #include "clang/Basic/TargetInfo.h" #include "clang/Basic/Visibility.h" @@ -3585,6 +3586,12 @@ unsigned Functio
[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)
https://github.com/vikramRH updated https://github.com/llvm/llvm-project/pull/72556 >From 6ace9d0a51064be189093ca3bb42416aafadb7f6 Mon Sep 17 00:00:00 2001 From: Vikram Date: Fri, 10 Nov 2023 09:39:41 + Subject: [PATCH 1/4] [AMDGPU] Treat printf as builtin for OpenCL --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 8 clang/lib/AST/Decl.cpp | 7 +++ clang/lib/Basic/Targets/AMDGPU.cpp | 2 ++ clang/lib/CodeGen/CGBuiltin.cpp | 5 + 4 files changed, 22 insertions(+) diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index a19c8bd5f219ec6..1799c72806bfdd4 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -21,6 +21,10 @@ #if defined(BUILTIN) && !defined(TARGET_BUILTIN) # define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS) #endif + +#if defined(BUILTIN) && !defined(LANGBUILTIN) +#define LANGBUILTIN(ID, TYPE, ATTRS, BUILTIN_LANG) BUILTIN(ID, TYPE, ATTRS) +#endif //===--===// // SI+ only builtins. //===--===// @@ -406,5 +410,9 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb", "nc", "fp8-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-insts") +// OpenCL +LANGBUILTIN(printf, "icC*4.", "fp:0:", ALL_OCL_LANGUAGES) + #undef BUILTIN #undef TARGET_BUILTIN +#undef LANGBUILTIN diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index c5c2edf1bfe3aba..2597422bdd521a0 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -49,6 +49,7 @@ #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/Specifiers.h" +#include "clang/Basic/TargetBuiltins.h" #include "clang/Basic/TargetCXXABI.h" #include "clang/Basic/TargetInfo.h" #include "clang/Basic/Visibility.h" @@ -3598,6 +3599,12 @@ unsigned FunctionDecl::getBuiltinID(bool ConsiderWrapperFunctions) const { if (!ConsiderWrapperFunctions && getStorageClass() == SC_Static) return 0; + // AMDGCN implementation supports printf as a builtin + // for OpenCL + if (Context.getTargetInfo().getTriple().isAMDGCN() && + Context.getLangOpts().OpenCL && BuiltinID == AMDGPU::BIprintf) +return BuiltinID; + // OpenCL v1.2 s6.9.f - The library functions defined in // the C99 standard headers are not available. if (Context.getLangOpts().OpenCL && diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index 409ae32ab424215..307cfa49f54e926 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -91,6 +91,8 @@ static constexpr Builtin::Info BuiltinInfo[] = { {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, +#define LANGBUILTIN(ID, TYPE, ATTRS, LANG) \ + {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, LANG}, #include "clang/Basic/BuiltinsAMDGPU.def" }; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 09309a3937fb613..987909b5a62e11b 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -2458,6 +2458,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad()) BuiltinID = mutateLongDoubleBuiltin(BuiltinID); + // Mutate the printf builtin ID so that we use the same CodeGen path for + // HIP and OpenCL with AMDGPU targets. + if (getTarget().getTriple().isAMDGCN() && BuiltinID == AMDGPU::BIprintf) + BuiltinID = Builtin::BIprintf; + // If the builtin has been declared explicitly with an assembler label, // disable the specialized emitting below. Ideally we should communicate the // rename in IR, or at least avoid generating the intrinsic calls that are >From 040a28deef5fe7a5d9e357a898b50335992e708d Mon Sep 17 00:00:00 2001 From: Vikram Date: Mon, 20 Nov 2023 05:26:27 + Subject: [PATCH 2/4] [AMDGPU] Enable OpenCL printf expansion at clang CodeGen --- clang/lib/CodeGen/CGBuiltin.cpp | 3 ++- clang/lib/CodeGen/CGGPUBuiltin.cpp| 25 +++-- clang/lib/Driver/ToolChains/Clang.cpp | 10 ++ 3 files changed, 31 insertions(+), 7 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 987909b5a62e11b..8d51df24c7872b7 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -5622,7 +5622,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsig
[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)
@@ -170,20 +173,49 @@ static Value *appendString(IRBuilder<> &Builder, Value *Desc, Value *Arg, return callAppendStringN(Builder, Desc, Arg, Length, IsLast); } +static Value *appendVectorArg(IRBuilder<> &Builder, Value *Desc, Value *Arg, + bool IsLast, bool IsBuffered) { + assert(Arg->getType()->isVectorTy() && "incorrent append* function"); + auto VectorTy = dyn_cast(Arg->getType()); + auto Zero = Builder.getInt64(0); + if (VectorTy) { +for (unsigned int i = 0; i < VectorTy->getNumElements() - 1; i++) { + auto Val = Builder.CreateExtractElement(Arg, i); + Desc = callAppendArgs(Builder, Desc, 1, +fitArgInto64Bits(Builder, Val, IsBuffered), Zero, +Zero, Zero, Zero, Zero, Zero, false); +} + +Value* Val = +Builder.CreateExtractElement(Arg, VectorTy->getNumElements() - 1); +return callAppendArgs(Builder, Desc, 1, + fitArgInto64Bits(Builder, Val, IsBuffered), Zero, + Zero, Zero, Zero, Zero, Zero, IsLast); + } + return nullptr; +} + static Value *processArg(IRBuilder<> &Builder, Value *Desc, Value *Arg, - bool SpecIsCString, bool IsLast) { + bool SpecIsCString, bool IsVector, bool IsLast, + bool IsBuffered) { if (SpecIsCString && isa(Arg->getType())) { return appendString(Builder, Desc, Arg, IsLast); } - // If the format specifies a string but the argument is not, the frontend will - // have printed a warning. We just rely on undefined behaviour and send the - // argument anyway. - return appendArg(Builder, Desc, Arg, IsLast); + + if (IsVector) { +return appendVectorArg(Builder, Desc, Arg, IsLast, IsBuffered); + } + + // If the format specifies a string but the argument is not, the frontend + // will have printed a warning. We just rely on undefined behaviour and send + // the argument anyway. + return appendArg(Builder, Desc, Arg, IsLast, IsBuffered); } // Scan the format string to locate all specifiers, and mark the ones that // specify a string, i.e, the "%s" specifier with optional '*' characters. -static void locateCStrings(SparseBitVector<8> &BV, StringRef Str) { +static void locateCStringsAndVectors(SparseBitVector<8> &BV, vikramRH wrote: Done https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)
@@ -1,12 +1,68 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -cl-std=CL1.2 -triple amdgcn-amd-amdhsa -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -cl-std=CL1.2 -triple amdgcn-amd-amdhsa -mprintf-kind=buffered -disable-llvm-passes -emit-llvm -o - %s | FileCheck --check-prefix=CHECK_BUFFERED %s +// RUN: %clang_cc1 -cl-std=CL1.2 -triple amdgcn-amd-amdhsa -mprintf-kind=hostcall -disable-llvm-passes -emit-llvm -o - %s | FileCheck --check-prefix=CHECK_HOSTCALL %s int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2))); vikramRH wrote: Done https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)
@@ -194,6 +226,8 @@ static void locateCStrings(SparseBitVector<8> &BV, StringRef Str) { SpecPos += 2; continue; } +if (Str.find_first_of("v", SpecPos) != StringRef::npos) vikramRH wrote: Fixed https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)
@@ -170,20 +173,49 @@ static Value *appendString(IRBuilder<> &Builder, Value *Desc, Value *Arg, return callAppendStringN(Builder, Desc, Arg, Length, IsLast); } +static Value *appendVectorArg(IRBuilder<> &Builder, Value *Desc, Value *Arg, + bool IsLast, bool IsBuffered) { + assert(Arg->getType()->isVectorTy() && "incorrent append* function"); + auto VectorTy = dyn_cast(Arg->getType()); + auto Zero = Builder.getInt64(0); + if (VectorTy) { vikramRH wrote: I have changed this code a little now so that only FixedVectorTypes are handled. This should be okay since the OCL specs specifically say only vectors of length 2,3,4,8 and 16 are supported for printf. https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)
@@ -26,28 +26,31 @@ using namespace llvm; #define DEBUG_TYPE "amdgpu-emit-printf" -static Value *fitArgInto64Bits(IRBuilder<> &Builder, Value *Arg) { +static Value *fitArgInto64Bits(IRBuilder<> &Builder, Value *Arg, + bool IsBuffered) { + const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout(); auto Int64Ty = Builder.getInt64Ty(); auto Ty = Arg->getType(); if (auto IntTy = dyn_cast(Ty)) { -switch (IntTy->getBitWidth()) { -case 32: - return Builder.CreateZExt(Arg, Int64Ty); -case 64: - return Arg; +if (IntTy->getBitWidth() < 64) { + return Builder.CreateZExt(Arg, Builder.getInt64Ty()); } } - if (Ty->getTypeID() == Type::DoubleTyID) { + if (Ty->isFloatingPointTy()) { +if (DL.getTypeAllocSize(Ty) < 8) + Arg = Builder.CreateFPExt(Arg, Builder.getDoubleTy()); +if (IsBuffered) + return Arg; return Builder.CreateBitCast(Arg, Int64Ty); } - if (isa(Ty)) { + if (!IsBuffered && isa(Ty)) { return Builder.CreatePtrToInt(Arg, Int64Ty); vikramRH wrote: The pointer is just pushed onto the buffer. The cast is necessary for the hostcall case to be compatible with device lib functions https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)
@@ -26,28 +26,31 @@ using namespace llvm; #define DEBUG_TYPE "amdgpu-emit-printf" -static Value *fitArgInto64Bits(IRBuilder<> &Builder, Value *Arg) { +static Value *fitArgInto64Bits(IRBuilder<> &Builder, Value *Arg, + bool IsBuffered) { + const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout(); auto Int64Ty = Builder.getInt64Ty(); auto Ty = Arg->getType(); if (auto IntTy = dyn_cast(Ty)) { -switch (IntTy->getBitWidth()) { -case 32: - return Builder.CreateZExt(Arg, Int64Ty); -case 64: - return Arg; +if (IntTy->getBitWidth() < 64) { + return Builder.CreateZExt(Arg, Builder.getInt64Ty()); } } - if (Ty->getTypeID() == Type::DoubleTyID) { + if (Ty->isFloatingPointTy()) { +if (DL.getTypeAllocSize(Ty) < 8) + Arg = Builder.CreateFPExt(Arg, Builder.getDoubleTy()); vikramRH wrote: The type cast is necessary for types such as _Float16, which is not handled at argument promotion. I have added a test case to show the same https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AMDGPU] Treat printf as builtin for OpenCL (PR #72554)
@@ -406,5 +410,9 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb", "nc", "fp8-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-insts") +// OpenCL +LANGBUILTIN(printf, "icC*4.", "fp:0:", ALL_OCL_LANGUAGES) vikramRH wrote: @ssahasra , I still feel this is the way to move here since I dont see a way to access the printf option at IR level and thus decide version of printf to use. It has to be at clang CodeGen. I ask other reviewers too if they feel there are major concerns with adding such a builtin variant (i.e AMDGPU and OCL specific). I might have to look for alternative approaches if so. https://github.com/llvm/llvm-project/pull/72554 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AMDGPU] Treat printf as builtin for OpenCL (PR #72554)
https://github.com/vikramRH edited https://github.com/llvm/llvm-project/pull/72554 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Enable OpenCL hostcall printf (WIP) (PR #72556)
https://github.com/vikramRH ready_for_review https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)
https://github.com/vikramRH edited https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)
vikramRH wrote: ping https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)
vikramRH wrote: @arsenm , apologies for the trouble here. I should have based this out of my earlier commit. currently I do not see a way to base this patch off of my earlier commit and it might get too confusing for other reviewers if I close this and raise another review. would the individual commit details suffice ? https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AMDGPU] Treat printf as builtin for OpenCL (PR #72554)
@@ -406,5 +410,9 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb", "nc", "fp8-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-insts") +// OpenCL +LANGBUILTIN(printf, "icC*4.", "fp:0:", ALL_OCL_LANGUAGES) vikramRH wrote: OpenCL spec says printf format string should be in constant address space. This makes the printf signature target specific and hence we would need a target specific builtinID to recognize this. Im not sure I understand how we can go ahead with generic "BIPrinf" here ? https://github.com/llvm/llvm-project/pull/72554 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AMDGPU] Treat printf as builtin for OpenCL (PR #72554)
https://github.com/vikramRH edited https://github.com/llvm/llvm-project/pull/72554 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)
vikramRH wrote: > Is there a separate PR open for "Add vector processing support to AMDGPU > printf"? I think it's easiest to move this part forward first @arsenm , you are right. I just want to make sure we are good on runtime changes too now since there seems to be a blocker. The changes here are not necessary unless we are okay with runtime changes. https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)
@@ -4742,6 +4742,16 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, Args.ClaimAllArgs(options::OPT_gen_cdb_fragment_path); } + if (TC.getTriple().isAMDGPU() && types::isOpenCL(Input.getType())) { +if (Args.getLastArg(options::OPT_mprintf_kind_EQ)) { + CmdArgs.push_back(Args.MakeArgString( + "-mprintf-kind=" + + Args.getLastArgValue(options::OPT_mprintf_kind_EQ))); + // Force compiler error on invalid conversion specifiers + CmdArgs.push_back(Args.MakeArgString("-Werror=format-invalid-specifier")); vikramRH wrote: This was just so that we could error out instead of undefined behaviors due to wrong specifiers. I have no preference here and would be okay to change it if you feel so. https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang] support vector subscript expressions in constant evaluator (PR #76379)
https://github.com/vikramRH created https://github.com/llvm/llvm-project/pull/76379 Feel free to add additional reviewers as relevant, I'm yet to update float test cases as I'm not sure whether it would be safe to directly compare float results in static assert. Would it okay to integer cast the results and compare them ? >From 181a4629f08e7a0e7ec5b3a2406519c26c0d476b Mon Sep 17 00:00:00 2001 From: Vikram Date: Wed, 20 Dec 2023 05:36:40 + Subject: [PATCH] [Clang] support vector subscript expressions in constant evaluator --- clang/lib/AST/ExprConstant.cpp | 61 +- clang/test/CodeGenCXX/temporaries.cpp| 12 +- clang/test/SemaCXX/constexpr-vectors.cpp | 746 ++- 3 files changed, 668 insertions(+), 151 deletions(-) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index f6aeee1a4e935d..0074b8aa00fc75 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -226,6 +226,12 @@ namespace { ArraySize = 0; MostDerivedLength = I + 1; IsArray = false; + } else if (Type->isVectorType()) { +const VectorType *CT = Type->castAs(); +Type = CT->getElementType(); +ArraySize = CT->getNumElements(); +MostDerivedLength = I + 1; +IsArray = true; } else { // Path[I] describes a base class. ArraySize = 0; @@ -437,6 +443,15 @@ namespace { MostDerivedArraySize = 2; MostDerivedPathLength = Entries.size(); } +/// Update this designator to refer to the given vector component. +void addVectorUnchecked(const VectorType *VecTy) { + Entries.push_back(PathEntry::ArrayIndex(0)); + + MostDerivedType = VecTy->getElementType(); + MostDerivedIsArrayElement = true; + MostDerivedArraySize = VecTy->getNumElements(); + MostDerivedPathLength = Entries.size(); +} void diagnoseUnsizedArrayPointerArithmetic(EvalInfo &Info, const Expr *E); void diagnosePointerArithmetic(EvalInfo &Info, const Expr *E, const APSInt &N); @@ -1732,6 +1747,10 @@ namespace { if (checkSubobject(Info, E, Imag ? CSK_Imag : CSK_Real)) Designator.addComplexUnchecked(EltTy, Imag); } +void addVector(EvalInfo &Info, const Expr *E, const VectorType *VecTy) { + if (checkSubobject(Info, E, CSK_ArrayIndex)) +Designator.addVectorUnchecked(VecTy); +} void clearIsNullPointer() { IsNullPtr = false; } @@ -1890,6 +1909,8 @@ static bool EvaluateFixedPointOrInteger(const Expr *E, APFixedPoint &Result, static bool EvaluateFixedPoint(const Expr *E, APFixedPoint &Result, EvalInfo &Info); +static bool EvaluateVector(const Expr *E, APValue &Result, EvalInfo &Info); + //===--===// // Misc utilities //===--===// @@ -3278,6 +3299,19 @@ static bool HandleLValueComplexElement(EvalInfo &Info, const Expr *E, return true; } +static bool HandeLValueVectorComponent(EvalInfo &Info, const Expr *E, + LValue &LVal, const VectorType *VecTy, + APSInt &Adjustment) { + LVal.addVector(Info, E, VecTy); + + CharUnits SizeOfComponent; + if (!HandleSizeof(Info, E->getExprLoc(), VecTy->getElementType(), +SizeOfComponent)) +return false; + LVal.adjustOffsetAndIndex(Info, E, Adjustment, SizeOfComponent); + return true; +} + /// Try to evaluate the initializer for a variable declaration. /// /// \param Info Information about the ongoing evaluation. @@ -3718,7 +3752,8 @@ findSubobject(EvalInfo &Info, const Expr *E, const CompleteObject &Obj, } // If this is our last pass, check that the final object type is OK. -if (I == N || (I == N - 1 && ObjType->isAnyComplexType())) { +if (I == N || (I == N - 1 && + (ObjType->isAnyComplexType() || ObjType->isVectorType( { // Accesses to volatile objects are prohibited. if (ObjType.isVolatileQualified() && isFormalAccess(handler.AccessKind)) { if (Info.getLangOpts().CPlusPlus) { @@ -3823,6 +3858,10 @@ findSubobject(EvalInfo &Info, const Expr *E, const CompleteObject &Obj, return handler.found(Index ? O->getComplexFloatImag() : O->getComplexFloatReal(), ObjType); } +} else if (ObjType->isVectorType()) { + // Next Subobject is a vector element + uint64_t Index = Sub.Entries[I].getAsArrayIndex(); + O = &O->getVectorElt(Index); } else if (const FieldDecl *Field = getAsField(Sub.Entries[I])) { if (Field->isMutable() && !Obj.mayAccessMutableMembers(Info, handler.AccessKind)) { @@ -8756,14 +8795,28 @@ bool LValueExprEvaluator::VisitMemberExpr(const MemberExpr *E) { } bool LValueExprEvaluat
[clang] [Clang] support vector subscript expressions in constant evaluator (WIP) (PR #76379)
https://github.com/vikramRH edited https://github.com/llvm/llvm-project/pull/76379 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang] support vector subscript expressions in constant evaluator (WIP) (PR #76379)
vikramRH wrote: It seems there are few crashes with systemZ vectors. Looking into them https://github.com/llvm/llvm-project/pull/76379 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang] support vector subscript expressions in constant evaluator (WIP) (PR #76379)
https://github.com/vikramRH updated https://github.com/llvm/llvm-project/pull/76379 >From 89c79eea31d1a9ec0656fbf5c4eacf75b2471034 Mon Sep 17 00:00:00 2001 From: Vikram Date: Wed, 20 Dec 2023 05:36:40 + Subject: [PATCH] [Clang] support vector subscript expressions in constant evaluator --- clang/lib/AST/ExprConstant.cpp | 61 +- clang/test/CodeGenCXX/temporaries.cpp| 12 +- clang/test/SemaCXX/constexpr-vectors.cpp | 746 ++- 3 files changed, 668 insertions(+), 151 deletions(-) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index f6aeee1a4e935d..390c5aef477105 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -221,6 +221,12 @@ namespace { ArraySize = 2; MostDerivedLength = I + 1; IsArray = true; + } else if (Type->isVectorType()) { +const VectorType *CT = Type->castAs(); +Type = CT->getElementType(); +ArraySize = CT->getNumElements(); +MostDerivedLength = I + 1; +IsArray = true; } else if (const FieldDecl *FD = getAsField(Path[I])) { Type = FD->getType(); ArraySize = 0; @@ -437,6 +443,15 @@ namespace { MostDerivedArraySize = 2; MostDerivedPathLength = Entries.size(); } +/// Update this designator to refer to the given vector component. +void addVectorUnchecked(const VectorType *VecTy) { + Entries.push_back(PathEntry::ArrayIndex(0)); + + MostDerivedType = VecTy->getElementType(); + MostDerivedIsArrayElement = true; + MostDerivedArraySize = VecTy->getNumElements(); + MostDerivedPathLength = Entries.size(); +} void diagnoseUnsizedArrayPointerArithmetic(EvalInfo &Info, const Expr *E); void diagnosePointerArithmetic(EvalInfo &Info, const Expr *E, const APSInt &N); @@ -1732,6 +1747,10 @@ namespace { if (checkSubobject(Info, E, Imag ? CSK_Imag : CSK_Real)) Designator.addComplexUnchecked(EltTy, Imag); } +void addVector(EvalInfo &Info, const Expr *E, const VectorType *VecTy) { + if (checkSubobject(Info, E, CSK_ArrayIndex)) +Designator.addVectorUnchecked(VecTy); +} void clearIsNullPointer() { IsNullPtr = false; } @@ -1890,6 +1909,8 @@ static bool EvaluateFixedPointOrInteger(const Expr *E, APFixedPoint &Result, static bool EvaluateFixedPoint(const Expr *E, APFixedPoint &Result, EvalInfo &Info); +static bool EvaluateVector(const Expr *E, APValue &Result, EvalInfo &Info); + //===--===// // Misc utilities //===--===// @@ -3278,6 +3299,19 @@ static bool HandleLValueComplexElement(EvalInfo &Info, const Expr *E, return true; } +static bool HandeLValueVectorComponent(EvalInfo &Info, const Expr *E, + LValue &LVal, const VectorType *VecTy, + APSInt &Adjustment) { + LVal.addVector(Info, E, VecTy); + + CharUnits SizeOfComponent; + if (!HandleSizeof(Info, E->getExprLoc(), VecTy->getElementType(), +SizeOfComponent)) +return false; + LVal.adjustOffsetAndIndex(Info, E, Adjustment, SizeOfComponent); + return true; +} + /// Try to evaluate the initializer for a variable declaration. /// /// \param Info Information about the ongoing evaluation. @@ -3718,7 +3752,8 @@ findSubobject(EvalInfo &Info, const Expr *E, const CompleteObject &Obj, } // If this is our last pass, check that the final object type is OK. -if (I == N || (I == N - 1 && ObjType->isAnyComplexType())) { +if (I == N || (I == N - 1 && + (ObjType->isAnyComplexType() || ObjType->isVectorType( { // Accesses to volatile objects are prohibited. if (ObjType.isVolatileQualified() && isFormalAccess(handler.AccessKind)) { if (Info.getLangOpts().CPlusPlus) { @@ -3823,6 +3858,10 @@ findSubobject(EvalInfo &Info, const Expr *E, const CompleteObject &Obj, return handler.found(Index ? O->getComplexFloatImag() : O->getComplexFloatReal(), ObjType); } +} else if (ObjType->isVectorType()) { + // Next Subobject is a vector element + uint64_t Index = Sub.Entries[I].getAsArrayIndex(); + O = &O->getVectorElt(Index); } else if (const FieldDecl *Field = getAsField(Sub.Entries[I])) { if (Field->isMutable() && !Obj.mayAccessMutableMembers(Info, handler.AccessKind)) { @@ -8756,14 +8795,28 @@ bool LValueExprEvaluator::VisitMemberExpr(const MemberExpr *E) { } bool LValueExprEvaluator::VisitArraySubscriptExpr(const ArraySubscriptExpr *E) { - // FIXME: Deal with vectors as array subscript bases. - if (E->getBase()->getType()->isVectorType() || - E->getBase()->getType()->isSveVLSBuil
[clang] [Clang] support vector subscript expressions in constant evaluator (WIP) (PR #76379)
https://github.com/vikramRH closed https://github.com/llvm/llvm-project/pull/76379 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang] support vector subscript expressions in constant evaluator (WIP) (PR #76379)
vikramRH wrote: Putting this on hold hold as @yuanfang-chen already has a PR https://github.com/llvm/llvm-project/pull/76379 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)
@@ -202,12 +207,20 @@ RValue CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E) { Args.push_back(Arg); } - llvm::IRBuilder<> IRB(Builder.GetInsertBlock(), Builder.GetInsertPoint()); - IRB.SetCurrentDebugLocation(Builder.getCurrentDebugLocation()); + auto PFK = CGM.getTarget().getTargetOpts().AMDGPUPrintfKindVal; + bool isBuffered = (PFK == clang::TargetOptions::AMDGPUPrintfKind::Buffered); + + StringRef FmtStr; + if (llvm::getConstantStringInfo(Args[0], FmtStr)) { +if (FmtStr.empty()) + FmtStr = StringRef("", 1); + } else { +assert(!CGM.getLangOpts().OpenCL && + "OpenCL needs compile time resolvable format string"); vikramRH wrote: Done https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)
@@ -3616,6 +3617,12 @@ unsigned FunctionDecl::getBuiltinID(bool ConsiderWrapperFunctions) const { if (!ConsiderWrapperFunctions && getStorageClass() == SC_Static) return 0; + // AMDGCN implementation supports printf as a builtin + // for OpenCL + if (Context.getTargetInfo().getTriple().isAMDGCN() && + Context.getLangOpts().OpenCL && BuiltinID == AMDGPU::BIprintf) +return BuiltinID; vikramRH wrote: Only other alternative I see currently is to modify Sema (probably ActOnFunctionDeclarator) so that we map the ocl printf declaration to C printf builtin ID. This would be a really hacky solution and I would prefer this implementation. https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)
@@ -3616,6 +3617,12 @@ unsigned FunctionDecl::getBuiltinID(bool ConsiderWrapperFunctions) const { if (!ConsiderWrapperFunctions && getStorageClass() == SC_Static) return 0; + // AMDGCN implementation supports printf as a builtin + // for OpenCL + if (Context.getTargetInfo().getTriple().isAMDGCN() && + Context.getLangOpts().OpenCL && BuiltinID == AMDGPU::BIprintf) +return BuiltinID; vikramRH wrote: ping @arsenm https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)
@@ -3616,6 +3617,12 @@ unsigned FunctionDecl::getBuiltinID(bool ConsiderWrapperFunctions) const { if (!ConsiderWrapperFunctions && getStorageClass() == SC_Static) return 0; + // AMDGCN implementation supports printf as a builtin + // for OpenCL + if (Context.getTargetInfo().getTriple().isAMDGCN() && + Context.getLangOpts().OpenCL && BuiltinID == AMDGPU::BIprintf) +return BuiltinID; vikramRH wrote: @arsenm, thanks for the info. CustomTypeChecking is a valid option. I'm not sure why OpenCL community did not consider this change despite OpenCL specs specifying the details. I could create a separate patch for this (probably folks from OCL community would provide further background). Meanwhile, can this go ahead as an AMDGPU specific workaround for now so that we have the intended feature in place ? (The frontend changes here can be reverted with that follow up patch ) PS :Also, I see another issue . OpenCL v1.2 s6.9.f states none of the functions defined in C99 headers are available. This would mean std printf is supposed to be treated differently than OpenCL builtins and consequently the builtin IDs assigned to them "need" to be different. If this understanding is correct, moving ahead with using same builtin ID as std printf is not the right way. https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)
https://github.com/vikramRH edited https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)
https://github.com/vikramRH edited https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)
@@ -3616,6 +3617,12 @@ unsigned FunctionDecl::getBuiltinID(bool ConsiderWrapperFunctions) const { if (!ConsiderWrapperFunctions && getStorageClass() == SC_Static) return 0; + // AMDGCN implementation supports printf as a builtin + // for OpenCL + if (Context.getTargetInfo().getTriple().isAMDGCN() && + Context.getLangOpts().OpenCL && BuiltinID == AMDGPU::BIprintf) +return BuiltinID; vikramRH wrote: I was referring to https://github.com/llvm/llvm-project/blob/3e6db602918435b6a5ac476f63f8b259e7e73af4/clang/lib/AST/Decl.cpp#L3633. This essentially means that even if frontend attaches the printf builtin ID to the decl (even after custom type checks), this would revert. https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [RFC][Clang] Enable custom type checking for printf (PR #86801)
https://github.com/vikramRH created https://github.com/llvm/llvm-project/pull/86801 The motivation for this change comes from an ongoing PR (#72556 ) , which enables hostcall based printf lowering for AMDGPU target and OpenCL inputs. The OpenCL printf has a different signature than the C printf. the difference being the explicit address space specifier for format string arg as follows int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2))); This is not considered a builtin because of the type mismatch. The patch #72556 tried to address this scenario by declaring OCL printf essentially as a target specific printf overload. However, the discussions there resulted in the decision that this should not be target-specific. The idea in this patch is that the changes are NFC for current framework (i.e the semantic checks for printf are preserved) however the printf declarations are considered builtins now regardless of LangOpt. This would allow me to hanlde the printf CodeGen without any target specific hacks. PS: feel free to add additional reviewers, I'm not aware of others who could comment here. >From a2731d056cee9c4e75d49f8d4fa3325dc532b207 Mon Sep 17 00:00:00 2001 From: Vikram Date: Wed, 27 Mar 2024 04:24:10 -0400 Subject: [PATCH] [Clang] Implement custom type checking for printf --- clang/include/clang/Basic/Builtins.td | 4 +-- clang/include/clang/Sema/Sema.h | 1 + clang/lib/AST/Decl.cpp| 6 +++- clang/lib/Basic/Builtins.cpp | 3 +- clang/lib/CodeGen/CGBuiltin.cpp | 2 +- clang/lib/Sema/SemaChecking.cpp | 51 +++ 6 files changed, 62 insertions(+), 5 deletions(-) diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 52c0dd52c28b11..f795c7c42c7b25 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -2816,14 +2816,14 @@ def StrLen : LibBuiltin<"string.h"> { // FIXME: This list is incomplete. def Printf : LibBuiltin<"stdio.h"> { let Spellings = ["printf"]; - let Attributes = [PrintfFormat<0>]; + let Attributes = [PrintfFormat<0>, CustomTypeChecking]; let Prototype = "int(char const*, ...)"; } // FIXME: The builtin and library function should have the same signature. def BuiltinPrintf : Builtin { let Spellings = ["__builtin_printf"]; - let Attributes = [NoThrow, PrintfFormat<0>, FunctionWithBuiltinPrefix]; + let Attributes = [NoThrow, PrintfFormat<0>, FunctionWithBuiltinPrefix, CustomTypeChecking]; let Prototype = "int(char const* restrict, ...)"; } diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 5ecd2f9eb2881f..b18b208a75bdf4 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -2245,6 +2245,7 @@ class Sema final { bool SemaBuiltinVAStart(unsigned BuiltinID, CallExpr *TheCall); bool SemaBuiltinVAStartARMMicrosoft(CallExpr *Call); + bool SemaBuiltinPrintf(FunctionDecl *FDecl, CallExpr *TheCall); bool SemaBuiltinUnorderedCompare(CallExpr *TheCall, unsigned BuiltinID); bool SemaBuiltinFPClassification(CallExpr *TheCall, unsigned NumArgs, unsigned BuiltinID); diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 131f82985e903b..298223f874cda3 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -3629,8 +3629,12 @@ unsigned FunctionDecl::getBuiltinID(bool ConsiderWrapperFunctions) const { // OpenCL v1.2 s6.9.f - The library functions defined in // the C99 standard headers are not available. if (Context.getLangOpts().OpenCL && - Context.BuiltinInfo.isPredefinedLibFunction(BuiltinID)) + Context.BuiltinInfo.isPredefinedLibFunction(BuiltinID)) { +if (Context.getLangOpts().getOpenCLCompatibleVersion() >= 120 && +(BuiltinID == Builtin::BIprintf)) + return BuiltinID; return 0; + } // CUDA does not have device-side standard library. printf and malloc are the // only special cases that are supported by device-side runtime. diff --git a/clang/lib/Basic/Builtins.cpp b/clang/lib/Basic/Builtins.cpp index 3467847ac1672e..25590ed9299e8b 100644 --- a/clang/lib/Basic/Builtins.cpp +++ b/clang/lib/Basic/Builtins.cpp @@ -235,7 +235,8 @@ bool Builtin::Context::performsCallback(unsigned ID, bool Builtin::Context::canBeRedeclared(unsigned ID) const { return ID == Builtin::NotBuiltin || ID == Builtin::BI__va_start || - ID == Builtin::BI__builtin_assume_aligned || + ID == Builtin::BI__builtin_assume_aligned || ID == Builtin::BIprintf || + ID == Builtin::BI__builtin_printf || (!hasReferenceArgsOrResult(ID) && !hasCustomTypechecking(ID)) || isInStdNamespace(ID); } diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 3cfdb261a0eac0..baed36acb12437 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp
[clang] [AMDGPU] Treat printf as builtin for OpenCL (PR #72554)
https://github.com/vikramRH closed https://github.com/llvm/llvm-project/pull/72554 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AMDGPU] Treat printf as builtin for OpenCL (PR #72554)
vikramRH wrote: closing this in favour of https://github.com/llvm/llvm-project/pull/86801 https://github.com/llvm/llvm-project/pull/72554 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [RFC][Clang] Enable custom type checking for printf (PR #86801)
https://github.com/vikramRH ready_for_review https://github.com/llvm/llvm-project/pull/86801 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [RFC][Clang] Enable custom type checking for printf (PR #86801)
vikramRH wrote: Thanks for the comments @AaronBallman. The core issue here is that the current builtin handling design does not allow multiple overloads for the same identifier to coexist (ref. https://github.com/llvm/llvm-project/blob/eacda36c7dd842cb15c0c954eda74b67d0c73814/clang/include/clang/Basic/Builtins.h#L66), unless the builtins are defined in target specific namespaces which is what I tried in my original patch . If we want change this approach, I currently think of couple of ways at a top level 1. As you said, we could have OCL specific LibBuiltin and LangBuiltin TableGen classes (and corresponding macros in Buitlins.inc). To make this work they would need new builtin ID's of different form (say "BI_OCL##"). This is very Language specific. 2. Probably change the current Builtin Info structure to allow vector of possible signatures for an identifier. The builtin type decoder could choose the appropriate signature based on LangOpt. (This wording is vague and could be a separate discussion in itself ) either way, changes in current design are required. printf is the only current use case I know that can benefit out of this (since OpenCL v1.2 s6.9.f says other library functions defined in C standard header are not available ,so 🤷♂️ ). But I guess we could have more use cases in future. can this be a separate discussion ? This patch would unblock my current work for now. https://github.com/llvm/llvm-project/pull/86801 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [RFC][Clang] Enable custom type checking for printf (PR #86801)
vikramRH wrote: > I looked at the OpenCL spec for C standard library support and was surprised > that 1) it's only talking about C99 so it's unclear what happens for C11 > (clause 6 says "This document describes the modifications and restrictions to > C99 and C11 in OpenCL C" but 6.11 only talks about C99 headers and leaves > `iso646.h`, `math.h`, `stdbool.h`, `stddef.h`, (all in C99) as well as > `stdalign.h`, `stdatomic.h`, `stdnoreturn.h`, `threads.h`, and `uchar.h` > available?), and 2) OpenCL's `printf` is not really the same function as C's > `printf` > (https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_C.html#differences-between-opencl-c-and-c99-printf). > > #1 is probably more of an oversight than anything, at least with the C11 > headers. So maybe this isn't a super slippery slope, but maybe C23 will > change that (I can imagine `stdbit.h` being of use in OpenCL for bit-bashing > operations). However, the fact that the builtin isn't really `printf` but is > `printf`-like makes me think we should make it a separate builtin to avoid > surprises (we do diagnostics based on builtin IDs and we have special > checking logic that we perhaps should be exempting in some cases). Understood. Then I propose the following. 1. Currently Builtin TableGen does not seem to support specifying lang address spaces in function prototypes. this needs to be implemented first if not already in development. 2. We could have two new macro variants probably named "OCL_BUILTIN" and "OCL_LIB_BUILTIN" which will take the ID's of the form "BI_OCL##". we would also need corresponding TableGen classes (probably named similar to the macros) which can expose such overloaded prototypes when required. How does this sound ? https://github.com/llvm/llvm-project/pull/86801 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)
@@ -3616,6 +3617,12 @@ unsigned FunctionDecl::getBuiltinID(bool ConsiderWrapperFunctions) const { if (!ConsiderWrapperFunctions && getStorageClass() == SC_Static) return 0; + // AMDGCN implementation supports printf as a builtin + // for OpenCL + if (Context.getTargetInfo().getTriple().isAMDGCN() && + Context.getLangOpts().OpenCL && BuiltinID == AMDGPU::BIprintf) +return BuiltinID; vikramRH wrote: The signatures of C-printf and OCL printf differ and I dont think generic builtin handling provides a way to register overloaded builtins with "shared" builtin ID's. do you have any alternate suggestions here ? https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)
@@ -2550,6 +2550,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad()) BuiltinID = mutateLongDoubleBuiltin(BuiltinID); + // Mutate the printf builtin ID so that we use the same CodeGen path for + // HIP and OpenCL with AMDGPU targets. + if (getTarget().getTriple().isAMDGCN() && BuiltinID == AMDGPU::BIprintf) +BuiltinID = Builtin::BIprintf; vikramRH wrote: This can be removed if you feel so, probably we would need a new case in Expr CodeGen https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)
@@ -202,12 +207,20 @@ RValue CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E) { Args.push_back(Arg); } - llvm::IRBuilder<> IRB(Builder.GetInsertBlock(), Builder.GetInsertPoint()); - IRB.SetCurrentDebugLocation(Builder.getCurrentDebugLocation()); + auto PFK = CGM.getTarget().getTargetOpts().AMDGPUPrintfKindVal; + bool isBuffered = (PFK == clang::TargetOptions::AMDGPUPrintfKind::Buffered); + + StringRef FmtStr; + if (llvm::getConstantStringInfo(Args[0], FmtStr)) { +if (FmtStr.empty()) + FmtStr = StringRef("", 1); vikramRH wrote: not really. This is just to say the format string is not really empty (i.e size = 0) when the user input is an empty format string (a weird corner case) https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)
vikramRH wrote: The new set of changes adds following changes, 1. The iteration over vector elements now happens using vector size from the format specifier as reference, this is inline with runtime implementation and helps handling undefined behavior when we have a mismatch. 2. The error flag "-Werror=format-invalid-specifier" has been removed. https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU] Enable OpenCL hostcall printf (WIP) (PR #72556)
@@ -178,17 +181,29 @@ RValue CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E) { E, this, GetVprintfDeclaration(CGM.getModule()), false); } +// Deterimines if an argument is a string +static bool isString(const clang::Type *argXTy) { vikramRH wrote: I have removed this, addrspace cast is done during arg processing. https://github.com/llvm/llvm-project/pull/72556 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)
vikramRH wrote: > @jayfoad's testcase fails and the same test should be repeated for all 3 > intrinsics added MIR tests for 3 intrinsics. The issue is that Im not able to attach the glue nodes to newly created laneop pieces since they fail at selection. https://github.com/llvm/llvm-project/pull/87509 should enable this, https://github.com/llvm/llvm-project/pull/89217 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)
@@ -0,0 +1,46 @@ +# RUN: not --crash llc -mtriple=amdgcn -run-pass=none -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s vikramRH wrote: Okay, I'll update with IR's https://github.com/llvm/llvm-project/pull/89217 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)
vikramRH wrote: > > > @jayfoad's testcase fails and the same test should be repeated for all 3 > > > intrinsics > > > > > > added MIR tests for 3 intrinsics. The issue is that Im not able to attach > > the glue nodes to newly created laneop pieces since they fail at selection. > > #87509 should enable this, > > I am not really comfortable waiting for #87509 to fix convergence tokens in > this expansion. Is it really true that this expansion cannot be fixed > independent of future work on `CONVERGENCE_GLUE`? There is no way to manually > handle the same glue operands?? I guess one way would be to have custom selection for each of the new node type introduced, but would this be a proper way forward ? (this would be in general for all convergent SDNodes i guess if selection is not made generic) https://github.com/llvm/llvm-project/pull/89217 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)
vikramRH wrote: > > > > @jayfoad's testcase fails and the same test should be repeated for all > > > > 3 intrinsics > > > > > > > > > added MIR tests for 3 intrinsics. The issue is that Im not able to attach > > > the glue nodes to newly created laneop pieces since they fail at > > > selection. #87509 should enable this, > > > > > > I am not really comfortable waiting for #87509 to fix convergence tokens in > > this expansion. Is it really true that this expansion cannot be fixed > > independent of future work on `CONVERGENCE_GLUE`? There is no way to > > manually handle the same glue operands?? > > I guess one way would be to have custom selection for each of the new node > type introduced, but would this be a proper way forward ? (this would be in > general for all convergent SDNodes i guess if selection is not made generic) Or drop the new nodes altogether and legelaize to intrinsics directly ? https://github.com/llvm/llvm-project/pull/89217 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)
vikramRH wrote: > That's another option. The only real plus to the intermediate is it's > slightly less annoying to write combines for. But there are limited combining > opportunities for these we now legalize to intrinsics directly. The SDAG lowering uses a new helper to unroll vector cases while also handling convergence tokens https://github.com/llvm/llvm-project/pull/89217 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)
@@ -0,0 +1,65 @@ +; RUN: llc -stop-after=amdgpu-isel -mtriple=amdgcn-- -mcpu=gfx1100 -verify-machineinstrs -o - %s | FileCheck --check-prefixes=CHECK,ISEL %s + +; CHECK-LABEL: name:basic_readfirstlane_i64 +; CHECK:[[TOKEN:%[0-9]+]]{{[^ ]*}} = CONVERGENCECTRL_ANCHOR vikramRH wrote: I currently see machine verifier failure which is not related to this patch. An i32 example with trunc here, https://godbolt.org/z/he8asMe77. This is also seen with wider type legalizations that we do now, so I cannot integrate these with existing tests just yet. am I missing something here ? https://github.com/llvm/llvm-project/pull/89217 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)
@@ -0,0 +1,65 @@ +; RUN: llc -stop-after=amdgpu-isel -mtriple=amdgcn-- -mcpu=gfx1100 -verify-machineinstrs -o - %s | FileCheck --check-prefixes=CHECK,ISEL %s + +; CHECK-LABEL: name:basic_readfirstlane_i64 +; CHECK:[[TOKEN:%[0-9]+]]{{[^ ]*}} = CONVERGENCECTRL_ANCHOR vikramRH wrote: this is a preexisting error, and the failure is further down the pipeline. (after sreg alloc now i guess), does it make sense to have it as xfail now rather then stopping after isel? https://github.com/llvm/llvm-project/pull/89217 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)
https://github.com/vikramRH edited https://github.com/llvm/llvm-project/pull/89217 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)
@@ -0,0 +1,65 @@ +; RUN: llc -stop-after=amdgpu-isel -mtriple=amdgcn-- -mcpu=gfx1100 -verify-machineinstrs -o - %s | FileCheck --check-prefixes=CHECK,ISEL %s + +; CHECK-LABEL: name:basic_readfirstlane_i64 +; CHECK:[[TOKEN:%[0-9]+]]{{[^ ]*}} = CONVERGENCECTRL_ANCHOR vikramRH wrote: Makes sense, updated. https://github.com/llvm/llvm-project/pull/89217 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][ExprConst] allow single element access of vector object to be constant expression (PR #72607)
vikramRH wrote: @yuanfang-chen , @AaronBallman, @shafik, are we still actively looking into this ? (I would be willing to commandeer this if its not high on your priority list) https://github.com/llvm/llvm-project/pull/72607 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][WIP] Extend permlane16, permlanex16 and permlane64 intrinsic lowering for generic types (PR #92725)
@@ -18479,6 +18479,28 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType()); return Builder.CreateCall(F, Args); } + case AMDGPU::BI__builtin_amdgcn_permlane16: + case AMDGPU::BI__builtin_amdgcn_permlanex16: { +llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); vikramRH wrote: added a new helper https://github.com/llvm/llvm-project/pull/92725 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][WIP] Extend permlane16, permlanex16 and permlane64 intrinsic lowering for generic types (PR #92725)
vikramRH wrote: Updated this PR to be in sync with #89217, However still plan is to land this land this only after changes in #89217 are accepted. https://github.com/llvm/llvm-project/pull/92725 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][ExprConst] allow single element access of vector object to be constant expression (PR #72607)
vikramRH wrote: > Hello @vikramRH, please feel free to commandeer this. Thanks @yuanfang-chen. Also, clang already rejects expressions like &V[0] (https://godbolt.org/z/eGcxzGo66), which is also true with constexprs and this PR. What's the specific concern here ? https://github.com/llvm/llvm-project/pull/72607 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)
@@ -6086,6 +6086,63 @@ static SDValue lowerBALLOTIntrinsic(const SITargetLowering &TLI, SDNode *N, DAG.getConstant(0, SL, MVT::i32), DAG.getCondCode(ISD::SETNE)); } +static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N, + SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + unsigned ValSize = VT.getSizeInBits(); + unsigned IntrinsicID = N->getConstantOperandVal(0); + SDValue Src0 = N->getOperand(1); + SDLoc SL(N); + MVT IntVT = MVT::getIntegerVT(ValSize); + + auto createLaneOp = [&DAG, &SL](SDValue Src0, SDValue Src1, SDValue Src2, + MVT VT) -> SDValue { +return (Src2 ? DAG.getNode(AMDGPUISD::WRITELANE, SL, VT, {Src0, Src1, Src2}) +: Src1 ? DAG.getNode(AMDGPUISD::READLANE, SL, VT, {Src0, Src1}) + : DAG.getNode(AMDGPUISD::READFIRSTLANE, SL, VT, {Src0})); + }; + + SDValue Src1, Src2; + if (IntrinsicID == Intrinsic::amdgcn_readlane || + IntrinsicID == Intrinsic::amdgcn_writelane) { +Src1 = N->getOperand(2); +if (IntrinsicID == Intrinsic::amdgcn_writelane) + Src2 = N->getOperand(3); + } + + if (ValSize == 32) { +// Already legal +return SDValue(); + } + + if (ValSize < 32) { +bool IsFloat = VT.isFloatingPoint(); +Src0 = DAG.getAnyExtOrTrunc(IsFloat ? DAG.getBitcast(IntVT, Src0) : Src0, +SL, MVT::i32); +if (Src2.getNode()) { + Src2 = DAG.getAnyExtOrTrunc(IsFloat ? DAG.getBitcast(IntVT, Src2) : Src2, + SL, MVT::i32); +} +SDValue LaneOp = createLaneOp(Src0, Src1, Src2, MVT::i32); +SDValue Trunc = DAG.getAnyExtOrTrunc(LaneOp, SL, IntVT); +return IsFloat ? DAG.getBitcast(VT, Trunc) : Trunc; + } + + if ((ValSize % 32) == 0) { +MVT VecVT = MVT::getVectorVT(MVT::i32, ValSize / 32); +Src0 = DAG.getBitcast(VecVT, Src0); + +if (Src2.getNode()) + Src2 = DAG.getBitcast(VecVT, Src2); + +SDValue LaneOp = createLaneOp(Src0, Src1, Src2, VecVT); +SDValue UnrolledLaneOp = DAG.UnrollVectorOp(LaneOp.getNode()); +return DAG.getBitcast(VT, UnrolledLaneOp); vikramRH wrote: ```suggestion MVT LaneOpT = VT.isVector() && VT.getVectorElementType().getSizeInBits() == 16 ? MVT::v2i16 : MVT::i32; SDValue Src0SubReg, Src2SubReg; SmallVector LaneOps; LaneOps.push_back(DAG.getTargetConstant( TLI.getRegClassFor(VT.getSimpleVT(), N->isDivergent())->getID(), SL, MVT::i32)); for (unsigned i = 0; i < (ValSize / 32); i++) { unsigned SubRegIdx = SIRegisterInfo::getSubRegFromChannel(i); Src0SubReg = DAG.getTargetExtractSubreg(SubRegIdx, SL, LaneOpT, Src0); if (Src2) Src2SubReg = DAG.getTargetExtractSubreg(SubRegIdx, SL, LaneOpT, Src2); LaneOps.push_back(createLaneOp(Src0SubReg, Src1, Src2SubReg, LaneOpT)); LaneOps.push_back(DAG.getTargetConstant(SubRegIdx, SL, MVT::i32)); } return SDValue( DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, SL, VT, LaneOps), 0); ``` @arsenm , @jayfoad , an alternate idea here that is much closer in logic to the GIsel implementation and doesn't rely on bitcasts. how does this look ? https://github.com/llvm/llvm-project/pull/89217 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][WIP] Extend permlane16, permlanex16 and permlane64 intrinsic lowering for generic types (PR #92725)
https://github.com/vikramRH edited https://github.com/llvm/llvm-project/pull/92725 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][WIP] Extend permlane16, permlanex16 and permlane64 intrinsic lowering for generic types (PR #92725)
vikramRH wrote: 1. Added/updated tests for permlanex16, permlane64 2. This needs https://github.com/llvm/llvm-project/pull/89217 to land first so that only incremental changes can be reviewed. https://github.com/llvm/llvm-project/pull/92725 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)
https://github.com/vikramRH edited https://github.com/llvm/llvm-project/pull/89217 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)
@@ -6086,6 +6086,63 @@ static SDValue lowerBALLOTIntrinsic(const SITargetLowering &TLI, SDNode *N, DAG.getConstant(0, SL, MVT::i32), DAG.getCondCode(ISD::SETNE)); } +static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N, + SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + unsigned ValSize = VT.getSizeInBits(); + unsigned IntrinsicID = N->getConstantOperandVal(0); + SDValue Src0 = N->getOperand(1); + SDLoc SL(N); + MVT IntVT = MVT::getIntegerVT(ValSize); + + auto createLaneOp = [&DAG, &SL](SDValue Src0, SDValue Src1, SDValue Src2, + MVT VT) -> SDValue { +return (Src2 ? DAG.getNode(AMDGPUISD::WRITELANE, SL, VT, {Src0, Src1, Src2}) +: Src1 ? DAG.getNode(AMDGPUISD::READLANE, SL, VT, {Src0, Src1}) + : DAG.getNode(AMDGPUISD::READFIRSTLANE, SL, VT, {Src0})); + }; + + SDValue Src1, Src2; + if (IntrinsicID == Intrinsic::amdgcn_readlane || + IntrinsicID == Intrinsic::amdgcn_writelane) { +Src1 = N->getOperand(2); +if (IntrinsicID == Intrinsic::amdgcn_writelane) + Src2 = N->getOperand(3); + } + + if (ValSize == 32) { +// Already legal +return SDValue(); + } + + if (ValSize < 32) { +bool IsFloat = VT.isFloatingPoint(); +Src0 = DAG.getAnyExtOrTrunc(IsFloat ? DAG.getBitcast(IntVT, Src0) : Src0, +SL, MVT::i32); +if (Src2.getNode()) { + Src2 = DAG.getAnyExtOrTrunc(IsFloat ? DAG.getBitcast(IntVT, Src2) : Src2, + SL, MVT::i32); +} +SDValue LaneOp = createLaneOp(Src0, Src1, Src2, MVT::i32); +SDValue Trunc = DAG.getAnyExtOrTrunc(LaneOp, SL, IntVT); +return IsFloat ? DAG.getBitcast(VT, Trunc) : Trunc; + } + + if ((ValSize % 32) == 0) { +MVT VecVT = MVT::getVectorVT(MVT::i32, ValSize / 32); vikramRH wrote: Understood. Thanks ! https://github.com/llvm/llvm-project/pull/89217 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)
@@ -5387,6 +5387,124 @@ bool AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper, return true; } +// TODO: Fix pointer type handling +bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper, + MachineInstr &MI, + Intrinsic::ID IID) const { + + MachineIRBuilder &B = Helper.MIRBuilder; + MachineRegisterInfo &MRI = *B.getMRI(); + + Register DstReg = MI.getOperand(0).getReg(); + Register Src0 = MI.getOperand(2).getReg(); + + auto createLaneOp = [&](Register Src0, Register Src1, + Register Src2) -> Register { +auto LaneOp = B.buildIntrinsic(IID, {S32}).addUse(Src0); +switch (IID) { +case Intrinsic::amdgcn_readfirstlane: + return LaneOp.getReg(0); +case Intrinsic::amdgcn_readlane: + return LaneOp.addUse(Src1).getReg(0); +case Intrinsic::amdgcn_writelane: + return LaneOp.addUse(Src1).addUse(Src2).getReg(0); +default: + llvm_unreachable("unhandled lane op"); +} + }; + + Register Src1, Src2; + if (IID == Intrinsic::amdgcn_readlane || IID == Intrinsic::amdgcn_writelane) { +Src1 = MI.getOperand(3).getReg(); +if (IID == Intrinsic::amdgcn_writelane) { + Src2 = MI.getOperand(4).getReg(); +} + } + + LLT Ty = MRI.getType(DstReg); + unsigned Size = Ty.getSizeInBits(); + + if (Size == 32) { +// Already legal +return true; + } + + if (Size < 32) { +Src0 = B.buildAnyExt(S32, Src0).getReg(0); +if (Src2.isValid()) + Src2 = B.buildAnyExt(LLT::scalar(32), Src2).getReg(0); + +Register LaneOpDst = createLaneOp(Src0, Src1, Src2); +B.buildTrunc(DstReg, LaneOpDst); + +MI.eraseFromParent(); +return true; + } + + if ((Size % 32) == 0) { +SmallVector PartialRes; +unsigned NumParts = Size / 32; +LLT PartialResTy = +Ty.isVector() && Ty.getElementType() == S16 ? V2S16 : S32; vikramRH wrote: Done https://github.com/llvm/llvm-project/pull/89217 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)
@@ -1170,6 +1170,23 @@ The AMDGPU backend implements the following LLVM IR intrinsics. :ref:`llvm.set.fpenv` Sets the floating point environment to the specifies state. + llvm.amdgcn.readfirstlaneProvides direct access to v_readfirstlane_b32. Returns the value in + the lowest active lane of the input operand. Currently + implemented for i16, i32, float, half, bf16, v2i16, v2f16 and types + whose sizes are multiples of 32-bit. + + llvm.amdgcn.readlane Provides direct access to v_readlane_b32. Returns the value in the + specified lane of the first input operand. The second operand + specifies the lane to read from. Currently implemented + for i16, i32, float, half, bf16, v2i16, v2f16 and types whose sizes vikramRH wrote: Updated https://github.com/llvm/llvm-project/pull/89217 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)
@@ -6086,6 +6086,63 @@ static SDValue lowerBALLOTIntrinsic(const SITargetLowering &TLI, SDNode *N, DAG.getConstant(0, SL, MVT::i32), DAG.getCondCode(ISD::SETNE)); } +static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N, + SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + unsigned ValSize = VT.getSizeInBits(); + unsigned IntrinsicID = N->getConstantOperandVal(0); + SDValue Src0 = N->getOperand(1); + SDLoc SL(N); + MVT IntVT = MVT::getIntegerVT(ValSize); + + auto createLaneOp = [&DAG, &SL](SDValue Src0, SDValue Src1, SDValue Src2, + MVT VT) -> SDValue { +return (Src2 ? DAG.getNode(AMDGPUISD::WRITELANE, SL, VT, {Src0, Src1, Src2}) +: Src1 ? DAG.getNode(AMDGPUISD::READLANE, SL, VT, {Src0, Src1}) + : DAG.getNode(AMDGPUISD::READFIRSTLANE, SL, VT, {Src0})); + }; + + SDValue Src1, Src2; + if (IntrinsicID == Intrinsic::amdgcn_readlane || + IntrinsicID == Intrinsic::amdgcn_writelane) { +Src1 = N->getOperand(2); +if (IntrinsicID == Intrinsic::amdgcn_writelane) + Src2 = N->getOperand(3); + } + + if (ValSize == 32) { +// Already legal +return SDValue(); + } + + if (ValSize < 32) { +bool IsFloat = VT.isFloatingPoint(); +Src0 = DAG.getAnyExtOrTrunc(IsFloat ? DAG.getBitcast(IntVT, Src0) : Src0, +SL, MVT::i32); +if (Src2.getNode()) { + Src2 = DAG.getAnyExtOrTrunc(IsFloat ? DAG.getBitcast(IntVT, Src2) : Src2, + SL, MVT::i32); +} +SDValue LaneOp = createLaneOp(Src0, Src1, Src2, MVT::i32); +SDValue Trunc = DAG.getAnyExtOrTrunc(LaneOp, SL, IntVT); +return IsFloat ? DAG.getBitcast(VT, Trunc) : Trunc; + } + + if ((ValSize % 32) == 0) { +MVT VecVT = MVT::getVectorVT(MVT::i32, ValSize / 32); vikramRH wrote: Updated https://github.com/llvm/llvm-project/pull/89217 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)
@@ -1170,6 +1170,23 @@ The AMDGPU backend implements the following LLVM IR intrinsics. :ref:`llvm.set.fpenv` Sets the floating point environment to the specifies state. + llvm.amdgcn.readfirstlaneProvides direct access to v_readfirstlane_b32. Returns the value in + the lowest active lane of the input operand. Currently implemented + for i16, i32, float, half, bf16, <2 x i16>, <2 x half>, <2 x bfloat>, vikramRH wrote: done https://github.com/llvm/llvm-project/pull/89217 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)
@@ -5461,8 +5461,7 @@ bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper, SmallVector PartialRes; unsigned NumParts = Size / 32; - MachineInstrBuilder Src0Parts, Src2Parts; - Src0Parts = B.buildUnmerge(PartialResTy, Src0); + MachineInstrBuilder Src0Parts = B.buildUnmerge(PartialResTy, Src0), Src2Parts; vikramRH wrote: Done https://github.com/llvm/llvm-project/pull/89217 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)
@@ -5496,6 +5496,9 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(LDS) NODE_NAME_CASE(FPTRUNC_ROUND_UPWARD) NODE_NAME_CASE(FPTRUNC_ROUND_DOWNWARD) + NODE_NAME_CASE(READLANE) + NODE_NAME_CASE(READFIRSTLANE) vikramRH wrote: done https://github.com/llvm/llvm-project/pull/89217 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)
vikramRH wrote: > You should add the mentioned convergence-tokens.ll test function Added the test in a separate test file https://github.com/llvm/llvm-project/pull/89217 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][WIP] Add support for i64/f64 readlane, writelane and readfirstlane operations. (PR #89217)
vikramRH wrote: Added/updated tests for readfirstlane and writelane ops https://github.com/llvm/llvm-project/pull/89217 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][WIP] Add support for i64/f64 readlane, writelane and readfirstlane operations. (PR #89217)
vikramRH wrote: Gentle ping :) https://github.com/llvm/llvm-project/pull/89217 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][WIP] Add support for i64/f64 readlane, writelane and readfirstlane operations. (PR #89217)
@@ -4822,6 +4822,111 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI, return RetBB; } +static MachineBasicBlock *lowerPseudoLaneOp(MachineInstr &MI, vikramRH wrote: @arsenm, would "PreISelIntrinsicLowering" be a proper place for this ? https://github.com/llvm/llvm-project/pull/89217 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][ExprConst] allow single element access of vector object to be constant expression (PR #101126)
https://github.com/vikramRH created https://github.com/llvm/llvm-project/pull/101126 None >From 690901f2370381285afa7cf7c2f7401d89e568f6 Mon Sep 17 00:00:00 2001 From: Vikram Date: Mon, 29 Jul 2024 08:56:07 -0400 Subject: [PATCH] [clang][ExprConst] allow single element access of vector object to be constant expression --- clang/docs/ReleaseNotes.rst | 3 + clang/lib/AST/ExprConstant.cpp| 102 +- clang/lib/AST/Interp/State.h | 3 +- clang/test/AST/Interp/builtin-functions.cpp | 26 ++--- clang/test/AST/Interp/vectors.cpp | 50 - clang/test/CodeGenCXX/temporaries.cpp | 41 --- .../constexpr-vectors-access-elements.cpp | 29 + 7 files changed, 190 insertions(+), 64 deletions(-) create mode 100644 clang/test/SemaCXX/constexpr-vectors-access-elements.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index ddad083571eb1..2179aaea12387 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -64,6 +64,9 @@ sections with improvements to Clang's support for those languages. C++ Language Changes +- Allow single element access of vector object to be constant expression. + Supports the `V.xyzw` syntax and other tidbits as seen in OpenCL. + Selecting multiple elements is left as a future work. C++17 Feature Support ^ diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 558e20ed3e423..08f49ac896153 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -222,6 +222,11 @@ namespace { ArraySize = 2; MostDerivedLength = I + 1; IsArray = true; + } else if (const auto *VT = Type->getAs()) { +Type = VT->getElementType(); +ArraySize = VT->getNumElements(); +MostDerivedLength = I + 1; +IsArray = true; } else if (const FieldDecl *FD = getAsField(Path[I])) { Type = FD->getType(); ArraySize = 0; @@ -268,7 +273,6 @@ namespace { /// If the current array is an unsized array, the value of this is /// undefined. uint64_t MostDerivedArraySize; - /// The type of the most derived object referred to by this address. QualType MostDerivedType; @@ -442,6 +446,16 @@ namespace { MostDerivedArraySize = 2; MostDerivedPathLength = Entries.size(); } + +void addVectorElementUnchecked(QualType EltTy, uint64_t Size, + uint64_t Idx) { + Entries.push_back(PathEntry::ArrayIndex(Idx)); + MostDerivedType = EltTy; + MostDerivedPathLength = Entries.size(); + MostDerivedArraySize = 0; + MostDerivedIsArrayElement = false; +} + void diagnoseUnsizedArrayPointerArithmetic(EvalInfo &Info, const Expr *E); void diagnosePointerArithmetic(EvalInfo &Info, const Expr *E, const APSInt &N); @@ -1737,6 +1751,11 @@ namespace { if (checkSubobject(Info, E, Imag ? CSK_Imag : CSK_Real)) Designator.addComplexUnchecked(EltTy, Imag); } +void addVectorElement(EvalInfo &Info, const Expr *E, QualType EltTy, + uint64_t Size, uint64_t Idx) { + if (checkSubobject(Info, E, CSK_VectorElement)) +Designator.addVectorElementUnchecked(EltTy, Size, Idx); +} void clearIsNullPointer() { IsNullPtr = false; } @@ -3310,6 +3329,19 @@ static bool HandleLValueComplexElement(EvalInfo &Info, const Expr *E, return true; } +static bool HandleLValueVectorElement(EvalInfo &Info, const Expr *E, + LValue &LVal, QualType EltTy, + uint64_t Size, uint64_t Idx) { + if (Idx) { +CharUnits SizeOfElement; +if (!HandleSizeof(Info, E->getExprLoc(), EltTy, SizeOfElement)) + return false; +LVal.Offset += SizeOfElement * Idx; + } + LVal.addVectorElement(Info, E, EltTy, Size, Idx); + return true; +} + /// Try to evaluate the initializer for a variable declaration. /// /// \param Info Information about the ongoing evaluation. @@ -3855,6 +3887,19 @@ findSubobject(EvalInfo &Info, const Expr *E, const CompleteObject &Obj, return handler.found(Index ? O->getComplexFloatImag() : O->getComplexFloatReal(), ObjType); } +} else if (const auto *VT = ObjType->getAs()) { + uint64_t Index = Sub.Entries[I].getAsArrayIndex(); + if (Index >= VT->getNumElements()) { +if (Info.getLangOpts().CPlusPlus11) + Info.FFDiag(E, diag::note_constexpr_access_past_end) + << handler.AccessKind; +else + Info.FFDiag(E); +return handler.failed(); + } + ObjType = VT->getElementType(); + assert(I == N - 1 && "extracting subobject of scalar?"); + return handler.found(O->getVectorElt(Index), ObjType); } else
[clang] [clang][ExprConst] allow single element access of vector object to be constant expression (PR #101126)
vikramRH wrote: * **#101126** https://app.graphite.dev/github/pr/llvm/llvm-project/101126?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about stacking. Join @vikramRH and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/101126 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][ExprConst] allow single element access of vector object to be constant expression (PR #101126)
https://github.com/vikramRH edited https://github.com/llvm/llvm-project/pull/101126 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][ExprConst] allow single element access of vector object to be constant expression (PR #101126)
https://github.com/vikramRH ready_for_review https://github.com/llvm/llvm-project/pull/101126 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][ExprConst] allow single element access of vector object to be constant expression (PR #101126)
@@ -442,6 +446,16 @@ namespace { MostDerivedArraySize = 2; MostDerivedPathLength = Entries.size(); } + +void addVectorElementUnchecked(QualType EltTy, uint64_t Size, + uint64_t Idx) { + Entries.push_back(PathEntry::ArrayIndex(Idx)); vikramRH wrote: Yes, I thought about having a new accessor of the sort "vectorIndex" but all it seems to achieve is just adding new API that returns does the exact same thing as array (other than perhaps adding a new meaning to PathEntry value). I will update it if you feel this makes sense. https://github.com/llvm/llvm-project/pull/101126 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][ExprConst] allow single element access of vector object to be constant expression (PR #101126)
@@ -3,40 +3,40 @@ typedef int __attribute__((vector_size(16))) VI4; constexpr VI4 A = {1,2,3,4}; -static_assert(A[0] == 1, ""); // ref-error {{not an integral constant expression}} -static_assert(A[1] == 2, ""); // ref-error {{not an integral constant expression}} -static_assert(A[2] == 3, ""); // ref-error {{not an integral constant expression}} -static_assert(A[3] == 4, ""); // ref-error {{not an integral constant expression}} +static_assert(A[0] == 1, ""); +static_assert(A[1] == 2, ""); +static_assert(A[2] == 3, ""); +static_assert(A[3] == 4, ""); /// FIXME: It would be nice if the note said 'vector' instead of 'array'. -static_assert(A[12] == 4, ""); // ref-error {{not an integral constant expression}} \ - // expected-error {{not an integral constant expression}} \ - // expected-note {{cannot refer to element 12 of array of 4 elements in a constant expression}} +static_assert(A[12] == 4, ""); // both-error {{not an integral constant expression}} \ + // expected-note {{cannot refer to element 12 of array of 4 elements in a constant expression}} \ + // ref-note {{read of dereferenced one-past-the-end pointer is not allowed in a constant expression}} vikramRH wrote: I just kept the original version of the PR, but the message "cannot refer to element 12 of array of 4 elements" seems correct here. I shall update this https://github.com/llvm/llvm-project/pull/101126 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][WIP] Add support for i64/f64 readlane, writelane and readfirstlane operations. (PR #89217)
vikramRH wrote: new commit extends @jayfoad's implementation with GIsel support. yet to add tests for half, floats and some vectors https://github.com/llvm/llvm-project/pull/89217 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][WIP] Add support for i64/f64 readlane, writelane and readfirstlane operations. (PR #89217)
vikramRH wrote: 1. Review comments 2. improve GIsel lowering 3. add tests for half, bfloat, float2, ptr, vector of ptr and int 4. removed gfx700 checks from writelane test since it caused issues with f16 legalization. is this required ? https://github.com/llvm/llvm-project/pull/89217 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][WIP] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)
https://github.com/vikramRH edited https://github.com/llvm/llvm-project/pull/89217 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][WIP] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)
@@ -5386,6 +5386,130 @@ bool AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper, return true; } +bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper, + MachineInstr &MI, + Intrinsic::ID IID) const { + + MachineIRBuilder &B = Helper.MIRBuilder; + MachineRegisterInfo &MRI = *B.getMRI(); + + Register DstReg = MI.getOperand(0).getReg(); + Register Src0 = MI.getOperand(2).getReg(); + + auto createLaneOp = [&](Register &Src0, Register &Src1, + Register &Src2) -> Register { +auto LaneOpDst = B.buildIntrinsic(IID, {S32}).addUse(Src0); +if (Src2.isValid()) + return (LaneOpDst.addUse(Src1).addUse(Src2)).getReg(0); +if (Src1.isValid()) + return (LaneOpDst.addUse(Src1)).getReg(0); +return LaneOpDst.getReg(0); + }; + + Register Src1, Src2, Src0Valid, Src2Valid; + if (IID == Intrinsic::amdgcn_readlane || IID == Intrinsic::amdgcn_writelane) { +Src1 = MI.getOperand(3).getReg(); +if (IID == Intrinsic::amdgcn_writelane) { + Src2 = MI.getOperand(4).getReg(); +} + } + + LLT Ty = MRI.getType(DstReg); + unsigned Size = Ty.getSizeInBits(); + + if (Size == 32) { +if (Ty.isScalar()) + // Already legal + return true; + +Register Src0Valid = B.buildBitcast(S32, Src0).getReg(0); +if (Src2.isValid()) + Src2Valid = B.buildBitcast(S32, Src2).getReg(0); +Register LaneOp = createLaneOp(Src0Valid, Src1, Src2Valid); +B.buildBitcast(DstReg, LaneOp); +MI.eraseFromParent(); +return true; + } + + if (Size < 32) { +Register Src0Cast = MRI.getType(Src0).isScalar() +? Src0 +: B.buildBitcast(LLT::scalar(Size), Src0).getReg(0); +Src0Valid = B.buildAnyExt(S32, Src0Cast).getReg(0); + +if (Src2.isValid()) { + Register Src2Cast = + MRI.getType(Src2).isScalar() + ? Src2 + : B.buildBitcast(LLT::scalar(Size), Src2).getReg(0); + Src2Valid = B.buildAnyExt(LLT::scalar(32), Src2Cast).getReg(0); +} +Register LaneOp = createLaneOp(Src0Valid, Src1, Src2Valid); +if (Ty.isScalar()) + B.buildTrunc(DstReg, LaneOp); +else { + auto Trunc = B.buildTrunc(LLT::scalar(Size), LaneOp); + B.buildBitcast(DstReg, Trunc); +} + +MI.eraseFromParent(); +return true; + } + + if ((Size % 32) == 0) { +SmallVector PartialRes; +unsigned NumParts = Size / 32; +auto Src0Parts = B.buildUnmerge(S32, Src0); + +switch (IID) { +case Intrinsic::amdgcn_readlane: { + Register Src1 = MI.getOperand(3).getReg(); + for (unsigned i = 0; i < NumParts; ++i) +PartialRes.push_back( +(B.buildIntrinsic(Intrinsic::amdgcn_readlane, {S32}) + .addUse(Src0Parts.getReg(i)) + .addUse(Src1)) +.getReg(0)); vikramRH wrote: should this be a seperate change that addresses other such instances too ? https://github.com/llvm/llvm-project/pull/89217 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits