joey updated this revision to Diff 108452.
joey added a comment.
Updated all the comments you made and rebased.
Sorry for the long delay.
https://reviews.llvm.org/D33945
Files:
CodeGen/CGBuiltin.cpp
CodeGenOpenCL/cl20-device-side-enqueue.cl
CodeGenOpenCL/pipe_builtin.cl
Sema/SemaChecking.cpp
SemaOpenCL/cl20-device-side-enqueue.cl
SemaOpenCL/invalid-pipe-builtin-cl2.0.cl
clang/Basic/Builtins.def
Index: SemaOpenCL/invalid-pipe-builtin-cl2.0.cl
===================================================================
--- SemaOpenCL/invalid-pipe-builtin-cl2.0.cl
+++ SemaOpenCL/invalid-pipe-builtin-cl2.0.cl
@@ -1,5 +1,7 @@
// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0
+#pragma OPENCL EXTENSION cl_khr_subgroups : enable
+
void test1(read_only pipe int p, global int* ptr){
int tmp;
reserve_id_t rid;
Index: SemaOpenCL/cl20-device-side-enqueue.cl
===================================================================
--- SemaOpenCL/cl20-device-side-enqueue.cl
+++ SemaOpenCL/cl20-device-side-enqueue.cl
@@ -209,3 +209,35 @@
size = get_kernel_preferred_work_group_size_multiple(1); // expected-error{{expected block argument}}
size = get_kernel_preferred_work_group_size_multiple(block_A, 1); // expected-error{{too many arguments to function call, expected 1, have 2}}
}
+
+#pragma OPENCL EXTENSION cl_khr_subgroups : enable
+
+kernel void foo(global int *buf)
+{
+ ndrange_t n;
+ buf[0] = get_kernel_max_sub_group_size_for_ndrange(n, ^(){});
+ buf[0] = get_kernel_max_sub_group_size_for_ndrange(0, ^(){}); // expected-error{{illegal call to 'get_kernel_max_sub_group_size_for_ndrange', expected 'ndrange_t' argument type}}
+ buf[0] = get_kernel_max_sub_group_size_for_ndrange(n, 1); // expected-error{{illegal call to 'get_kernel_max_sub_group_size_for_ndrange', expected block argument type}}
+}
+
+kernel void bar(global int *buf)
+{
+ ndrange_t n;
+ buf[0] = get_kernel_sub_group_count_for_ndrange(n, ^(){});
+ buf[0] = get_kernel_sub_group_count_for_ndrange(0, ^(){}); // expected-error{{illegal call to 'get_kernel_sub_group_count_for_ndrange', expected 'ndrange_t' argument type}}
+ buf[0] = get_kernel_sub_group_count_for_ndrange(n, 1); // expected-error{{illegal call to 'get_kernel_sub_group_count_for_ndrange', expected block argument type}}
+}
+
+#pragma OPENCL EXTENSION cl_khr_subgroups : disable
+
+kernel void foo1(global int *buf)
+{
+ ndrange_t n;
+ buf[0] = get_kernel_max_sub_group_size_for_ndrange(n, ^(){}); // expected-error {{use of declaration 'get_kernel_max_sub_group_size_for_ndrange' requires cl_khr_subgroups extension to be enabled}}
+}
+
+kernel void bar1(global int *buf)
+{
+ ndrange_t n;
+ buf[0] = get_kernel_sub_group_count_for_ndrange(n, ^(){}); // expected-error {{use of declaration 'get_kernel_sub_group_count_for_ndrange' requires cl_khr_subgroups extension to be enabled}}
+}
Index: CodeGenOpenCL/pipe_builtin.cl
===================================================================
--- CodeGenOpenCL/pipe_builtin.cl
+++ CodeGenOpenCL/pipe_builtin.cl
@@ -3,6 +3,8 @@
// CHECK: %opencl.pipe_t = type opaque
// CHECK: %opencl.reserve_id_t = type opaque
+#pragma OPENCL EXTENSION cl_khr_subgroups : enable
+
void test1(read_only pipe int p, global int *ptr) {
// CHECK: call i32 @__read_pipe_2(%opencl.pipe_t* %{{.*}}, i8* %{{.*}}, i32 4, i32 4)
read_pipe(p, ptr);
Index: CodeGenOpenCL/cl20-device-side-enqueue.cl
===================================================================
--- CodeGenOpenCL/cl20-device-side-enqueue.cl
+++ CodeGenOpenCL/cl20-device-side-enqueue.cl
@@ -1,6 +1,8 @@
// RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | FileCheck %s --check-prefix=COMMON --check-prefix=B32
// RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefix=COMMON --check-prefix=B64
+#pragma OPENCL EXTENSION cl_khr_subgroups : enable
+
typedef void (^bl_t)(local void *);
typedef struct {int a;} ndrange_t;
@@ -138,4 +140,9 @@
size = get_kernel_preferred_work_group_size_multiple(block_A);
// COMMON: call i32 @__get_kernel_preferred_work_group_multiple_impl(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) to i8 addrspace(4)*))
size = get_kernel_preferred_work_group_size_multiple(block_G);
+
+ // COMMON: call i32 @__get_kernel_max_sub_group_size_for_ndrange_impl(%struct.ndrange_t* {{.*}}, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* {{.*}} to i8 addrspace(1)*) to i8 addrspace(4)*))
+ size = get_kernel_max_sub_group_size_for_ndrange(ndrange, ^(){});
+ // COMMON: call i32 @__get_kernel_sub_group_count_for_ndrange_impl(%struct.ndrange_t* {{.*}}, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* {{.*}} to i8 addrspace(1)*) to i8 addrspace(4)*))
+ size = get_kernel_sub_group_count_for_ndrange(ndrange, ^(){});
}
Index: Sema/SemaChecking.cpp
===================================================================
--- Sema/SemaChecking.cpp
+++ Sema/SemaChecking.cpp
@@ -299,6 +299,41 @@
return IllegalParams;
}
+static bool checkOpenCLSubgroupExt(Sema &S, CallExpr *Call) {
+ if (!S.getOpenCLOptions().isEnabled("cl_khr_subgroups")) {
+ S.Diag(Call->getLocStart(), diag::err_opencl_requires_extension)
+ << 1 << Call->getDirectCallee() << "cl_khr_subgroups";
+ return true;
+ }
+ return false;
+}
+
+static bool SemaOpenCLBuiltinNDRangeAndBlock(Sema &S, CallExpr *TheCall) {
+ if (checkArgCount(S, TheCall, 2))
+ return true;
+
+ if (checkOpenCLSubgroupExt(S, TheCall))
+ return true;
+
+ // First argument is an ndrange_t type.
+ Expr *NDRangeArg = TheCall->getArg(0);
+ if (NDRangeArg->getType().getAsString() != "ndrange_t") {
+ S.Diag(NDRangeArg->getLocStart(),
+ diag::err_opencl_builtin_expected_type)
+ << TheCall->getDirectCallee() << "'ndrange_t'";
+ return true;
+ }
+
+ Expr *BlockArg = TheCall->getArg(1);
+ if (!isBlockPointer(BlockArg)) {
+ S.Diag(BlockArg->getLocStart(),
+ diag::err_opencl_builtin_expected_type)
+ << TheCall->getDirectCallee() << "block";
+ return true;
+ }
+ return checkOpenCLBlockArgs(S, BlockArg);
+}
+
/// OpenCL C v2.0, s6.13.17.6 - Check the argument to the
/// get_kernel_work_group_size
/// and get_kernel_preferred_work_group_size_multiple builtin functions.
@@ -647,6 +682,11 @@
return true;
}
+ // Since return type of reserve_read/write_pipe built-in function is
+ // reserve_id_t, which is not defined in the builtin def file , we used int
+ // as return type and need to override the return type of these functions.
+ Call->setType(S.Context.OCLReserveIDTy);
+
return false;
}
@@ -1048,22 +1088,26 @@
case Builtin::BIreserve_write_pipe:
case Builtin::BIwork_group_reserve_read_pipe:
case Builtin::BIwork_group_reserve_write_pipe:
+ if (SemaBuiltinReserveRWPipe(*this, TheCall))
+ return ExprError();
+ break;
case Builtin::BIsub_group_reserve_read_pipe:
case Builtin::BIsub_group_reserve_write_pipe:
- if (SemaBuiltinReserveRWPipe(*this, TheCall))
+ if (checkOpenCLSubgroupExt(*this, TheCall) ||
+ SemaBuiltinReserveRWPipe(*this, TheCall))
return ExprError();
- // Since return type of reserve_read/write_pipe built-in function is
- // reserve_id_t, which is not defined in the builtin def file , we used int
- // as return type and need to override the return type of these functions.
- TheCall->setType(Context.OCLReserveIDTy);
break;
case Builtin::BIcommit_read_pipe:
case Builtin::BIcommit_write_pipe:
case Builtin::BIwork_group_commit_read_pipe:
case Builtin::BIwork_group_commit_write_pipe:
+ if (SemaBuiltinCommitRWPipe(*this, TheCall))
+ return ExprError();
+ break;
case Builtin::BIsub_group_commit_read_pipe:
case Builtin::BIsub_group_commit_write_pipe:
- if (SemaBuiltinCommitRWPipe(*this, TheCall))
+ if (checkOpenCLSubgroupExt(*this, TheCall) ||
+ SemaBuiltinCommitRWPipe(*this, TheCall))
return ExprError();
break;
case Builtin::BIget_pipe_num_packets:
@@ -1088,6 +1132,12 @@
if (SemaOpenCLBuiltinKernelWorkGroupSize(*this, TheCall))
return ExprError();
break;
+ break;
+ case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
+ case Builtin::BIget_kernel_sub_group_count_for_ndrange:
+ if (SemaOpenCLBuiltinNDRangeAndBlock(*this, TheCall))
+ return ExprError();
+ break;
case Builtin::BI__builtin_os_log_format:
case Builtin::BI__builtin_os_log_format_buffer_size:
if (SemaBuiltinOSLogFormat(TheCall)) {
Index: CodeGen/CGBuiltin.cpp
===================================================================
--- CodeGen/CGBuiltin.cpp
+++ CodeGen/CGBuiltin.cpp
@@ -2338,7 +2338,6 @@
Value *F = CGM.getIntrinsic(Intrinsic::coro_size, T);
return RValue::get(Builder.CreateCall(F));
}
-
case Builtin::BI__builtin_coro_id:
return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
case Builtin::BI__builtin_coro_promise:
@@ -2684,6 +2683,25 @@
"__get_kernel_preferred_work_group_multiple_impl"),
Arg));
}
+ case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
+ case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
+ llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
+ getContext().getTargetAddressSpace(LangAS::opencl_generic));
+ LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
+ llvm::Value *NDRange = NDRangeL.getAddress().getPointer();
+ Value *Block = EmitScalarExpr(E->getArg(1));
+ Block = Builder.CreatePointerCast(Block, GenericVoidPtrTy);
+ const char *Name =
+ BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
+ ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
+ : "__get_kernel_sub_group_count_for_ndrange_impl";
+ return RValue::get(Builder.CreateCall(
+ CGM.CreateRuntimeFunction(
+ llvm::FunctionType::get(
+ IntTy, {NDRange->getType(), GenericVoidPtrTy}, false),
+ Name),
+ {NDRange, Block}));
+ }
case Builtin::BIprintf:
if (getTarget().getTriple().isNVPTX())
return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue);
Index: clang/Basic/Builtins.def
===================================================================
--- clang/Basic/Builtins.def
+++ clang/Basic/Builtins.def
@@ -1398,8 +1398,10 @@
// OpenCL v2.0 s6.13.17 - Enqueue kernel functions.
// Custom builtin check allows to perform special check of passed block arguments.
LANGBUILTIN(enqueue_kernel, "i.", "tn", OCLC20_LANG)
-LANGBUILTIN(get_kernel_work_group_size, "i.", "tn", OCLC20_LANG)
-LANGBUILTIN(get_kernel_preferred_work_group_size_multiple, "i.", "tn", OCLC20_LANG)
+LANGBUILTIN(get_kernel_work_group_size, "Ui.", "tn", OCLC20_LANG)
+LANGBUILTIN(get_kernel_preferred_work_group_size_multiple, "Ui.", "tn", OCLC20_LANG)
+LANGBUILTIN(get_kernel_max_sub_group_size_for_ndrange, "Ui.", "tn", OCLC20_LANG)
+LANGBUILTIN(get_kernel_sub_group_count_for_ndrange, "Ui.", "tn", OCLC20_LANG)
// OpenCL v2.0 s6.13.9 - Address space qualifier functions.
LANGBUILTIN(to_global, "v*v*", "tn", OCLC20_LANG)
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits