arsenm created this revision. Herald added subscribers: t-tye, tpr, dstuttard, nhaehnle, wdng.
https://reviews.llvm.org/D38770 Files: include/clang/Basic/TargetInfo.h lib/Basic/Targets/AMDGPU.cpp lib/Basic/Targets/AMDGPU.h lib/CodeGen/CGBuiltin.cpp test/CodeGenOpenCL/builtins-amdgcn.cl test/CodeGenOpenCL/builtins-r600.cl
Index: test/CodeGenOpenCL/builtins-r600.cl =================================================================== --- test/CodeGenOpenCL/builtins-r600.cl +++ test/CodeGenOpenCL/builtins-r600.cl @@ -52,4 +52,4 @@ } } -// CHECK-DAG: [[WI_RANGE]] = !{i32 0, i32 1024} +// CHECK-DAG: [[WI_RANGE]] = !{i32 0, i32 256} Index: test/CodeGenOpenCL/builtins-amdgcn.cl =================================================================== --- test/CodeGenOpenCL/builtins-amdgcn.cl +++ test/CodeGenOpenCL/builtins-amdgcn.cl @@ -507,7 +507,7 @@ *out = __builtin_amdgcn_s_getpc(); } -// CHECK-DAG: [[WI_RANGE]] = !{i32 0, i32 1024} +// CHECK-DAG: [[WI_RANGE]] = !{i32 0, i32 256} // CHECK-DAG: attributes #[[NOUNWIND_READONLY:[0-9]+]] = { nounwind readonly } // CHECK-DAG: attributes #[[READ_EXEC_ATTRS]] = { convergent } // CHECK-DAG: ![[EXEC]] = !{!"exec"} Index: lib/CodeGen/CGBuiltin.cpp =================================================================== --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -9115,22 +9115,28 @@ // amdgcn workitem case AMDGPU::BI__builtin_amdgcn_workitem_id_x: - return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024); + return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, + getContext().getTargetInfo().getOpenCLMaxWorkGroupSize(0)); case AMDGPU::BI__builtin_amdgcn_workitem_id_y: - return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024); + return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, + getContext().getTargetInfo().getOpenCLMaxWorkGroupSize(1)); case AMDGPU::BI__builtin_amdgcn_workitem_id_z: - return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024); + return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, + getContext().getTargetInfo().getOpenCLMaxWorkGroupSize(2)); // r600 intrinsics case AMDGPU::BI__builtin_r600_recipsqrt_ieee: case AMDGPU::BI__builtin_r600_recipsqrt_ieeef: return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee); case AMDGPU::BI__builtin_r600_read_tidig_x: - return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024); + return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, + getContext().getTargetInfo().getOpenCLMaxWorkGroupSize(0)); case AMDGPU::BI__builtin_r600_read_tidig_y: - return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024); + return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, + getContext().getTargetInfo().getOpenCLMaxWorkGroupSize(1)); case AMDGPU::BI__builtin_r600_read_tidig_z: - return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024); + return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, + getContext().getTargetInfo().getOpenCLMaxWorkGroupSize(2)); default: return nullptr; } Index: lib/Basic/Targets/AMDGPU.h =================================================================== --- lib/Basic/Targets/AMDGPU.h +++ lib/Basic/Targets/AMDGPU.h @@ -70,6 +70,10 @@ bool hasLDEXPF : 1; const AddrSpace AS; + // The hardware limit is really 1024 or 2048, but the runtime currently only + // supports 256. + unsigned MaxWorkGroupSize = 1024; + static bool hasFullSpeedFMAF32(StringRef GPUName) { return parseAMDGCNName(GPUName) >= GK_GFX9; } @@ -279,6 +283,10 @@ } } + unsigned getOpenCLMaxWorkGroupSize(unsigned Dim) const override { + return MaxWorkGroupSize; + } + llvm::Optional<unsigned> getConstantAddressSpace() const override { return LangAS::FirstTargetAddressSpace + AS.Constant; } Index: lib/Basic/Targets/AMDGPU.cpp =================================================================== --- lib/Basic/Targets/AMDGPU.cpp +++ lib/Basic/Targets/AMDGPU.cpp @@ -340,6 +340,10 @@ void AMDGPUTargetInfo::adjust(LangOptions &Opts) { TargetInfo::adjust(Opts); setAddressSpaceMap(Opts.OpenCL || !isAMDGCN(getTriple())); + + // TODO: Add option to force hardware maximum. + if (Opts.OpenCL) + MaxWorkGroupSize = 256; } ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { Index: include/clang/Basic/TargetInfo.h =================================================================== --- include/clang/Basic/TargetInfo.h +++ include/clang/Basic/TargetInfo.h @@ -1060,6 +1060,11 @@ /// \brief Get address space for OpenCL type. virtual LangAS::ID getOpenCLTypeAddrSpace(const Type *T) const; + /// \returns Maximum device supported OpenCL workgroup size. + virtual unsigned getOpenCLMaxWorkGroupSize(unsigned Dim) const { + return 0; + } + /// \returns Target specific vtbl ptr address space. virtual unsigned getVtblPtrAddressSpace() const { return 0;
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits