Need change llvm_sampler_fix.cpp's author and comment. The others of this patchset LGTM.
> -----Original Message----- > From: Beignet [mailto:[email protected]] On Behalf Of > Zhigang Gong > Sent: Monday, December 15, 2014 09:02 > To: [email protected] > Cc: Gong, Zhigang > Subject: [Beignet] [PATCH 1/3] GBE: switch to CLANG native sampler_t. > > CLANG has sampler_t support since LLVM 3.3, let's switch to that type rather > than the old hacky way. One major problem is the sampler static checking. As > Gen platform has some hardware restrication and if the sampler value is a > const defined at kernel side, we need to use the value to optimize the code > path. Now the sampler_t becomes an obaque type now, the CLANG doesn't > support any arithmatic operations on it. > So we have to introduce a new pass to do this optimization. > > Signed-off-by: Zhigang Gong <[email protected]> > --- > backend/src/CMakeLists.txt | 1 + > backend/src/ir/function.hpp | 5 ++ > backend/src/ir/sampler.cpp | 6 +- > backend/src/libocl/include/ocl_types.h | 4 +- > backend/src/libocl/src/ocl_image.cl | 27 ++++--- > backend/src/llvm/llvm_gen_backend.cpp | 8 +- > backend/src/llvm/llvm_gen_backend.hpp | 1 + > backend/src/llvm/llvm_sampler_fix.cpp | 144 > +++++++++++++++++++++++++++++++++ > backend/src/llvm/llvm_to_gen.cpp | 1 + > src/cl_kernel.c | 12 ++- > 10 files changed, 184 insertions(+), 25 deletions(-) create mode 100644 > backend/src/llvm/llvm_sampler_fix.cpp > > diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt index > b4555f1..deba230 100644 > --- a/backend/src/CMakeLists.txt > +++ b/backend/src/CMakeLists.txt > @@ -73,6 +73,7 @@ set (GBE_SRC > backend/program.cpp > backend/program.hpp > backend/program.h > + llvm/llvm_sampler_fix.cpp > llvm/llvm_bitcode_link.cpp > llvm/llvm_gen_backend.cpp > llvm/llvm_passes.cpp > diff --git a/backend/src/ir/function.hpp b/backend/src/ir/function.hpp index > 0f86fef..4aea087 100644 > --- a/backend/src/ir/function.hpp > +++ b/backend/src/ir/function.hpp > @@ -204,6 +204,11 @@ namespace ir { > return isImage1dT() || isImage1dArrayT() || isImage1dBufferT() || > isImage2dT() || isImage2dArrayT() || isImage3dT(); > } > + > + bool isSamplerType() const { > + return typeName.compare("sampler_t") == 0; > + } > + > }; > > /*! Create a function input argument */ diff --git > a/backend/src/ir/sampler.cpp b/backend/src/ir/sampler.cpp index > ba42acb..e4accca 100644 > --- a/backend/src/ir/sampler.cpp > +++ b/backend/src/ir/sampler.cpp > @@ -49,11 +49,7 @@ namespace ir { > ir::FunctionArgument *arg = ctx->getFunction().getArg(samplerReg); > GBE_ASSERT(arg != NULL); > > - // XXX As LLVM 3.2/3.1 doesn't have a new data type for the sampler_t, > we have to fix up the argument > - // type here. Once we switch to the LLVM and use the new data type > sampler_t, we can remove this > - // work around. > - arg->type = ir::FunctionArgument::SAMPLER; > - arg->info.typeName = "sampler_t"; > + GBE_ASSERT(arg->type == ir::FunctionArgument::SAMPLER); > int32_t id = ctx->getFunction().getArgID(arg); > GBE_ASSERT(id < (1 << __CLK_SAMPLER_ARG_BITS)); > > diff --git a/backend/src/libocl/include/ocl_types.h > b/backend/src/libocl/include/ocl_types.h > index 49ac907..7798ee1 100644 > --- a/backend/src/libocl/include/ocl_types.h > +++ b/backend/src/libocl/include/ocl_types.h > @@ -87,8 +87,8 @@ DEF(double); > // FIXME: > // This is a transitional hack to bypass the LLVM 3.3 built-in types. > // See the Khronos SPIR specification for handling of these types. > -#define sampler_t __sampler_t > -typedef const ushort __sampler_t; > +//#define sampler_t __sampler_t > +//typedef const ushort __sampler_t; > > ///////////////////////////////////////////////////////////////////////////// > // OpenCL built-in event types > diff --git a/backend/src/libocl/src/ocl_image.cl > b/backend/src/libocl/src/ocl_image.cl > index c4ca2f8..6da8e90 100644 > --- a/backend/src/libocl/src/ocl_image.cl > +++ b/backend/src/libocl/src/ocl_image.cl > @@ -136,18 +136,24 @@ GEN_VALIDATE_ARRAY_INDEX(int, > image1d_buffer_t) // integer type surfaces correctly with > CLK_ADDRESS_CLAMP and CLK_FILTER_NEAREST. > // The work around is to use a LD message instead of normal sample > message. > > ////////////////////////////////////////////////////////////////////////////// > / > + > +bool __gen_ocl_sampler_need_fix(sampler_t); > +bool __gen_ocl_sampler_need_rounding_fix(sampler_t); > + > bool __gen_sampler_need_fix(const sampler_t sampler) { > - return (((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP) > && > - ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST)); > + return __gen_ocl_sampler_need_fix(sampler); > + > +// return (((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP) > && > +// ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST)); > } > > bool __gen_sampler_need_rounding_fix(const sampler_t sampler) { > - return ((sampler & CLK_NORMALIZED_COORDS_TRUE) == 0); > + return __gen_ocl_sampler_need_rounding_fix(sampler); > +// return ((sampler & CLK_NORMALIZED_COORDS_TRUE) == 0); > } > > - > INLINE_OVERLOADABLE float __gen_fixup_float_coord(float tmpCoord) { > if (tmpCoord < 0 && tmpCoord > -0x1p-20f) @@ -311,7 +317,7 @@ > INLINE_OVERLOADABLE float3 __gen_fixup_neg_boundary(float3 coord) > __gen_sampler_need_rounding_fix(sampler)) > \ > tmpCoord = __gen_fixup_float_coord(tmpCoord); > \ > if (int_clamping_fix) { > \ > - if (sampler & CLK_NORMALIZED_COORDS_TRUE) > \ > + if (!__gen_sampler_need_rounding_fix(sampler)) > \ > tmpCoord = __gen_denormalize_coord(cl_image, tmpCoord); > \ > tmpCoord = __gen_fixup_neg_boundary(tmpCoord); > \ > return __gen_ocl_read_image ##suffix( > \ > @@ -328,9 +334,10 @@ INLINE_OVERLOADABLE float3 > __gen_fixup_neg_boundary(float3 coord) > coord_type coord) > \ > { > \ > coord = __gen_validate_array_index(coord, cl_image); > \ > + sampler_t defaultSampler = CLK_NORMALIZED_COORDS_FALSE | > CLK_ADDRESS_NONE \ > + | CLK_FILTER_NEAREST; > \ > return __gen_ocl_read_image ##suffix( > \ > - cl_image, CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE > \ > - | CLK_FILTER_NEAREST, coord, 0); > \ > + cl_image, defaultSampler, coord, 0); > \ > } > > #define DECL_WRITE_IMAGE(image_type, image_data_type, suffix, > coord_type) \ > @@ -419,15 +426,15 @@ INLINE_OVERLOADABLE int4 > __gen_fixup_1darray_coord(int2 coord, image1d_array_t i > __gen_sampler_need_rounding_fix(sampler)) > \ > tmpCoord = __gen_fixup_float_coord(tmpCoord); > \ > if (int_clamping_fix) { > \ > - if (sampler & CLK_NORMALIZED_COORDS_TRUE) > \ > + if (!__gen_sampler_need_rounding_fix(sampler)) > \ > tmpCoord = __gen_denormalize_coord(cl_image, tmpCoord); > \ > float4 newCoord = __gen_fixup_1darray_coord(tmpCoord, cl_image); > \ > return __gen_ocl_read_image ##suffix( > \ > - cl_image, sampler, newCoord, 2); \ > + cl_image, sampler, newCoord, 2); > \ > } > \ > } > \ > } > \ > - return __gen_ocl_read_image ##suffix(cl_image, sampler, tmpCoord, 0); > \ > + return __gen_ocl_read_image ##suffix(cl_image, sampler, tmpCoord, 0); > \ > } > > #define DECL_IMAGE_1DArray(int_clamping_fix, image_data_type, suffix) > \ > diff --git a/backend/src/llvm/llvm_gen_backend.cpp > b/backend/src/llvm/llvm_gen_backend.cpp > index 512f437..36a6cb3 100644 > --- a/backend/src/llvm/llvm_gen_backend.cpp > +++ b/backend/src/llvm/llvm_gen_backend.cpp > @@ -1591,6 +1591,12 @@ namespace gbe > continue; > } > > + if (llvmInfo.isSamplerType()) { > + ctx.input(argName, ir::FunctionArgument::SAMPLER, reg, llvmInfo, > getTypeByteSize(unit, type), getAlignmentByte(unit, type), 0); > + (void)ctx.getFunction().getSamplerSet()->append(reg, &ctx); > + continue; > + } > + > if (type->isPointerTy() == false) > ctx.input(argName, ir::FunctionArgument::VALUE, reg, llvmInfo, > getTypeByteSize(unit, type), getAlignmentByte(unit, type), 0); > else { > @@ -3013,7 +3019,7 @@ namespace gbe > // This is not a kernel argument sampler, we need to append it to > sampler > set, > // and allocate a sampler slot for it. > const ir::Immediate &x = processConstantImm(CPV); > - GBE_ASSERTM(x.getType() == ir::TYPE_U16 || x.getType() == > ir::TYPE_S16, "Invalid sampler type"); > + GBE_ASSERTM(x.getType() == ir::TYPE_U32 || x.getType() == > + ir::TYPE_S32, "Invalid sampler type"); > > index = ctx.getFunction().getSamplerSet()->append(x.getIntegerValue(), > &ctx); > } else { > diff --git a/backend/src/llvm/llvm_gen_backend.hpp > b/backend/src/llvm/llvm_gen_backend.hpp > index ed7a57e..a496c16 100644 > --- a/backend/src/llvm/llvm_gen_backend.hpp > +++ b/backend/src/llvm/llvm_gen_backend.hpp > @@ -129,6 +129,7 @@ namespace gbe > /* customized loop unrolling pass. */ > llvm::LoopPass *createCustomLoopUnrollPass(); #endif > + llvm::FunctionPass* createSamplerFixPass(); > > /*! Add all the function call of ocl to our bitcode. */ > llvm::Module* runBitCodeLinker(llvm::Module *mod, bool strictMath); diff > --git a/backend/src/llvm/llvm_sampler_fix.cpp > b/backend/src/llvm/llvm_sampler_fix.cpp > new file mode 100644 > index 0000000..ec498d0 > --- /dev/null > +++ b/backend/src/llvm/llvm_sampler_fix.cpp > @@ -0,0 +1,144 @@ > +/* > + * Copyright © 2012 Intel Corporation > + * > + * This library is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * This library is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with this library. If not, see > <http://www.gnu.org/licenses/>. > + * > + * Author: Ruiling, Song <[email protected]> > + * > + * Legalize unsupported integer data type i128/i256/... > + * right now, the implementation only consider little-endian system. > + * > + */ > +#include "llvm/IR/Instructions.h" > +#include "llvm/Pass.h" > +#include "llvm/PassManager.h" > + > +#include "llvm/Config/llvm-config.h" > +#include "llvm/ADT/DenseMap.h" > +#include "llvm/ADT/PostOrderIterator.h" > +#include "llvm/IR/Function.h" > +#include "llvm/IR/InstrTypes.h" > +#include "llvm/IR/Instructions.h" > +#include "llvm/IR/IntrinsicInst.h" > +#include "llvm/IR/Module.h" > +#include "llvm/Pass.h" > +#include "llvm/IR/IRBuilder.h" > +#if LLVM_VERSION_MINOR >= 5 > +#include "llvm/IR/CFG.h" > +#else > +#include "llvm/Support/CFG.h" > +#endif > + > +#include "llvm/Analysis/ConstantsScanner.h" > + > +#include "llvm_gen_backend.hpp" > +#include "ocl_common_defines.h" > + > +using namespace llvm; > + > +namespace gbe { > + > + class SamplerFix : public FunctionPass { > + public: > + SamplerFix() : FunctionPass(ID) { > +#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 5 > + > +initializeDominatorTreeWrapperPassPass(*PassRegistry::getPassRegistry() > +); > +#else > + initializeDominatorTreePass(*PassRegistry::getPassRegistry()); > +#endif > + } > + > + bool visitCallInst(CallInst *I) { > + Value *Callee = I->getCalledValue(); > + const std::string fnName = Callee->getName(); > + bool changed = false; > + Type *boolTy = IntegerType::get(I->getContext(), 1); > + Type *i32Ty = IntegerType::get(I->getContext(), 32); > + > + if (fnName.compare("__gen_ocl_sampler_need_fix") == 0) { > + > + // return (((sampler & __CLK_ADDRESS_MASK) == > CLK_ADDRESS_CLAMP) && > + // ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST)); > + bool needFix = true; > + Value *needFixVal; > + if (dyn_cast<ConstantInt>(I->getOperand(0))) { > + const ConstantInt *ci = dyn_cast<ConstantInt>(I->getOperand(0)); > + uint32_t samplerInt = ci->getZExtValue(); > + needFix = ((samplerInt & __CLK_ADDRESS_MASK) == > CLK_ADDRESS_CLAMP && > + (samplerInt & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST); > + needFixVal = ConstantInt::get(boolTy, needFix); > + } else { > + IRBuilder<> Builder(I->getParent()); > + > + Builder.SetInsertPoint(I); > + Value *addressMask = ConstantInt::get(i32Ty, > __CLK_ADDRESS_MASK); > + Value *addressMode = Builder.CreateAnd(I->getOperand(0), > addressMask); > + Value *clampInt = ConstantInt::get(i32Ty, CLK_ADDRESS_CLAMP); > + Value *isClampMode = Builder.CreateICmpEQ(addressMode, > clampInt); > + Value *filterMask = ConstantInt::get(i32Ty, __CLK_FILTER_MASK); > + Value *filterMode = Builder.CreateAnd(I->getOperand(0), > filterMask); > + Value *nearestInt = ConstantInt::get(i32Ty, CLK_FILTER_NEAREST); > + Value *isNearestMode = Builder.CreateICmpEQ(filterMode, > nearestInt); > + needFixVal = Builder.CreateAnd(isClampMode, isNearestMode); > + } > + > + I->replaceAllUsesWith(needFixVal); > + changed = true; > + } else if (fnName.compare("__gen_ocl_sampler_need_rounding_fix") > + == 0) { > + > + // return ((sampler & CLK_NORMALIZED_COORDS_TRUE) == 0); > + bool needFix = true; > + Value *needFixVal; > + if (dyn_cast<ConstantInt>(I->getOperand(0))) { > + const ConstantInt *ci = dyn_cast<ConstantInt>(I->getOperand(0)); > + uint32_t samplerInt = ci->getZExtValue(); > + needFix = samplerInt & CLK_NORMALIZED_COORDS_TRUE; > + needFixVal = ConstantInt::get(boolTy, needFix); > + } else { > + IRBuilder<> Builder(I->getParent()); > + Builder.SetInsertPoint(I); > + Value *normalizeMask = ConstantInt::get(i32Ty, > CLK_NORMALIZED_COORDS_TRUE); > + Value *normalizeMode = Builder.CreateAnd(I->getOperand(0), > normalizeMask); > + needFixVal = Builder.CreateICmpEQ(normalizeMode, > ConstantInt::get(i32Ty, 0)); > + } > + I->replaceAllUsesWith(needFixVal); > + changed = true; > + } > + return changed; > + } > + > + bool runOnFunction(Function& F) { > + bool changed = false; > + std::set<Instruction*> deadInsnSet; > + for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) { > + if (dyn_cast<CallInst>(&*I)) { > + if (visitCallInst(dyn_cast<CallInst>(&*I))) { > + changed = true; > + deadInsnSet.insert(&*I); > + } > + } > + } > + for (auto it: deadInsnSet) > + it->eraseFromParent(); > + return changed; > + } > + > + static char ID; > + }; > + > + FunctionPass* createSamplerFixPass() { > + return new SamplerFix(); > + } > + char SamplerFix::ID = 0; > +}; > diff --git a/backend/src/llvm/llvm_to_gen.cpp > b/backend/src/llvm/llvm_to_gen.cpp > index e1bf12f..1c247b8 100644 > --- a/backend/src/llvm/llvm_to_gen.cpp > +++ b/backend/src/llvm/llvm_to_gen.cpp > @@ -121,6 +121,7 @@ namespace gbe > MPM.add(createTypeBasedAliasAnalysisPass()); > MPM.add(createBasicAliasAnalysisPass()); > MPM.add(createIntrinsicLoweringPass()); > + MPM.add(createSamplerFixPass()); > MPM.add(createGlobalOptimizerPass()); // Optimize out global vars > > MPM.add(createIPSCCPPass()); // IP SCCP > diff --git a/src/cl_kernel.c b/src/cl_kernel.c index a869515..177cb00 100644 > --- a/src/cl_kernel.c > +++ b/src/cl_kernel.c > @@ -114,11 +114,8 @@ cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t > sz, const void *value) > arg_sz = interp_kernel_get_arg_size(k->opaque, index); > > if (UNLIKELY(arg_type != GBE_ARG_LOCAL_PTR && arg_sz != sz)) { > - if (arg_sz == 2 && arg_type == GBE_ARG_VALUE && sz == > sizeof(cl_sampler)) { > - /* FIXME, this is a workaround for the case when a kernel arg > - defined a sampler_t but doesn't use it.*/ > - arg_type = GBE_ARG_SAMPLER; > - } else > + if (arg_type != GBE_ARG_SAMPLER || > + (arg_type == GBE_ARG_SAMPLER && sz != sizeof(cl_sampler))) > return CL_INVALID_ARG_SIZE; > } > > @@ -182,8 +179,9 @@ cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t sz, > const void *value) > k->args[index].sampler = sampler; > cl_set_sampler_arg_slot(k, index, sampler); > offset = interp_kernel_get_curbe_offset(k->opaque, > GBE_CURBE_KERNEL_ARGUMENT, index); > - assert(offset + 2 <= k->curbe_sz); > - memcpy(k->curbe + offset, &sampler->clkSamplerValue, 2); > + //assert(arg_sz == 4); > + assert(offset + 4 <= k->curbe_sz); > + memcpy(k->curbe + offset, &sampler->clkSamplerValue, 4); > return CL_SUCCESS; > } > > -- > 1.8.3.2 > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
