Thanks for the comment, will fix those and push latter.
On Thu, Dec 18, 2014 at 07:43:20AM +0000, Yang, Rong R wrote: > Need change llvm_sampler_fix.cpp's author and comment. > The others of this patchset LGTM. > > > -----Original Message----- > > From: Beignet [mailto:[email protected]] On Behalf Of > > Zhigang Gong > > Sent: Monday, December 15, 2014 09:02 > > To: [email protected] > > Cc: Gong, Zhigang > > Subject: [Beignet] [PATCH 1/3] GBE: switch to CLANG native sampler_t. > > > > CLANG has sampler_t support since LLVM 3.3, let's switch to that type rather > > than the old hacky way. One major problem is the sampler static checking. As > > Gen platform has some hardware restrication and if the sampler value is a > > const defined at kernel side, we need to use the value to optimize the code > > path. Now the sampler_t becomes an obaque type now, the CLANG doesn't > > support any arithmatic operations on it. > > So we have to introduce a new pass to do this optimization. > > > > Signed-off-by: Zhigang Gong <[email protected]> > > --- > > backend/src/CMakeLists.txt | 1 + > > backend/src/ir/function.hpp | 5 ++ > > backend/src/ir/sampler.cpp | 6 +- > > backend/src/libocl/include/ocl_types.h | 4 +- > > backend/src/libocl/src/ocl_image.cl | 27 ++++--- > > backend/src/llvm/llvm_gen_backend.cpp | 8 +- > > backend/src/llvm/llvm_gen_backend.hpp | 1 + > > backend/src/llvm/llvm_sampler_fix.cpp | 144 > > +++++++++++++++++++++++++++++++++ > > backend/src/llvm/llvm_to_gen.cpp | 1 + > > src/cl_kernel.c | 12 ++- > > 10 files changed, 184 insertions(+), 25 deletions(-) create mode 100644 > > backend/src/llvm/llvm_sampler_fix.cpp > > > > diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt index > > b4555f1..deba230 100644 > > --- a/backend/src/CMakeLists.txt > > +++ b/backend/src/CMakeLists.txt > > @@ -73,6 +73,7 @@ set (GBE_SRC > > backend/program.cpp > > backend/program.hpp > > backend/program.h > > + llvm/llvm_sampler_fix.cpp > > llvm/llvm_bitcode_link.cpp > > llvm/llvm_gen_backend.cpp > > llvm/llvm_passes.cpp > > diff --git a/backend/src/ir/function.hpp b/backend/src/ir/function.hpp index > > 0f86fef..4aea087 100644 > > --- a/backend/src/ir/function.hpp > > +++ b/backend/src/ir/function.hpp > > @@ -204,6 +204,11 @@ namespace ir { > > return isImage1dT() || isImage1dArrayT() || isImage1dBufferT() || > > isImage2dT() || isImage2dArrayT() || isImage3dT(); > > } > > + > > + bool isSamplerType() const { > > + return typeName.compare("sampler_t") == 0; > > + } > > + > > }; > > > > /*! Create a function input argument */ diff --git > > a/backend/src/ir/sampler.cpp b/backend/src/ir/sampler.cpp index > > ba42acb..e4accca 100644 > > --- a/backend/src/ir/sampler.cpp > > +++ b/backend/src/ir/sampler.cpp > > @@ -49,11 +49,7 @@ namespace ir { > > ir::FunctionArgument *arg = ctx->getFunction().getArg(samplerReg); > > GBE_ASSERT(arg != NULL); > > > > - // XXX As LLVM 3.2/3.1 doesn't have a new data type for the sampler_t, > > we have to fix up the argument > > - // type here. Once we switch to the LLVM and use the new data type > > sampler_t, we can remove this > > - // work around. > > - arg->type = ir::FunctionArgument::SAMPLER; > > - arg->info.typeName = "sampler_t"; > > + GBE_ASSERT(arg->type == ir::FunctionArgument::SAMPLER); > > int32_t id = ctx->getFunction().getArgID(arg); > > GBE_ASSERT(id < (1 << __CLK_SAMPLER_ARG_BITS)); > > > > diff --git a/backend/src/libocl/include/ocl_types.h > > b/backend/src/libocl/include/ocl_types.h > > index 49ac907..7798ee1 100644 > > --- a/backend/src/libocl/include/ocl_types.h > > +++ b/backend/src/libocl/include/ocl_types.h > > @@ -87,8 +87,8 @@ DEF(double); > > // FIXME: > > // This is a transitional hack to bypass the LLVM 3.3 built-in types. > > // See the Khronos SPIR specification for handling of these types. > > -#define sampler_t __sampler_t > > -typedef const ushort __sampler_t; > > +//#define sampler_t __sampler_t > > +//typedef const ushort __sampler_t; > > > > > > ///////////////////////////////////////////////////////////////////////////// > > // OpenCL built-in event types > > diff --git a/backend/src/libocl/src/ocl_image.cl > > b/backend/src/libocl/src/ocl_image.cl > > index c4ca2f8..6da8e90 100644 > > --- a/backend/src/libocl/src/ocl_image.cl > > +++ b/backend/src/libocl/src/ocl_image.cl > > @@ -136,18 +136,24 @@ GEN_VALIDATE_ARRAY_INDEX(int, > > image1d_buffer_t) // integer type surfaces correctly with > > CLK_ADDRESS_CLAMP and CLK_FILTER_NEAREST. > > // The work around is to use a LD message instead of normal sample > > message. > > > > ////////////////////////////////////////////////////////////////////////////// > > / > > + > > +bool __gen_ocl_sampler_need_fix(sampler_t); > > +bool __gen_ocl_sampler_need_rounding_fix(sampler_t); > > + > > bool __gen_sampler_need_fix(const sampler_t sampler) { > > - return (((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP) > > && > > - ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST)); > > + return __gen_ocl_sampler_need_fix(sampler); > > + > > +// return (((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP) > > && > > +// ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST)); > > } > > > > bool __gen_sampler_need_rounding_fix(const sampler_t sampler) { > > - return ((sampler & CLK_NORMALIZED_COORDS_TRUE) == 0); > > + return __gen_ocl_sampler_need_rounding_fix(sampler); > > +// return ((sampler & CLK_NORMALIZED_COORDS_TRUE) == 0); > > } > > > > - > > INLINE_OVERLOADABLE float __gen_fixup_float_coord(float tmpCoord) { > > if (tmpCoord < 0 && tmpCoord > -0x1p-20f) @@ -311,7 +317,7 @@ > > INLINE_OVERLOADABLE float3 __gen_fixup_neg_boundary(float3 coord) > > __gen_sampler_need_rounding_fix(sampler)) > > \ > > tmpCoord = __gen_fixup_float_coord(tmpCoord); > > \ > > if (int_clamping_fix) { > > \ > > - if (sampler & CLK_NORMALIZED_COORDS_TRUE) > > \ > > + if (!__gen_sampler_need_rounding_fix(sampler)) > > \ > > tmpCoord = __gen_denormalize_coord(cl_image, tmpCoord); > > \ > > tmpCoord = __gen_fixup_neg_boundary(tmpCoord); > > \ > > return __gen_ocl_read_image ##suffix( > > \ > > @@ -328,9 +334,10 @@ INLINE_OVERLOADABLE float3 > > __gen_fixup_neg_boundary(float3 coord) > > coord_type coord) > > \ > > { > > \ > > coord = __gen_validate_array_index(coord, cl_image); > > \ > > + sampler_t defaultSampler = CLK_NORMALIZED_COORDS_FALSE | > > CLK_ADDRESS_NONE \ > > + | CLK_FILTER_NEAREST; > > \ > > return __gen_ocl_read_image ##suffix( > > \ > > - cl_image, CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE > > \ > > - | CLK_FILTER_NEAREST, coord, 0); > > \ > > + cl_image, defaultSampler, coord, 0); > > \ > > } > > > > #define DECL_WRITE_IMAGE(image_type, image_data_type, suffix, > > coord_type) \ > > @@ -419,15 +426,15 @@ INLINE_OVERLOADABLE int4 > > __gen_fixup_1darray_coord(int2 coord, image1d_array_t i > > __gen_sampler_need_rounding_fix(sampler)) > > \ > > tmpCoord = __gen_fixup_float_coord(tmpCoord); > > \ > > if (int_clamping_fix) { > > \ > > - if (sampler & CLK_NORMALIZED_COORDS_TRUE) > > \ > > + if (!__gen_sampler_need_rounding_fix(sampler)) > > \ > > tmpCoord = __gen_denormalize_coord(cl_image, tmpCoord); > > \ > > float4 newCoord = __gen_fixup_1darray_coord(tmpCoord, > > cl_image); > > \ > > return __gen_ocl_read_image ##suffix( > > \ > > - cl_image, sampler, newCoord, 2); > > \ > > + cl_image, sampler, newCoord, 2); > > \ > > } > > \ > > } > > \ > > } > > \ > > - return __gen_ocl_read_image ##suffix(cl_image, sampler, tmpCoord, 0); > > \ > > + return __gen_ocl_read_image ##suffix(cl_image, sampler, tmpCoord, 0); > > \ > > } > > > > #define DECL_IMAGE_1DArray(int_clamping_fix, image_data_type, suffix) > > \ > > diff --git a/backend/src/llvm/llvm_gen_backend.cpp > > b/backend/src/llvm/llvm_gen_backend.cpp > > index 512f437..36a6cb3 100644 > > --- a/backend/src/llvm/llvm_gen_backend.cpp > > +++ b/backend/src/llvm/llvm_gen_backend.cpp > > @@ -1591,6 +1591,12 @@ namespace gbe > > continue; > > } > > > > + if (llvmInfo.isSamplerType()) { > > + ctx.input(argName, ir::FunctionArgument::SAMPLER, reg, llvmInfo, > > getTypeByteSize(unit, type), getAlignmentByte(unit, type), 0); > > + (void)ctx.getFunction().getSamplerSet()->append(reg, &ctx); > > + continue; > > + } > > + > > if (type->isPointerTy() == false) > > ctx.input(argName, ir::FunctionArgument::VALUE, reg, llvmInfo, > > getTypeByteSize(unit, type), getAlignmentByte(unit, type), 0); > > else { > > @@ -3013,7 +3019,7 @@ namespace gbe > > // This is not a kernel argument sampler, we need to append it to > > sampler > > set, > > // and allocate a sampler slot for it. > > const ir::Immediate &x = processConstantImm(CPV); > > - GBE_ASSERTM(x.getType() == ir::TYPE_U16 || x.getType() == > > ir::TYPE_S16, "Invalid sampler type"); > > + GBE_ASSERTM(x.getType() == ir::TYPE_U32 || x.getType() == > > + ir::TYPE_S32, "Invalid sampler type"); > > > > index = > > ctx.getFunction().getSamplerSet()->append(x.getIntegerValue(), > > &ctx); > > } else { > > diff --git a/backend/src/llvm/llvm_gen_backend.hpp > > b/backend/src/llvm/llvm_gen_backend.hpp > > index ed7a57e..a496c16 100644 > > --- a/backend/src/llvm/llvm_gen_backend.hpp > > +++ b/backend/src/llvm/llvm_gen_backend.hpp > > @@ -129,6 +129,7 @@ namespace gbe > > /* customized loop unrolling pass. */ > > llvm::LoopPass *createCustomLoopUnrollPass(); #endif > > + llvm::FunctionPass* createSamplerFixPass(); > > > > /*! Add all the function call of ocl to our bitcode. */ > > llvm::Module* runBitCodeLinker(llvm::Module *mod, bool strictMath); diff > > --git a/backend/src/llvm/llvm_sampler_fix.cpp > > b/backend/src/llvm/llvm_sampler_fix.cpp > > new file mode 100644 > > index 0000000..ec498d0 > > --- /dev/null > > +++ b/backend/src/llvm/llvm_sampler_fix.cpp > > @@ -0,0 +1,144 @@ > > +/* > > + * Copyright © 2012 Intel Corporation > > + * > > + * This library is free software; you can redistribute it and/or > > + * modify it under the terms of the GNU Lesser General Public > > + * License as published by the Free Software Foundation; either > > + * version 2.1 of the License, or (at your option) any later version. > > + * > > + * This library is distributed in the hope that it will be useful, > > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > > GNU > > + * Lesser General Public License for more details. > > + * > > + * You should have received a copy of the GNU Lesser General Public > > + * License along with this library. If not, see > > <http://www.gnu.org/licenses/>. > > + * > > + * Author: Ruiling, Song <[email protected]> > > + * > > + * Legalize unsupported integer data type i128/i256/... > > + * right now, the implementation only consider little-endian system. > > + * > > + */ > > +#include "llvm/IR/Instructions.h" > > +#include "llvm/Pass.h" > > +#include "llvm/PassManager.h" > > + > > +#include "llvm/Config/llvm-config.h" > > +#include "llvm/ADT/DenseMap.h" > > +#include "llvm/ADT/PostOrderIterator.h" > > +#include "llvm/IR/Function.h" > > +#include "llvm/IR/InstrTypes.h" > > +#include "llvm/IR/Instructions.h" > > +#include "llvm/IR/IntrinsicInst.h" > > +#include "llvm/IR/Module.h" > > +#include "llvm/Pass.h" > > +#include "llvm/IR/IRBuilder.h" > > +#if LLVM_VERSION_MINOR >= 5 > > +#include "llvm/IR/CFG.h" > > +#else > > +#include "llvm/Support/CFG.h" > > +#endif > > + > > +#include "llvm/Analysis/ConstantsScanner.h" > > + > > +#include "llvm_gen_backend.hpp" > > +#include "ocl_common_defines.h" > > + > > +using namespace llvm; > > + > > +namespace gbe { > > + > > + class SamplerFix : public FunctionPass { > > + public: > > + SamplerFix() : FunctionPass(ID) { > > +#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 5 > > + > > +initializeDominatorTreeWrapperPassPass(*PassRegistry::getPassRegistry() > > +); > > +#else > > + initializeDominatorTreePass(*PassRegistry::getPassRegistry()); > > +#endif > > + } > > + > > + bool visitCallInst(CallInst *I) { > > + Value *Callee = I->getCalledValue(); > > + const std::string fnName = Callee->getName(); > > + bool changed = false; > > + Type *boolTy = IntegerType::get(I->getContext(), 1); > > + Type *i32Ty = IntegerType::get(I->getContext(), 32); > > + > > + if (fnName.compare("__gen_ocl_sampler_need_fix") == 0) { > > + > > + // return (((sampler & __CLK_ADDRESS_MASK) == > > CLK_ADDRESS_CLAMP) && > > + // ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST)); > > + bool needFix = true; > > + Value *needFixVal; > > + if (dyn_cast<ConstantInt>(I->getOperand(0))) { > > + const ConstantInt *ci = dyn_cast<ConstantInt>(I->getOperand(0)); > > + uint32_t samplerInt = ci->getZExtValue(); > > + needFix = ((samplerInt & __CLK_ADDRESS_MASK) == > > CLK_ADDRESS_CLAMP && > > + (samplerInt & __CLK_FILTER_MASK) == > > CLK_FILTER_NEAREST); > > + needFixVal = ConstantInt::get(boolTy, needFix); > > + } else { > > + IRBuilder<> Builder(I->getParent()); > > + > > + Builder.SetInsertPoint(I); > > + Value *addressMask = ConstantInt::get(i32Ty, > > __CLK_ADDRESS_MASK); > > + Value *addressMode = Builder.CreateAnd(I->getOperand(0), > > addressMask); > > + Value *clampInt = ConstantInt::get(i32Ty, CLK_ADDRESS_CLAMP); > > + Value *isClampMode = Builder.CreateICmpEQ(addressMode, > > clampInt); > > + Value *filterMask = ConstantInt::get(i32Ty, __CLK_FILTER_MASK); > > + Value *filterMode = Builder.CreateAnd(I->getOperand(0), > > filterMask); > > + Value *nearestInt = ConstantInt::get(i32Ty, CLK_FILTER_NEAREST); > > + Value *isNearestMode = Builder.CreateICmpEQ(filterMode, > > nearestInt); > > + needFixVal = Builder.CreateAnd(isClampMode, isNearestMode); > > + } > > + > > + I->replaceAllUsesWith(needFixVal); > > + changed = true; > > + } else if (fnName.compare("__gen_ocl_sampler_need_rounding_fix") > > + == 0) { > > + > > + // return ((sampler & CLK_NORMALIZED_COORDS_TRUE) == 0); > > + bool needFix = true; > > + Value *needFixVal; > > + if (dyn_cast<ConstantInt>(I->getOperand(0))) { > > + const ConstantInt *ci = dyn_cast<ConstantInt>(I->getOperand(0)); > > + uint32_t samplerInt = ci->getZExtValue(); > > + needFix = samplerInt & CLK_NORMALIZED_COORDS_TRUE; > > + needFixVal = ConstantInt::get(boolTy, needFix); > > + } else { > > + IRBuilder<> Builder(I->getParent()); > > + Builder.SetInsertPoint(I); > > + Value *normalizeMask = ConstantInt::get(i32Ty, > > CLK_NORMALIZED_COORDS_TRUE); > > + Value *normalizeMode = Builder.CreateAnd(I->getOperand(0), > > normalizeMask); > > + needFixVal = Builder.CreateICmpEQ(normalizeMode, > > ConstantInt::get(i32Ty, 0)); > > + } > > + I->replaceAllUsesWith(needFixVal); > > + changed = true; > > + } > > + return changed; > > + } > > + > > + bool runOnFunction(Function& F) { > > + bool changed = false; > > + std::set<Instruction*> deadInsnSet; > > + for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; > > ++I) { > > + if (dyn_cast<CallInst>(&*I)) { > > + if (visitCallInst(dyn_cast<CallInst>(&*I))) { > > + changed = true; > > + deadInsnSet.insert(&*I); > > + } > > + } > > + } > > + for (auto it: deadInsnSet) > > + it->eraseFromParent(); > > + return changed; > > + } > > + > > + static char ID; > > + }; > > + > > + FunctionPass* createSamplerFixPass() { > > + return new SamplerFix(); > > + } > > + char SamplerFix::ID = 0; > > +}; > > diff --git a/backend/src/llvm/llvm_to_gen.cpp > > b/backend/src/llvm/llvm_to_gen.cpp > > index e1bf12f..1c247b8 100644 > > --- a/backend/src/llvm/llvm_to_gen.cpp > > +++ b/backend/src/llvm/llvm_to_gen.cpp > > @@ -121,6 +121,7 @@ namespace gbe > > MPM.add(createTypeBasedAliasAnalysisPass()); > > MPM.add(createBasicAliasAnalysisPass()); > > MPM.add(createIntrinsicLoweringPass()); > > + MPM.add(createSamplerFixPass()); > > MPM.add(createGlobalOptimizerPass()); // Optimize out global vars > > > > MPM.add(createIPSCCPPass()); // IP SCCP > > diff --git a/src/cl_kernel.c b/src/cl_kernel.c index a869515..177cb00 100644 > > --- a/src/cl_kernel.c > > +++ b/src/cl_kernel.c > > @@ -114,11 +114,8 @@ cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t > > sz, const void *value) > > arg_sz = interp_kernel_get_arg_size(k->opaque, index); > > > > if (UNLIKELY(arg_type != GBE_ARG_LOCAL_PTR && arg_sz != sz)) { > > - if (arg_sz == 2 && arg_type == GBE_ARG_VALUE && sz == > > sizeof(cl_sampler)) { > > - /* FIXME, this is a workaround for the case when a kernel arg > > - defined a sampler_t but doesn't use it.*/ > > - arg_type = GBE_ARG_SAMPLER; > > - } else > > + if (arg_type != GBE_ARG_SAMPLER || > > + (arg_type == GBE_ARG_SAMPLER && sz != sizeof(cl_sampler))) > > return CL_INVALID_ARG_SIZE; > > } > > > > @@ -182,8 +179,9 @@ cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t sz, > > const void *value) > > k->args[index].sampler = sampler; > > cl_set_sampler_arg_slot(k, index, sampler); > > offset = interp_kernel_get_curbe_offset(k->opaque, > > GBE_CURBE_KERNEL_ARGUMENT, index); > > - assert(offset + 2 <= k->curbe_sz); > > - memcpy(k->curbe + offset, &sampler->clkSamplerValue, 2); > > + //assert(arg_sz == 4); > > + assert(offset + 4 <= k->curbe_sz); > > + memcpy(k->curbe + offset, &sampler->clkSamplerValue, 4); > > return CL_SUCCESS; > > } > > > > -- > > 1.8.3.2 > > > > _______________________________________________ > > Beignet mailing list > > [email protected] > > http://lists.freedesktop.org/mailman/listinfo/beignet > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
