LGTM, pushed, thanks.
> -----Original Message----- > From: Beignet [mailto:[email protected]] On Behalf Of > Pan Xiuli > Sent: Tuesday, November 24, 2015 15:52 > To: [email protected] > Cc: Pan, Xiuli > Subject: [Beignet] [PATCH V4 2/2] Backend: add debugwait function > > Use wait function to extend a debug function: > void debugwait(void) > This function can hang the gpu unless gpu reset or host send something to let > it go. > EXTREMELY DANGEROUS for machines turn off hangcheck > > v2: > Fix some bugs, and add setting predicate and execwidth, also modify some > inst scheduling > > v3: > Add push and pop in insturction selection, and set nomask with execwidth. > > v4: > Fix barrier predicate setting bugs, and rebase the patch > > Signed-off-by: Pan Xiuli <[email protected]> > --- > backend/src/backend/gen_context.cpp | 3 ++- > backend/src/backend/gen_encoder.cpp | 1 + > backend/src/backend/gen_insn_scheduling.cpp | 3 ++- > backend/src/backend/gen_insn_selection.cpp | 28 > +++++++++++++++++++++++-- > backend/src/backend/gen_insn_selection.hpp | 1 + > backend/src/ir/instruction.cpp | 32 > ++++++++++++++++++++++++++++- > backend/src/ir/instruction.hpp | 10 +++++++++ > backend/src/ir/instruction.hxx | 1 + > backend/src/libocl/include/ocl_sync.h | 1 + > backend/src/libocl/src/ocl_barrier.ll | 6 ++++++ > backend/src/libocl/src/ocl_sync.cl | 1 + > backend/src/llvm/llvm_gen_backend.cpp | 6 ++++++ > backend/src/llvm/llvm_gen_ocl_function.hxx | 3 +++ > 13 files changed, 91 insertions(+), 5 deletions(-) > > diff --git a/backend/src/backend/gen_context.cpp > b/backend/src/backend/gen_context.cpp > index 41fe72d..43fa7fa 100644 > --- a/backend/src/backend/gen_context.cpp > +++ b/backend/src/backend/gen_context.cpp > @@ -1804,7 +1804,7 @@ namespace gbe > } > > void GenContext::emitWaitInstruction(const SelectionInstruction &insn) { > - p->WAIT(); > + p->WAIT(insn.extra.waitType); > } > > void GenContext::emitBarrierInstruction(const SelectionInstruction &insn) > { @@ -1829,6 +1829,7 @@ namespace gbe > p->BARRIER(src); > p->curr.execWidth = 1; > // Now we wait for the other threads > + p->curr.predicate = GEN_PREDICATE_NONE; > p->WAIT(); > p->pop(); > } > diff --git a/backend/src/backend/gen_encoder.cpp > b/backend/src/backend/gen_encoder.cpp > index 1ad4f01..7c4357a 100644 > --- a/backend/src/backend/gen_encoder.cpp > +++ b/backend/src/backend/gen_encoder.cpp > @@ -996,6 +996,7 @@ namespace gbe > > void GenEncoder::WAIT(uint32_t n) { > GenNativeInstruction *insn = this->next(GEN_OPCODE_WAIT); > + GBE_ASSERT(curr.predicate == GEN_PREDICATE_NONE); > GenRegister src = GenRegister::notification0(n); > this->setDst(insn, GenRegister::null()); > this->setSrc0(insn, src); > diff --git a/backend/src/backend/gen_insn_scheduling.cpp > b/backend/src/backend/gen_insn_scheduling.cpp > index 43f67c9..8111e0c 100644 > --- a/backend/src/backend/gen_insn_scheduling.cpp > +++ b/backend/src/backend/gen_insn_scheduling.cpp > @@ -591,7 +591,8 @@ namespace gbe > || node->insn.opcode == SEL_OP_READ_ARF > || node->insn.opcode == SEL_OP_BARRIER > || node->insn.opcode == SEL_OP_CALC_TIMESTAMP > - || node->insn.opcode == SEL_OP_STORE_PROFILING) > + || node->insn.opcode == SEL_OP_STORE_PROFILING > + || node->insn.opcode == SEL_OP_WAIT) > tracker.makeBarrier(insnID, insnNum); > } > > diff --git a/backend/src/backend/gen_insn_selection.cpp > b/backend/src/backend/gen_insn_selection.cpp > index e1cf6f7..ed7514c 100644 > --- a/backend/src/backend/gen_insn_selection.cpp > +++ b/backend/src/backend/gen_insn_selection.cpp > @@ -619,7 +619,7 @@ namespace gbe > /*! No-op */ > void NOP(void); > /*! Wait instruction (used for the barrier) */ > - void WAIT(void); > + void WAIT(uint32_t n = 0); > /*! Atomic instruction */ > void ATOMIC(Reg dst, uint32_t function, uint32_t srcNum, Reg src0, Reg > src1, Reg src2, GenRegister bti, vector<GenRegister> temps); > /*! Read 64 bits float/int array */ @@ -1293,7 +1293,11 @@ namespace > gbe > > void Selection::Opaque::EOT(void) { this->appendInsn(SEL_OP_EOT, 0, 0); } > void Selection::Opaque::NOP(void) { this->appendInsn(SEL_OP_NOP, 0, 0); } > - void Selection::Opaque::WAIT(void) { this->appendInsn(SEL_OP_WAIT, 0, > 0); } > + void Selection::Opaque::WAIT(uint32_t n) { > + SelectionInstruction *insn = this->appendInsn(SEL_OP_WAIT, 0, 0); > + insn->extra.waitType = n; > + } > > void Selection::Opaque::READ64(Reg addr, > const GenRegister *dst, @@ -3465,6 +3469,25 > @@ > namespace gbe > DECL_CTOR(SyncInstruction, 1,1); > }; > > + /*! Wait instruction */ > + DECL_PATTERN(WaitInstruction) > + { > + INLINE bool emitOne(Selection::Opaque &sel, const ir::WaitInstruction > &insn, bool &markChildren) const > + { > + using namespace ir; > + // Debugwait will use reg 1, which is different from barrier > + sel.push(); > + sel.curr.noMask = 1; > + sel.curr.execWidth = 1; > + sel.curr.predicate = GEN_PREDICATE_NONE; > + sel.WAIT(1); > + sel.pop(); > + return true; > + } > + > + DECL_CTOR(WaitInstruction, 1,1); > + }; > + > INLINE uint32_t getByteScatterGatherSize(Selection::Opaque &sel, ir::Type > type) { > using namespace ir; > switch (type) { > @@ -5978,6 +6001,7 @@ namespace gbe > this->insert<CalcTimestampInstructionPattern>(); > this->insert<StoreProfilingInstructionPattern>(); > this->insert<NullaryInstructionPattern>(); > + this->insert<WaitInstructionPattern>(); > > // Sort all the patterns with the number of instructions they output > for (uint32_t op = 0; op < ir::OP_INVALID; ++op) diff --git > a/backend/src/backend/gen_insn_selection.hpp > b/backend/src/backend/gen_insn_selection.hpp > index 32e5ce2..0070ac2 100644 > --- a/backend/src/backend/gen_insn_selection.hpp > +++ b/backend/src/backend/gen_insn_selection.hpp > @@ -136,6 +136,7 @@ namespace gbe > uint16_t lut_sub:2; > }; > uint32_t barrierType; > + uint32_t waitType; > bool longjmp; > uint32_t indirect_offset; > struct { > diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp > index 6ed0b89..c7facfb 100644 > --- a/backend/src/ir/instruction.cpp > +++ b/backend/src/ir/instruction.cpp > @@ -949,6 +949,21 @@ namespace ir { > Register dst[0], src[0]; > }; > > + /*! Wait instructions */ > + class ALIGNED_INSTRUCTION WaitInstruction : > + public BasePolicy, > + public NSrcPolicy<WaitInstruction, 0>, > + public NDstPolicy<WaitInstruction, 0> > + { > + public: > + INLINE WaitInstruction() { > + this->opcode = OP_WAIT; > + } > + INLINE bool wellFormed(const Function &fn, std::string &why) const; > + INLINE void out(std::ostream &out, const Function &fn) const; > + Register dst[0], src[0]; > + }; > + > #undef ALIGNED_INSTRUCTION > > ///////////////////////////////////////////////////////////////////////// > @@ -1247,6 +1262,8 @@ namespace ir { > { return true; } > INLINE bool GetImageInfoInstruction::wellFormed(const Function &fn, > std::string &why) const > { return true; } > + INLINE bool WaitInstruction::wellFormed(const Function &fn, std::string > &why) const > + { return true; } > > > // Ensure that types and register family match @@ -1531,6 +1548,9 @@ > namespace ir { > out << "." << syncStr[field]; > } > > + INLINE void WaitInstruction::out(std::ostream &out, const Function &fn) > const { > + this->outOpcode(out); > + } > > } /* namespace internal */ > > @@ -1680,6 +1700,10 @@ START_INTROSPECTION(LabelInstruction) > #include "ir/instruction.hxx" > END_INTROSPECTION(LabelInstruction) > > +START_INTROSPECTION(WaitInstruction) > +#include "ir/instruction.hxx" > +END_INTROSPECTION(WaitInstruction) > + > START_INTROSPECTION(VmeInstruction) > #include "ir/instruction.hxx" > END_INTROSPECTION(VmeInstruction) > @@ -1829,7 +1853,8 @@ END_FUNCTION(Instruction, Register) > opcode == OP_SYNC || > opcode == OP_ATOMIC || > opcode == OP_CALC_TIMESTAMP || > - opcode == OP_STORE_PROFILING; > + opcode == OP_STORE_PROFILING || > + opcode == OP_WAIT; > } > > #define DECL_MEM_FN(CLASS, RET, PROTOTYPE, CALL) \ > @@ -2174,6 +2199,11 @@ DECL_MEM_FN(MemInstruction, void, > setBtiReg(Register reg), setBtiReg(reg)) > return internal::StoreProfilingInstruction(bti, profilingType).convert(); > } > > + // WAIT > + Instruction WAIT(void) { > + return internal::WaitInstruction().convert(); > + } > + > std::ostream &operator<< (std::ostream &out, const Instruction &insn) { > const Function &fn = insn.getFunction(); > const BasicBlock *bb = insn.getParent(); diff --git > a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index > 7862bbf..76ffd77 100644 > --- a/backend/src/ir/instruction.hpp > +++ b/backend/src/ir/instruction.hpp > @@ -576,6 +576,13 @@ namespace ir { > static bool isClassOf(const Instruction &insn); > }; > > + /*! Indirect Move instruction */ > + class WaitInstruction : public Instruction { > + public: > + /*! Return true if the given instruction is an instance of this class */ > + static bool isClassOf(const Instruction &insn); }; > + > /*! Specialize the instruction. Also performs typechecking first based on > the > * opcode. Crashes if it fails > */ > @@ -797,6 +804,9 @@ namespace ir { > Instruction CALC_TIMESTAMP(uint32_t pointNum, uint32_t tsType); > /*! calculate the execute timestamp for profiling */ > Instruction STORE_PROFILING(uint32_t bti, uint32_t Type); > + /*! wait */ > + Instruction WAIT(void); > + > } /* namespace ir */ > } /* namespace gbe */ > > diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx > index 1282747..efdd4c5 100644 > --- a/backend/src/ir/instruction.hxx > +++ b/backend/src/ir/instruction.hxx > @@ -109,3 +109,4 @@ DECL_INSN(ELSE, BranchInstruction) > DECL_INSN(WHILE, BranchInstruction) DECL_INSN(CALC_TIMESTAMP, > CalcTimestampInstruction) DECL_INSN(STORE_PROFILING, > StoreProfilingInstruction) > +DECL_INSN(WAIT, WaitInstruction) > diff --git a/backend/src/libocl/include/ocl_sync.h > b/backend/src/libocl/include/ocl_sync.h > index 18090d5..1d90cae 100644 > --- a/backend/src/libocl/include/ocl_sync.h > +++ b/backend/src/libocl/include/ocl_sync.h > @@ -31,5 +31,6 @@ OVERLOADABLE void barrier(cl_mem_fence_flags flags); > void mem_fence(cl_mem_fence_flags flags); void > read_mem_fence(cl_mem_fence_flags flags); void > write_mem_fence(cl_mem_fence_flags flags); > +OVERLOADABLE void debugwait(void); > > #endif /* __OCL_SYNC_H__ */ > diff --git a/backend/src/libocl/src/ocl_barrier.ll > b/backend/src/libocl/src/ocl_barrier.ll > index 2765a71..9416f80 100644 > --- a/backend/src/libocl/src/ocl_barrier.ll > +++ b/backend/src/libocl/src/ocl_barrier.ll > @@ -12,6 +12,7 @@ declare i32 @_get_global_mem_fence() nounwind > alwaysinline declare void @__gen_ocl_barrier_local() nounwind alwaysinline > noduplicate declare void @__gen_ocl_barrier_global() nounwind > alwaysinline noduplicate declare void > @__gen_ocl_barrier_local_and_global() nounwind alwaysinline noduplicate > +declare void @__gen_ocl_debugwait() nounwind alwaysinline noduplicate > > define void @_Z7barrierj(i32 %flags) nounwind noduplicate alwaysinline { > %1 = icmp eq i32 %flags, 3 > @@ -40,3 +41,8 @@ barrier_global: > done: > ret void > } > + > +define void @_Z9debugwaitv() nounwind noduplicate alwaysinline { > + call void @__gen_ocl_debugwait() > + ret void > +} > diff --git a/backend/src/libocl/src/ocl_sync.cl > b/backend/src/libocl/src/ocl_sync.cl > index d008639..70d6f26 100644 > --- a/backend/src/libocl/src/ocl_sync.cl > +++ b/backend/src/libocl/src/ocl_sync.cl > @@ -20,6 +20,7 @@ > void __gen_ocl_barrier_local(void); > void __gen_ocl_barrier_global(void); > void __gen_ocl_barrier_local_and_global(void); > +void __gen_ocl_debugwait(void); > > void mem_fence(cl_mem_fence_flags flags) { } diff --git > a/backend/src/llvm/llvm_gen_backend.cpp > b/backend/src/llvm/llvm_gen_backend.cpp > index d1b6f98..a0b2262 100644 > --- a/backend/src/llvm/llvm_gen_backend.cpp > +++ b/backend/src/llvm/llvm_gen_backend.cpp > @@ -3601,6 +3601,7 @@ namespace gbe > case GEN_OCL_PRINTF: > case GEN_OCL_CALC_TIMESTAMP: > case GEN_OCL_STORE_PROFILING: > + case GEN_OCL_DEBUGWAIT: > break; > case GEN_OCL_NOT_FOUND: > default: > @@ -4394,6 +4395,11 @@ namespace gbe > ctx.SIMD_SHUFFLE(getType(ctx, I.getType()), dst, src0, src1); > break; > } > + case GEN_OCL_DEBUGWAIT: > + { > + ctx.WAIT(); > + break; > + } > default: break; > } > } > diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx > b/backend/src/llvm/llvm_gen_ocl_function.hxx > index 65bf0c1..d0e3614 100644 > --- a/backend/src/llvm/llvm_gen_ocl_function.hxx > +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx > @@ -177,3 +177,6 @@ DECL_LLVM_GEN_FUNCTION(PRINTF, > __gen_ocl_printf) DECL_LLVM_GEN_FUNCTION(CALC_TIMESTAMP, > __gen_ocl_calc_timestamp) // store profiling info to the mem. > DECL_LLVM_GEN_FUNCTION(STORE_PROFILING, > __gen_ocl_store_profiling) > + > +// debug wait function > +DECL_LLVM_GEN_FUNCTION(DEBUGWAIT, __gen_ocl_debugwait) > -- > 2.1.4 > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
