Right, the current implementation for all the memcpy/memset intrinsics is simply not good. We need to find out a graceful way to expand those intrinsics to replace current very hacky method in the future.
One question on your comment is that is this comment for this patch only or for the whole patchset? Thanks. On Mon, Dec 15, 2014 at 02:44:54AM +0000, Song, Ruiling wrote: > As a temporary solution, it looks good to me. > I think we need to re-write the memset/memcpy lowering pass to get rid of the > .ll files. We can use C++ code to generate those IRs and replace the > memset/memcpy calling. Although it is a little complex. > > > > -----Original Message----- > > From: Beignet [mailto:[email protected]] On Behalf Of > > Zhigang Gong > > Sent: Monday, December 15, 2014 9:18 AM > > To: Gong, Zhigang > > Cc: [email protected] > > Subject: Re: [Beignet] [PATCH 1/2] GBE: Add constant pointer in the memcpy > > intrinsic. > > > > Ping for review. > > > > On Thu, Dec 04, 2014 at 05:21:59PM +0800, Zhigang Gong wrote: > > > From: Zhigang Gong <[email protected]> > > > > > > Blender may generate such type of intrinsics. Now fix it. > > > Also fixed a previous typo which will not assert when it should > > > assert. > > > > > > Signed-off-by: Zhigang Gong <[email protected]> > > > --- > > > backend/src/libocl/src/ocl_memcpy.ll | 177 > > +++++++++++++++++++++++++++ > > > backend/src/llvm/llvm_bitcode_link.cpp | 7 ++ > > > backend/src/llvm/llvm_intrinsic_lowering.cpp | 4 +- > > > 3 files changed, 187 insertions(+), 1 deletion(-) > > > > > > diff --git a/backend/src/libocl/src/ocl_memcpy.ll > > > b/backend/src/libocl/src/ocl_memcpy.ll > > > index fbc44d1..b3fadb2 100644 > > > --- a/backend/src/libocl/src/ocl_memcpy.ll > > > +++ b/backend/src/libocl/src/ocl_memcpy.ll > > > @@ -550,3 +550,180 @@ > > while.body: ; preds > > = %entry, %while.body > > > while.end: ; preds > > = %while.body, %entry > > > ret void > > > } > > > + > > > +define void @__gen_memcpy_gc_align(i8 addrspace(1)* %dst, i8 > > > +addrspace(2)* %src, i32 %size) nounwind alwaysinline { > > > +entry: > > > + br label %while.cond > > > + > > > +while.cond: ; preds > > = %while.body, %entry > > > + %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] > > > + %add = add i32 %index.0, 4 > > > + %cmp = icmp ugt i32 %add, %size > > > + br i1 %cmp, label %while.cond3, label %while.body > > > + > > > +while.body: ; preds > > = %while.cond > > > + %add.ptr = getelementptr inbounds i8 addrspace(2)* %src, i32 > > > +%index.0 > > > + %0 = bitcast i8 addrspace(2)* %add.ptr to i32 addrspace(2)* > > > + %1 = load i32 addrspace(2)* %0, align 4 > > > + %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %dst, i32 > > > +%index.0 > > > + %2 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)* > > > + store i32 %1, i32 addrspace(1)* %2, align 4 > > > + br label %while.cond > > > + > > > +while.cond3: ; preds > > = %while.cond, %while.body5 > > > + %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 > > > +] > > > + %cmp4 = icmp ult i32 %index.1, %size > > > + br i1 %cmp4, label %while.body5, label %while.end7 > > > + > > > +while.body5: ; preds > > = %while.cond3 > > > + %arrayidx = getelementptr inbounds i8 addrspace(2)* %src, i32 > > > +%index.1 > > > + %3 = load i8 addrspace(2)* %arrayidx, align 1 > > > + %arrayidx6 = getelementptr inbounds i8 addrspace(1)* %dst, i32 > > > +%index.1 > > > + store i8 %3, i8 addrspace(1)* %arrayidx6, align 1 > > > + %inc = add i32 %index.1, 1 > > > + br label %while.cond3 > > > + > > > +while.end7: ; preds > > = %while.cond3 > > > + ret void > > > +} > > > + > > > +define void @__gen_memcpy_pc_align(i8 addrspace(0)* %dst, i8 > > > +addrspace(2)* %src, i32 %size) nounwind alwaysinline { > > > +entry: > > > + br label %while.cond > > > + > > > +while.cond: ; preds > > = %while.body, %entry > > > + %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] > > > + %add = add i32 %index.0, 4 > > > + %cmp = icmp ugt i32 %add, %size > > > + br i1 %cmp, label %while.cond3, label %while.body > > > + > > > +while.body: ; preds > > = %while.cond > > > + %add.ptr = getelementptr inbounds i8 addrspace(2)* %src, i32 > > > +%index.0 > > > + %0 = bitcast i8 addrspace(2)* %add.ptr to i32 addrspace(2)* > > > + %1 = load i32 addrspace(2)* %0, align 4 > > > + %add.ptr1 = getelementptr inbounds i8 addrspace(0)* %dst, i32 > > > +%index.0 > > > + %2 = bitcast i8 addrspace(0)* %add.ptr1 to i32 addrspace(0)* > > > + store i32 %1, i32 addrspace(0)* %2, align 4 > > > + br label %while.cond > > > + > > > +while.cond3: ; preds > > = %while.cond, %while.body5 > > > + %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 > > > +] > > > + %cmp4 = icmp ult i32 %index.1, %size > > > + br i1 %cmp4, label %while.body5, label %while.end7 > > > + > > > +while.body5: ; preds > > = %while.cond3 > > > + %arrayidx = getelementptr inbounds i8 addrspace(2)* %src, i32 > > > +%index.1 > > > + %3 = load i8 addrspace(2)* %arrayidx, align 1 > > > + %arrayidx6 = getelementptr inbounds i8 addrspace(0)* %dst, i32 > > > +%index.1 > > > + store i8 %3, i8 addrspace(0)* %arrayidx6, align 1 > > > + %inc = add i32 %index.1, 1 > > > + br label %while.cond3 > > > + > > > +while.end7: ; preds > > = %while.cond3 > > > + ret void > > > +} > > > + > > > +define void @__gen_memcpy_lc_align(i8 addrspace(3)* %dst, i8 > > > +addrspace(2)* %src, i32 %size) nounwind alwaysinline { > > > +entry: > > > + br label %while.cond > > > + > > > +while.cond: ; preds > > = %while.body, %entry > > > + %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] > > > + %add = add i32 %index.0, 4 > > > + %cmp = icmp ugt i32 %add, %size > > > + br i1 %cmp, label %while.cond3, label %while.body > > > + > > > +while.body: ; preds > > = %while.cond > > > + %add.ptr = getelementptr inbounds i8 addrspace(2)* %src, i32 > > > +%index.0 > > > + %0 = bitcast i8 addrspace(2)* %add.ptr to i32 addrspace(2)* > > > + %1 = load i32 addrspace(2)* %0, align 4 > > > + %add.ptr1 = getelementptr inbounds i8 addrspace(3)* %dst, i32 > > > +%index.0 > > > + %2 = bitcast i8 addrspace(3)* %add.ptr1 to i32 addrspace(3)* > > > + store i32 %1, i32 addrspace(3)* %2, align 4 > > > + br label %while.cond > > > + > > > +while.cond3: ; preds > > = %while.cond, %while.body5 > > > + %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 > > > +] > > > + %cmp4 = icmp ult i32 %index.1, %size > > > + br i1 %cmp4, label %while.body5, label %while.end7 > > > + > > > +while.body5: ; preds > > = %while.cond3 > > > + %arrayidx = getelementptr inbounds i8 addrspace(2)* %src, i32 > > > +%index.1 > > > + %3 = load i8 addrspace(2)* %arrayidx, align 1 > > > + %arrayidx6 = getelementptr inbounds i8 addrspace(3)* %dst, i32 > > > +%index.1 > > > + store i8 %3, i8 addrspace(3)* %arrayidx6, align 1 > > > + %inc = add i32 %index.1, 1 > > > + br label %while.cond3 > > > + > > > +while.end7: ; preds > > = %while.cond3 > > > + ret void > > > +} > > > + > > > +define void @__gen_memcpy_pc(i8 addrspace(0)* %dst, i8 addrspace(2)* > > > +%src, i32 %size) nounwind alwaysinline { > > > +entry: > > > + %cmp4 = icmp eq i32 %size, 0 > > > + br i1 %cmp4, label %while.end, label %while.body > > > + > > > +while.body: ; preds > > = %entry, %while.body > > > + %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] > > > + %0 = ptrtoint i8 addrspace(2)* %src to i32 > > > + %1 = add i32 %0, %index.05 > > > + %2 = inttoptr i32 %1 to i8 addrspace(2)* > > > + %3 = load i8 addrspace(2)* %2, align 1 > > > + %4 = ptrtoint i8 addrspace(0)* %dst to i32 > > > + %5 = add i32 %4, %index.05 > > > + %6 = inttoptr i32 %5 to i8 addrspace(0)* > > > + store i8 %3, i8 addrspace(0)* %6, align 1 > > > + %inc = add i32 %index.05, 1 > > > + %cmp = icmp ult i32 %inc, %size > > > + br i1 %cmp, label %while.body, label %while.end > > > + > > > +while.end: ; preds > > = %while.body, %entry > > > + ret void > > > +} > > > + > > > +define void @__gen_memcpy_gc(i8 addrspace(1)* %dst, i8 addrspace(2)* > > > +%src, i32 %size) nounwind alwaysinline { > > > +entry: > > > + %cmp4 = icmp eq i32 %size, 0 > > > + br i1 %cmp4, label %while.end, label %while.body > > > + > > > +while.body: ; preds > > = %entry, %while.body > > > + %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] > > > + %0 = ptrtoint i8 addrspace(2)* %src to i32 > > > + %1 = add i32 %0, %index.05 > > > + %2 = inttoptr i32 %1 to i8 addrspace(2)* > > > + %3 = load i8 addrspace(2)* %2, align 1 > > > + %4 = ptrtoint i8 addrspace(1)* %dst to i32 > > > + %5 = add i32 %4, %index.05 > > > + %6 = inttoptr i32 %5 to i8 addrspace(1)* > > > + store i8 %3, i8 addrspace(1)* %6, align 1 > > > + %inc = add i32 %index.05, 1 > > > + %cmp = icmp ult i32 %inc, %size > > > + br i1 %cmp, label %while.body, label %while.end > > > + > > > +while.end: ; preds > > = %while.body, %entry > > > + ret void > > > +} > > > + > > > +define void @__gen_memcpy_lc(i8 addrspace(3)* %dst, i8 addrspace(2)* > > > +%src, i32 %size) nounwind alwaysinline { > > > +entry: > > > + %cmp4 = icmp eq i32 %size, 0 > > > + br i1 %cmp4, label %while.end, label %while.body > > > + > > > +while.body: ; preds > > = %entry, %while.body > > > + %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] > > > + %0 = ptrtoint i8 addrspace(2)* %src to i32 > > > + %1 = add i32 %0, %index.05 > > > + %2 = inttoptr i32 %1 to i8 addrspace(2)* > > > + %3 = load i8 addrspace(2)* %2, align 1 > > > + %4 = ptrtoint i8 addrspace(3)* %dst to i32 > > > + %5 = add i32 %4, %index.05 > > > + %6 = inttoptr i32 %5 to i8 addrspace(3)* > > > + store i8 %3, i8 addrspace(3)* %6, align 1 > > > + %inc = add i32 %index.05, 1 > > > + %cmp = icmp ult i32 %inc, %size > > > + br i1 %cmp, label %while.body, label %while.end > > > + > > > +while.end: ; preds > > = %while.body, %entry > > > + ret void > > > +} > > > diff --git a/backend/src/llvm/llvm_bitcode_link.cpp > > > b/backend/src/llvm/llvm_bitcode_link.cpp > > > index f5e9f81..d3058d6 100644 > > > --- a/backend/src/llvm/llvm_bitcode_link.cpp > > > +++ b/backend/src/llvm/llvm_bitcode_link.cpp > > > @@ -170,6 +170,13 @@ namespace gbe > > > builtinFuncs.push_back("__gen_memset_g_align"); > > > builtinFuncs.push_back("__gen_memset_l_align"); > > > > > > + builtinFuncs.push_back("__gen_memcpy_pc"); > > > + builtinFuncs.push_back("__gen_memcpy_gc"); > > > + builtinFuncs.push_back("__gen_memcpy_lc"); > > > + > > > + builtinFuncs.push_back("__gen_memcpy_pc_align"); > > > + builtinFuncs.push_back("__gen_memcpy_gc_align"); > > > + builtinFuncs.push_back("__gen_memcpy_lc_align"); > > > > > > for (Module::iterator SF = mod->begin(), E = mod->end(); SF != E; > > ++SF) { > > > if (SF->isDeclaration()) continue; diff --git > > > a/backend/src/llvm/llvm_intrinsic_lowering.cpp > > > b/backend/src/llvm/llvm_intrinsic_lowering.cpp > > > index 52f99c1..7d1f8f0 100644 > > > --- a/backend/src/llvm/llvm_intrinsic_lowering.cpp > > > +++ b/backend/src/llvm/llvm_intrinsic_lowering.cpp > > > @@ -72,10 +72,12 @@ namespace gbe { > > > return 'p'; > > > case 1: > > > return 'g'; > > > + case 2: > > > + return 'c'; > > > case 3: > > > return 'l'; > > > default: > > > - assert("Non support address space"); > > > + assert(0 && "Non support address space"); > > > return '\0'; > > > } > > > } > > > -- > > > 1.8.3.2 > > > > > > _______________________________________________ > > > Beignet mailing list > > > [email protected] > > > http://lists.freedesktop.org/mailman/listinfo/beignet > > _______________________________________________ > > Beignet mailing list > > [email protected] > > http://lists.freedesktop.org/mailman/listinfo/beignet > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
