Sorry, I missed patch 2 :( please give me some time to review it.
> -----Original Message----- > From: Zhigang Gong [mailto:[email protected]] > Sent: Monday, December 15, 2014 2:56 PM > To: Song, Ruiling > Cc: Gong, Zhigang; [email protected] > Subject: Re: [Beignet] [PATCH 1/2] GBE: Add constant pointer in the memcpy > intrinsic. > > Right, the current implementation for all the memcpy/memset intrinsics is > simply not good. > We need to find out a graceful way to expand those intrinsics to replace > current very hacky method in the future. > > One question on your comment is that is this comment for this patch only or > for the whole patchset? > Thanks. > > On Mon, Dec 15, 2014 at 02:44:54AM +0000, Song, Ruiling wrote: > > As a temporary solution, it looks good to me. > > I think we need to re-write the memset/memcpy lowering pass to get rid > of the .ll files. We can use C++ code to generate those IRs and replace the > memset/memcpy calling. Although it is a little complex. > > > > > > > -----Original Message----- > > > From: Beignet [mailto:[email protected]] On > > > Behalf Of Zhigang Gong > > > Sent: Monday, December 15, 2014 9:18 AM > > > To: Gong, Zhigang > > > Cc: [email protected] > > > Subject: Re: [Beignet] [PATCH 1/2] GBE: Add constant pointer in the > > > memcpy intrinsic. > > > > > > Ping for review. > > > > > > On Thu, Dec 04, 2014 at 05:21:59PM +0800, Zhigang Gong wrote: > > > > From: Zhigang Gong <[email protected]> > > > > > > > > Blender may generate such type of intrinsics. Now fix it. > > > > Also fixed a previous typo which will not assert when it should > > > > assert. > > > > > > > > Signed-off-by: Zhigang Gong <[email protected]> > > > > --- > > > > backend/src/libocl/src/ocl_memcpy.ll | 177 > > > +++++++++++++++++++++++++++ > > > > backend/src/llvm/llvm_bitcode_link.cpp | 7 ++ > > > > backend/src/llvm/llvm_intrinsic_lowering.cpp | 4 +- > > > > 3 files changed, 187 insertions(+), 1 deletion(-) > > > > > > > > diff --git a/backend/src/libocl/src/ocl_memcpy.ll > > > > b/backend/src/libocl/src/ocl_memcpy.ll > > > > index fbc44d1..b3fadb2 100644 > > > > --- a/backend/src/libocl/src/ocl_memcpy.ll > > > > +++ b/backend/src/libocl/src/ocl_memcpy.ll > > > > @@ -550,3 +550,180 @@ > > > while.body: ; preds > > > = %entry, %while.body > > > > while.end: ; preds > > > = %while.body, %entry > > > > ret void > > > > } > > > > + > > > > +define void @__gen_memcpy_gc_align(i8 addrspace(1)* %dst, i8 > > > > +addrspace(2)* %src, i32 %size) nounwind alwaysinline { > > > > +entry: > > > > + br label %while.cond > > > > + > > > > +while.cond: ; preds > > > = %while.body, %entry > > > > + %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] %add = > > > > + add i32 %index.0, 4 %cmp = icmp ugt i32 %add, %size br i1 > > > > + %cmp, label %while.cond3, label %while.body > > > > + > > > > +while.body: ; preds > > > = %while.cond > > > > + %add.ptr = getelementptr inbounds i8 addrspace(2)* %src, i32 > > > > +%index.0 > > > > + %0 = bitcast i8 addrspace(2)* %add.ptr to i32 addrspace(2)* > > > > + %1 = load i32 addrspace(2)* %0, align 4 > > > > + %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %dst, i32 > > > > +%index.0 > > > > + %2 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)* > > > > + store i32 %1, i32 addrspace(1)* %2, align 4 > > > > + br label %while.cond > > > > + > > > > +while.cond3: ; preds > > > = %while.cond, %while.body5 > > > > + %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, > > > > +%while.body5 ] > > > > + %cmp4 = icmp ult i32 %index.1, %size > > > > + br i1 %cmp4, label %while.body5, label %while.end7 > > > > + > > > > +while.body5: ; preds > > > = %while.cond3 > > > > + %arrayidx = getelementptr inbounds i8 addrspace(2)* %src, i32 > > > > +%index.1 > > > > + %3 = load i8 addrspace(2)* %arrayidx, align 1 > > > > + %arrayidx6 = getelementptr inbounds i8 addrspace(1)* %dst, i32 > > > > +%index.1 > > > > + store i8 %3, i8 addrspace(1)* %arrayidx6, align 1 > > > > + %inc = add i32 %index.1, 1 > > > > + br label %while.cond3 > > > > + > > > > +while.end7: ; preds > > > = %while.cond3 > > > > + ret void > > > > +} > > > > + > > > > +define void @__gen_memcpy_pc_align(i8 addrspace(0)* %dst, i8 > > > > +addrspace(2)* %src, i32 %size) nounwind alwaysinline { > > > > +entry: > > > > + br label %while.cond > > > > + > > > > +while.cond: ; preds > > > = %while.body, %entry > > > > + %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] %add = > > > > + add i32 %index.0, 4 %cmp = icmp ugt i32 %add, %size br i1 > > > > + %cmp, label %while.cond3, label %while.body > > > > + > > > > +while.body: ; preds > > > = %while.cond > > > > + %add.ptr = getelementptr inbounds i8 addrspace(2)* %src, i32 > > > > +%index.0 > > > > + %0 = bitcast i8 addrspace(2)* %add.ptr to i32 addrspace(2)* > > > > + %1 = load i32 addrspace(2)* %0, align 4 > > > > + %add.ptr1 = getelementptr inbounds i8 addrspace(0)* %dst, i32 > > > > +%index.0 > > > > + %2 = bitcast i8 addrspace(0)* %add.ptr1 to i32 addrspace(0)* > > > > + store i32 %1, i32 addrspace(0)* %2, align 4 > > > > + br label %while.cond > > > > + > > > > +while.cond3: ; preds > > > = %while.cond, %while.body5 > > > > + %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, > > > > +%while.body5 ] > > > > + %cmp4 = icmp ult i32 %index.1, %size > > > > + br i1 %cmp4, label %while.body5, label %while.end7 > > > > + > > > > +while.body5: ; preds > > > = %while.cond3 > > > > + %arrayidx = getelementptr inbounds i8 addrspace(2)* %src, i32 > > > > +%index.1 > > > > + %3 = load i8 addrspace(2)* %arrayidx, align 1 > > > > + %arrayidx6 = getelementptr inbounds i8 addrspace(0)* %dst, i32 > > > > +%index.1 > > > > + store i8 %3, i8 addrspace(0)* %arrayidx6, align 1 > > > > + %inc = add i32 %index.1, 1 > > > > + br label %while.cond3 > > > > + > > > > +while.end7: ; preds > > > = %while.cond3 > > > > + ret void > > > > +} > > > > + > > > > +define void @__gen_memcpy_lc_align(i8 addrspace(3)* %dst, i8 > > > > +addrspace(2)* %src, i32 %size) nounwind alwaysinline { > > > > +entry: > > > > + br label %while.cond > > > > + > > > > +while.cond: ; preds > > > = %while.body, %entry > > > > + %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] %add = > > > > + add i32 %index.0, 4 %cmp = icmp ugt i32 %add, %size br i1 > > > > + %cmp, label %while.cond3, label %while.body > > > > + > > > > +while.body: ; preds > > > = %while.cond > > > > + %add.ptr = getelementptr inbounds i8 addrspace(2)* %src, i32 > > > > +%index.0 > > > > + %0 = bitcast i8 addrspace(2)* %add.ptr to i32 addrspace(2)* > > > > + %1 = load i32 addrspace(2)* %0, align 4 > > > > + %add.ptr1 = getelementptr inbounds i8 addrspace(3)* %dst, i32 > > > > +%index.0 > > > > + %2 = bitcast i8 addrspace(3)* %add.ptr1 to i32 addrspace(3)* > > > > + store i32 %1, i32 addrspace(3)* %2, align 4 > > > > + br label %while.cond > > > > + > > > > +while.cond3: ; preds > > > = %while.cond, %while.body5 > > > > + %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, > > > > +%while.body5 ] > > > > + %cmp4 = icmp ult i32 %index.1, %size > > > > + br i1 %cmp4, label %while.body5, label %while.end7 > > > > + > > > > +while.body5: ; preds > > > = %while.cond3 > > > > + %arrayidx = getelementptr inbounds i8 addrspace(2)* %src, i32 > > > > +%index.1 > > > > + %3 = load i8 addrspace(2)* %arrayidx, align 1 > > > > + %arrayidx6 = getelementptr inbounds i8 addrspace(3)* %dst, i32 > > > > +%index.1 > > > > + store i8 %3, i8 addrspace(3)* %arrayidx6, align 1 > > > > + %inc = add i32 %index.1, 1 > > > > + br label %while.cond3 > > > > + > > > > +while.end7: ; preds > > > = %while.cond3 > > > > + ret void > > > > +} > > > > + > > > > +define void @__gen_memcpy_pc(i8 addrspace(0)* %dst, i8 > > > > +addrspace(2)* %src, i32 %size) nounwind alwaysinline { > > > > +entry: > > > > + %cmp4 = icmp eq i32 %size, 0 > > > > + br i1 %cmp4, label %while.end, label %while.body > > > > + > > > > +while.body: ; preds > > > = %entry, %while.body > > > > + %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] > > > > + %0 = ptrtoint i8 addrspace(2)* %src to i32 > > > > + %1 = add i32 %0, %index.05 > > > > + %2 = inttoptr i32 %1 to i8 addrspace(2)* > > > > + %3 = load i8 addrspace(2)* %2, align 1 > > > > + %4 = ptrtoint i8 addrspace(0)* %dst to i32 > > > > + %5 = add i32 %4, %index.05 > > > > + %6 = inttoptr i32 %5 to i8 addrspace(0)* store i8 %3, i8 > > > > + addrspace(0)* %6, align 1 %inc = add i32 %index.05, 1 %cmp = > > > > + icmp ult i32 %inc, %size br i1 %cmp, label %while.body, label > > > > + %while.end > > > > + > > > > +while.end: ; preds > > > = %while.body, %entry > > > > + ret void > > > > +} > > > > + > > > > +define void @__gen_memcpy_gc(i8 addrspace(1)* %dst, i8 > > > > +addrspace(2)* %src, i32 %size) nounwind alwaysinline { > > > > +entry: > > > > + %cmp4 = icmp eq i32 %size, 0 > > > > + br i1 %cmp4, label %while.end, label %while.body > > > > + > > > > +while.body: ; preds > > > = %entry, %while.body > > > > + %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] > > > > + %0 = ptrtoint i8 addrspace(2)* %src to i32 > > > > + %1 = add i32 %0, %index.05 > > > > + %2 = inttoptr i32 %1 to i8 addrspace(2)* > > > > + %3 = load i8 addrspace(2)* %2, align 1 > > > > + %4 = ptrtoint i8 addrspace(1)* %dst to i32 > > > > + %5 = add i32 %4, %index.05 > > > > + %6 = inttoptr i32 %5 to i8 addrspace(1)* store i8 %3, i8 > > > > + addrspace(1)* %6, align 1 %inc = add i32 %index.05, 1 %cmp = > > > > + icmp ult i32 %inc, %size br i1 %cmp, label %while.body, label > > > > + %while.end > > > > + > > > > +while.end: ; preds > > > = %while.body, %entry > > > > + ret void > > > > +} > > > > + > > > > +define void @__gen_memcpy_lc(i8 addrspace(3)* %dst, i8 > > > > +addrspace(2)* %src, i32 %size) nounwind alwaysinline { > > > > +entry: > > > > + %cmp4 = icmp eq i32 %size, 0 > > > > + br i1 %cmp4, label %while.end, label %while.body > > > > + > > > > +while.body: ; preds > > > = %entry, %while.body > > > > + %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] > > > > + %0 = ptrtoint i8 addrspace(2)* %src to i32 > > > > + %1 = add i32 %0, %index.05 > > > > + %2 = inttoptr i32 %1 to i8 addrspace(2)* > > > > + %3 = load i8 addrspace(2)* %2, align 1 > > > > + %4 = ptrtoint i8 addrspace(3)* %dst to i32 > > > > + %5 = add i32 %4, %index.05 > > > > + %6 = inttoptr i32 %5 to i8 addrspace(3)* store i8 %3, i8 > > > > + addrspace(3)* %6, align 1 %inc = add i32 %index.05, 1 %cmp = > > > > + icmp ult i32 %inc, %size br i1 %cmp, label %while.body, label > > > > + %while.end > > > > + > > > > +while.end: ; preds > > > = %while.body, %entry > > > > + ret void > > > > +} > > > > diff --git a/backend/src/llvm/llvm_bitcode_link.cpp > > > > b/backend/src/llvm/llvm_bitcode_link.cpp > > > > index f5e9f81..d3058d6 100644 > > > > --- a/backend/src/llvm/llvm_bitcode_link.cpp > > > > +++ b/backend/src/llvm/llvm_bitcode_link.cpp > > > > @@ -170,6 +170,13 @@ namespace gbe > > > > builtinFuncs.push_back("__gen_memset_g_align"); > > > > builtinFuncs.push_back("__gen_memset_l_align"); > > > > > > > > + builtinFuncs.push_back("__gen_memcpy_pc"); > > > > + builtinFuncs.push_back("__gen_memcpy_gc"); > > > > + builtinFuncs.push_back("__gen_memcpy_lc"); > > > > + > > > > + builtinFuncs.push_back("__gen_memcpy_pc_align"); > > > > + builtinFuncs.push_back("__gen_memcpy_gc_align"); > > > > + builtinFuncs.push_back("__gen_memcpy_lc_align"); > > > > > > > > for (Module::iterator SF = mod->begin(), E = mod->end(); SF > > > > != E; > > > ++SF) { > > > > if (SF->isDeclaration()) continue; diff --git > > > > a/backend/src/llvm/llvm_intrinsic_lowering.cpp > > > > b/backend/src/llvm/llvm_intrinsic_lowering.cpp > > > > index 52f99c1..7d1f8f0 100644 > > > > --- a/backend/src/llvm/llvm_intrinsic_lowering.cpp > > > > +++ b/backend/src/llvm/llvm_intrinsic_lowering.cpp > > > > @@ -72,10 +72,12 @@ namespace gbe { > > > > return 'p'; > > > > case 1: > > > > return 'g'; > > > > + case 2: > > > > + return 'c'; > > > > case 3: > > > > return 'l'; > > > > default: > > > > - assert("Non support address space"); > > > > + assert(0 && "Non support address space"); > > > > return '\0'; > > > > } > > > > } > > > > -- > > > > 1.8.3.2 > > > > > > > > _______________________________________________ > > > > Beignet mailing list > > > > [email protected] > > > > http://lists.freedesktop.org/mailman/listinfo/beignet > > > _______________________________________________ > > > Beignet mailing list > > > [email protected] > > > http://lists.freedesktop.org/mailman/listinfo/beignet > > _______________________________________________ > > Beignet mailing list > > [email protected] > > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
