From: Zhigang Gong <[email protected]> Blender may generate such type of intrinsics. Now fix it. Also fixed a previous typo which will not assert when it should assert.
Signed-off-by: Zhigang Gong <[email protected]> --- backend/src/libocl/src/ocl_memcpy.ll | 177 +++++++++++++++++++++++++++ backend/src/llvm/llvm_bitcode_link.cpp | 7 ++ backend/src/llvm/llvm_intrinsic_lowering.cpp | 4 +- 3 files changed, 187 insertions(+), 1 deletion(-) diff --git a/backend/src/libocl/src/ocl_memcpy.ll b/backend/src/libocl/src/ocl_memcpy.ll index fbc44d1..b3fadb2 100644 --- a/backend/src/libocl/src/ocl_memcpy.ll +++ b/backend/src/libocl/src/ocl_memcpy.ll @@ -550,3 +550,180 @@ while.body: ; preds = %entry, %while.body while.end: ; preds = %while.body, %entry ret void } + +define void @__gen_memcpy_gc_align(i8 addrspace(1)* %dst, i8 addrspace(2)* %src, i32 %size) nounwind alwaysinline { +entry: + br label %while.cond + +while.cond: ; preds = %while.body, %entry + %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] + %add = add i32 %index.0, 4 + %cmp = icmp ugt i32 %add, %size + br i1 %cmp, label %while.cond3, label %while.body + +while.body: ; preds = %while.cond + %add.ptr = getelementptr inbounds i8 addrspace(2)* %src, i32 %index.0 + %0 = bitcast i8 addrspace(2)* %add.ptr to i32 addrspace(2)* + %1 = load i32 addrspace(2)* %0, align 4 + %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.0 + %2 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)* + store i32 %1, i32 addrspace(1)* %2, align 4 + br label %while.cond + +while.cond3: ; preds = %while.cond, %while.body5 + %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] + %cmp4 = icmp ult i32 %index.1, %size + br i1 %cmp4, label %while.body5, label %while.end7 + +while.body5: ; preds = %while.cond3 + %arrayidx = getelementptr inbounds i8 addrspace(2)* %src, i32 %index.1 + %3 = load i8 addrspace(2)* %arrayidx, align 1 + %arrayidx6 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.1 + store i8 %3, i8 addrspace(1)* %arrayidx6, align 1 + %inc = add i32 %index.1, 1 + br label %while.cond3 + +while.end7: ; preds = %while.cond3 + ret void +} + +define void @__gen_memcpy_pc_align(i8 addrspace(0)* %dst, i8 addrspace(2)* %src, i32 %size) nounwind alwaysinline { +entry: + br label %while.cond + +while.cond: ; preds = %while.body, %entry + %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] + %add = add i32 %index.0, 4 + %cmp = icmp ugt i32 %add, %size + br i1 %cmp, label %while.cond3, label %while.body + +while.body: ; preds = %while.cond + %add.ptr = getelementptr inbounds i8 addrspace(2)* %src, i32 %index.0 + %0 = bitcast i8 addrspace(2)* %add.ptr to i32 addrspace(2)* + %1 = load i32 addrspace(2)* %0, align 4 + %add.ptr1 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.0 + %2 = bitcast i8 addrspace(0)* %add.ptr1 to i32 addrspace(0)* + store i32 %1, i32 addrspace(0)* %2, align 4 + br label %while.cond + +while.cond3: ; preds = %while.cond, %while.body5 + %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] + %cmp4 = icmp ult i32 %index.1, %size + br i1 %cmp4, label %while.body5, label %while.end7 + +while.body5: ; preds = %while.cond3 + %arrayidx = getelementptr inbounds i8 addrspace(2)* %src, i32 %index.1 + %3 = load i8 addrspace(2)* %arrayidx, align 1 + %arrayidx6 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.1 + store i8 %3, i8 addrspace(0)* %arrayidx6, align 1 + %inc = add i32 %index.1, 1 + br label %while.cond3 + +while.end7: ; preds = %while.cond3 + ret void +} + +define void @__gen_memcpy_lc_align(i8 addrspace(3)* %dst, i8 addrspace(2)* %src, i32 %size) nounwind alwaysinline { +entry: + br label %while.cond + +while.cond: ; preds = %while.body, %entry + %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] + %add = add i32 %index.0, 4 + %cmp = icmp ugt i32 %add, %size + br i1 %cmp, label %while.cond3, label %while.body + +while.body: ; preds = %while.cond + %add.ptr = getelementptr inbounds i8 addrspace(2)* %src, i32 %index.0 + %0 = bitcast i8 addrspace(2)* %add.ptr to i32 addrspace(2)* + %1 = load i32 addrspace(2)* %0, align 4 + %add.ptr1 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.0 + %2 = bitcast i8 addrspace(3)* %add.ptr1 to i32 addrspace(3)* + store i32 %1, i32 addrspace(3)* %2, align 4 + br label %while.cond + +while.cond3: ; preds = %while.cond, %while.body5 + %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] + %cmp4 = icmp ult i32 %index.1, %size + br i1 %cmp4, label %while.body5, label %while.end7 + +while.body5: ; preds = %while.cond3 + %arrayidx = getelementptr inbounds i8 addrspace(2)* %src, i32 %index.1 + %3 = load i8 addrspace(2)* %arrayidx, align 1 + %arrayidx6 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.1 + store i8 %3, i8 addrspace(3)* %arrayidx6, align 1 + %inc = add i32 %index.1, 1 + br label %while.cond3 + +while.end7: ; preds = %while.cond3 + ret void +} + +define void @__gen_memcpy_pc(i8 addrspace(0)* %dst, i8 addrspace(2)* %src, i32 %size) nounwind alwaysinline { +entry: + %cmp4 = icmp eq i32 %size, 0 + br i1 %cmp4, label %while.end, label %while.body + +while.body: ; preds = %entry, %while.body + %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] + %0 = ptrtoint i8 addrspace(2)* %src to i32 + %1 = add i32 %0, %index.05 + %2 = inttoptr i32 %1 to i8 addrspace(2)* + %3 = load i8 addrspace(2)* %2, align 1 + %4 = ptrtoint i8 addrspace(0)* %dst to i32 + %5 = add i32 %4, %index.05 + %6 = inttoptr i32 %5 to i8 addrspace(0)* + store i8 %3, i8 addrspace(0)* %6, align 1 + %inc = add i32 %index.05, 1 + %cmp = icmp ult i32 %inc, %size + br i1 %cmp, label %while.body, label %while.end + +while.end: ; preds = %while.body, %entry + ret void +} + +define void @__gen_memcpy_gc(i8 addrspace(1)* %dst, i8 addrspace(2)* %src, i32 %size) nounwind alwaysinline { +entry: + %cmp4 = icmp eq i32 %size, 0 + br i1 %cmp4, label %while.end, label %while.body + +while.body: ; preds = %entry, %while.body + %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] + %0 = ptrtoint i8 addrspace(2)* %src to i32 + %1 = add i32 %0, %index.05 + %2 = inttoptr i32 %1 to i8 addrspace(2)* + %3 = load i8 addrspace(2)* %2, align 1 + %4 = ptrtoint i8 addrspace(1)* %dst to i32 + %5 = add i32 %4, %index.05 + %6 = inttoptr i32 %5 to i8 addrspace(1)* + store i8 %3, i8 addrspace(1)* %6, align 1 + %inc = add i32 %index.05, 1 + %cmp = icmp ult i32 %inc, %size + br i1 %cmp, label %while.body, label %while.end + +while.end: ; preds = %while.body, %entry + ret void +} + +define void @__gen_memcpy_lc(i8 addrspace(3)* %dst, i8 addrspace(2)* %src, i32 %size) nounwind alwaysinline { +entry: + %cmp4 = icmp eq i32 %size, 0 + br i1 %cmp4, label %while.end, label %while.body + +while.body: ; preds = %entry, %while.body + %index.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] + %0 = ptrtoint i8 addrspace(2)* %src to i32 + %1 = add i32 %0, %index.05 + %2 = inttoptr i32 %1 to i8 addrspace(2)* + %3 = load i8 addrspace(2)* %2, align 1 + %4 = ptrtoint i8 addrspace(3)* %dst to i32 + %5 = add i32 %4, %index.05 + %6 = inttoptr i32 %5 to i8 addrspace(3)* + store i8 %3, i8 addrspace(3)* %6, align 1 + %inc = add i32 %index.05, 1 + %cmp = icmp ult i32 %inc, %size + br i1 %cmp, label %while.body, label %while.end + +while.end: ; preds = %while.body, %entry + ret void +} diff --git a/backend/src/llvm/llvm_bitcode_link.cpp b/backend/src/llvm/llvm_bitcode_link.cpp index f5e9f81..d3058d6 100644 --- a/backend/src/llvm/llvm_bitcode_link.cpp +++ b/backend/src/llvm/llvm_bitcode_link.cpp @@ -170,6 +170,13 @@ namespace gbe builtinFuncs.push_back("__gen_memset_g_align"); builtinFuncs.push_back("__gen_memset_l_align"); + builtinFuncs.push_back("__gen_memcpy_pc"); + builtinFuncs.push_back("__gen_memcpy_gc"); + builtinFuncs.push_back("__gen_memcpy_lc"); + + builtinFuncs.push_back("__gen_memcpy_pc_align"); + builtinFuncs.push_back("__gen_memcpy_gc_align"); + builtinFuncs.push_back("__gen_memcpy_lc_align"); for (Module::iterator SF = mod->begin(), E = mod->end(); SF != E; ++SF) { if (SF->isDeclaration()) continue; diff --git a/backend/src/llvm/llvm_intrinsic_lowering.cpp b/backend/src/llvm/llvm_intrinsic_lowering.cpp index 52f99c1..7d1f8f0 100644 --- a/backend/src/llvm/llvm_intrinsic_lowering.cpp +++ b/backend/src/llvm/llvm_intrinsic_lowering.cpp @@ -72,10 +72,12 @@ namespace gbe { return 'p'; case 1: return 'g'; + case 2: + return 'c'; case 3: return 'l'; default: - assert("Non support address space"); + assert(0 && "Non support address space"); return '\0'; } } -- 1.8.3.2 _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
