From: Junyan He <[email protected]> Because the memcpy and memset function suite will be lost during the module linkage, we need to add it obviously into the module linkage to avoid lost.
Signed-off-by: Junyan He <[email protected]> --- backend/src/libocl/Makefile.in | 2 +- backend/src/libocl/lib/ocl_memcpy.ll | 336 +++++++++++++++++++++++++++++++++ backend/src/libocl/lib/ocl_memset.ll | 127 +++++++++++++ backend/src/llvm/llvm_bitcode_link.cpp | 15 ++ backend/src/ocl_memcpy.ll | 336 --------------------------------- backend/src/ocl_memset.ll | 127 ------------- 6 files changed, 479 insertions(+), 464 deletions(-) create mode 100644 backend/src/libocl/lib/ocl_memcpy.ll create mode 100644 backend/src/libocl/lib/ocl_memset.ll delete mode 100644 backend/src/ocl_memcpy.ll delete mode 100644 backend/src/ocl_memset.ll diff --git a/backend/src/libocl/Makefile.in b/backend/src/libocl/Makefile.in index 2b9de51..47a92f2 100644 --- a/backend/src/libocl/Makefile.in +++ b/backend/src/libocl/Makefile.in @@ -8,7 +8,7 @@ GENERATED_HEADERS=ocl_defines.h ocl_as.h ocl_convert.h ocl_common.h ocl_relation GENERATED_CL_SRCS=$(addprefix lib/, $(GENERATED_FILES)) GENERATED_CL_HEADERS=$(addprefix include/, $(GENERATED_HEADERS)) CL_FILE_NAMES=ocl_workitem.cl ocl_atom.cl ocl_async.cl ocl_sync.cl ocl_misc.cl ocl_vload.cl ocl_geometric.cl ocl_image.cl $(GENERATED_FILES) -LL_FILE_NAMES=ocl_barrier.ll +LL_FILE_NAMES=ocl_barrier.ll ocl_memcpy.ll ocl_memset.ll CL_SRCS=$(addprefix lib/, $(CL_FILE_NAMES)) LL_SRCS=$(addprefix lib/, $(LL_FILE_NAMES)) CL_BITCODES=$(patsubst %.cl, %.bc, $(CL_SRCS)) diff --git a/backend/src/libocl/lib/ocl_memcpy.ll b/backend/src/libocl/lib/ocl_memcpy.ll new file mode 100644 index 0000000..476033e --- /dev/null +++ b/backend/src/libocl/lib/ocl_memcpy.ll @@ -0,0 +1,336 @@ +;The memcpy's source code. +; INLINE_OVERLOADABLE void __gen_memcpy(uchar* dst, uchar* src, size_t size) { +; size_t index = 0; +; while((index + 4) >= size) { +; *((uint *)(dst + index)) = *((uint *)(src + index)); +; index += 4; +; } +; while(index < size) { +; dst[index] = src[index]; +; index++; +; } +; } + +define void @__gen_memcpy_gg(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline { +entry: + br label %while.cond + +while.cond: ; preds = %while.body, %entry + %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] + %add = add i32 %index.0, 4 + %cmp = icmp ult i32 %add, %size + br i1 %cmp, label %while.cond3, label %while.body + +while.body: ; preds = %while.cond + %add.ptr = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.0 + %0 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)* + %1 = load i32 addrspace(1)* %0, align 4 + %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.0 + %2 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)* + store i32 %1, i32 addrspace(1)* %2, align 4 + br label %while.cond + +while.cond3: ; preds = %while.cond, %while.body5 + %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] + %cmp4 = icmp ult i32 %index.1, %size + br i1 %cmp4, label %while.body5, label %while.end7 + +while.body5: ; preds = %while.cond3 + %arrayidx = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.1 + %3 = load i8 addrspace(1)* %arrayidx, align 1 + %arrayidx6 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.1 + store i8 %3, i8 addrspace(1)* %arrayidx6, align 1 + %inc = add i32 %index.1, 1 + br label %while.cond3 + +while.end7: ; preds = %while.cond3 + ret void +} + +define void @__gen_memcpy_gp(i8 addrspace(1)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline { +entry: + br label %while.cond + +while.cond: ; preds = %while.body, %entry + %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] + %add = add i32 %index.0, 4 + %cmp = icmp ult i32 %add, %size + br i1 %cmp, label %while.cond3, label %while.body + +while.body: ; preds = %while.cond + %add.ptr = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.0 + %0 = bitcast i8 addrspace(0)* %add.ptr to i32 addrspace(0)* + %1 = load i32 addrspace(0)* %0, align 4 + %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.0 + %2 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)* + store i32 %1, i32 addrspace(1)* %2, align 4 + br label %while.cond + +while.cond3: ; preds = %while.cond, %while.body5 + %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] + %cmp4 = icmp ult i32 %index.1, %size + br i1 %cmp4, label %while.body5, label %while.end7 + +while.body5: ; preds = %while.cond3 + %arrayidx = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.1 + %3 = load i8 addrspace(0)* %arrayidx, align 1 + %arrayidx6 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.1 + store i8 %3, i8 addrspace(1)* %arrayidx6, align 1 + %inc = add i32 %index.1, 1 + br label %while.cond3 + +while.end7: ; preds = %while.cond3 + ret void +} + +define void @__gen_memcpy_gl(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline { +entry: + br label %while.cond + +while.cond: ; preds = %while.body, %entry + %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] + %add = add i32 %index.0, 4 + %cmp = icmp ult i32 %add, %size + br i1 %cmp, label %while.cond3, label %while.body + +while.body: ; preds = %while.cond + %add.ptr = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.0 + %0 = bitcast i8 addrspace(3)* %add.ptr to i32 addrspace(3)* + %1 = load i32 addrspace(3)* %0, align 4 + %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.0 + %2 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)* + store i32 %1, i32 addrspace(1)* %2, align 4 + br label %while.cond + +while.cond3: ; preds = %while.cond, %while.body5 + %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] + %cmp4 = icmp ult i32 %index.1, %size + br i1 %cmp4, label %while.body5, label %while.end7 + +while.body5: ; preds = %while.cond3 + %arrayidx = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.1 + %3 = load i8 addrspace(3)* %arrayidx, align 1 + %arrayidx6 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.1 + store i8 %3, i8 addrspace(1)* %arrayidx6, align 1 + %inc = add i32 %index.1, 1 + br label %while.cond3 + +while.end7: ; preds = %while.cond3 + ret void +} + +define void @__gen_memcpy_pg(i8 addrspace(0)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline { +entry: + br label %while.cond + +while.cond: ; preds = %while.body, %entry + %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] + %add = add i32 %index.0, 4 + %cmp = icmp ult i32 %add, %size + br i1 %cmp, label %while.cond3, label %while.body + +while.body: ; preds = %while.cond + %add.ptr = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.0 + %0 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)* + %1 = load i32 addrspace(1)* %0, align 4 + %add.ptr1 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.0 + %2 = bitcast i8 addrspace(0)* %add.ptr1 to i32 addrspace(0)* + store i32 %1, i32 addrspace(0)* %2, align 4 + br label %while.cond + +while.cond3: ; preds = %while.cond, %while.body5 + %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] + %cmp4 = icmp ult i32 %index.1, %size + br i1 %cmp4, label %while.body5, label %while.end7 + +while.body5: ; preds = %while.cond3 + %arrayidx = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.1 + %3 = load i8 addrspace(1)* %arrayidx, align 1 + %arrayidx6 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.1 + store i8 %3, i8 addrspace(0)* %arrayidx6, align 1 + %inc = add i32 %index.1, 1 + br label %while.cond3 + +while.end7: ; preds = %while.cond3 + ret void +} + +define void @__gen_memcpy_pp(i8 addrspace(0)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline { +entry: + br label %while.cond + +while.cond: ; preds = %while.body, %entry + %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] + %add = add i32 %index.0, 4 + %cmp = icmp ult i32 %add, %size + br i1 %cmp, label %while.cond3, label %while.body + +while.body: ; preds = %while.cond + %add.ptr = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.0 + %0 = bitcast i8 addrspace(0)* %add.ptr to i32 addrspace(0)* + %1 = load i32 addrspace(0)* %0, align 4 + %add.ptr1 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.0 + %2 = bitcast i8 addrspace(0)* %add.ptr1 to i32 addrspace(0)* + store i32 %1, i32 addrspace(0)* %2, align 4 + br label %while.cond + +while.cond3: ; preds = %while.cond, %while.body5 + %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] + %cmp4 = icmp ult i32 %index.1, %size + br i1 %cmp4, label %while.body5, label %while.end7 + +while.body5: ; preds = %while.cond3 + %arrayidx = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.1 + %3 = load i8 addrspace(0)* %arrayidx, align 1 + %arrayidx6 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.1 + store i8 %3, i8 addrspace(0)* %arrayidx6, align 1 + %inc = add i32 %index.1, 1 + br label %while.cond3 + +while.end7: ; preds = %while.cond3 + ret void +} + +define void @__gen_memcpy_pl(i8 addrspace(0)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline { +entry: + br label %while.cond + +while.cond: ; preds = %while.body, %entry + %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] + %add = add i32 %index.0, 4 + %cmp = icmp ult i32 %add, %size + br i1 %cmp, label %while.cond3, label %while.body + +while.body: ; preds = %while.cond + %add.ptr = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.0 + %0 = bitcast i8 addrspace(3)* %add.ptr to i32 addrspace(3)* + %1 = load i32 addrspace(3)* %0, align 4 + %add.ptr1 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.0 + %2 = bitcast i8 addrspace(0)* %add.ptr1 to i32 addrspace(0)* + store i32 %1, i32 addrspace(0)* %2, align 4 + br label %while.cond + +while.cond3: ; preds = %while.cond, %while.body5 + %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] + %cmp4 = icmp ult i32 %index.1, %size + br i1 %cmp4, label %while.body5, label %while.end7 + +while.body5: ; preds = %while.cond3 + %arrayidx = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.1 + %3 = load i8 addrspace(3)* %arrayidx, align 1 + %arrayidx6 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.1 + store i8 %3, i8 addrspace(0)* %arrayidx6, align 1 + %inc = add i32 %index.1, 1 + br label %while.cond3 + +while.end7: ; preds = %while.cond3 + ret void +} + +define void @__gen_memcpy_lg(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline { +entry: + br label %while.cond + +while.cond: ; preds = %while.body, %entry + %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] + %add = add i32 %index.0, 4 + %cmp = icmp ult i32 %add, %size + br i1 %cmp, label %while.cond3, label %while.body + +while.body: ; preds = %while.cond + %add.ptr = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.0 + %0 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)* + %1 = load i32 addrspace(1)* %0, align 4 + %add.ptr1 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.0 + %2 = bitcast i8 addrspace(3)* %add.ptr1 to i32 addrspace(3)* + store i32 %1, i32 addrspace(3)* %2, align 4 + br label %while.cond + +while.cond3: ; preds = %while.cond, %while.body5 + %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] + %cmp4 = icmp ult i32 %index.1, %size + br i1 %cmp4, label %while.body5, label %while.end7 + +while.body5: ; preds = %while.cond3 + %arrayidx = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.1 + %3 = load i8 addrspace(1)* %arrayidx, align 1 + %arrayidx6 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.1 + store i8 %3, i8 addrspace(3)* %arrayidx6, align 1 + %inc = add i32 %index.1, 1 + br label %while.cond3 + +while.end7: ; preds = %while.cond3 + ret void +} + +define void @__gen_memcpy_lp(i8 addrspace(3)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline { +entry: + br label %while.cond + +while.cond: ; preds = %while.body, %entry + %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] + %add = add i32 %index.0, 4 + %cmp = icmp ult i32 %add, %size + br i1 %cmp, label %while.cond3, label %while.body + +while.body: ; preds = %while.cond + %add.ptr = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.0 + %0 = bitcast i8 addrspace(0)* %add.ptr to i32 addrspace(0)* + %1 = load i32 addrspace(0)* %0, align 4 + %add.ptr1 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.0 + %2 = bitcast i8 addrspace(3)* %add.ptr1 to i32 addrspace(3)* + store i32 %1, i32 addrspace(3)* %2, align 4 + br label %while.cond + +while.cond3: ; preds = %while.cond, %while.body5 + %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] + %cmp4 = icmp ult i32 %index.1, %size + br i1 %cmp4, label %while.body5, label %while.end7 + +while.body5: ; preds = %while.cond3 + %arrayidx = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.1 + %3 = load i8 addrspace(0)* %arrayidx, align 1 + %arrayidx6 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.1 + store i8 %3, i8 addrspace(3)* %arrayidx6, align 1 + %inc = add i32 %index.1, 1 + br label %while.cond3 + +while.end7: ; preds = %while.cond3 + ret void +} + +define void @__gen_memcpy_ll(i8 addrspace(3)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline { +entry: + br label %while.cond + +while.cond: ; preds = %while.body, %entry + %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] + %add = add i32 %index.0, 4 + %cmp = icmp ult i32 %add, %size + br i1 %cmp, label %while.cond3, label %while.body + +while.body: ; preds = %while.cond + %add.ptr = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.0 + %0 = bitcast i8 addrspace(3)* %add.ptr to i32 addrspace(3)* + %1 = load i32 addrspace(3)* %0, align 4 + %add.ptr1 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.0 + %2 = bitcast i8 addrspace(3)* %add.ptr1 to i32 addrspace(3)* + store i32 %1, i32 addrspace(3)* %2, align 4 + br label %while.cond + +while.cond3: ; preds = %while.cond, %while.body5 + %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] + %cmp4 = icmp ult i32 %index.1, %size + br i1 %cmp4, label %while.body5, label %while.end7 + +while.body5: ; preds = %while.cond3 + %arrayidx = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.1 + %3 = load i8 addrspace(3)* %arrayidx, align 1 + %arrayidx6 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.1 + store i8 %3, i8 addrspace(3)* %arrayidx6, align 1 + %inc = add i32 %index.1, 1 + br label %while.cond3 + +while.end7: ; preds = %while.cond3 + ret void +} diff --git a/backend/src/libocl/lib/ocl_memset.ll b/backend/src/libocl/lib/ocl_memset.ll new file mode 100644 index 0000000..addf9f5 --- /dev/null +++ b/backend/src/libocl/lib/ocl_memset.ll @@ -0,0 +1,127 @@ +;The memset's source code. +; INLINE_OVERLOADABLE void __gen_memset(uchar* dst, uchar val, size_t size) { +; size_t index = 0; +; uint v = (val << 24) | (val << 16) | (val << 8) | val; +; while((index + 4) >= size) { +; *((uint *)(dst + index)) = v; +; index += 4; +; } +; while(index < size) { +; dst[index] = val; +; index++; +; } +; } + +define void @__gen_memset_p(i8* %dst, i8 zeroext %val, i32 %size) nounwind alwaysinline { +entry: + %conv = zext i8 %val to i32 + %shl = shl nuw i32 %conv, 24 + %shl2 = shl nuw nsw i32 %conv, 16 + %or = or i32 %shl, %shl2 + %shl4 = shl nuw nsw i32 %conv, 8 + %or5 = or i32 %or, %shl4 + %or7 = or i32 %or5, %conv + br label %while.cond + +while.cond: ; preds = %while.body, %entry + %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] + %add = add i32 %index.0, 4 + %cmp = icmp ult i32 %add, %size + br i1 %cmp, label %while.cond10, label %while.body + +while.body: ; preds = %while.cond + %add.ptr = getelementptr inbounds i8* %dst, i32 %index.0 + %0 = bitcast i8* %add.ptr to i32* + store i32 %or7, i32* %0, align 4 + br label %while.cond + +while.cond10: ; preds = %while.cond, %while.body13 + %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body13 ] + %cmp11 = icmp ult i32 %index.1, %size + br i1 %cmp11, label %while.body13, label %while.end14 + +while.body13: ; preds = %while.cond10 + %arrayidx = getelementptr inbounds i8* %dst, i32 %index.1 + store i8 %val, i8* %arrayidx, align 1 + %inc = add i32 %index.1, 1 + br label %while.cond10 + +while.end14: ; preds = %while.cond10 + ret void +} + +define void @__gen_memset_g(i8 addrspace(1)* %dst, i8 zeroext %val, i32 %size) nounwind alwaysinline { +entry: + %conv = zext i8 %val to i32 + %shl = shl nuw i32 %conv, 24 + %shl2 = shl nuw nsw i32 %conv, 16 + %or = or i32 %shl, %shl2 + %shl4 = shl nuw nsw i32 %conv, 8 + %or5 = or i32 %or, %shl4 + %or7 = or i32 %or5, %conv + br label %while.cond + +while.cond: ; preds = %while.body, %entry + %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] + %add = add i32 %index.0, 4 + %cmp = icmp ult i32 %add, %size + br i1 %cmp, label %while.cond10, label %while.body + +while.body: ; preds = %while.cond + %add.ptr = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.0 + %0 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)* + store i32 %or7, i32 addrspace(1)* %0, align 4 + br label %while.cond + +while.cond10: ; preds = %while.cond, %while.body13 + %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body13 ] + %cmp11 = icmp ult i32 %index.1, %size + br i1 %cmp11, label %while.body13, label %while.end14 + +while.body13: ; preds = %while.cond10 + %arrayidx = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.1 + store i8 %val, i8 addrspace(1)* %arrayidx, align 1 + %inc = add i32 %index.1, 1 + br label %while.cond10 + +while.end14: ; preds = %while.cond10 + ret void +} + +define void @__gen_memset_l(i8 addrspace(3)* %dst, i8 zeroext %val, i32 %size) nounwind alwaysinline { +entry: + %conv = zext i8 %val to i32 + %shl = shl nuw i32 %conv, 24 + %shl2 = shl nuw nsw i32 %conv, 16 + %or = or i32 %shl, %shl2 + %shl4 = shl nuw nsw i32 %conv, 8 + %or5 = or i32 %or, %shl4 + %or7 = or i32 %or5, %conv + br label %while.cond + +while.cond: ; preds = %while.body, %entry + %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] + %add = add i32 %index.0, 4 + %cmp = icmp ult i32 %add, %size + br i1 %cmp, label %while.cond10, label %while.body + +while.body: ; preds = %while.cond + %add.ptr = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.0 + %0 = bitcast i8 addrspace(3)* %add.ptr to i32 addrspace(3)* + store i32 %or7, i32 addrspace(3)* %0, align 4 + br label %while.cond + +while.cond10: ; preds = %while.cond, %while.body13 + %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body13 ] + %cmp11 = icmp ult i32 %index.1, %size + br i1 %cmp11, label %while.body13, label %while.end14 + +while.body13: ; preds = %while.cond10 + %arrayidx = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.1 + store i8 %val, i8 addrspace(3)* %arrayidx, align 1 + %inc = add i32 %index.1, 1 + br label %while.cond10 + +while.end14: ; preds = %while.cond10 + ret void +} diff --git a/backend/src/llvm/llvm_bitcode_link.cpp b/backend/src/llvm/llvm_bitcode_link.cpp index 90ab4e1..615bbcb 100644 --- a/backend/src/llvm/llvm_bitcode_link.cpp +++ b/backend/src/llvm/llvm_bitcode_link.cpp @@ -99,6 +99,21 @@ namespace gbe return NULL; } + /* Add the memset and memcpy functions here. */ + kernels.push_back("__gen_memcpy_gg"); + kernels.push_back("__gen_memcpy_gp"); + kernels.push_back("__gen_memcpy_gl"); + kernels.push_back("__gen_memcpy_pg"); + kernels.push_back("__gen_memcpy_pp"); + kernels.push_back("__gen_memcpy_pl"); + kernels.push_back("__gen_memcpy_lg"); + kernels.push_back("__gen_memcpy_lp"); + kernels.push_back("__gen_memcpy_ll"); + + kernels.push_back("__gen_memset_p"); + kernels.push_back("__gen_memset_g"); + kernels.push_back("__gen_memset_l"); + Module* clonedLib = createOclBitCodeModule(ctx); assert(clonedLib && "Can not create the beignet bitcode\n"); diff --git a/backend/src/ocl_memcpy.ll b/backend/src/ocl_memcpy.ll deleted file mode 100644 index 476033e..0000000 --- a/backend/src/ocl_memcpy.ll +++ /dev/null @@ -1,336 +0,0 @@ -;The memcpy's source code. -; INLINE_OVERLOADABLE void __gen_memcpy(uchar* dst, uchar* src, size_t size) { -; size_t index = 0; -; while((index + 4) >= size) { -; *((uint *)(dst + index)) = *((uint *)(src + index)); -; index += 4; -; } -; while(index < size) { -; dst[index] = src[index]; -; index++; -; } -; } - -define void @__gen_memcpy_gg(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline { -entry: - br label %while.cond - -while.cond: ; preds = %while.body, %entry - %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] - %add = add i32 %index.0, 4 - %cmp = icmp ult i32 %add, %size - br i1 %cmp, label %while.cond3, label %while.body - -while.body: ; preds = %while.cond - %add.ptr = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.0 - %0 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)* - %1 = load i32 addrspace(1)* %0, align 4 - %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.0 - %2 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)* - store i32 %1, i32 addrspace(1)* %2, align 4 - br label %while.cond - -while.cond3: ; preds = %while.cond, %while.body5 - %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] - %cmp4 = icmp ult i32 %index.1, %size - br i1 %cmp4, label %while.body5, label %while.end7 - -while.body5: ; preds = %while.cond3 - %arrayidx = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.1 - %3 = load i8 addrspace(1)* %arrayidx, align 1 - %arrayidx6 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.1 - store i8 %3, i8 addrspace(1)* %arrayidx6, align 1 - %inc = add i32 %index.1, 1 - br label %while.cond3 - -while.end7: ; preds = %while.cond3 - ret void -} - -define void @__gen_memcpy_gp(i8 addrspace(1)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline { -entry: - br label %while.cond - -while.cond: ; preds = %while.body, %entry - %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] - %add = add i32 %index.0, 4 - %cmp = icmp ult i32 %add, %size - br i1 %cmp, label %while.cond3, label %while.body - -while.body: ; preds = %while.cond - %add.ptr = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.0 - %0 = bitcast i8 addrspace(0)* %add.ptr to i32 addrspace(0)* - %1 = load i32 addrspace(0)* %0, align 4 - %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.0 - %2 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)* - store i32 %1, i32 addrspace(1)* %2, align 4 - br label %while.cond - -while.cond3: ; preds = %while.cond, %while.body5 - %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] - %cmp4 = icmp ult i32 %index.1, %size - br i1 %cmp4, label %while.body5, label %while.end7 - -while.body5: ; preds = %while.cond3 - %arrayidx = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.1 - %3 = load i8 addrspace(0)* %arrayidx, align 1 - %arrayidx6 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.1 - store i8 %3, i8 addrspace(1)* %arrayidx6, align 1 - %inc = add i32 %index.1, 1 - br label %while.cond3 - -while.end7: ; preds = %while.cond3 - ret void -} - -define void @__gen_memcpy_gl(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline { -entry: - br label %while.cond - -while.cond: ; preds = %while.body, %entry - %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] - %add = add i32 %index.0, 4 - %cmp = icmp ult i32 %add, %size - br i1 %cmp, label %while.cond3, label %while.body - -while.body: ; preds = %while.cond - %add.ptr = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.0 - %0 = bitcast i8 addrspace(3)* %add.ptr to i32 addrspace(3)* - %1 = load i32 addrspace(3)* %0, align 4 - %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.0 - %2 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)* - store i32 %1, i32 addrspace(1)* %2, align 4 - br label %while.cond - -while.cond3: ; preds = %while.cond, %while.body5 - %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] - %cmp4 = icmp ult i32 %index.1, %size - br i1 %cmp4, label %while.body5, label %while.end7 - -while.body5: ; preds = %while.cond3 - %arrayidx = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.1 - %3 = load i8 addrspace(3)* %arrayidx, align 1 - %arrayidx6 = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.1 - store i8 %3, i8 addrspace(1)* %arrayidx6, align 1 - %inc = add i32 %index.1, 1 - br label %while.cond3 - -while.end7: ; preds = %while.cond3 - ret void -} - -define void @__gen_memcpy_pg(i8 addrspace(0)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline { -entry: - br label %while.cond - -while.cond: ; preds = %while.body, %entry - %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] - %add = add i32 %index.0, 4 - %cmp = icmp ult i32 %add, %size - br i1 %cmp, label %while.cond3, label %while.body - -while.body: ; preds = %while.cond - %add.ptr = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.0 - %0 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)* - %1 = load i32 addrspace(1)* %0, align 4 - %add.ptr1 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.0 - %2 = bitcast i8 addrspace(0)* %add.ptr1 to i32 addrspace(0)* - store i32 %1, i32 addrspace(0)* %2, align 4 - br label %while.cond - -while.cond3: ; preds = %while.cond, %while.body5 - %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] - %cmp4 = icmp ult i32 %index.1, %size - br i1 %cmp4, label %while.body5, label %while.end7 - -while.body5: ; preds = %while.cond3 - %arrayidx = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.1 - %3 = load i8 addrspace(1)* %arrayidx, align 1 - %arrayidx6 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.1 - store i8 %3, i8 addrspace(0)* %arrayidx6, align 1 - %inc = add i32 %index.1, 1 - br label %while.cond3 - -while.end7: ; preds = %while.cond3 - ret void -} - -define void @__gen_memcpy_pp(i8 addrspace(0)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline { -entry: - br label %while.cond - -while.cond: ; preds = %while.body, %entry - %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] - %add = add i32 %index.0, 4 - %cmp = icmp ult i32 %add, %size - br i1 %cmp, label %while.cond3, label %while.body - -while.body: ; preds = %while.cond - %add.ptr = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.0 - %0 = bitcast i8 addrspace(0)* %add.ptr to i32 addrspace(0)* - %1 = load i32 addrspace(0)* %0, align 4 - %add.ptr1 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.0 - %2 = bitcast i8 addrspace(0)* %add.ptr1 to i32 addrspace(0)* - store i32 %1, i32 addrspace(0)* %2, align 4 - br label %while.cond - -while.cond3: ; preds = %while.cond, %while.body5 - %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] - %cmp4 = icmp ult i32 %index.1, %size - br i1 %cmp4, label %while.body5, label %while.end7 - -while.body5: ; preds = %while.cond3 - %arrayidx = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.1 - %3 = load i8 addrspace(0)* %arrayidx, align 1 - %arrayidx6 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.1 - store i8 %3, i8 addrspace(0)* %arrayidx6, align 1 - %inc = add i32 %index.1, 1 - br label %while.cond3 - -while.end7: ; preds = %while.cond3 - ret void -} - -define void @__gen_memcpy_pl(i8 addrspace(0)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline { -entry: - br label %while.cond - -while.cond: ; preds = %while.body, %entry - %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] - %add = add i32 %index.0, 4 - %cmp = icmp ult i32 %add, %size - br i1 %cmp, label %while.cond3, label %while.body - -while.body: ; preds = %while.cond - %add.ptr = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.0 - %0 = bitcast i8 addrspace(3)* %add.ptr to i32 addrspace(3)* - %1 = load i32 addrspace(3)* %0, align 4 - %add.ptr1 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.0 - %2 = bitcast i8 addrspace(0)* %add.ptr1 to i32 addrspace(0)* - store i32 %1, i32 addrspace(0)* %2, align 4 - br label %while.cond - -while.cond3: ; preds = %while.cond, %while.body5 - %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] - %cmp4 = icmp ult i32 %index.1, %size - br i1 %cmp4, label %while.body5, label %while.end7 - -while.body5: ; preds = %while.cond3 - %arrayidx = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.1 - %3 = load i8 addrspace(3)* %arrayidx, align 1 - %arrayidx6 = getelementptr inbounds i8 addrspace(0)* %dst, i32 %index.1 - store i8 %3, i8 addrspace(0)* %arrayidx6, align 1 - %inc = add i32 %index.1, 1 - br label %while.cond3 - -while.end7: ; preds = %while.cond3 - ret void -} - -define void @__gen_memcpy_lg(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i32 %size) nounwind alwaysinline { -entry: - br label %while.cond - -while.cond: ; preds = %while.body, %entry - %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] - %add = add i32 %index.0, 4 - %cmp = icmp ult i32 %add, %size - br i1 %cmp, label %while.cond3, label %while.body - -while.body: ; preds = %while.cond - %add.ptr = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.0 - %0 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)* - %1 = load i32 addrspace(1)* %0, align 4 - %add.ptr1 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.0 - %2 = bitcast i8 addrspace(3)* %add.ptr1 to i32 addrspace(3)* - store i32 %1, i32 addrspace(3)* %2, align 4 - br label %while.cond - -while.cond3: ; preds = %while.cond, %while.body5 - %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] - %cmp4 = icmp ult i32 %index.1, %size - br i1 %cmp4, label %while.body5, label %while.end7 - -while.body5: ; preds = %while.cond3 - %arrayidx = getelementptr inbounds i8 addrspace(1)* %src, i32 %index.1 - %3 = load i8 addrspace(1)* %arrayidx, align 1 - %arrayidx6 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.1 - store i8 %3, i8 addrspace(3)* %arrayidx6, align 1 - %inc = add i32 %index.1, 1 - br label %while.cond3 - -while.end7: ; preds = %while.cond3 - ret void -} - -define void @__gen_memcpy_lp(i8 addrspace(3)* %dst, i8 addrspace(0)* %src, i32 %size) nounwind alwaysinline { -entry: - br label %while.cond - -while.cond: ; preds = %while.body, %entry - %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] - %add = add i32 %index.0, 4 - %cmp = icmp ult i32 %add, %size - br i1 %cmp, label %while.cond3, label %while.body - -while.body: ; preds = %while.cond - %add.ptr = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.0 - %0 = bitcast i8 addrspace(0)* %add.ptr to i32 addrspace(0)* - %1 = load i32 addrspace(0)* %0, align 4 - %add.ptr1 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.0 - %2 = bitcast i8 addrspace(3)* %add.ptr1 to i32 addrspace(3)* - store i32 %1, i32 addrspace(3)* %2, align 4 - br label %while.cond - -while.cond3: ; preds = %while.cond, %while.body5 - %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] - %cmp4 = icmp ult i32 %index.1, %size - br i1 %cmp4, label %while.body5, label %while.end7 - -while.body5: ; preds = %while.cond3 - %arrayidx = getelementptr inbounds i8 addrspace(0)* %src, i32 %index.1 - %3 = load i8 addrspace(0)* %arrayidx, align 1 - %arrayidx6 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.1 - store i8 %3, i8 addrspace(3)* %arrayidx6, align 1 - %inc = add i32 %index.1, 1 - br label %while.cond3 - -while.end7: ; preds = %while.cond3 - ret void -} - -define void @__gen_memcpy_ll(i8 addrspace(3)* %dst, i8 addrspace(3)* %src, i32 %size) nounwind alwaysinline { -entry: - br label %while.cond - -while.cond: ; preds = %while.body, %entry - %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] - %add = add i32 %index.0, 4 - %cmp = icmp ult i32 %add, %size - br i1 %cmp, label %while.cond3, label %while.body - -while.body: ; preds = %while.cond - %add.ptr = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.0 - %0 = bitcast i8 addrspace(3)* %add.ptr to i32 addrspace(3)* - %1 = load i32 addrspace(3)* %0, align 4 - %add.ptr1 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.0 - %2 = bitcast i8 addrspace(3)* %add.ptr1 to i32 addrspace(3)* - store i32 %1, i32 addrspace(3)* %2, align 4 - br label %while.cond - -while.cond3: ; preds = %while.cond, %while.body5 - %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body5 ] - %cmp4 = icmp ult i32 %index.1, %size - br i1 %cmp4, label %while.body5, label %while.end7 - -while.body5: ; preds = %while.cond3 - %arrayidx = getelementptr inbounds i8 addrspace(3)* %src, i32 %index.1 - %3 = load i8 addrspace(3)* %arrayidx, align 1 - %arrayidx6 = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.1 - store i8 %3, i8 addrspace(3)* %arrayidx6, align 1 - %inc = add i32 %index.1, 1 - br label %while.cond3 - -while.end7: ; preds = %while.cond3 - ret void -} diff --git a/backend/src/ocl_memset.ll b/backend/src/ocl_memset.ll deleted file mode 100644 index addf9f5..0000000 --- a/backend/src/ocl_memset.ll +++ /dev/null @@ -1,127 +0,0 @@ -;The memset's source code. -; INLINE_OVERLOADABLE void __gen_memset(uchar* dst, uchar val, size_t size) { -; size_t index = 0; -; uint v = (val << 24) | (val << 16) | (val << 8) | val; -; while((index + 4) >= size) { -; *((uint *)(dst + index)) = v; -; index += 4; -; } -; while(index < size) { -; dst[index] = val; -; index++; -; } -; } - -define void @__gen_memset_p(i8* %dst, i8 zeroext %val, i32 %size) nounwind alwaysinline { -entry: - %conv = zext i8 %val to i32 - %shl = shl nuw i32 %conv, 24 - %shl2 = shl nuw nsw i32 %conv, 16 - %or = or i32 %shl, %shl2 - %shl4 = shl nuw nsw i32 %conv, 8 - %or5 = or i32 %or, %shl4 - %or7 = or i32 %or5, %conv - br label %while.cond - -while.cond: ; preds = %while.body, %entry - %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] - %add = add i32 %index.0, 4 - %cmp = icmp ult i32 %add, %size - br i1 %cmp, label %while.cond10, label %while.body - -while.body: ; preds = %while.cond - %add.ptr = getelementptr inbounds i8* %dst, i32 %index.0 - %0 = bitcast i8* %add.ptr to i32* - store i32 %or7, i32* %0, align 4 - br label %while.cond - -while.cond10: ; preds = %while.cond, %while.body13 - %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body13 ] - %cmp11 = icmp ult i32 %index.1, %size - br i1 %cmp11, label %while.body13, label %while.end14 - -while.body13: ; preds = %while.cond10 - %arrayidx = getelementptr inbounds i8* %dst, i32 %index.1 - store i8 %val, i8* %arrayidx, align 1 - %inc = add i32 %index.1, 1 - br label %while.cond10 - -while.end14: ; preds = %while.cond10 - ret void -} - -define void @__gen_memset_g(i8 addrspace(1)* %dst, i8 zeroext %val, i32 %size) nounwind alwaysinline { -entry: - %conv = zext i8 %val to i32 - %shl = shl nuw i32 %conv, 24 - %shl2 = shl nuw nsw i32 %conv, 16 - %or = or i32 %shl, %shl2 - %shl4 = shl nuw nsw i32 %conv, 8 - %or5 = or i32 %or, %shl4 - %or7 = or i32 %or5, %conv - br label %while.cond - -while.cond: ; preds = %while.body, %entry - %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] - %add = add i32 %index.0, 4 - %cmp = icmp ult i32 %add, %size - br i1 %cmp, label %while.cond10, label %while.body - -while.body: ; preds = %while.cond - %add.ptr = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.0 - %0 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)* - store i32 %or7, i32 addrspace(1)* %0, align 4 - br label %while.cond - -while.cond10: ; preds = %while.cond, %while.body13 - %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body13 ] - %cmp11 = icmp ult i32 %index.1, %size - br i1 %cmp11, label %while.body13, label %while.end14 - -while.body13: ; preds = %while.cond10 - %arrayidx = getelementptr inbounds i8 addrspace(1)* %dst, i32 %index.1 - store i8 %val, i8 addrspace(1)* %arrayidx, align 1 - %inc = add i32 %index.1, 1 - br label %while.cond10 - -while.end14: ; preds = %while.cond10 - ret void -} - -define void @__gen_memset_l(i8 addrspace(3)* %dst, i8 zeroext %val, i32 %size) nounwind alwaysinline { -entry: - %conv = zext i8 %val to i32 - %shl = shl nuw i32 %conv, 24 - %shl2 = shl nuw nsw i32 %conv, 16 - %or = or i32 %shl, %shl2 - %shl4 = shl nuw nsw i32 %conv, 8 - %or5 = or i32 %or, %shl4 - %or7 = or i32 %or5, %conv - br label %while.cond - -while.cond: ; preds = %while.body, %entry - %index.0 = phi i32 [ 0, %entry ], [ %add, %while.body ] - %add = add i32 %index.0, 4 - %cmp = icmp ult i32 %add, %size - br i1 %cmp, label %while.cond10, label %while.body - -while.body: ; preds = %while.cond - %add.ptr = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.0 - %0 = bitcast i8 addrspace(3)* %add.ptr to i32 addrspace(3)* - store i32 %or7, i32 addrspace(3)* %0, align 4 - br label %while.cond - -while.cond10: ; preds = %while.cond, %while.body13 - %index.1 = phi i32 [ %index.0, %while.cond ], [ %inc, %while.body13 ] - %cmp11 = icmp ult i32 %index.1, %size - br i1 %cmp11, label %while.body13, label %while.end14 - -while.body13: ; preds = %while.cond10 - %arrayidx = getelementptr inbounds i8 addrspace(3)* %dst, i32 %index.1 - store i8 %val, i8 addrspace(3)* %arrayidx, align 1 - %inc = add i32 %index.1, 1 - br label %while.cond10 - -while.end14: ; preds = %while.cond10 - ret void -} -- 1.8.3.2 _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
