From: Junyan He <[email protected]> The python script is modified to genete the proto and define for header files and source files.
Signed-off-by: Junyan He <[email protected]> --- backend/src/libocl/Makefile.in | 18 +- backend/src/libocl/include/ocl_common.inh | 21 ++ backend/src/libocl/include/ocl_float.h | 79 ++++++ backend/src/libocl/lib/ocl_common.inc | 49 ++++ backend/src/libocl/script/gen_vector.py | 382 ++++++++++++++++++++++++++++++ backend/src/libocl/script/ocl_common.def | 22 ++ 6 files changed, 569 insertions(+), 2 deletions(-) create mode 100644 backend/src/libocl/include/ocl_common.inh create mode 100644 backend/src/libocl/include/ocl_float.h create mode 100644 backend/src/libocl/lib/ocl_common.inc create mode 100755 backend/src/libocl/script/gen_vector.py create mode 100644 backend/src/libocl/script/ocl_common.def diff --git a/backend/src/libocl/Makefile.in b/backend/src/libocl/Makefile.in index 5e020ab..78f8668 100644 --- a/backend/src/libocl/Makefile.in +++ b/backend/src/libocl/Makefile.in @@ -3,8 +3,8 @@ HEADER_INSTALL_PREFIX=@OCL_HEADER_DIR@ BITCODE_INSTALL_PREFIX=@OCL_BITCODE_DIR@ -GENERATED_FILES=ocl_as.cl ocl_convert.cl -GENERATED_HEADERS=ocl_defines.h ocl_as.h ocl_convert.h +GENERATED_FILES=ocl_as.cl ocl_convert.cl ocl_common.cl +GENERATED_HEADERS=ocl_defines.h ocl_as.h ocl_convert.h ocl_common.h GENERATED_CL_SRCS=$(addprefix lib/, $(GENERATED_FILES)) GENERATED_CL_HEADERS=$(addprefix include/, $(GENERATED_HEADERS)) CL_FILE_NAMES=ocl_workitem.cl ocl_atom.cl ocl_async.cl ocl_sync.cl $(GENERATED_FILES) @@ -45,6 +45,20 @@ include/ocl_defines.h:include/ocl_defines.inh @cat $< > $@ @cat ../ocl_common_defines.h >> $@ +%.h:%.inh + @echo "Generate the header: $@" + @rm -f $@ + @cat $< > $@ + @script/gen_vector.py script/$(notdir $(patsubst %.h, %.def, $@)) $@ 1 + @echo "#endif" >> $@ + +%.cl:%.inc + @echo "Generate the source: $@" + @rm -f $@ + @cat $< > $@ + @script/gen_vector.py script/$(notdir $(patsubst %.cl, %.def, $@)) $@ 0 + + %.bc:%.cl $(GENERATED_CL_HEADERS) clang -cc1 $(CLANG_OCL_FLAGS) -emit-llvm-bc -triple spir -o $@ -x cl $< diff --git a/backend/src/libocl/include/ocl_common.inh b/backend/src/libocl/include/ocl_common.inh new file mode 100644 index 0000000..9001a6f --- /dev/null +++ b/backend/src/libocl/include/ocl_common.inh @@ -0,0 +1,21 @@ +#ifndef __OCL_COMMON_H__ +#define __OCL_COMMON_H__ + +#include "ocl_types.h" + +///////////////////////////////////////////////////////////////////////////// +// Common Functions +///////////////////////////////////////////////////////////////////////////// +OVERLOADABLE float step(float edge, float x); +OVERLOADABLE float max(float a, float b); +OVERLOADABLE float min(float a, float b); +OVERLOADABLE float mix(float x, float y, float a); +OVERLOADABLE float clamp(float v, float l, float u); + +OVERLOADABLE float degrees(float radians); +OVERLOADABLE float radians(float degrees); +OVERLOADABLE float smoothstep(float e0, float e1, float x); + +OVERLOADABLE float sign(float x); + + diff --git a/backend/src/libocl/include/ocl_float.h b/backend/src/libocl/include/ocl_float.h new file mode 100644 index 0000000..ebf1800 --- /dev/null +++ b/backend/src/libocl/include/ocl_float.h @@ -0,0 +1,79 @@ +#ifndef __OCL_FLOAT_H__ +#define __OCL_FLOAT_H__ + +///////////////////////////////////////////////////////////////////////////// +// OpenCL floating-point macros and pragmas +///////////////////////////////////////////////////////////////////////////// +#define FLT_DIG 6 +#define FLT_MANT_DIG 24 +#define FLT_MAX_10_EXP +38 +#define FLT_MAX_EXP +128 +#define FLT_MIN_10_EXP -37 +#define FLT_MIN_EXP -125 +#define FLT_RADIX 2 +#define FLT_ONE 1.0000000000e+00 /* 0x3F800000 */ +#define FLT_MAX 0x1.fffffep127f +#define FLT_MIN 0x1.0p-126f +#define FLT_EPSILON 0x1.0p-23f + +#define MAXFLOAT 3.40282347e38F +INLINE_OVERLOADABLE float __ocl_inff(void) { + union { uint u; float f; } u; + u.u = 0x7F800000; + return u.f; +} +INLINE_OVERLOADABLE float __ocl_nanf(void) { + union { uint u; float f; } u; + u.u = 0x7F800001; + return u.f; +} +typedef union +{ + float value; + uint word; +} float_shape_type; + +/* Get a 32 bit int from a float. */ +#ifndef GEN_OCL_GET_FLOAT_WORD +# define GEN_OCL_GET_FLOAT_WORD(i,d) \ +do { \ + float_shape_type gf_u; \ + gf_u.value = (d); \ + (i) = gf_u.word; \ +} while (0) +#endif +/* Set a float from a 32 bit int. */ +#ifndef GEN_OCL_SET_FLOAT_WORD +# define GEN_OCL_SET_FLOAT_WORD(d,i) \ +do { \ + float_shape_type sf_u; \ + sf_u.word = (i); \ + (d) = sf_u.value; \ +} while (0) +#endif + +INLINE_OVERLOADABLE int __ocl_finitef (float x){ + unsigned ix; + GEN_OCL_GET_FLOAT_WORD (ix, x); + return (ix & 0x7fffffff) < 0x7f800000; +} + +#define HUGE_VALF (__ocl_inff()) +#define INFINITY (__ocl_inff()) +#define NAN (__ocl_nanf()) +#define M_E_F 2.718281828459045F +#define M_LOG2E_F 1.4426950408889634F +#define M_LOG10E_F 0.43429448190325176F +#define M_LN2_F 0.6931471805599453F +#define M_LN10_F 2.302585092994046F +#define M_PI_F 3.141592653589793F +#define M_PI_2_F 1.5707963267948966F +#define M_PI_4_F 0.7853981633974483F +#define M_1_PI_F 0.3183098861837907F +#define M_2_PI_F 0.6366197723675814F +#define M_2_SQRTPI_F 1.1283791670955126F +#define M_SQRT2_F 1.4142135623730951F +#define M_SQRT1_2_F 0.7071067811865476F + + +#endif /* __OCL_FLOAT_H__ */ diff --git a/backend/src/libocl/lib/ocl_common.inc b/backend/src/libocl/lib/ocl_common.inc new file mode 100644 index 0000000..0096b48 --- /dev/null +++ b/backend/src/libocl/lib/ocl_common.inc @@ -0,0 +1,49 @@ +#include "ocl_common.h" +#include "ocl_float.h" + +///////////////////////////////////////////////////////////////////////////// +// Common Functions +///////////////////////////////////////////////////////////////////////////// +PURE CONST float __gen_ocl_fmax(float a, float b); +PURE CONST float __gen_ocl_fmin(float a, float b); + +OVERLOADABLE float step(float edge, float x) { + return x < edge ? 0.0 : 1.0; +} + +OVERLOADABLE float max(float a, float b) { + return __gen_ocl_fmax(a, b); +} +OVERLOADABLE float min(float a, float b) { + return __gen_ocl_fmin(a, b); +} +OVERLOADABLE float mix(float x, float y, float a) { + return x + (y-x)*a; +} +OVERLOADABLE float clamp(float v, float l, float u) { + return max(min(v, u), l); +} + + +OVERLOADABLE float degrees(float radians) { + return (180 / M_PI_F) * radians; +} +OVERLOADABLE float radians(float degrees) { + return (M_PI_F / 180) * degrees; +} + +OVERLOADABLE float smoothstep(float e0, float e1, float x) { + x = clamp((x - e0) / (e1 - e0), 0.f, 1.f); + return x * x * (3 - 2 * x); +} + +OVERLOADABLE float sign(float x) { + if(x > 0) + return 1; + if(x < 0) + return -1; + if(x == -0.f) + return -0.f; + return 0.f; +} + diff --git a/backend/src/libocl/script/gen_vector.py b/backend/src/libocl/script/gen_vector.py new file mode 100755 index 0000000..a91dfcf --- /dev/null +++ b/backend/src/libocl/script/gen_vector.py @@ -0,0 +1,382 @@ +#!/usr/bin/env python +# +# Copyright (C) 2012 Intel Corporation +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library. If not, see <http://www.gnu.org/licenses/>. +# +# Author: Zhigang Gong <[email protected]> +#/ + +# This file is to generate inline code to lower down those builtin +# vector functions to scalar functions. +import re +import sys +import os + +if len(sys.argv) != 4: + print "Invalid argument {0}".format(sys.argv) + print "use {0} spec_file_name output_file_name just_proto".format(sys.argv[0]) + raise + +all_vector = 1,2,3,4,8,16 + +# generate generic type sets +def gen_vector_type(type_set, vector_set = all_vector): + ret = [] + for t in type_set: + for i in vector_set: + ret.append((t, i)) + return ret + +def set_vector_memspace(vector_type_set, memspace): + ret = [] + if memspace == '': + return vector_type_set + for t in vector_type_set: + ret.append((t[0], t[1], memspace)) + return ret + +# if we have 3 elements in the type tuple, we are a pointer with a memory space type +# at the third element. +def isPointer(t): + return len(t) == 3 + +all_itype = "char","short","int","long" +all_utype = "uchar","ushort","uint","ulong" +all_int_type = all_itype + all_utype + +all_float_type = "float","double" +all_type = all_int_type + all_float_type + +# all vector/scalar types +for t in all_type: + exec "{0}n = [\"{0}n\", gen_vector_type([\"{0}\"])]".format(t) + exec "s{0} = [\"{0}\", gen_vector_type([\"{0}\"], [1])]".format(t) + +# Predefined type sets according to the Open CL spec. +math_gentype = ["math_gentype", gen_vector_type(all_float_type)] +math_gentypef = ["math_gentypef", gen_vector_type(["float"])] +math_gentyped = ["math_gentyped", gen_vector_type(["double"])] + +half_native_math_gentype = ["half_native_math_gentype", gen_vector_type(["float"])] + +integer_gentype = ["integer_gentype", gen_vector_type(all_int_type)] +integer_ugentype = ["integer_ugentype", gen_vector_type(all_utype)] +integer_sgentype = ["integer_sgentype", gen_vector_type(all_int_type, [1])] + +fast_integer_gentype = ["fast_integer_gentype", gen_vector_type(["uint", "int"])] + +common_gentype = ["common_gentype", gen_vector_type(all_float_type)] +common_gentypef = ["common_gentypef", gen_vector_type(["float"])] +common_gentyped = ["common_gentyped", gen_vector_type(["double"])] + +relational_gentype = ["relational_gentype", gen_vector_type(all_type)] +relational_igentype = ["relational_igentype", gen_vector_type(all_itype)] +relational_ugentype = ["relational_ugentype", gen_vector_type(all_utype)] + +misc_gentypem = ["misc_gentypem", gen_vector_type(all_type, [2, 4, 8, 16])] +misc_gentypen = ["misc_gentypen", gen_vector_type(all_type, [2, 4, 8, 16])] +misc_ugentypem = ["misc_ugentypem", gen_vector_type(all_utype, [2, 4, 8, 16])] +misc_ugentypen = ["misc_ugentypen", gen_vector_type(all_utype, [2, 4, 8, 16])] + +all_predefined_type = math_gentype, math_gentypef, math_gentyped, \ + half_native_math_gentype, integer_gentype,integer_sgentype,\ + integer_ugentype, charn, ucharn, shortn, ushortn, intn, \ + uintn, longn, ulongn, floatn, doublen, \ + fast_integer_gentype, common_gentype, common_gentypef, \ + common_gentyped, relational_gentype, relational_igentype, \ + relational_ugentype, schar, suchar, sshort, sint, suint, \ + slong, sulong, sfloat, sdouble, misc_gentypem, \ + misc_ugentypem, misc_gentypen, misc_ugentypen + +# type dictionary contains all the predefined type sets. +type_dict = {} + +for t in all_predefined_type: + type_dict.update({t[0]:t[1]}) + +def _prefix(prefix, dtype): + if dtype.count("gentype") != 0: + return prefix + '_' + dtype + return dtype + +memspaces = ["__local ", "__private ", "__global "] + +def stripMemSpace(t): + if t[0:2] == '__': + for memspace in memspaces : + if t[0:len(memspace)] == memspace: + return memspace, t[len(memspace):] + return '', t + +def check_type(types): + for t in types: + memspace, t = stripMemSpace(t) + if not t in type_dict: + print t + raise "found invalid type." + +def match_unsigned(dtype): + if dtype[0] == 'float': + return ["uint", dtype[1]] + if dtype[0] == 'double': + return ["ulong", dtype[1]] + if dtype[0][0] == 'u': + return dtype + return ['u' + dtype[0], dtype[1]] + +def match_signed(dtype): + if dtype[0] == 'float': + return ["int", dtype[1]] + if dtype[0] == 'double': + return ["long", dtype[1]] + if dtype[0][0] != 'u': + return dtype + return [dtype[0][1:], dtype[1]] + +def match_scalar(dtype): + return [dtype[0], 1] + +# The dstType is the expected type, srcType is +# the reference type. Sometimes, the dstType and +# srcType are different. We need to fix this issue +# and return correct dst type. +def fixup_type(dstType, srcType, n): + if dstType == srcType: + return dstType[n] + + if dstType != srcType: + # scalar dst type + if len(dstType) == 1: + return dstType[0] + # dst is not scalar bug src is scalar + if len(srcType) == 1: + return dstType[n] + if dstType == integer_sgentype[1] and srcType == integer_gentype[1]: + return match_scalar(srcType[n]) + + if dstType == integer_gentype[1] and \ + (srcType == integer_sgentype[1] or \ + srcType == integer_ugentype[1]): + return dstType[n] + + if dstType == integer_ugentype[1] and srcType == integer_gentype[1]: + return match_unsigned(srcType[n]) + + if dstType == relational_igentype[1] and srcType == relational_gentype[1]: + return match_signed(srcType[n]) + if dstType == relational_ugentype[1] and srcType == relational_gentype[1]: + return match_unsigned(srcType[n]) + + if dstType == relational_gentype[1] and \ + (srcType == relational_igentype[1] or \ + srcType == relational_ugentype[1]): + return dstType[n] + + if (len(dstType) == len(srcType)): + return dstType[n] + + print dstType, srcType + raise "type mispatch" + +class builtinProto(): + valueTypeStr = "" + functionName = "" + paramTypeStrs = [] + paramCount = 0 + outputStr = [] + prefix = "" + justproto = 0 + + def init(self, sectionHeader, sectionPrefix, justproto): + self.valueTypeStr = "" + self.functionName = "" + self.paramTypeStrs = [] + self.paramCount = 0 + self.justproto = justproto + if sectionHeader != "": + self.outputStr = [sectionHeader] + else: + self.outputStr = [] + if sectionPrefix != "": + self.prefix = sectionPrefix + self.indent = 0 + + def append(self, line, nextInit = ""): + self.outputStr.append(line); + return nextInit; + + def indentSpace(self): + ret = "" + for i in range(self.indent): + ret += ' ' + + return ret + + def init_from_line(self, t): + self.append('//{0}'.format(t)) + line = filter(None, re.split(',| |\(', t.rstrip(')\n'))) + self.paramCount = 0 + stripped = 0 + memSpace = '' + for i, text in enumerate(line): + idx = i - stripped + if idx == 0: + self.valueTypeStr = _prefix(self.prefix, line[i]) + continue + + if idx == 1: + self.functionName = line[i]; + continue + + if idx % 2 == 0: + if line[i][0] == '(': + tmpType = line[i][1:] + else: + tmpType = line[i] + if tmpType == '__local' or \ + tmpType == '__private' or \ + tmpType == '__global': + memSpace = tmpType + ' ' + stripped += 1 + continue + self.paramTypeStrs.append(memSpace + _prefix(self.prefix, tmpType)) + memSpace = '' + self.paramCount += 1 + + def gen_proto_str_1(self, vtypeSeq, ptypeSeqs, i): + for n in range(0, self.paramCount): + ptype = fixup_type(ptypeSeqs[n], vtypeSeq, i); + vtype = fixup_type(vtypeSeq, ptypeSeqs[n], i); + # XXX FIXME now skip all double vector, as we don't + # defined those scalar version's prototype. + if ptype[0].find('double') != -1 or \ + vtype[0].find('double') != -1: + return + + if (n == 0): + formatStr = 'OVERLOADABLE {0}{1} {2} ('.format(vtype[0], vtype[1], self.functionName) + else: + formatStr += ', ' + + if vtype[1] == 1: + return + + if isPointer(ptype): + formatStr += ptype[2] + pointerStr = '*' + else: + pointerStr = '' + + if ptype[1] != 1: + formatStr += '{0}{1} {2}param{3}'.format(ptype[0], ptype[1], pointerStr, n) + else: + formatStr += '{0} {1}param{2}'.format(ptype[0], pointerStr, n) + + formatStr += ')' + if self.justproto == "1": + formatStr += ';' + self.append(formatStr) + return formatStr + formatStr = self.append(formatStr, '{{return ({0}{1})('.format(vtype[0], vtype[1])) + self.indent = len(formatStr) + for j in range(0, vtype[1]): + if (j != 0): + formatStr += ',' + if (j + 1) % 2 == 0: + formatStr += ' ' + if j % 2 == 0: + formatStr = self.append(formatStr, self.indentSpace()) + + if self.prefix == 'relational' and self.functionName != 'bitselect' and self.functionName != 'select': + formatStr += '-' + formatStr += '{0}('.format(self.functionName) + for n in range(0, self.paramCount): + if n != 0: + formatStr += ', ' + + ptype = fixup_type(ptypeSeqs[n], vtypeSeq, i) + vtype = fixup_type(vtypeSeq, ptypeSeqs[n], i) + if vtype[1] != ptype[1]: + if ptype[1] != 1: + raise "parameter is not a scalar but has different width with result value." + if isPointer(ptype): + formatStr += '&' + formatStr += 'param{0}'.format(n) + continue + + if (isPointer(ptype)): + formatStr += '({0} {1} *)param{2} + {3:2d}'.format(ptype[2], ptype[0], n, j) + else: + if (self.functionName == 'select' and n == 2): + formatStr += '({0})(param{1}.s{2:x} & (({0})1 << (sizeof({0})*8 - 1)))'.format(ptype[0], n, j) + else: + formatStr += 'param{0}.s{1:x}'.format(n, j) + + formatStr += ')' + + formatStr += '); }\n' + self.append(formatStr) + + return formatStr + + def output(self): + for line in self.outputStr: + print line + + def output(self, outFile): + for line in self.outputStr: + outFile.write('{0}\n'.format(line)) + + def gen_proto_str(self): + check_type([self.valueTypeStr] + self.paramTypeStrs) + vtypeSeq = type_dict[self.valueTypeStr] + ptypeSeqs = [] + count = len(vtypeSeq); + for t in self.paramTypeStrs: + memspace,t = stripMemSpace(t) + ptypeSeqs.append(set_vector_memspace(type_dict[t], memspace)) + count = max(count, len(type_dict[t])) + + for i in range(count): + formatStr = self.gen_proto_str_1(vtypeSeq, ptypeSeqs, i) + + self.append("") + +# save the prototypes into ocl_vector.h +specFile = open(sys.argv[1], 'r') +headerFileName = sys.argv[2] +tempHeader = open(headerFileName, 'a') +isJustProto = sys.argv[3] + +tempHeader.write("//Begin from this part is autogenerated.\n") +tempHeader.write("//Don't modify it manually.\n") + +functionProto = builtinProto() +for line in specFile: + if line.isspace(): + continue + if line[0] == '#': + if line[1] == '#': + sectionHeader = "//{0} builtin functions".format(line[2:].rstrip()) + sectionPrefix=(line[2:].split())[0] + continue + functionProto.init(sectionHeader, sectionPrefix, isJustProto) + sectionHeader = "" + setionPrefix = "" + functionProto.init_from_line(line) + functionProto.gen_proto_str() + functionProto.output(tempHeader) + +tempHeader.close() diff --git a/backend/src/libocl/script/ocl_common.def b/backend/src/libocl/script/ocl_common.def new file mode 100644 index 0000000..fac5ef5 --- /dev/null +++ b/backend/src/libocl/script/ocl_common.def @@ -0,0 +1,22 @@ +##common +gentype clamp (gentype x, gentype minval, gentype maxval) +gentypef clamp (gentypef x, float minval, float maxval) +gentyped clamp (gentyped x, double minval, double maxval) +gentype degrees (gentype radians) +gentype max (gentype x, gentype y) +gentypef max (gentypef x, float y) +gentyped max (gentyped x, double y) +gentype min (gentype x, gentype y) +gentypef min (gentypef x, float y) +gentyped min (gentyped x, double y) +gentype mix (gentype x, gentype y, gentype a) +gentypef mix (gentypef x, gentypef y, float a) +gentyped mix (gentyped x, gentyped y, double a) +gentype radians (gentype degrees) +gentype step (gentype edge, gentype x) +gentypef step (float edge, gentypef x) +gentyped step (double edge, gentyped x) +gentype smoothstep (gentype edge0, gentype edge1, gentype x) +gentypef smoothstep (float edge0, float edge1, gentypef x) +gentyped smoothstep (double edge0, double edge1, gentyped x) +gentype sign (gentype x) -- 1.8.3.2 _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
