sc/source/core/opencl/formulagroupcl.cxx | 382 ++++++++++++++++++++++++++++--- 1 file changed, 356 insertions(+), 26 deletions(-)
New commits: commit 226e367ff93542d82975d6ccd34448f68acf8035 Author: Tor Lillqvist <[email protected]> Date: Mon Jan 12 13:03:02 2015 +0200 SAL_INFO the setting arguments to OpenCL kernels and enqueueing them Change-Id: Ia60194f9789324bc484bfa609c6eb92572b8554d diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx index bf99b58..d407a79 100644 --- a/sc/source/core/opencl/formulagroupcl.cxx +++ b/sc/source/core/opencl/formulagroupcl.cxx @@ -156,6 +156,7 @@ size_t VectorRef::Marshal( cl_kernel k, int argno, int, cl_program ) pNanBuffer, 0, NULL, NULL); } + SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem); err = clSetKernelArg(k, argno, sizeof(cl_mem), (void*)&mpClmem); if (CL_SUCCESS != err) throw OpenCLError(err, __FILE__, __LINE__); @@ -214,6 +215,7 @@ public: } // Pass the scalar result back to the rest of the formula kernel + SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_uint: " << hashCode); cl_int err = clSetKernelArg(k, argno, sizeof(cl_uint), (void*)&hashCode); if (CL_SUCCESS != err) throw OpenCLError(err, __FILE__, __LINE__); @@ -263,6 +265,7 @@ public: { double tmp = GetDouble(); // Pass the scalar result back to the rest of the formula kernel + SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": double: " << tmp); cl_int err = clSetKernelArg(k, argno, sizeof(double), (void*)&tmp); if (CL_SUCCESS != err) throw OpenCLError(err, __FILE__, __LINE__); @@ -303,6 +306,7 @@ public: { double tmp = 0.0; // Pass the scalar result back to the rest of the formula kernel + SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": double: " << tmp); cl_int err = clSetKernelArg(k, argno, sizeof(double), (void*)&tmp); if (CL_SUCCESS != err) throw OpenCLError(err, __FILE__, __LINE__); @@ -662,6 +666,7 @@ threefry2x32 (threefry2x32_ctr_t in, threefry2x32_key_t k)\n\ { cl_int seed = comphelper::rng::uniform_int_distribution(0, SAL_MAX_INT32); // Pass the scalar result back to the rest of the formula kernel + SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_int: " << seed); cl_int err = clSetKernelArg(k, argno, sizeof(cl_int), (void*)&seed); if (CL_SUCCESS != err) throw OpenCLError(err, __FILE__, __LINE__); @@ -770,6 +775,7 @@ size_t DynamicKernelStringArgument::Marshal( cl_kernel k, int argno, int, cl_pro if (CL_SUCCESS != err) throw OpenCLError(err, __FILE__, __LINE__); + SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem); err = clSetKernelArg(k, argno, sizeof(cl_mem), (void*)&mpClmem); if (CL_SUCCESS != err) throw OpenCLError(err, __FILE__, __LINE__); @@ -1414,19 +1420,23 @@ public: // set kernel arg of reduction kernel // TODO(Wei Wei): use unique name for kernel cl_mem buf = Base::GetCLBuffer(); + SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 0 << ": cl_mem: " << buf); err = clSetKernelArg(redKernel, 0, sizeof(cl_mem), (void*)&buf); if (CL_SUCCESS != err) throw OpenCLError(err, __FILE__, __LINE__); + SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 1 << ": cl_mem: " << mpClmem2); err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), (void*)&mpClmem2); if (CL_SUCCESS != err) throw OpenCLError(err, __FILE__, __LINE__); + SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 2 << ": cl_int: " << nInput); err = clSetKernelArg(redKernel, 2, sizeof(cl_int), (void*)&nInput); if (CL_SUCCESS != err) throw OpenCLError(err, __FILE__, __LINE__); + SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 3 << ": cl_int: " << nCurWindowSize); err = clSetKernelArg(redKernel, 3, sizeof(cl_int), (void*)&nCurWindowSize); if (CL_SUCCESS != err) throw OpenCLError(err, __FILE__, __LINE__); @@ -1434,6 +1444,7 @@ public: // set work group size and execute size_t global_work_size[] = { 256, (size_t)w }; size_t local_work_size[] = { 256, 1 }; + SAL_INFO("sc.opencl", "Enqueing kernel " << redKernel); err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, NULL, global_work_size, local_work_size, 0, NULL, NULL); if (CL_SUCCESS != err) @@ -1465,19 +1476,23 @@ public: throw OpenCLError(err, __FILE__, __LINE__); // set kernel arg of reduction kernel buf = Base::GetCLBuffer(); + SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 0 << ": cl_mem: " << buf); err = clSetKernelArg(redKernel, 0, sizeof(cl_mem), (void*)&buf); if (CL_SUCCESS != err) throw OpenCLError(err, __FILE__, __LINE__); + SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 1 << ": cl_mem: " << mpClmem2); err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), (void*)&mpClmem2); if (CL_SUCCESS != err) throw OpenCLError(err, __FILE__, __LINE__); + SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 2 << ": cl_int: " << nInput); err = clSetKernelArg(redKernel, 2, sizeof(cl_int), (void*)&nInput); if (CL_SUCCESS != err) throw OpenCLError(err, __FILE__, __LINE__); + SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 3 << ": cl_int: " << nCurWindowSize); err = clSetKernelArg(redKernel, 3, sizeof(cl_int), (void*)&nCurWindowSize); if (CL_SUCCESS != err) throw OpenCLError(err, __FILE__, __LINE__); @@ -1485,6 +1500,7 @@ public: // set work group size and execute size_t global_work_size1[] = { 256, (size_t)w }; size_t local_work_size1[] = { 256, 1 }; + SAL_INFO("sc.opencl", "Enqueing kernel " << redKernel); err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, NULL, global_work_size1, local_work_size1, 0, NULL, NULL); if (CL_SUCCESS != err) @@ -1514,6 +1530,7 @@ public: throw OpenCLError(err, __FILE__, __LINE__); } // set kernel arg + SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem2); err = clSetKernelArg(k, argno, sizeof(cl_mem), (void*)&(mpClmem2)); if (CL_SUCCESS != err) throw OpenCLError(err, __FILE__, __LINE__); @@ -2139,12 +2156,14 @@ public: // set kernel arg of reduction kernel for (size_t j = 0; j < vclmem.size(); j++) { + SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << j << ": " << (vclmem[j] ? "cl_mem" : "double") << ": " << vclmem[j]); err = clSetKernelArg(redKernel, j, vclmem[j] ? sizeof(cl_mem) : sizeof(double), (void*)&vclmem[j]); if (CL_SUCCESS != err) throw OpenCLError(err, __FILE__, __LINE__); } + SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << vclmem.size() << ": cl_mem: " << pClmem2); err = clSetKernelArg(redKernel, vclmem.size(), sizeof(cl_mem), (void*)&pClmem2); if (CL_SUCCESS != err) throw OpenCLError(err, __FILE__, __LINE__); @@ -2152,6 +2171,7 @@ public: // set work group size and execute size_t global_work_size[] = { 256, (size_t)nVectorWidth }; size_t local_work_size[] = { 256, 1 }; + SAL_INFO("sc.opencl", "Enqueing kernel " << redKernel); err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, NULL, global_work_size, local_work_size, 0, NULL, NULL); if (CL_SUCCESS != err) @@ -2161,6 +2181,7 @@ public: throw OpenCLError(err, __FILE__, __LINE__); // Pass pClmem2 to the "real" kernel + SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << pClmem2); err = clSetKernelArg(k, argno, sizeof(cl_mem), (void*)&pClmem2); if (CL_SUCCESS != err) throw OpenCLError(err, __FILE__, __LINE__); @@ -2206,6 +2227,10 @@ public: // set kernel arg of reduction kernel for (size_t j = 0; j < vclmem.size(); j++) { + if (vclmem[j].mCLMem) + SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << j << ": cl_mem: " << vclmem[j].mCLMem); + else + SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << j << ": double: " << vclmem[j].mConst); err = clSetKernelArg(redKernel, j, vclmem[j].mCLMem ? sizeof(cl_mem) : sizeof(double), vclmem[j].mCLMem ? (void*)&vclmem[j].mCLMem : @@ -2213,20 +2238,24 @@ public: if (CL_SUCCESS != err) throw OpenCLError(err, __FILE__, __LINE__); } + SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << vclmem.size() << ": cl_mem: " << mpClmem2); err = clSetKernelArg(redKernel, vclmem.size(), sizeof(cl_mem), (void*)&mpClmem2); if (CL_SUCCESS != err) throw OpenCLError(err, __FILE__, __LINE__); + SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << (vclmem.size() + 1) << ": cl_int: " << nInput); err = clSetKernelArg(redKernel, vclmem.size() + 1, sizeof(cl_int), (void*)&nInput); if (CL_SUCCESS != err) throw OpenCLError(err, __FILE__, __LINE__); + SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << (vclmem.size() + 2) << ": cl_int: " << nCurWindowSize); err = clSetKernelArg(redKernel, vclmem.size() + 2, sizeof(cl_int), (void*)&nCurWindowSize); if (CL_SUCCESS != err) throw OpenCLError(err, __FILE__, __LINE__); // set work group size and execute size_t global_work_size[] = { 256, (size_t)nVectorWidth }; size_t local_work_size[] = { 256, 1 }; + SAL_INFO("sc.opencl", "Enqueing kernel " << redKernel); err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, NULL, global_work_size, local_work_size, 0, NULL, NULL); if (CL_SUCCESS != err) @@ -2236,6 +2265,7 @@ public: throw OpenCLError(err, __FILE__, __LINE__); clReleaseKernel(redKernel); // Pass mpClmem2 to the "real" kernel + SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem2); err = clSetKernelArg(k, argno, sizeof(cl_mem), (void*)&mpClmem2); if (CL_SUCCESS != err) throw OpenCLError(err, __FILE__, __LINE__); @@ -3712,12 +3742,14 @@ void DynamicKernel::Launch( size_t nr ) nr * sizeof(double), NULL, &err); if (CL_SUCCESS != err) throw OpenCLError(err, __FILE__, __LINE__); + SAL_INFO("sc.opencl", "Kernel " << mpKernel << " arg " << 0 << ": cl_mem: " << mpResClmem); err = clSetKernelArg(mpKernel, 0, sizeof(cl_mem), (void*)&mpResClmem); if (CL_SUCCESS != err) throw OpenCLError(err, __FILE__, __LINE__); // The rest of buffers mSyms.Marshal(mpKernel, nr, mpProgram); size_t global_work_size[] = { nr }; + SAL_INFO("sc.opencl", "Enqueing kernel " << mpKernel); err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, mpKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL); if (CL_SUCCESS != err) commit 4200d7300ec9355e0d648d7053484901b5dd2009 Author: Tor Lillqvist <[email protected]> Date: Mon Jan 12 12:41:46 2015 +0200 Rewrite the RAND() OpenCL implementation to actually be random Use a so-called counter-based random number generator. Code from Random123, http://www.deshawresearch.com/resources_random123.html. Change-Id: Id47f84ef18eada64dcf47762a61ec3856c71760e diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx index f0cfd930..bf99b58 100644 --- a/sc/source/core/opencl/formulagroupcl.cxx +++ b/sc/source/core/opencl/formulagroupcl.cxx @@ -16,6 +16,7 @@ #include "tokenarray.hxx" #include "compiler.hxx" #include "interpre.hxx" +#include <comphelper/random.hxx> #include <formula/vectortoken.hxx> #include "scmatrix.hxx" @@ -326,31 +327,331 @@ public: } virtual void GenSlidingWindowDecl( std::stringstream& ss ) const SAL_OVERRIDE { - GenDecl(ss); + ss << "int " << mSymName; } virtual std::string GenSlidingWindowDeclRef( bool = false ) const SAL_OVERRIDE { - return mSymName + "_Random()"; + return mSymName + "_Random(" + mSymName + ")"; } virtual void GenSlidingWindowFunction( std::stringstream& ss ) SAL_OVERRIDE { - ss << "\ndouble " << mSymName; - ss << "_Random ()\n{\n"; - ss << " int i, gid0=get_global_id(0);;\n"; - ss << " double tmp = 0;\n"; - ss << " double M = 2147483647;\n"; - ss << " double Lamda = 32719;\n"; - ss << " double f;\n"; - ss << " f = gid0 + 1;\n"; - ss << " int k;\n"; - ss << " for(i = 1;i <= 100; ++i){\n"; - ss << " f = Lamda * f;\n"; - ss << " k = (int)(f * pow(M,-1.0));\n"; - ss << " f = f - M * k;\n"; - ss << " }\n"; - ss << " tmp = f * pow(M,-1.0);\n"; - ss << " return tmp;\n"; - ss << "}"; + // This string is from the pi_opencl_kernel.i file as + // generated when building the Random123 examples. Unused + // stuff has been removed, and the actual kernel is not the + // same as in the totally different use case of that example, + // of course. Only the code that calculates the counter-based + // random number and what it needs is left. + ss << "\ +/*\n\ +Copyright 2010-2011, D. E. Shaw Research.\n\ +All rights reserved.\n\ +\n\ +Redistribution and use in source and binary forms, with or without\n\ +modification, are permitted provided that the following conditions are\n\ +met:\n\ +\n\ +* Redistributions of source code must retain the above copyright\n\ + notice, this list of conditions, and the following disclaimer.\n\ +\n\ +* Redistributions in binary form must reproduce the above copyright\n\ + notice, this list of conditions, and the following disclaimer in the\n\ + documentation and/or other materials provided with the distribution.\n\ +\n\ +* Neither the name of D. E. Shaw Research nor the names of its\n\ + contributors may be used to endorse or promote products derived from\n\ + this software without specific prior written permission.\n\ +\n\ +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n\ +\"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n\ +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n\ +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n\ +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n\ +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n\ +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n\ +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n\ +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n\ +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n\ +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\ +*/\n\ +\n\ +typedef uint uint32_t;\n\ +struct r123array2x32\n\ +{\n\ + uint32_t v[2];\n\ +};\n\ +enum r123_enum_threefry32x2\n\ +{\n\ + R_32x2_0_0 = 13,\n\ + R_32x2_1_0 = 15,\n\ + R_32x2_2_0 = 26,\n\ + R_32x2_3_0 = 6,\n\ + R_32x2_4_0 = 17,\n\ + R_32x2_5_0 = 29,\n\ + R_32x2_6_0 = 16,\n\ + R_32x2_7_0 = 24\n\ +};\n\ +inline uint32_t RotL_32 (uint32_t x, unsigned int N)\n\ + __attribute__ ((always_inline));\n\ +inline uint32_t\n\ +RotL_32 (uint32_t x, unsigned int N)\n\ +{\n\ + return (x << (N & 31)) | (x >> ((32 - N) & 31));\n\ +}\n\ +\n\ +typedef struct r123array2x32 threefry2x32_ctr_t;\n\ +typedef struct r123array2x32 threefry2x32_key_t;\n\ +typedef struct r123array2x32 threefry2x32_ukey_t;\n\ +inline threefry2x32_key_t\n\ +threefry2x32keyinit (threefry2x32_ukey_t uk)\n\ +{\n\ + return uk;\n\ +}\n\ +\n\ +inline threefry2x32_ctr_t threefry2x32_R (unsigned int Nrounds,\n\ + threefry2x32_ctr_t in,\n\ + threefry2x32_key_t k)\n\ + __attribute__ ((always_inline));\n\ +inline threefry2x32_ctr_t\n\ +threefry2x32_R (unsigned int Nrounds, threefry2x32_ctr_t in,\n\ + threefry2x32_key_t k)\n\ +{\n\ + threefry2x32_ctr_t X;\n\ + uint32_t ks[2 + 1];\n\ + int i;\n\ + ks[2] = 0x1BD11BDA;\n\ + for (i = 0; i < 2; i++) {\n\ + ks[i] = k.v[i];\n\ + X.v[i] = in.v[i];\n\ + ks[2] ^= k.v[i];\n\ + }\n\ + X.v[0] += ks[0];\n\ + X.v[1] += ks[1];\n\ + if (Nrounds > 0) {\n\ + X.v[0] += X.v[1];\n\ + X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\ + X.v[1] ^= X.v[0];\n\ + }\n\ + if (Nrounds > 1) {\n\ + X.v[0] += X.v[1];\n\ + X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\ + X.v[1] ^= X.v[0];\n\ + }\n\ + if (Nrounds > 2) {\n\ + X.v[0] += X.v[1];\n\ + X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\ + X.v[1] ^= X.v[0];\n\ + }\n\ + if (Nrounds > 3) {\n\ + X.v[0] += X.v[1];\n\ + X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\ + X.v[1] ^= X.v[0];\n\ + }\n\ + if (Nrounds > 3) {\n\ + X.v[0] += ks[1];\n\ + X.v[1] += ks[2];\n\ + X.v[1] += 1;\n\ + }\n\ + if (Nrounds > 4) {\n\ + X.v[0] += X.v[1];\n\ + X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\ + X.v[1] ^= X.v[0];\n\ + }\n\ + if (Nrounds > 5) {\n\ + X.v[0] += X.v[1];\n\ + X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\ + X.v[1] ^= X.v[0];\n\ + }\n\ + if (Nrounds > 6) {\n\ + X.v[0] += X.v[1];\n\ + X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\ + X.v[1] ^= X.v[0];\n\ + }\n\ + if (Nrounds > 7) {\n\ + X.v[0] += X.v[1];\n\ + X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\ + X.v[1] ^= X.v[0];\n\ + }\n\ + if (Nrounds > 7) {\n\ + X.v[0] += ks[2];\n\ + X.v[1] += ks[0];\n\ + X.v[1] += 2;\n\ + }\n\ + if (Nrounds > 8) {\n\ + X.v[0] += X.v[1];\n\ + X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\ + X.v[1] ^= X.v[0];\n\ + }\n\ + if (Nrounds > 9) {\n\ + X.v[0] += X.v[1];\n\ + X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\ + X.v[1] ^= X.v[0];\n\ + }\n\ + if (Nrounds > 10) {\n\ + X.v[0] += X.v[1];\n\ + X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\ + X.v[1] ^= X.v[0];\n\ + }\n\ + if (Nrounds > 11) {\n\ + X.v[0] += X.v[1];\n\ + X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\ + X.v[1] ^= X.v[0];\n\ + }\n\ + if (Nrounds > 11) {\n\ + X.v[0] += ks[0];\n\ + X.v[1] += ks[1];\n\ + X.v[1] += 3;\n\ + }\n\ + if (Nrounds > 12) {\n\ + X.v[0] += X.v[1];\n\ + X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\ + X.v[1] ^= X.v[0];\n\ + }\n\ + if (Nrounds > 13) {\n\ + X.v[0] += X.v[1];\n\ + X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\ + X.v[1] ^= X.v[0];\n\ + }\n\ + if (Nrounds > 14) {\n\ + X.v[0] += X.v[1];\n\ + X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\ + X.v[1] ^= X.v[0];\n\ + }\n\ + if (Nrounds > 15) {\n\ + X.v[0] += X.v[1];\n\ + X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\ + X.v[1] ^= X.v[0];\n\ + }\n\ + if (Nrounds > 15) {\n\ + X.v[0] += ks[1];\n\ + X.v[1] += ks[2];\n\ + X.v[1] += 4;\n\ + }\n\ + if (Nrounds > 16) {\n\ + X.v[0] += X.v[1];\n\ + X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\ + X.v[1] ^= X.v[0];\n\ + }\n\ + if (Nrounds > 17) {\n\ + X.v[0] += X.v[1];\n\ + X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\ + X.v[1] ^= X.v[0];\n\ + }\n\ + if (Nrounds > 18) {\n\ + X.v[0] += X.v[1];\n\ + X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\ + X.v[1] ^= X.v[0];\n\ + }\n\ + if (Nrounds > 19) {\n\ + X.v[0] += X.v[1];\n\ + X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\ + X.v[1] ^= X.v[0];\n\ + }\n\ + if (Nrounds > 19) {\n\ + X.v[0] += ks[2];\n\ + X.v[1] += ks[0];\n\ + X.v[1] += 5;\n\ + }\n\ + if (Nrounds > 20) {\n\ + X.v[0] += X.v[1];\n\ + X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\ + X.v[1] ^= X.v[0];\n\ + }\n\ + if (Nrounds > 21) {\n\ + X.v[0] += X.v[1];\n\ + X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\ + X.v[1] ^= X.v[0];\n\ + }\n\ + if (Nrounds > 22) {\n\ + X.v[0] += X.v[1];\n\ + X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\ + X.v[1] ^= X.v[0];\n\ + }\n\ + if (Nrounds > 23) {\n\ + X.v[0] += X.v[1];\n\ + X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\ + X.v[1] ^= X.v[0];\n\ + }\n\ + if (Nrounds > 23) {\n\ + X.v[0] += ks[0];\n\ + X.v[1] += ks[1];\n\ + X.v[1] += 6;\n\ + }\n\ + if (Nrounds > 24) {\n\ + X.v[0] += X.v[1];\n\ + X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\ + X.v[1] ^= X.v[0];\n\ + }\n\ + if (Nrounds > 25) {\n\ + X.v[0] += X.v[1];\n\ + X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\ + X.v[1] ^= X.v[0];\n\ + }\n\ + if (Nrounds > 26) {\n\ + X.v[0] += X.v[1];\n\ + X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\ + X.v[1] ^= X.v[0];\n\ + }\n\ + if (Nrounds > 27) {\n\ + X.v[0] += X.v[1];\n\ + X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\ + X.v[1] ^= X.v[0];\n\ + }\n\ + if (Nrounds > 27) {\n\ + X.v[0] += ks[1];\n\ + X.v[1] += ks[2];\n\ + X.v[1] += 7;\n\ + }\n\ + if (Nrounds > 28) {\n\ + X.v[0] += X.v[1];\n\ + X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\ + X.v[1] ^= X.v[0];\n\ + }\n\ + if (Nrounds > 29) {\n\ + X.v[0] += X.v[1];\n\ + X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\ + X.v[1] ^= X.v[0];\n\ + }\n\ + if (Nrounds > 30) {\n\ + X.v[0] += X.v[1];\n\ + X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\ + X.v[1] ^= X.v[0];\n\ + }\n\ + if (Nrounds > 31) {\n\ + X.v[0] += X.v[1];\n\ + X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\ + X.v[1] ^= X.v[0];\n\ + }\n\ + if (Nrounds > 31) {\n\ + X.v[0] += ks[2];\n\ + X.v[1] += ks[0];\n\ + X.v[1] += 8;\n\ + }\n\ + return X;\n\ +}\n\ +\n\ +enum r123_enum_threefry2x32\n\ +{ threefry2x32_rounds = 20 };\n\ +inline threefry2x32_ctr_t threefry2x32 (threefry2x32_ctr_t in,\n\ + threefry2x32_key_t k)\n\ + __attribute__ ((always_inline));\n\ +inline threefry2x32_ctr_t\n\ +threefry2x32 (threefry2x32_ctr_t in, threefry2x32_key_t k)\n\ +{\n\ + return threefry2x32_R (threefry2x32_rounds, in, k);\n\ +}\n\ +\n\ +"; + ss << "double " << mSymName << "_Random (int seed)\n\ +{\n\ + unsigned tid = get_global_id(0);\n\ + threefry2x32_key_t k = { {tid, 0xdecafbad} };\n\ + threefry2x32_ctr_t c = { {seed, 0xf00dcafe} };\n\ + c = threefry2x32_R(threefry2x32_rounds, c, k);\n\ + const double factor = 1./(" << SAL_MAX_UINT32 << ".0 + 1.0);\n\ + const double halffactor = 0.5*factor;\n\ + return c.v[0] * factor + halffactor;\n\ +}\n\ +"; } virtual size_t GetWindowSize() const SAL_OVERRIDE { @@ -359,9 +660,9 @@ public: /// Create buffer and pass the buffer to a given kernel virtual size_t Marshal( cl_kernel k, int argno, int, cl_program ) SAL_OVERRIDE { - double tmp = 0.0; + cl_int seed = comphelper::rng::uniform_int_distribution(0, SAL_MAX_INT32); // Pass the scalar result back to the rest of the formula kernel - cl_int err = clSetKernelArg(k, argno, sizeof(double), (void*)&tmp); + cl_int err = clSetKernelArg(k, argno, sizeof(cl_int), (void*)&seed); if (CL_SUCCESS != err) throw OpenCLError(err, __FILE__, __LINE__); return 1; commit 41d40b3a0090f71cef74145da3af2118ab504e59 Author: Tor Lillqvist <[email protected]> Date: Fri Jan 9 10:42:28 2015 +0200 Surely any C++ programmer knows what a forward declaration is Change-Id: I12d230176ef1ea232ac9a401fbbebce6d8c058a7 diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx index a327d1f..f0cfd930 100644 --- a/sc/source/core/opencl/formulagroupcl.cxx +++ b/sc/source/core/opencl/formulagroupcl.cxx @@ -546,11 +546,8 @@ protected: /// Handling a Double Vector that is used as a sliding window input /// to either a sliding window average or sum-of-products /// Generate a sequential loop for reductions -class OpSum; // Forward Declaration -class OpAverage; // Forward Declaration -class OpMin; // Forward Declaration -class OpMax; // Forward Declaration -class OpCount; // Forward Declaration +class OpAverage; +class OpCount; template<class Base> class DynamicKernelSlidingArgument : public Base _______________________________________________ Libreoffice-commits mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits
