Dnia 2012-05-20, nie o godzinie 15:37 -0400, Andreas Kloeckner pisze: [ cut ] > > > > I had to fix tests to run on Python3 (see patch). Almost all fail now > > because of Unicode hashing, but at least they run, so we can start > > fixing them. > > > > Andreas, can you apply those two patches? > > Done.
I have playing a little bit with Python 3 and PyCUDA. See attached patch fixing some of the problems with tests. After applying those, the best situation is with test_driver.py: 17 tests pass, only 4 fail. Other tests are not so good: only 2, 3 tests pass, rest fails. The most of the failures are in Python code calling pack() function (trying to pack kernel arguments) The pack calls (indirectly) get_pylong from src/wrapper/_pvt_struct_v3.cpp, which calls PyIndex_Check(v) and raises StructError with "required argument is not an integer" (line 110 of src/wrapper/_pvt_struct_v3.cpp). I do not understand this code fully yet - but it looks like simply copying _pvt_struct_v3 from PyOpenCL was not enough ;-) And another thing, unrelated to Python 3, but related to packaging. Debian recently started hardening packages, and tool for checking returns some warnings for both PyCUDA and PyOpenCL: $ hardening-check *.so _cl.cpython-32mu.so: Position Independent Executable: yes Stack protected: yes Fortify Source functions: no, only unprotected functions found! Read-only relocations: yes Immediate binding: no, not found! _pvt_struct.cpython-32mu.so: Position Independent Executable: yes Stack protected: no, not found! Fortify Source functions: no, only unprotected functions found! Read-only relocations: yes Immediate binding: no, not found! Immediate binding is not important, but "Stack protected: no" and "Fortify Source functions: no" is something that is reported against packages. The long description is at http://wiki.debian.org/Hardening the short summary be me here. Both Fortify source and Stack protector add protection to the stack to guard against overflows. They add compiler arguments: -D_FORTIFY_SOURCE=2 -fstack-protector --param ssp-buffer-size=4 so sample compiler call looks like: For Python 2.6: gcc -pthread -fno-strict-aliasing -fwrapv -Wall -O3 -DNDEBUG -g -O2 -fstack-protector --param=ssp-buffer-size=4 -Wformat -Werror=format-security -D_FORTIFY_SOURCE=2 -fPIC -DPYGPU_PACKAGE=pyopencl -DPYGPU_PYOPENCL=1 -DPYOPENCL_USE_DEVICE_FISSION=1 -DHAVE_GL=1 -I/usr/lib/pymodules/python2.6/numpy/core/include -I/usr/include/python2.6 -c src/wrapper/wrap_cl.cpp -o build/temp.linux-x86_64-2.6/src/wrapper/wrap_cl.o For Python 2.7: gcc -pthread -fno-strict-aliasing -fwrapv -Wall -O3 -DNDEBUG -g -O2 -fstack-protector --param=ssp-buffer-size=4 -Wformat -Werror=format-security -D_FORTIFY_SOURCE=2 -fPIC -DPYGPU_PACKAGE=pyopencl -DPYGPU_PYOPENCL=1 -DPYOPENCL_USE_DEVICE_FISSION=1 -DHAVE_GL=1 -I/usr/lib/pymodules/python2.7/numpy/core/include -I/usr/include/python2.7 -c src/wrapper/wrap_cl_part_1.cpp -o build/temp.linux-x86_64-2.7/src/wrapper/wrap_cl_part_1.o For Python 3.2: gcc -pthread -fwrapv -Wall -O3 -DNDEBUG -g -O2 -fstack-protector --param=ssp-buffer-size=4 -Wformat -Werror=format-security -D_FORTIFY_SOURCE=2 -fPIC -DPYGPU_PACKAGE=pyopencl -DPYGPU_PYOPENCL=1 -DPYOPENCL_USE_DEVICE_FISSION=1 -DHAVE_GL=1 -I/usr/lib/python3/dist-packages/numpy/core/include -I/usr/lib/python3/dist-packages/numpy/core/include -I/usr/include/python3.2mu -c src/wrapper/wrap_cl_part_2.cpp -o build/temp.linux-x86_64-3.2/src/wrapper/wrap_cl_part_2.o If I understand correctly PyCUDA/PyOpenCL uses Python functions to operate on strings and should be immune to stack overflow/stack smashing and hardening-check is giving false positive here (from debian-python mailing list I have got reply that "hardening-no-fortify-functions and hardening-no-stackprotector are prone to false-positives. There's a bug report in the BTS about this.") It this correct, or should we add some protection to the stack? Regards. -- Tomasz Rybak GPG/PGP key ID: 2AD5 9860 Fingerprint A481 824E 7DD3 9C0E C40A 488E C654 FB33 2AD5 9860 http://member.acm.org/~tomaszrybak
diff --git a/pycuda/compiler.py b/pycuda/compiler.py
index 6d7674d..7ef78b5 100644
--- a/pycuda/compiler.py
+++ b/pycuda/compiler.py
@@ -76,13 +76,13 @@ def compile_plain(source, options, keep, nvcc, cache_dir):
if '#include' in source:
checksum.update(preprocess_source(source, options, nvcc))
else:
- checksum.update(source)
+ checksum.update(source.encode("utf-8"))
for option in options:
- checksum.update(option)
+ checksum.update(option.encode("utf-8"))
checksum.update(get_nvcc_version(nvcc))
from pycuda.characterize import platform_bits
- checksum.update(str(platform_bits()))
+ checksum.update(str(platform_bits()).encode("utf-8"))
cache_file = checksum.hexdigest()
cache_path = join(cache_dir, cache_file + ".cubin")
diff --git a/pycuda/driver.py b/pycuda/driver.py
index 50784c3..17d33c9 100644
--- a/pycuda/driver.py
+++ b/pycuda/driver.py
@@ -719,7 +719,11 @@ def matrix_to_texref(matrix, texref, order):
# {{{ device copies
def to_device(bf_obj):
- bf = buffer(bf_obj)
+ import sys
+ if sys.version_info >= (2, 7):
+ bf = memoryview(bf_obj).tobytes()
+ else:
+ bf = buffer(bf_obj)
result = mem_alloc(len(bf))
memcpy_htod(result, bf)
return result
diff --git a/test/test_cumath.py b/test/test_cumath.py
index dbbc201..4d76440 100644
--- a/test/test_cumath.py
+++ b/test/test_cumath.py
@@ -34,7 +34,7 @@ numpy_func_names = {
-def make_unary_function_test(name, (a, b)=(0, 1), threshold=0):
+def make_unary_function_test(name, a=0, b=1, threshold=0):
def test():
gpu_func = getattr(cumath, name)
cpu_func = getattr(np, numpy_func_names.get(name, name))
@@ -55,25 +55,25 @@ def make_unary_function_test(name, (a, b)=(0, 1), threshold=0):
if have_pycuda():
- test_ceil = make_unary_function_test("ceil", (-10, 10))
- test_floor = make_unary_function_test("ceil", (-10, 10))
- test_fabs = make_unary_function_test("fabs", (-10, 10))
- test_exp = make_unary_function_test("exp", (-3, 3), 1e-5)
- test_log = make_unary_function_test("log", (1e-5, 1), 5e-7)
- test_log10 = make_unary_function_test("log10", (1e-5, 1), 3e-7)
- test_sqrt = make_unary_function_test("sqrt", (1e-5, 1), 2e-7)
-
- test_sin = make_unary_function_test("sin", (-10, 10), 1e-7)
- test_cos = make_unary_function_test("cos", (-10, 10), 1e-7)
- test_asin = make_unary_function_test("asin", (-0.9, 0.9), 5e-7)
- test_acos = make_unary_function_test("acos", (-0.9, 0.9), 5e-7)
+ test_ceil = make_unary_function_test("ceil", -10, 10)
+ test_floor = make_unary_function_test("ceil", -10, 10)
+ test_fabs = make_unary_function_test("fabs", -10, 10)
+ test_exp = make_unary_function_test("exp", -3, 3, 1e-5)
+ test_log = make_unary_function_test("log", 1e-5, 1, 5e-7)
+ test_log10 = make_unary_function_test("log10", 1e-5, 1, 3e-7)
+ test_sqrt = make_unary_function_test("sqrt", 1e-5, 1, 2e-7)
+
+ test_sin = make_unary_function_test("sin", -10, 10, 1e-7)
+ test_cos = make_unary_function_test("cos", -10, 10, 1e-7)
+ test_asin = make_unary_function_test("asin", -0.9, 0.9, 5e-7)
+ test_acos = make_unary_function_test("acos", -0.9, 0.9, 5e-7)
test_tan = make_unary_function_test("tan",
- (-math.pi/2 + 0.1, math.pi/2 - 0.1), 1e-5)
- test_atan = make_unary_function_test("atan", (-10, 10), 2e-7)
+ -math.pi/2 + 0.1, math.pi/2 - 0.1, 1e-5)
+ test_atan = make_unary_function_test("atan", -10, 10, 2e-7)
- test_sinh = make_unary_function_test("sinh", (-3, 3), 1e-6)
- test_cosh = make_unary_function_test("cosh", (-3, 3), 1e-6)
- test_tanh = make_unary_function_test("tanh", (-3, 3), 2e-6)
+ test_sinh = make_unary_function_test("sinh", -3, 3, 1e-6)
+ test_cosh = make_unary_function_test("cosh", -3, 3, 1e-6)
+ test_tanh = make_unary_function_test("tanh", -3, 3, 2e-6)
@@ -151,7 +151,7 @@ if __name__ == "__main__":
import sys
if len(sys.argv) > 1:
- exec sys.argv[1]
+ exec (sys.argv[1])
else:
from py.test.cmdline import main
main([__file__])
diff --git a/test/test_driver.py b/test/test_driver.py
index 376d31b..0b96636 100644
--- a/test/test_driver.py
+++ b/test/test_driver.py
@@ -465,8 +465,8 @@ class TestDriver:
func.prepared_call((1, 1), (4,4,1), a_gpu, shared_size=20)
a_doubled = np.empty_like(a)
drv.memcpy_dtoh(a_doubled, a_gpu)
- print a
- print a_doubled
+ print (a)
+ print (a_doubled)
assert la.norm(a_doubled-2*a) == 0
# now with offsets
@@ -573,7 +573,7 @@ if __name__ == "__main__":
import sys
if len(sys.argv) > 1:
- exec sys.argv[1]
+ exec (sys.argv[1])
else:
from py.test.cmdline import main
main([__file__])
diff --git a/test/test_gpuarray.py b/test/test_gpuarray.py
index 7afa758..cdacb4d 100644
--- a/test/test_gpuarray.py
+++ b/test/test_gpuarray.py
@@ -601,8 +601,8 @@ class TestGPUArray:
max_a_b_gpu = gpuarray.maximum(a_gpu, b_gpu)
min_a_b_gpu = gpuarray.minimum(a_gpu, b_gpu)
- print max_a_b_gpu
- print np.maximum(a, b)
+ print (max_a_b_gpu)
+ print (np.maximum(a, b))
assert la.norm(max_a_b_gpu.get()- np.maximum(a, b)) == 0
assert la.norm(min_a_b_gpu.get()- np.minimum(a, b)) == 0
@@ -719,9 +719,9 @@ class TestGPUArray:
def test_stride_preservation(self):
A = np.random.rand(3,3)
AT = A.T
- print AT.flags.f_contiguous, AT.flags.c_contiguous
+ print (AT.flags.f_contiguous, AT.flags.c_contiguous)
AT_GPU = gpuarray.to_gpu(AT)
- print AT_GPU.flags.f_contiguous, AT_GPU.flags.c_contiguous
+ print (AT_GPU.flags.f_contiguous, AT_GPU.flags.c_contiguous)
assert np.allclose(AT_GPU.get(),AT)
@mark_cuda_test
@@ -838,7 +838,7 @@ if __name__ == "__main__":
import sys
if len(sys.argv) > 1:
- exec sys.argv[1]
+ exec (sys.argv[1])
else:
from py.test.cmdline import main
main([__file__])
signature.asc
Description: This is a digitally signed message part
_______________________________________________ PyCUDA mailing list [email protected] http://lists.tiker.net/listinfo/pycuda
