From 1a14766a13ab3a16bf875561d10ace6462ded613 Mon Sep 17 00:00:00 2001
From: Alexander Harvey Nitz <ahnitz@syr.edu>
Date: Fri, 19 Jul 2013 21:04:03 -0400
Subject: [PATCH 4/4] allow gpuarray take to be used with complex types

---
 pycuda/cuda/pycuda-helpers.hpp |   20 +++++++++++++++++++-
 pycuda/gpuarray.py             |   22 ++++++++++++++++++++--
 2 files changed, 39 insertions(+), 3 deletions(-)
diff --git a/pycuda/cuda/pycuda-helpers.hpp b/pycuda/cuda/pycuda-helpers.hpp
index 45c36ba..85531f8 100644
--- a/pycuda/cuda/pycuda-helpers.hpp
+++ b/pycuda/cuda/pycuda-helpers.hpp
@@ -1,3 +1,5 @@
+#include <pycuda-complex.hpp>
+
 #ifndef _AFJKDASLFSADHF_HEADER_SEEN_PYCUDA_HELPERS_HPP
 #define _AFJKDASLFSADHF_HEADER_SEEN_PYCUDA_HELPERS_HPP
 
@@ -7,6 +9,23 @@ extern "C++" {
 
   typedef float fp_tex_float;
   typedef int2 fp_tex_double;
+  typedef uint2 fp_tex_cfloat;
+  typedef int4 fp_tex_cdouble;
+
+   template <enum cudaTextureReadMode read_mode>
+  __device__ pycuda::complex<float> fp_tex1Dfetch(texture<fp_tex_cfloat, 1, read_mode> tex, int i)
+  {
+    fp_tex_cfloat v = tex1Dfetch(tex, i);
+    pycuda::complex<float> out;
+    return pycuda::complex<float>(__int_as_float(v.x), __int_as_float(v.y));
+  }
+
+  template <enum cudaTextureReadMode read_mode>
+  __device__ pycuda::complex<double> fp_tex1Dfetch(texture<fp_tex_cdouble, 1, read_mode> tex, int i)
+  {
+    fp_tex_cdouble v = tex1Dfetch(tex, i);
+    return pycuda::complex<double>(__hiloint2double(v.y, v.x), __hiloint2double(v.w, v.z));
+  }

   template <enum cudaTextureReadMode read_mode>
   __device__ double fp_tex1Dfetch(texture<fp_tex_double, 1, read_mode> tex, int i)
@@ -55,7 +74,6 @@ extern "C++" {
   PYCUDA_GENERATE_FP_TEX_FUNCS(unsigned short int)
   PYCUDA_GENERATE_FP_TEX_FUNCS(char)
   PYCUDA_GENERATE_FP_TEX_FUNCS(unsigned char)
-
 }
 
 #endif
diff --git a/pycuda/gpuarray.py b/pycuda/gpuarray.py
index 79f59ff..2163f21 100644
--- a/pycuda/gpuarray.py
+++ b/pycuda/gpuarray.py
@@ -521,7 +521,7 @@ class GPUArray(object):
                 allow_offset=allow_offset) / self.dtype.itemsize
 
     def bind_to_texref_ext(self, texref, channels=1, allow_double_hack=False,
-            allow_offset=False):
+            allow_complex_hack=False, allow_offset=False):
         if not self.flags.forc:
             raise RuntimeError("only contiguous arrays may "
                     "be used as arguments to this operation")
@@ -535,6 +535,24 @@ class GPUArray(object):
             channels = 2
             fmt = drv.array_format.SIGNED_INT32
             read_as_int = True
+        elif self.dtype == np.complex64 and allow_complex_hack:
+            if channels != 1:
+                raise ValueError(
+                        "'fake' complex64 textures can "
+                        "only have one channel")
+
+            channels = 2
+            fmt = drv.array_format.UNSIGNED_INT32
+            read_as_int = True
+        elif self.dtype == np.complex128 and allow_complex_hack:
+            if channels != 1:
+                raise ValueError(
+                        "'fake' complex128 textures can "
+                        "only have one channel")
+
+            channels = 4
+            fmt = drv.array_format.SIGNED_INT32
+            read_as_int = True
         else:
             fmt = drv.dtype_to_array_format(self.dtype)
             read_as_int = np.integer in self.dtype.type.__mro__
@@ -1007,7 +1025,7 @@ def take(a, indices, out=None, stream=None):
     assert len(indices.shape) == 1
 
     func, tex_src = elementwise.get_take_kernel(a.dtype, indices.dtype)
-    a.bind_to_texref_ext(tex_src[0], allow_double_hack=True)
+    a.bind_to_texref_ext(tex_src[0], allow_double_hack=True, allow_complex_hack=True)
 
     func.prepared_async_call(out._grid, out._block, stream,
             indices.gpudata, out.gpudata, indices.size)
-- 
1.7.10.rc4.209.g0677f