Hello world,
this patch uses division by known sizes (which can usually be replaced
by a simple shift because intrinsics have sizes of power of two) instead
of division by the size extracted from the array descriptor itself.
This should save about 20 cycles for a single calculation.
I'll go through the rest of the library to identify other possibilities
for this.
Regression-tested, no new failures.
OK for the branch?
Thomas
2012-03-24 Thomas König <[email protected]>
* libgfortran.h (GFC_DESCRIPTOR_SIZE_TYPEKNOWN): New macro.
(GFC_DESCRIPTOR_STRIDE_TYPEKNOWN): New macro.
* m4/cshift0.m4: Use GFC_DESCRIPTOR_STRIDE_TYPEKNOWN.
* m4/in_pack.m4: Likewise.
* m4/pack.m4: Likewise.
* m4/spread.m4: Likewise.
* m4/transpose.m4: Likewise.
* m4/iforeach.m4: Likewise.
* m4/eoshift1.m4: Likewise.
* m4/eoshift3.m4: Likewise.
* m4/shape.m4: Likewise.
* m4/cshift1.m4: Likewise.
* m4/in_unpack.m4: Likewise.
* m4/matmull.m4: Likewise.
* m4/bessel.m4: Likewise.
* m4/unpack.m4: Likewise.
* m4/reshape.m4: Likewise.
* m4/ifunction_logical.m4: Likewise.
* m4/ifunction.m4: Likewise.
* m4/matmul.m4: Likewise.
* generated/all_l16.c: Regenerated.
* generated/all_l1.c: Regenerated.
* generated/all_l2.c: Regenerated.
* generated/all_l4.c: Regenerated.
* generated/all_l8.c: Regenerated.
* generated/any_l16.c: Regenerated.
* generated/any_l1.c: Regenerated.
* generated/any_l2.c: Regenerated.
* generated/any_l4.c: Regenerated.
* generated/any_l8.c: Regenerated.
* generated/bessel_r10.c: Regenerated.
* generated/bessel_r16.c: Regenerated.
* generated/bessel_r4.c: Regenerated.
* generated/bessel_r8.c: Regenerated.
* generated/count_16_l.c: Regenerated.
* generated/count_1_l.c: Regenerated.
* generated/count_2_l.c: Regenerated.
* generated/count_4_l.c: Regenerated.
* generated/count_8_l.c: Regenerated.
* generated/cshift0_c10.c: Regenerated.
* generated/cshift0_c16.c: Regenerated.
* generated/cshift0_c4.c: Regenerated.
* generated/cshift0_c8.c: Regenerated.
* generated/cshift0_i16.c: Regenerated.
* generated/cshift0_i1.c: Regenerated.
* generated/cshift0_i2.c: Regenerated.
* generated/cshift0_i4.c: Regenerated.
* generated/cshift0_i8.c: Regenerated.
* generated/cshift0_r10.c: Regenerated.
* generated/cshift0_r16.c: Regenerated.
* generated/cshift0_r4.c: Regenerated.
* generated/cshift0_r8.c: Regenerated.
* generated/cshift1_16.c: Regenerated.
* generated/cshift1_4.c: Regenerated.
* generated/cshift1_8.c: Regenerated.
* generated/eoshift1_16.c: Regenerated.
* generated/eoshift1_4.c: Regenerated.
* generated/eoshift1_8.c: Regenerated.
* generated/eoshift3_16.c: Regenerated.
* generated/eoshift3_4.c: Regenerated.
* generated/eoshift3_8.c: Regenerated.
* generated/iall_i16.c: Regenerated.
* generated/iall_i1.c: Regenerated.
* generated/iall_i2.c: Regenerated.
* generated/iall_i4.c: Regenerated.
* generated/iall_i8.c: Regenerated.
* generated/iany_i16.c: Regenerated.
* generated/iany_i1.c: Regenerated.
* generated/iany_i2.c: Regenerated.
* generated/iany_i4.c: Regenerated.
* generated/iany_i8.c: Regenerated.
* generated/in_pack_c10.c: Regenerated.
* generated/in_pack_c16.c: Regenerated.
* generated/in_pack_c4.c: Regenerated.
* generated/in_pack_c8.c: Regenerated.
* generated/in_pack_i16.c: Regenerated.
* generated/in_pack_i1.c: Regenerated.
* generated/in_pack_i2.c: Regenerated.
* generated/in_pack_i4.c: Regenerated.
* generated/in_pack_i8.c: Regenerated.
* generated/in_pack_r10.c: Regenerated.
* generated/in_pack_r16.c: Regenerated.
* generated/in_pack_r4.c: Regenerated.
* generated/in_pack_r8.c: Regenerated.
* generated/in_unpack_c10.c: Regenerated.
* generated/in_unpack_c16.c: Regenerated.
* generated/in_unpack_c4.c: Regenerated.
* generated/in_unpack_c8.c: Regenerated.
* generated/in_unpack_i16.c: Regenerated.
* generated/in_unpack_i1.c: Regenerated.
* generated/in_unpack_i2.c: Regenerated.
* generated/in_unpack_i4.c: Regenerated.
* generated/in_unpack_i8.c: Regenerated.
* generated/in_unpack_r10.c: Regenerated.
* generated/in_unpack_r16.c: Regenerated.
* generated/in_unpack_r4.c: Regenerated.
* generated/in_unpack_r8.c: Regenerated.
* generated/iparity_i16.c: Regenerated.
* generated/iparity_i1.c: Regenerated.
* generated/iparity_i2.c: Regenerated.
* generated/iparity_i4.c: Regenerated.
* generated/iparity_i8.c: Regenerated.
* generated/matmul_c10.c: Regenerated.
* generated/matmul_c16.c: Regenerated.
* generated/matmul_c4.c: Regenerated.
* generated/matmul_c8.c: Regenerated.
* generated/matmul_i16.c: Regenerated.
* generated/matmul_i1.c: Regenerated.
* generated/matmul_i2.c: Regenerated.
* generated/matmul_i4.c: Regenerated.
* generated/matmul_i8.c: Regenerated.
* generated/matmul_l16.c: Regenerated.
* generated/matmul_l4.c: Regenerated.
* generated/matmul_l8.c: Regenerated.
* generated/matmul_r10.c: Regenerated.
* generated/matmul_r16.c: Regenerated.
* generated/matmul_r4.c: Regenerated.
* generated/matmul_r8.c: Regenerated.
* generated/maxloc0_16_i16.c: Regenerated.
* generated/maxloc0_16_i1.c: Regenerated.
* generated/maxloc0_16_i2.c: Regenerated.
* generated/maxloc0_16_i4.c: Regenerated.
* generated/maxloc0_16_i8.c: Regenerated.
* generated/maxloc0_16_r10.c: Regenerated.
* generated/maxloc0_16_r16.c: Regenerated.
* generated/maxloc0_16_r4.c: Regenerated.
* generated/maxloc0_16_r8.c: Regenerated.
* generated/maxloc0_4_i16.c: Regenerated.
* generated/maxloc0_4_i1.c: Regenerated.
* generated/maxloc0_4_i2.c: Regenerated.
* generated/maxloc0_4_i4.c: Regenerated.
* generated/maxloc0_4_i8.c: Regenerated.
* generated/maxloc0_4_r10.c: Regenerated.
* generated/maxloc0_4_r16.c: Regenerated.
* generated/maxloc0_4_r4.c: Regenerated.
* generated/maxloc0_4_r8.c: Regenerated.
* generated/maxloc0_8_i16.c: Regenerated.
* generated/maxloc0_8_i1.c: Regenerated.
* generated/maxloc0_8_i2.c: Regenerated.
* generated/maxloc0_8_i4.c: Regenerated.
* generated/maxloc0_8_i8.c: Regenerated.
* generated/maxloc0_8_r10.c: Regenerated.
* generated/maxloc0_8_r16.c: Regenerated.
* generated/maxloc0_8_r4.c: Regenerated.
* generated/maxloc0_8_r8.c: Regenerated.
* generated/maxloc1_16_i16.c: Regenerated.
* generated/maxloc1_16_i1.c: Regenerated.
* generated/maxloc1_16_i2.c: Regenerated.
* generated/maxloc1_16_i4.c: Regenerated.
* generated/maxloc1_16_i8.c: Regenerated.
* generated/maxloc1_16_r10.c: Regenerated.
* generated/maxloc1_16_r16.c: Regenerated.
* generated/maxloc1_16_r4.c: Regenerated.
* generated/maxloc1_16_r8.c: Regenerated.
* generated/maxloc1_4_i16.c: Regenerated.
* generated/maxloc1_4_i1.c: Regenerated.
* generated/maxloc1_4_i2.c: Regenerated.
* generated/maxloc1_4_i4.c: Regenerated.
* generated/maxloc1_4_i8.c: Regenerated.
* generated/maxloc1_4_r10.c: Regenerated.
* generated/maxloc1_4_r16.c: Regenerated.
* generated/maxloc1_4_r4.c: Regenerated.
* generated/maxloc1_4_r8.c: Regenerated.
* generated/maxloc1_8_i16.c: Regenerated.
* generated/maxloc1_8_i1.c: Regenerated.
* generated/maxloc1_8_i2.c: Regenerated.
* generated/maxloc1_8_i4.c: Regenerated.
* generated/maxloc1_8_i8.c: Regenerated.
* generated/maxloc1_8_r10.c: Regenerated.
* generated/maxloc1_8_r16.c: Regenerated.
* generated/maxloc1_8_r4.c: Regenerated.
* generated/maxloc1_8_r8.c: Regenerated.
* generated/maxval_i16.c: Regenerated.
* generated/maxval_i1.c: Regenerated.
* generated/maxval_i2.c: Regenerated.
* generated/maxval_i4.c: Regenerated.
* generated/maxval_i8.c: Regenerated.
* generated/maxval_r10.c: Regenerated.
* generated/maxval_r16.c: Regenerated.
* generated/maxval_r4.c: Regenerated.
* generated/maxval_r8.c: Regenerated.
* generated/minloc0_16_i16.c: Regenerated.
* generated/minloc0_16_i1.c: Regenerated.
* generated/minloc0_16_i2.c: Regenerated.
* generated/minloc0_16_i4.c: Regenerated.
* generated/minloc0_16_i8.c: Regenerated.
* generated/minloc0_16_r10.c: Regenerated.
* generated/minloc0_16_r16.c: Regenerated.
* generated/minloc0_16_r4.c: Regenerated.
* generated/minloc0_16_r8.c: Regenerated.
* generated/minloc0_4_i16.c: Regenerated.
* generated/minloc0_4_i1.c: Regenerated.
* generated/minloc0_4_i2.c: Regenerated.
* generated/minloc0_4_i4.c: Regenerated.
* generated/minloc0_4_i8.c: Regenerated.
* generated/minloc0_4_r10.c: Regenerated.
* generated/minloc0_4_r16.c: Regenerated.
* generated/minloc0_4_r4.c: Regenerated.
* generated/minloc0_4_r8.c: Regenerated.
* generated/minloc0_8_i16.c: Regenerated.
* generated/minloc0_8_i1.c: Regenerated.
* generated/minloc0_8_i2.c: Regenerated.
* generated/minloc0_8_i4.c: Regenerated.
* generated/minloc0_8_i8.c: Regenerated.
* generated/minloc0_8_r10.c: Regenerated.
* generated/minloc0_8_r16.c: Regenerated.
* generated/minloc0_8_r4.c: Regenerated.
* generated/minloc0_8_r8.c: Regenerated.
* generated/minloc1_16_i16.c: Regenerated.
* generated/minloc1_16_i1.c: Regenerated.
* generated/minloc1_16_i2.c: Regenerated.
* generated/minloc1_16_i4.c: Regenerated.
* generated/minloc1_16_i8.c: Regenerated.
* generated/minloc1_16_r10.c: Regenerated.
* generated/minloc1_16_r16.c: Regenerated.
* generated/minloc1_16_r4.c: Regenerated.
* generated/minloc1_16_r8.c: Regenerated.
* generated/minloc1_4_i16.c: Regenerated.
* generated/minloc1_4_i1.c: Regenerated.
* generated/minloc1_4_i2.c: Regenerated.
* generated/minloc1_4_i4.c: Regenerated.
* generated/minloc1_4_i8.c: Regenerated.
* generated/minloc1_4_r10.c: Regenerated.
* generated/minloc1_4_r16.c: Regenerated.
* generated/minloc1_4_r4.c: Regenerated.
* generated/minloc1_4_r8.c: Regenerated.
* generated/minloc1_8_i16.c: Regenerated.
* generated/minloc1_8_i1.c: Regenerated.
* generated/minloc1_8_i2.c: Regenerated.
* generated/minloc1_8_i4.c: Regenerated.
* generated/minloc1_8_i8.c: Regenerated.
* generated/minloc1_8_r10.c: Regenerated.
* generated/minloc1_8_r16.c: Regenerated.
* generated/minloc1_8_r4.c: Regenerated.
* generated/minloc1_8_r8.c: Regenerated.
* generated/minval_i16.c: Regenerated.
* generated/minval_i1.c: Regenerated.
* generated/minval_i2.c: Regenerated.
* generated/minval_i4.c: Regenerated.
* generated/minval_i8.c: Regenerated.
* generated/minval_r10.c: Regenerated.
* generated/minval_r16.c: Regenerated.
* generated/minval_r4.c: Regenerated.
* generated/minval_r8.c: Regenerated.
* generated/norm2_r10.c: Regenerated.
* generated/norm2_r16.c: Regenerated.
* generated/norm2_r4.c: Regenerated.
* generated/norm2_r8.c: Regenerated.
* generated/pack_c10.c: Regenerated.
* generated/pack_c16.c: Regenerated.
* generated/pack_c4.c: Regenerated.
* generated/pack_c8.c: Regenerated.
* generated/pack_i16.c: Regenerated.
* generated/pack_i1.c: Regenerated.
* generated/pack_i2.c: Regenerated.
* generated/pack_i4.c: Regenerated.
* generated/pack_i8.c: Regenerated.
* generated/pack_r10.c: Regenerated.
* generated/pack_r16.c: Regenerated.
* generated/pack_r4.c: Regenerated.
* generated/pack_r8.c: Regenerated.
* generated/parity_l16.c: Regenerated.
* generated/parity_l1.c: Regenerated.
* generated/parity_l2.c: Regenerated.
* generated/parity_l4.c: Regenerated.
* generated/parity_l8.c: Regenerated.
* generated/product_c10.c: Regenerated.
* generated/product_c16.c: Regenerated.
* generated/product_c4.c: Regenerated.
* generated/product_c8.c: Regenerated.
* generated/product_i16.c: Regenerated.
* generated/product_i1.c: Regenerated.
* generated/product_i2.c: Regenerated.
* generated/product_i4.c: Regenerated.
* generated/product_i8.c: Regenerated.
* generated/product_r10.c: Regenerated.
* generated/product_r16.c: Regenerated.
* generated/product_r4.c: Regenerated.
* generated/product_r8.c: Regenerated.
* generated/reshape_c10.c: Regenerated.
* generated/reshape_c16.c: Regenerated.
* generated/reshape_c4.c: Regenerated.
* generated/reshape_c8.c: Regenerated.
* generated/reshape_i16.c: Regenerated.
* generated/reshape_i4.c: Regenerated.
* generated/reshape_i8.c: Regenerated.
* generated/reshape_r10.c: Regenerated.
* generated/reshape_r16.c: Regenerated.
* generated/reshape_r4.c: Regenerated.
* generated/reshape_r8.c: Regenerated.
* generated/shape_i16.c: Regenerated.
* generated/shape_i4.c: Regenerated.
* generated/shape_i8.c: Regenerated.
* generated/spread_c10.c: Regenerated.
* generated/spread_c16.c: Regenerated.
* generated/spread_c4.c: Regenerated.
* generated/spread_c8.c: Regenerated.
* generated/spread_i16.c: Regenerated.
* generated/spread_i1.c: Regenerated.
* generated/spread_i2.c: Regenerated.
* generated/spread_i4.c: Regenerated.
* generated/spread_i8.c: Regenerated.
* generated/spread_r10.c: Regenerated.
* generated/spread_r16.c: Regenerated.
* generated/spread_r4.c: Regenerated.
* generated/spread_r8.c: Regenerated.
* generated/sum_c10.c: Regenerated.
* generated/sum_c16.c: Regenerated.
* generated/sum_c4.c: Regenerated.
* generated/sum_c8.c: Regenerated.
* generated/sum_i16.c: Regenerated.
* generated/sum_i1.c: Regenerated.
* generated/sum_i2.c: Regenerated.
* generated/sum_i4.c: Regenerated.
* generated/sum_i8.c: Regenerated.
* generated/sum_r10.c: Regenerated.
* generated/sum_r16.c: Regenerated.
* generated/sum_r4.c: Regenerated.
* generated/sum_r8.c: Regenerated.
* generated/transpose_c10.c: Regenerated.
* generated/transpose_c16.c: Regenerated.
* generated/transpose_c4.c: Regenerated.
* generated/transpose_c8.c: Regenerated.
* generated/transpose_i16.c: Regenerated.
* generated/transpose_i4.c: Regenerated.
* generated/transpose_i8.c: Regenerated.
* generated/transpose_r10.c: Regenerated.
* generated/transpose_r16.c: Regenerated.
* generated/transpose_r4.c: Regenerated.
* generated/transpose_r8.c: Regenerated.
* generated/unpack_c10.c: Regenerated.
* generated/unpack_c16.c: Regenerated.
* generated/unpack_c4.c: Regenerated.
* generated/unpack_c8.c: Regenerated.
* generated/unpack_i16.c: Regenerated.
* generated/unpack_i1.c: Regenerated.
* generated/unpack_i2.c: Regenerated.
* generated/unpack_i4.c: Regenerated.
* generated/unpack_i8.c: Regenerated.
* generated/unpack_r10.c: Regenerated.
* generated/unpack_r16.c: Regenerated.
* generated/unpack_r4.c: Regenerated.
* generated/unpack_r8.c: Regenerated.
Index: libgfortran.h
===================================================================
--- libgfortran.h (Revision 185261)
+++ libgfortran.h (Arbeitskopie)
@@ -364,6 +364,11 @@
#define GFC_DESCRIPTOR_TYPE(desc) (((desc)->dtype & GFC_DTYPE_TYPE_MASK) \
>> GFC_DTYPE_TYPE_SHIFT)
#define GFC_DESCRIPTOR_SIZE(desc) ((desc)->dtype >> GFC_DTYPE_SIZE_SHIFT)
+
+/* This is for getting the size of a descriptor when the type of the
+ descriptor is known at compile-time. Do not use for string types. */
+
+#define GFC_DESCRIPTOR_SIZE_TYPEKNOWN(desc) (sizeof((desc)->base_addr[0]))
#define GFC_DESCRIPTOR_DATA(desc) ((desc)->base_addr)
#define GFC_DESCRIPTOR_DTYPE(desc) ((desc)->dtype)
@@ -391,6 +396,13 @@
#define GFC_DESCRIPTOR_STRIDE(desc,i) \
(GFC_DESCRIPTOR_SM(desc,i) / GFC_DESCRIPTOR_SIZE(desc))
+/* This is for getting the stride when the type of the descriptor is known at
+ compile-time, to avoid expensive divisions. Do not use for string
+ types. */
+
+#define GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(desc,i) \
+ (GFC_DESCRIPTOR_SM(desc,i) / GFC_DESCRIPTOR_SIZE_TYPEKNOWN(desc))
+
/* Macros to get both the size and the type with a single masking operation */
#define GFC_DTYPE_SIZE_MASK \
Index: m4/cshift0.m4
===================================================================
--- m4/cshift0.m4 (Revision 185261)
+++ m4/cshift0.m4 (Arbeitskopie)
@@ -70,10 +70,10 @@
{
if (dim == which)
{
- roffset = GFC_DESCRIPTOR_STRIDE(ret,dim);
+ roffset = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(ret,dim);
if (roffset == 0)
roffset = 1;
- soffset = GFC_DESCRIPTOR_STRIDE(array,dim);
+ soffset = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(array,dim);
if (soffset == 0)
soffset = 1;
len = GFC_DESCRIPTOR_EXTENT(array,dim);
@@ -82,8 +82,8 @@
{
count[n] = 0;
extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
- rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
- sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim);
+ rstride[n] = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(ret,dim);
+ sstride[n] = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(array,dim);
n++;
}
}
Index: m4/in_pack.m4
===================================================================
--- m4/in_pack.m4 (Revision 185261)
+++ m4/in_pack.m4 (Arbeitskopie)
@@ -60,7 +60,7 @@
for (n = 0; n < dim; n++)
{
count[n] = 0;
- stride[n] = GFC_DESCRIPTOR_STRIDE(source,n);
+ stride[n] = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(source,n);
extent[n] = GFC_DESCRIPTOR_EXTENT(source,n);
if (extent[n] <= 0)
{
Index: m4/pack.m4
===================================================================
--- m4/pack.m4 (Revision 185261)
+++ m4/pack.m4 (Arbeitskopie)
@@ -127,7 +127,7 @@
extent[n] = GFC_DESCRIPTOR_EXTENT(array,n);
if (extent[n] <= 0)
zero_sized = 1;
- sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n);
+ sstride[n] = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(array,n);
mstride[n] = GFC_DESCRIPTOR_SM(mask,n);
}
if (sstride[0] == 0)
@@ -188,7 +188,7 @@
}
}
- rstride0 = GFC_DESCRIPTOR_STRIDE(ret,0);
+ rstride0 = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(ret,0);
if (rstride0 == 0)
rstride0 = 1;
sstride0 = sstride[0];
@@ -241,7 +241,7 @@
nelem = ((rptr - ret->base_addr) / rstride0);
if (n > nelem)
{
- sstride0 = GFC_DESCRIPTOR_STRIDE(vector,0);
+ sstride0 = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(vector,0);
if (sstride0 == 0)
sstride0 = 1;
Index: m4/spread.m4
===================================================================
--- m4/spread.m4 (Revision 185261)
+++ m4/spread.m4 (Arbeitskopie)
@@ -91,7 +91,7 @@
{
count[dim] = 0;
extent[dim] = GFC_DESCRIPTOR_EXTENT(source,dim);
- sstride[dim] = GFC_DESCRIPTOR_STRIDE(source,dim);
+ sstride[dim] = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(source,dim);
rstride[dim] = rs;
ext = extent[dim];
@@ -126,7 +126,7 @@
ret_extent = GFC_DESCRIPTOR_EXTENT(ret,n);
if (n == along - 1)
{
- rdelta = GFC_DESCRIPTOR_STRIDE(ret,n);
+ rdelta = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(ret,n);
if (ret_extent != ncopies)
runtime_error("Incorrect extent in return value of SPREAD"
@@ -147,8 +147,8 @@
if (extent[dim] <= 0)
zero_sized = 1;
- sstride[dim] = GFC_DESCRIPTOR_STRIDE(source,dim);
- rstride[dim] = GFC_DESCRIPTOR_STRIDE(ret,n);
+ sstride[dim] = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(source,dim);
+ rstride[dim] = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(ret,n);
dim++;
}
}
@@ -159,7 +159,7 @@
{
if (n == along - 1)
{
- rdelta = GFC_DESCRIPTOR_STRIDE(ret,n);
+ rdelta = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(ret,n);
}
else
{
@@ -167,8 +167,8 @@
extent[dim] = GFC_DESCRIPTOR_EXTENT(source,dim);
if (extent[dim] <= 0)
zero_sized = 1;
- sstride[dim] = GFC_DESCRIPTOR_STRIDE(source,dim);
- rstride[dim] = GFC_DESCRIPTOR_STRIDE(ret,n);
+ sstride[dim] = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(source,dim);
+ rstride[dim] = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(ret,n);
dim++;
}
}
@@ -252,12 +252,12 @@
else
{
if (ncopies - 1 > (GFC_DESCRIPTOR_EXTENT(ret,0) - 1)
- / GFC_DESCRIPTOR_STRIDE(ret,0))
+ / GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(ret,0))
runtime_error ("dim too large in spread()");
}
dest = ret->base_addr;
- stride = GFC_DESCRIPTOR_STRIDE(ret,0);
+ stride = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(ret,0);
for (n = 0; n < ncopies; n++)
{
Index: m4/transpose.m4
===================================================================
--- m4/transpose.m4 (Revision 185261)
+++ m4/transpose.m4 (Arbeitskopie)
@@ -87,13 +87,13 @@
}
- sxstride = GFC_DESCRIPTOR_STRIDE(source,0);
- systride = GFC_DESCRIPTOR_STRIDE(source,1);
+ sxstride = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(source,0);
+ systride = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(source,1);
xcount = GFC_DESCRIPTOR_EXTENT(source,0);
ycount = GFC_DESCRIPTOR_EXTENT(source,1);
- rxstride = GFC_DESCRIPTOR_STRIDE(ret,0);
- rystride = GFC_DESCRIPTOR_STRIDE(ret,1);
+ rxstride = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(ret,0);
+ rystride = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(ret,1);
rptr = ret->base_addr;
sptr = source->base_addr;
Index: m4/iforeach.m4
===================================================================
--- m4/iforeach.m4 (Revision 185261)
+++ m4/iforeach.m4 (Arbeitskopie)
@@ -39,11 +39,11 @@
"u_name");
}
- dstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+ dstride = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(retarray,0);
dest = retarray->base_addr;
for (n = 0; n < rank; n++)
{
- sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n);
+ sstride[n] = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(array,n);
extent[n] = GFC_DESCRIPTOR_EXTENT(array,n);
count[n] = 0;
if (extent[n] <= 0)
@@ -160,11 +160,11 @@
else
runtime_error ("Funny sized logical array");
- dstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+ dstride = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(retarray,0);
dest = retarray->base_addr;
for (n = 0; n < rank; n++)
{
- sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n);
+ sstride[n] = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(array,n);
mstride[n] = GFC_DESCRIPTOR_SM(mask,n);
extent[n] = GFC_DESCRIPTOR_EXTENT(array,n);
count[n] = 0;
@@ -272,7 +272,7 @@
"u_name");
}
- dstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+ dstride = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(retarray,0);
dest = retarray->base_addr;
for (n = 0; n<rank; n++)
dest[n * dstride] = $1 ;
Index: m4/eoshift1.m4
===================================================================
--- m4/eoshift1.m4 (Revision 185261)
+++ m4/eoshift1.m4 (Arbeitskopie)
@@ -145,7 +145,7 @@
rstride[n] = GFC_DESCRIPTOR_SM(ret,dim);
sstride[n] = GFC_DESCRIPTOR_SM(array,dim);
- hstride[n] = GFC_DESCRIPTOR_STRIDE(h,n);
+ hstride[n] = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(h,n);
n++;
}
}
Index: m4/eoshift3.m4
===================================================================
--- m4/eoshift3.m4 (Revision 185261)
+++ m4/eoshift3.m4 (Arbeitskopie)
@@ -149,7 +149,7 @@
rstride[n] = GFC_DESCRIPTOR_SM(ret,dim);
sstride[n] = GFC_DESCRIPTOR_SM(array,dim);
- hstride[n] = GFC_DESCRIPTOR_STRIDE(h,n);
+ hstride[n] = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(h,n);
if (bound)
bstride[n] = GFC_DESCRIPTOR_SM(bound,n);
else
Index: m4/shape.m4
===================================================================
--- m4/shape.m4 (Revision 185261)
+++ m4/shape.m4 (Arbeitskopie)
@@ -53,7 +53,7 @@
ret->base_addr = internal_malloc_size (sizeof ('rtype_name`) * rank);
}
- stride = GFC_DESCRIPTOR_STRIDE(ret,0);
+ stride = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(ret,0);
if (GFC_DESCRIPTOR_EXTENT(ret,0) < 1)
return;
Index: m4/cshift1.m4
===================================================================
--- m4/cshift1.m4 (Revision 185261)
+++ m4/cshift1.m4 (Arbeitskopie)
@@ -142,7 +142,7 @@
rstride[n] = GFC_DESCRIPTOR_SM(ret,dim);
sstride[n] = GFC_DESCRIPTOR_SM(array,dim);
- hstride[n] = GFC_DESCRIPTOR_STRIDE(h,n);
+ hstride[n] = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(h,n);
n++;
}
}
Index: m4/in_unpack.m4
===================================================================
--- m4/in_unpack.m4 (Revision 185261)
+++ m4/in_unpack.m4 (Arbeitskopie)
@@ -55,7 +55,7 @@
for (n = 0; n < dim; n++)
{
count[n] = 0;
- stride[n] = GFC_DESCRIPTOR_STRIDE(d,n);
+ stride[n] = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(d,n);
extent[n] = GFC_DESCRIPTOR_EXTENT(d,n);
if (extent[n] <= 0)
return;
Index: m4/matmull.m4
===================================================================
--- m4/matmull.m4 (Revision 185261)
+++ m4/matmull.m4 (Arbeitskopie)
@@ -165,13 +165,13 @@
`
if (GFC_DESCRIPTOR_RANK (retarray) == 1)
{
- rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+ rxstride = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(retarray,0);
rystride = rxstride;
}
else
{
- rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
- rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+ rxstride = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(retarray,0);
+ rystride = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(retarray,1);
}
/* If we have rank 1 parameters, zero the absent stride, and set the size to
Index: m4/bessel.m4
===================================================================
--- m4/bessel.m4 (Revision 185261)
+++ m4/bessel.m4 (Arbeitskopie)
@@ -50,7 +50,7 @@
'rtype_name` last1, last2, x2rev;
- stride = GFC_DESCRIPTOR_STRIDE(ret,0);
+ stride = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(ret,0);
if (ret->base_addr == NULL)
{
@@ -69,7 +69,7 @@
"(%ld vs. %ld)", (long int) n2-n1,
(long int) GFC_DESCRIPTOR_EXTENT(ret,0));
- stride = GFC_DESCRIPTOR_STRIDE(ret,0);
+ stride = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(ret,0);
if (unlikely (x == 0))
{
@@ -117,7 +117,7 @@
'rtype_name` last1, last2, x2rev;
- stride = GFC_DESCRIPTOR_STRIDE(ret,0);
+ stride = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(ret,0);
if (ret->base_addr == NULL)
{
@@ -136,7 +136,7 @@
"(%ld vs. %ld)", (long int) n2-n1,
(long int) GFC_DESCRIPTOR_EXTENT(ret,0));
- stride = GFC_DESCRIPTOR_STRIDE(ret,0);
+ stride = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(ret,0);
if (unlikely (x == 0))
{
Index: m4/unpack.m4
===================================================================
--- m4/unpack.m4 (Revision 185261)
+++ m4/unpack.m4 (Arbeitskopie)
@@ -96,7 +96,7 @@
rs * sizeof ('rtype_name`));
extent[n] = GFC_DESCRIPTOR_EXTENT(ret,n);
empty = empty || extent[n] <= 0;
- rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,n);
+ rstride[n] = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(ret,n);
mstride[n] = GFC_DESCRIPTOR_SM(mask,n);
rs *= extent[n];
}
@@ -111,7 +111,7 @@
count[n] = 0;
extent[n] = GFC_DESCRIPTOR_EXTENT(ret,n);
empty = empty || extent[n] <= 0;
- rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,n);
+ rstride[n] = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(ret,n);
mstride[n] = GFC_DESCRIPTOR_SM(mask,n);
}
if (rstride[0] == 0)
@@ -124,7 +124,7 @@
if (mstride[0] == 0)
mstride[0] = 1;
- vstride0 = GFC_DESCRIPTOR_STRIDE(vector,0);
+ vstride0 = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(vector,0);
if (vstride0 == 0)
vstride0 = 1;
rstride0 = rstride[0];
@@ -241,8 +241,8 @@
rs * sizeof ('rtype_name`));
extent[n] = GFC_DESCRIPTOR_EXTENT(ret,n);
empty = empty || extent[n] <= 0;
- rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,n);
- fstride[n] = GFC_DESCRIPTOR_STRIDE(field,n);
+ rstride[n] = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(ret,n);
+ fstride[n] = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(field,n);
mstride[n] = GFC_DESCRIPTOR_SM(mask,n);
rs *= extent[n];
}
@@ -257,8 +257,8 @@
count[n] = 0;
extent[n] = GFC_DESCRIPTOR_EXTENT(ret,n);
empty = empty || extent[n] <= 0;
- rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,n);
- fstride[n] = GFC_DESCRIPTOR_STRIDE(field,n);
+ rstride[n] = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(ret,n);
+ fstride[n] = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(field,n);
mstride[n] = GFC_DESCRIPTOR_SM(mask,n);
}
if (rstride[0] == 0)
@@ -273,7 +273,7 @@
if (mstride[0] == 0)
mstride[0] = 1;
- vstride0 = GFC_DESCRIPTOR_STRIDE(vector,0);
+ vstride0 = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(vector,0);
if (vstride0 == 0)
vstride0 = 1;
rstride0 = rstride[0];
Index: m4/reshape.m4
===================================================================
--- m4/reshape.m4 (Revision 185261)
+++ m4/reshape.m4 (Arbeitskopie)
@@ -91,7 +91,7 @@
for (n = 0; n < rdim; n++)
{
- shape_data[n] = shape->base_addr[n * GFC_DESCRIPTOR_STRIDE(shape,0)];
+ shape_data[n] = shape->base_addr[n * GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(shape,0)];
if (shape_data[n] <= 0)
{
shape_data[n] = 0;
@@ -134,7 +134,7 @@
for (n = 0; n < pdim; n++)
{
pcount[n] = 0;
- pstride[n] = GFC_DESCRIPTOR_STRIDE(pad,n);
+ pstride[n] = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(pad,n);
pextent[n] = GFC_DESCRIPTOR_EXTENT(pad,n);
if (pextent[n] <= 0)
{
@@ -197,7 +197,7 @@
for (n = 0; n < rdim; n++)
{
- v = order->base_addr[n * GFC_DESCRIPTOR_STRIDE(order,0)] - 1;
+ v = order->base_addr[n * GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(order,0)] - 1;
if (v < 0 || v >= rdim)
runtime_error("Value %ld out of range in ORDER argument"
@@ -216,12 +216,12 @@
for (n = 0; n < rdim; n++)
{
if (order)
- dim = order->base_addr[n * GFC_DESCRIPTOR_STRIDE(order,0)] - 1;
+ dim = order->base_addr[n * GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(order,0)] - 1;
else
dim = n;
rcount[n] = 0;
- rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim);
+ rstride[n] = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(ret,dim);
rextent[n] = GFC_DESCRIPTOR_EXTENT(ret,dim);
if (rextent[n] < 0)
rextent[n] = 0;
@@ -243,7 +243,7 @@
for (n = 0; n < sdim; n++)
{
scount[n] = 0;
- sstride[n] = GFC_DESCRIPTOR_STRIDE(source,n);
+ sstride[n] = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(source,n);
sextent[n] = GFC_DESCRIPTOR_EXTENT(source,n);
if (sextent[n] <= 0)
{
Index: m4/ifunction_logical.m4
===================================================================
--- m4/ifunction_logical.m4 (Revision 185261)
+++ m4/ifunction_logical.m4 (Arbeitskopie)
@@ -127,7 +127,7 @@
for (n = 0; n < rank; n++)
{
count[n] = 0;
- dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n);
+ dstride[n] = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(retarray,n);
if (extent[n] <= 0)
return;
}
Index: m4/ifunction.m4
===================================================================
--- m4/ifunction.m4 (Revision 185261)
+++ m4/ifunction.m4 (Arbeitskopie)
@@ -48,11 +48,11 @@
len = GFC_DESCRIPTOR_EXTENT(array,dim);
if (len < 0)
len = 0;
- delta = GFC_DESCRIPTOR_STRIDE(array,dim);
+ delta = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(array,dim);
for (n = 0; n < dim; n++)
{
- sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n);
+ sstride[n] = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(array,n);
extent[n] = GFC_DESCRIPTOR_EXTENT(array,n);
if (extent[n] < 0)
@@ -60,7 +60,7 @@
}
for (n = dim; n < rank; n++)
{
- sstride[n] = GFC_DESCRIPTOR_STRIDE(array, n + 1);
+ sstride[n] = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(array, n + 1);
extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1);
if (extent[n] < 0)
@@ -113,7 +113,7 @@
for (n = 0; n < rank; n++)
{
count[n] = 0;
- dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n);
+ dstride[n] = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(retarray,n);
if (extent[n] <= 0)
return;
}
@@ -222,12 +222,12 @@
else
runtime_error ("Funny sized logical array");
- delta = GFC_DESCRIPTOR_STRIDE(array,dim);
+ delta = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(array,dim);
mdelta = GFC_DESCRIPTOR_SM(mask,dim);
for (n = 0; n < dim; n++)
{
- sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n);
+ sstride[n] = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(array,n);
mstride[n] = GFC_DESCRIPTOR_SM(mask,n);
extent[n] = GFC_DESCRIPTOR_EXTENT(array,n);
@@ -237,7 +237,7 @@
}
for (n = dim; n < rank; n++)
{
- sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n + 1);
+ sstride[n] = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(array,n + 1);
mstride[n] = GFC_DESCRIPTOR_SM(mask, n + 1);
extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1);
@@ -293,7 +293,7 @@
for (n = 0; n < rank; n++)
{
count[n] = 0;
- dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n);
+ dstride[n] = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(retarray,n);
if (extent[n] <= 0)
return;
}
@@ -460,7 +460,7 @@
for (n = 0; n < rank; n++)
{
count[n] = 0;
- dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n);
+ dstride[n] = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(retarray,n);
}
dest = retarray->base_addr;
Index: m4/matmul.m4
===================================================================
--- m4/matmul.m4 (Revision 185261)
+++ m4/matmul.m4 (Arbeitskopie)
@@ -178,19 +178,19 @@
/* One-dimensional result may be addressed in the code below
either as a row or a column matrix. We want both cases to
work. */
- rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+ rxstride = rystride = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(retarray,0);
}
else
{
- rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
- rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+ rxstride = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(retarray,0);
+ rystride = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(retarray,1);
}
if (GFC_DESCRIPTOR_RANK (a) == 1)
{
/* Treat it as a a row matrix A[1,count]. */
- axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+ axstride = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(a,0);
aystride = 1;
xcount = 1;
@@ -198,8 +198,8 @@
}
else
{
- axstride = GFC_DESCRIPTOR_STRIDE(a,0);
- aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+ axstride = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(a,0);
+ aystride = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(a,1);
count = GFC_DESCRIPTOR_EXTENT(a,1);
xcount = GFC_DESCRIPTOR_EXTENT(a,0);
@@ -214,7 +214,7 @@
if (GFC_DESCRIPTOR_RANK (b) == 1)
{
/* Treat it as a column matrix B[count,1] */
- bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+ bxstride = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(b,0);
/* bystride should never be used for 1-dimensional b.
in case it is we want it to cause a segfault, rather than
@@ -224,8 +224,8 @@
}
else
{
- bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
- bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+ bxstride = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(b,0);
+ bystride = GFC_DESCRIPTOR_STRIDE_TYPEKNOWN(b,1);
ycount = GFC_DESCRIPTOR_EXTENT(b,1);
}