Thanks Jakub & Segher,

On 2021/3/17 06:47, Segher Boessenkool wrote:
Hi!

On Tue, Mar 16, 2021 at 07:57:17PM +0100, Jakub Jelinek wrote:
On Thu, Mar 11, 2021 at 07:57:23AM +0800, Xionghu Luo via Gcc-patches wrote:
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index ec068c58aa5..48eb91132a9 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -7000,11 +7000,15 @@ rs6000_expand_vector_set_var_p9 (rtx target, rtx val, 
rtx idx)
gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx)); - gcc_assert (GET_MODE (idx) == E_SImode);
-
    machine_mode inner_mode = GET_MODE (val);
- rtx tmp = gen_reg_rtx (GET_MODE (idx));
+  machine_mode idx_mode = GET_MODE (idx);
+  rtx tmp = gen_reg_rtx (DImode);
+  if (idx_mode != DImode)
+    tmp = convert_modes (DImode, idx_mode, idx, 0);
+  else
+    tmp = idx;
+
    int width = GET_MODE_SIZE (inner_mode);
gcc_assert (width >= 1 && width <= 8);
@@ -7012,9 +7016,7 @@ rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx 
idx)
    int shift = exact_log2 (width);
    /* Generate the IDX for permute shift, width is the vector element size.
       idx = idx * width.  */
-  emit_insn (gen_ashlsi3 (tmp, idx, GEN_INT (shift)));
-
-  tmp = convert_modes (DImode, SImode, tmp, 1);
+  emit_insn (gen_ashldi3 (tmp, tmp, GEN_INT (shift)));

This looks broken.

Yup.

The gen_reg_rtx (DImode); call result is completely ignored,
so it wastes one pseudo, and I'm not convinced that idx nor tmp
is guaranteed to be usable as output of shift.
So, shouldn't it be instead:
   rtx tmp = gen_reg_rtx (DImode);
   if (idx_mode != DImode)
     idx = convert_modes (DImode, idx_mode, idx, 0);

...
   emit_insn (gen_ashldi3 (tmp, idx, GEN_INT (shift));
?

Yeah, something like that.

Just
   idx = convert_modes (DImode, idx_mode, idx, 1);
...
   rtx tmp = gen_reg_rtx (DImode);
   emit_insn (gen_ashldi3 (tmp, idx, GEN_INT (shift));
works just as well.

Also, dunno what is less and what is more expensive on
rs6000, whether convert_modes with unsigned = 0 or 1.

Unsigned is never more expensive.  Sometimes it is cheaper.  It depends
more on the situation what is best.  Just look at the generated code?


Change it to "idx = convert_modes (DImode, idx_mode, idx, 1);"

For the case with "-mvsx -Og -mcpu=power9":

vector char f2(vector char v, char k, char val)
{
 v[k] = val;
 return v;
}

gimple:
_1 = (int) k_2(D);
_7 = v;
_8 = .VEC_SET (_7, val_4(D), _1);
v = _8;
_6 = v;

the expanded RTL is:

   6: NOTE_INSN_BASIC_BLOCK 2
   2: r121:V16QI=%2:V16QI
   3: r122:DI=%5:DI
   4: r123:DI=%6:DI
   5: NOTE_INSN_FUNCTION_BEG
   8: r117:DI=sign_extend(r122:DI#0)
   9: r124:V16QI=r121:V16QI
  10: r126:QI=r123:DI#0
  11: r127:DI=zero_extend(r117:DI#0)
  12: r128:DI=r127:DI<<0
  13: r129:V16QI=unspec[r128:DI] 152
  14: r130:V16QI=unspec[r128:DI] 151
  15: r124:V16QI=unspec[r124:V16QI,r124:V16QI,r129:V16QI] 236
  16: r124:V16QI=unspec[r124:V16QI,r126:QI,0] 118
  17: r124:V16QI=unspec[r124:V16QI,r124:V16QI,r130:V16QI] 236
  18: r119:V16QI=r124:V16QI
  19: r121:V16QI=r119:V16QI
  20: r120:V16QI=r121:V16QI
  24: %2:V16QI=r120:V16QI
  25: use %2:V16QI

sign_extend of insn #8 is generated by "_1 = (int) k_2(D);", zero_extend
of insn #11 is the convert added by this patch.  Both will be optimized
out in final ASM.  Since k is index of vector element, so it could only
be unsigned value, which means zero_extend should be used here as well?

PS: If type of k is "unsigned int", no zero_extend in expander,
and if k is "int", a zero_extend is generated in expander.

Attached the updated patch.  Thanks.


Xionghu
From 06bff74a35ad7641dffc87d1eab9c5872851ae79 Mon Sep 17 00:00:00 2001
From: Xionghu Luo <luo...@linux.ibm.com>
Date: Tue, 16 Mar 2021 21:23:14 -0500
Subject: [PATCH] rs6000: Convert the vector set variable idx to DImode
 [PR98914]

vec_insert defines the element argument type to be signed int by ELFv2
ABI.  When expanding a vector with a variable rtx, convert the rtx type
to DImode to support both intrinsic usage and other callers from
rs6000_expand_vector_init produced by v[k] = val when k is long type.

gcc/ChangeLog:

2021-03-17  Xionghu Luo  <luo...@linux.ibm.com>

        PR target/98914
        * config/rs6000/rs6000.c (rs6000_expand_vector_set_var_p9):
        Convert idx to DImode.
        (rs6000_expand_vector_set_var_p8): Likewise.

gcc/testsuite/ChangeLog:

2021-03-17  Xionghu Luo  <luo...@linux.ibm.com>

        PR target/98914
        * gcc.target/powerpc/pr98914.c: New test.
---
 gcc/config/rs6000/rs6000.c                 | 39 ++++++++++------------
 gcc/testsuite/gcc.target/powerpc/pr98914.c | 11 ++++++
 2 files changed, 29 insertions(+), 21 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr98914.c

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index ec068c58aa5..bb7fcdca876 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -7000,21 +7000,22 @@ rs6000_expand_vector_set_var_p9 (rtx target, rtx val, 
rtx idx)
 
   gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx));
 
-  gcc_assert (GET_MODE (idx) == E_SImode);
-
   machine_mode inner_mode = GET_MODE (val);
 
-  rtx tmp = gen_reg_rtx (GET_MODE (idx));
   int width = GET_MODE_SIZE (inner_mode);
 
   gcc_assert (width >= 1 && width <= 8);
 
   int shift = exact_log2 (width);
+
+  machine_mode idx_mode = GET_MODE (idx);
+  if (idx_mode != DImode)
+    idx = convert_modes (DImode, idx_mode, idx, 1);
+
   /* Generate the IDX for permute shift, width is the vector element size.
      idx = idx * width.  */
-  emit_insn (gen_ashlsi3 (tmp, idx, GEN_INT (shift)));
-
-  tmp = convert_modes (DImode, SImode, tmp, 1);
+  rtx tmp = gen_reg_rtx (DImode);
+  emit_insn (gen_ashldi3 (tmp, idx, GEN_INT (shift)));
 
   /*  lvsr    v1,0,idx.  */
   rtx pcvr = gen_reg_rtx (V16QImode);
@@ -7047,28 +7048,25 @@ rs6000_expand_vector_set_var_p8 (rtx target, rtx val, 
rtx idx)
 
   gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx));
 
-  gcc_assert (GET_MODE (idx) == E_SImode);
-
   machine_mode inner_mode = GET_MODE (val);
   HOST_WIDE_INT mode_mask = GET_MODE_MASK (inner_mode);
 
-  rtx tmp = gen_reg_rtx (GET_MODE (idx));
   int width = GET_MODE_SIZE (inner_mode);
-
   gcc_assert (width >= 1 && width <= 4);
 
+  machine_mode idx_mode = GET_MODE (idx);
+  if (idx_mode != DImode)
+    idx = convert_modes (DImode, idx_mode, idx, 1);
+
+  /*  idx = idx * width.  */
+  rtx tmp = gen_reg_rtx (DImode);
+  emit_insn (gen_muldi3 (tmp, idx, GEN_INT (width)));
+
+  /*  For LE:  idx = idx + 8.  */
   if (!BYTES_BIG_ENDIAN)
-    {
-      /*  idx = idx * width.  */
-      emit_insn (gen_mulsi3 (tmp, idx, GEN_INT (width)));
-      /*  idx = idx + 8.  */
-      emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (8)));
-    }
+    emit_insn (gen_adddi3 (tmp, tmp, GEN_INT (8)));
   else
-    {
-      emit_insn (gen_mulsi3 (tmp, idx, GEN_INT (width)));
-      emit_insn (gen_subsi3 (tmp, GEN_INT (24 - width), tmp));
-    }
+    emit_insn (gen_subdi3 (tmp, GEN_INT (24 - width), tmp));
 
   /*  lxv vs33, mask.
       DImode: 0xffffffffffffffff0000000000000000
@@ -7118,7 +7116,6 @@ rs6000_expand_vector_set_var_p8 (rtx target, rtx val, rtx 
idx)
   emit_insn (gen_rtx_SET (val_v16qi, sub_val));
 
   /*  lvsl    13,0,idx.  */
-  tmp = convert_modes (DImode, SImode, tmp, 1);
   rtx pcv = gen_reg_rtx (V16QImode);
   emit_insn (gen_altivec_lvsl_reg (pcv, tmp));
 
diff --git a/gcc/testsuite/gcc.target/powerpc/pr98914.c 
b/gcc/testsuite/gcc.target/powerpc/pr98914.c
new file mode 100644
index 00000000000..e4d78e3e6b3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr98914.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-Og -mvsx" } */
+
+vector int
+foo (vector int v)
+{
+  for (long k = 0; k < 1; ++k)
+    v[k] = 0;
+  return v;
+}
-- 
2.25.1

Reply via email to