[PATCH V2] rs6000: Don't allow AltiVec address in movoo & movxo pattern [PR110411]

2023-07-19 Thread jeevitha via Gcc-patches
Hi All,

The following patch has been bootstrapped and regtested on powerpc64le-linux.

There are no instructions that do traditional AltiVec addresses (i.e.
with the low four bits of the address masked off) for OOmode and XOmode
objects. The solution is to modify the constraints used in the movoo and
movxo pattern to disallow these types of addresses, which assists LRA in
resolving this issue. Furthermore, the mode size 16 check has been
removed in vsx_quad_dform_memory_operand to allow OOmode and
quad_address_p already handles less than size 16.

2023-07-19  Jeevitha Palanisamy  

gcc/
PR target/110411
* config/rs6000/mma.md (define_insn_and_split movoo): Disallow
AltiVec address in movoo and movxo pattern.
(define_insn_and_split movxo): Likewise.
*config/rs6000/predicates.md (vsx_quad_dform_memory_operand):Remove
redundant mode size check.

gcc/testsuite/
PR target/110411
* gcc.target/powerpc/pr110411-1.c: New testcase.
* gcc.target/powerpc/pr110411-2.c: New testcase.

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index d36dc13872b..575751d477e 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -293,8 +293,8 @@
 })
 
 (define_insn_and_split "*movoo"
-  [(set (match_operand:OO 0 "nonimmediate_operand" "=wa,m,wa")
-   (match_operand:OO 1 "input_operand" "m,wa,wa"))]
+  [(set (match_operand:OO 0 "nonimmediate_operand" "=wa,ZwO,wa")
+   (match_operand:OO 1 "input_operand" "ZwO,wa,wa"))]
   "TARGET_MMA
&& (gpc_reg_operand (operands[0], OOmode)
|| gpc_reg_operand (operands[1], OOmode))"
@@ -340,8 +340,8 @@
 })
 
 (define_insn_and_split "*movxo"
-  [(set (match_operand:XO 0 "nonimmediate_operand" "=d,m,d")
-   (match_operand:XO 1 "input_operand" "m,d,d"))]
+  [(set (match_operand:XO 0 "nonimmediate_operand" "=d,ZwO,d")
+   (match_operand:XO 1 "input_operand" "ZwO,d,d"))]
   "TARGET_MMA
&& (gpc_reg_operand (operands[0], XOmode)
|| gpc_reg_operand (operands[1], XOmode))"
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 3552d908e9d..925f69cd3fc 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -924,7 +924,7 @@
 (define_predicate "vsx_quad_dform_memory_operand"
   (match_code "mem")
 {
-  if (!TARGET_P9_VECTOR || GET_MODE_SIZE (mode) != 16)
+  if (!TARGET_P9_VECTOR)
 return false;
 
   return quad_address_p (XEXP (op, 0), mode, false);
diff --git a/gcc/testsuite/gcc.target/powerpc/pr110411-1.c 
b/gcc/testsuite/gcc.target/powerpc/pr110411-1.c
new file mode 100644
index 000..f42e9388d65
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr110411-1.c
@@ -0,0 +1,22 @@
+/* PR target/110411 */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10 -mblock-ops-vector-pair" } */
+
+/* Verify we do not ICE on the following.  */
+
+#include 
+
+struct s {
+  long a;
+  long b;
+  long c;
+  long d: 1;
+};
+unsigned long ptr;
+
+void
+bug (struct s *dst)
+{
+  struct s *src = (struct s *)(ptr & ~0xFUL);
+  memcpy (dst, src, sizeof(struct s));
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/pr110411-2.c 
b/gcc/testsuite/gcc.target/powerpc/pr110411-2.c
new file mode 100644
index 000..c2046fb9855
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr110411-2.c
@@ -0,0 +1,12 @@
+/* PR target/110411 */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
+
+/* Verify we do not ICE on the following.  */
+
+void
+bug (__vector_quad *dst)
+{
+  dst = (__vector_quad *)((unsigned long)dst & ~0xFUL);
+  __builtin_mma_xxsetaccz (dst);
+}





[PATCH] rs6000: Fix issue in specifying PTImode as an attribute [PR106895]

2023-07-19 Thread jeevitha via Gcc-patches
Hi All,

The following patch has been bootstrapped and regtested on powerpc64le-linux.

When the user specifies PTImode as an attribute, it breaks. Created
a tree node to handle PTImode types. PTImode attribute helps in generating
even/odd register pairs on 128 bits.

2023-07-20  Jeevitha Palanisamy  

gcc/
PR target/110411
* config/rs6000/rs6000.h (enum rs6000_builtin_type_index): Add fields
to hold PTImode type.
* config/rs6000/rs6000-builtin.cc (rs6000_init_builtins): Add node
for PTImode type.

gcc/testsuite/
PR target/106895
* gcc.target/powerpc/pr106895.c: New testcase.

diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index a8f291c6a72..ca00c3b0d4c 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -756,6 +756,15 @@ rs6000_init_builtins (void)
   else
 ieee128_float_type_node = NULL_TREE;
 
+  /* PTImode to get even/odd register pairs.  */
+  intPTI_type_internal_node = make_node(INTEGER_TYPE);
+  TYPE_PRECISION (intPTI_type_internal_node) = GET_MODE_BITSIZE (PTImode);
+  layout_type (intPTI_type_internal_node);
+  SET_TYPE_MODE (intPTI_type_internal_node, PTImode);
+  t = build_qualified_type (intPTI_type_internal_node, TYPE_QUAL_CONST);
+  lang_hooks.types.register_builtin_type (intPTI_type_internal_node,
+ "__int128pti");
+
   /* Vector pair and vector quad support.  */
   vector_pair_type_node = make_node (OPAQUE_TYPE);
   SET_TYPE_MODE (vector_pair_type_node, OOmode);
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 3503614efbd..0456bf56d17 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -2303,6 +2303,7 @@ enum rs6000_builtin_type_index
   RS6000_BTI_ptr_vector_quad,
   RS6000_BTI_ptr_long_long,
   RS6000_BTI_ptr_long_long_unsigned,
+  RS6000_BTI_PTI,
   RS6000_BTI_MAX
 };
 
@@ -2347,6 +2348,7 @@ enum rs6000_builtin_type_index
 #define uintDI_type_internal_node   
(rs6000_builtin_types[RS6000_BTI_UINTDI])
 #define intTI_type_internal_node
(rs6000_builtin_types[RS6000_BTI_INTTI])
 #define uintTI_type_internal_node   
(rs6000_builtin_types[RS6000_BTI_UINTTI])
+#define intPTI_type_internal_node   (rs6000_builtin_types[RS6000_BTI_PTI])
 #define float_type_internal_node
(rs6000_builtin_types[RS6000_BTI_float])
 #define double_type_internal_node   
(rs6000_builtin_types[RS6000_BTI_double])
 #define long_double_type_internal_node  
(rs6000_builtin_types[RS6000_BTI_long_double])
diff --git a/gcc/testsuite/gcc.target/powerpc/pr106895.c 
b/gcc/testsuite/gcc.target/powerpc/pr106895.c
new file mode 100644
index 000..04630fe1df5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr106895.c
@@ -0,0 +1,15 @@
+/* PR target/106895 */
+/* { dg-require-effective-target int128 } */
+/* { dg-options "-O2" } */
+
+/* Verify the following generates even/odd register pairs.  */
+
+typedef __int128 pti __attribute__((mode(PTI)));
+
+void
+set128 (pti val, pti *mem)
+{
+asm("stq %1,%0" : "=m"(*mem) : "r"(val));
+}
+
+/* { dg-final { scan-assembler "stq 10,0\\(5\\)" } } */




[PING ^1][PATCH] rs6000: Fix issue in specifying PTImode as an attribute [PR106895]

2023-08-04 Thread jeevitha via Gcc-patches
Ping!

please review.

Thanks & Regards
Jeevitha

On 20/07/23 10:05 am, jeevitha wrote:
> Hi All,
> 
> The following patch has been bootstrapped and regtested on powerpc64le-linux.
> 
> When the user specifies PTImode as an attribute, it breaks. Created
> a tree node to handle PTImode types. PTImode attribute helps in generating
> even/odd register pairs on 128 bits.
> 
> 2023-07-20  Jeevitha Palanisamy  
> 
> gcc/
>   PR target/110411
>   * config/rs6000/rs6000.h (enum rs6000_builtin_type_index): Add fields
>   to hold PTImode type.
>   * config/rs6000/rs6000-builtin.cc (rs6000_init_builtins): Add node
>   for PTImode type.
> 
> gcc/testsuite/
>   PR target/106895
>   * gcc.target/powerpc/pr106895.c: New testcase.
> 
> diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
> b/gcc/config/rs6000/rs6000-builtin.cc
> index a8f291c6a72..ca00c3b0d4c 100644
> --- a/gcc/config/rs6000/rs6000-builtin.cc
> +++ b/gcc/config/rs6000/rs6000-builtin.cc
> @@ -756,6 +756,15 @@ rs6000_init_builtins (void)
>else
>  ieee128_float_type_node = NULL_TREE;
>  
> +  /* PTImode to get even/odd register pairs.  */
> +  intPTI_type_internal_node = make_node(INTEGER_TYPE);
> +  TYPE_PRECISION (intPTI_type_internal_node) = GET_MODE_BITSIZE (PTImode);
> +  layout_type (intPTI_type_internal_node);
> +  SET_TYPE_MODE (intPTI_type_internal_node, PTImode);
> +  t = build_qualified_type (intPTI_type_internal_node, TYPE_QUAL_CONST);
> +  lang_hooks.types.register_builtin_type (intPTI_type_internal_node,
> +   "__int128pti");
> +
>/* Vector pair and vector quad support.  */
>vector_pair_type_node = make_node (OPAQUE_TYPE);
>SET_TYPE_MODE (vector_pair_type_node, OOmode);
> diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
> index 3503614efbd..0456bf56d17 100644
> --- a/gcc/config/rs6000/rs6000.h
> +++ b/gcc/config/rs6000/rs6000.h
> @@ -2303,6 +2303,7 @@ enum rs6000_builtin_type_index
>RS6000_BTI_ptr_vector_quad,
>RS6000_BTI_ptr_long_long,
>RS6000_BTI_ptr_long_long_unsigned,
> +  RS6000_BTI_PTI,
>RS6000_BTI_MAX
>  };
>  
> @@ -2347,6 +2348,7 @@ enum rs6000_builtin_type_index
>  #define uintDI_type_internal_node 
> (rs6000_builtin_types[RS6000_BTI_UINTDI])
>  #define intTI_type_internal_node  
> (rs6000_builtin_types[RS6000_BTI_INTTI])
>  #define uintTI_type_internal_node 
> (rs6000_builtin_types[RS6000_BTI_UINTTI])
> +#define intPTI_type_internal_node (rs6000_builtin_types[RS6000_BTI_PTI])
>  #define float_type_internal_node  
> (rs6000_builtin_types[RS6000_BTI_float])
>  #define double_type_internal_node 
> (rs6000_builtin_types[RS6000_BTI_double])
>  #define long_double_type_internal_node
> (rs6000_builtin_types[RS6000_BTI_long_double])
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr106895.c 
> b/gcc/testsuite/gcc.target/powerpc/pr106895.c
> new file mode 100644
> index 000..04630fe1df5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr106895.c
> @@ -0,0 +1,15 @@
> +/* PR target/106895 */
> +/* { dg-require-effective-target int128 } */
> +/* { dg-options "-O2" } */
> +
> +/* Verify the following generates even/odd register pairs.  */
> +
> +typedef __int128 pti __attribute__((mode(PTI)));
> +
> +void
> +set128 (pti val, pti *mem)
> +{
> +asm("stq %1,%0" : "=m"(*mem) : "r"(val));
> +}
> +
> +/* { dg-final { scan-assembler "stq 10,0\\(5\\)" } } */
> 
> 


[PING^1][PATCH V2] rs6000: Don't allow AltiVec address in movoo & movxo pattern [PR110411]

2023-08-04 Thread jeevitha via Gcc-patches
Ping!

please review.

Thanks & Regards
Jeevitha

On 19/07/23 10:16 pm, jeevitha wrote:
> Hi All,
> 
> The following patch has been bootstrapped and regtested on powerpc64le-linux.
> 
> There are no instructions that do traditional AltiVec addresses (i.e.
> with the low four bits of the address masked off) for OOmode and XOmode
> objects. The solution is to modify the constraints used in the movoo and
> movxo pattern to disallow these types of addresses, which assists LRA in
> resolving this issue. Furthermore, the mode size 16 check has been
> removed in vsx_quad_dform_memory_operand to allow OOmode and
> quad_address_p already handles less than size 16.
> 
> 2023-07-19  Jeevitha Palanisamy  
> 
> gcc/
>   PR target/110411
>   * config/rs6000/mma.md (define_insn_and_split movoo): Disallow
>   AltiVec address in movoo and movxo pattern.
>   (define_insn_and_split movxo): Likewise.
>   *config/rs6000/predicates.md (vsx_quad_dform_memory_operand):Remove
>   redundant mode size check.
> 
> gcc/testsuite/
>   PR target/110411
>   * gcc.target/powerpc/pr110411-1.c: New testcase.
>   * gcc.target/powerpc/pr110411-2.c: New testcase.
> 
> diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
> index d36dc13872b..575751d477e 100644
> --- a/gcc/config/rs6000/mma.md
> +++ b/gcc/config/rs6000/mma.md
> @@ -293,8 +293,8 @@
>  })
>  
>  (define_insn_and_split "*movoo"
> -  [(set (match_operand:OO 0 "nonimmediate_operand" "=wa,m,wa")
> - (match_operand:OO 1 "input_operand" "m,wa,wa"))]
> +  [(set (match_operand:OO 0 "nonimmediate_operand" "=wa,ZwO,wa")
> + (match_operand:OO 1 "input_operand" "ZwO,wa,wa"))]
>"TARGET_MMA
> && (gpc_reg_operand (operands[0], OOmode)
> || gpc_reg_operand (operands[1], OOmode))"
> @@ -340,8 +340,8 @@
>  })
>  
>  (define_insn_and_split "*movxo"
> -  [(set (match_operand:XO 0 "nonimmediate_operand" "=d,m,d")
> - (match_operand:XO 1 "input_operand" "m,d,d"))]
> +  [(set (match_operand:XO 0 "nonimmediate_operand" "=d,ZwO,d")
> + (match_operand:XO 1 "input_operand" "ZwO,d,d"))]
>"TARGET_MMA
> && (gpc_reg_operand (operands[0], XOmode)
> || gpc_reg_operand (operands[1], XOmode))"
> diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
> index 3552d908e9d..925f69cd3fc 100644
> --- a/gcc/config/rs6000/predicates.md
> +++ b/gcc/config/rs6000/predicates.md
> @@ -924,7 +924,7 @@
>  (define_predicate "vsx_quad_dform_memory_operand"
>(match_code "mem")
>  {
> -  if (!TARGET_P9_VECTOR || GET_MODE_SIZE (mode) != 16)
> +  if (!TARGET_P9_VECTOR)
>  return false;
>  
>return quad_address_p (XEXP (op, 0), mode, false);
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr110411-1.c 
> b/gcc/testsuite/gcc.target/powerpc/pr110411-1.c
> new file mode 100644
> index 000..f42e9388d65
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr110411-1.c
> @@ -0,0 +1,22 @@
> +/* PR target/110411 */
> +/* { dg-require-effective-target power10_ok } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power10 -mblock-ops-vector-pair" } */
> +
> +/* Verify we do not ICE on the following.  */
> +
> +#include 
> +
> +struct s {
> +  long a;
> +  long b;
> +  long c;
> +  long d: 1;
> +};
> +unsigned long ptr;
> +
> +void
> +bug (struct s *dst)
> +{
> +  struct s *src = (struct s *)(ptr & ~0xFUL);
> +  memcpy (dst, src, sizeof(struct s));
> +}
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr110411-2.c 
> b/gcc/testsuite/gcc.target/powerpc/pr110411-2.c
> new file mode 100644
> index 000..c2046fb9855
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr110411-2.c
> @@ -0,0 +1,12 @@
> +/* PR target/110411 */
> +/* { dg-require-effective-target power10_ok } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
> +
> +/* Verify we do not ICE on the following.  */
> +
> +void
> +bug (__vector_quad *dst)
> +{
> +  dst = (__vector_quad *)((unsigned long)dst & ~0xFUL);
> +  __builtin_mma_xxsetaccz (dst);
> +}
> 
> 
> 


[PATCH] MAINTAINERS: Add myself to write after approval

2023-05-31 Thread P Jeevitha via Gcc-patches


2023-05-30  Jeevitha Palanisamy  

ChangeLog:
* MAINTAINERS (Write After Approval): Add myself.

diff --git a/MAINTAINERS b/MAINTAINERS
index 2dc51154446..4a7c963914b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -584,6 +584,7 @@ Patrick O'Neill 

 Braden Obrzut  
 Andrea Ornstein
 Maxim Ostapenko

+Jeevitha Palanisamy
 Patrick Palka  
 Srinath Parvathaneni   
 Devang Patel   
-- 
2.39.1



[PATCH] rs6000: Remove duplicate expression [PR106907]

2023-06-04 Thread P Jeevitha via Gcc-patches
PR106907 has few warnings spotted from cppcheck. In that addressing duplicate
expression issue here. Here the same expression is used twice in logical
AND(&&) operation which result in same result so removing that.

2023-06-05  Jeevitha Palanisamy  

gcc/
PR target/106907
* config/rs6000/rs6000.cc (vec_const_128bit_to_bytes): Remove
duplicate expression.


diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 42f49e4a56b..d197c3f3289 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -28784,7 +28784,6 @@ vec_const_128bit_to_bytes (rtx op,
 
   info->all_words_same
 = (info->words[0] == info->words[1]
-   && info->words[0] == info->words[1]
&& info->words[0] == info->words[2]
&& info->words[0] == info->words[3]);
 




Re: [PATCH] rs6000: Remove duplicate expression [PR106907]

2023-06-06 Thread P Jeevitha via Gcc-patches
Thanks for reviewing Segher. Will work on backports as well :).


Jeevitha


[PATCH] rs6000: Remove redundant initialization [PR106907]

2023-06-06 Thread P Jeevitha via Gcc-patches
PR106907 has few warnings spotted from cppcheck. In that addressing
redundant initialization issue. Here the initialized value of 'new_addr'
was overwritten before it was read. Updated the source by removing the
unnecessary initialization of 'new_addr'.

2023-06-07  Jeevitha Palanisamy  

gcc/
PR target/106907
* gcc/config/rs6000/rs6000.cc (rs6000_expand_vector_extract): Remove 
redundant
initialization of new_addr.


diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 42f49e4a56b..d994e004bd3 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -7660,12 +7660,11 @@ rs6000_expand_vector_extract (rtx target, rtx vec, rtx 
elt)
 {
   unsigned int ele_size = GET_MODE_SIZE (inner_mode);
   rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
-  rtx new_addr = gen_reg_rtx (Pmode);
 
   elt = gen_rtx_AND (Pmode, elt, num_ele_m1);
   if (ele_size > 1)
elt = gen_rtx_MULT (Pmode, elt, GEN_INT (ele_size));
-  new_addr = gen_rtx_PLUS (Pmode, XEXP (mem, 0), elt);
+  rtx new_addr = gen_rtx_PLUS (Pmode, XEXP (mem, 0), elt);
   new_addr = change_address (mem, inner_mode, new_addr);
   emit_move_insn (target, new_addr);
 }



[PATCH] Add parentheses to clarify precedence between operators [PR106907]

2023-06-06 Thread P Jeevitha via Gcc-patches
PR106907 has few warnings spotted from cppcheck. Inorder to clarify the
order of precedence between operators added parentheses to explicitly
group operations based on desired order of evaluation.

2023-06-07  Jeevitha Palanisamy  

gcc/
PR target/106907
* config/gcn/gcn.cc (gcn_hsa_declare_function_name): Add parentheses
to group the operations.


diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index efb7211d54e..d2b5cb74204 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -6300,7 +6300,7 @@ gcn_hsa_declare_function_name (FILE *file, const char 
*name, tree)
   fprintf (file, "\t  .amdhsa_system_vgpr_workitem_id\t%i\n",
   (cfun->machine->args.requested & (1 << WORK_ITEM_ID_Z_ARG))
   ? 2
-  : cfun->machine->args.requested & (1 << WORK_ITEM_ID_Y_ARG)
+  : (cfun->machine->args.requested & (1 << WORK_ITEM_ID_Y_ARG))
   ? 1 : 0);
   fprintf (file,
   "\t  .amdhsa_next_free_vgpr\t%i\n"



[PATCH] rs6000: Change bitwise xor to inequality operator [PR106907]

2023-06-12 Thread P Jeevitha via Gcc-patches
PR106907 has few warnings spotted from cppcheck. Here we have
warnings for precedence clarification since boolean results are
used in bitwise operation. Bitwise xor performed on bool
is similar to checking inequality. So changed to inequality
operator (!=) instead of bitwise xor (^). And fixed comment indentation

2023-06-12  Jeevitha Palanisamy  

gcc/
PR target/106907
* config/rs6000/rs6000.cc (altivec_expand_vec_perm_const): Change 
bitwise
xor to inequality and fix comment indentation.


diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index ea68ca6faef..ea7efda8dcd 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -23396,10 +23396,10 @@ altivec_expand_vec_perm_const (rtx target, rtx op0, 
rtx op1,
  && GET_MODE (XEXP (op0, 0)) != V8HImode)))
continue;
 
-  /* For little-endian, the two input operands must be swapped
- (or swapped back) to ensure proper right-to-left numbering
- from 0 to 2N-1.  */
- if (swapped ^ !BYTES_BIG_ENDIAN
+ /* For little-endian, the two input operands must be swapped
+(or swapped back) to ensure proper right-to-left numbering
+from 0 to 2N-1.  */
+ if (swapped != !BYTES_BIG_ENDIAN
  && icode != CODE_FOR_vsx_xxpermdi_v16qi)
std::swap (op0, op1);
  if (imode != V16QImode)





[PATCH V2] rs6000: Change GPR2 to volatile & non-fixed register for function that does not use TOC [PR110320]

2023-07-16 Thread P Jeevitha via Gcc-patches


Hi All,

The following patch has been bootstrapped and regtested on powerpc64le-linux.

Normally, GPR2 is the TOC pointer and is defined as a fixed and non-volatile
register. However, it can be used as volatile for PCREL addressing. Therefore,
modified r2 to be non-fixed in FIXED_REGISTERS and set it to fixed if it is not
PCREL and also when the user explicitly requests TOC or fixed. If the register
r2 is fixed, it is made as non-volatile. Changes in register preservation roles
can be accomplished with the help of available target hooks
(TARGET_CONDITIONAL_REGISTER_USAGE).

2023-07-12  Jeevitha Palanisamy  

gcc/
PR target/PR110320
* config/rs6000/rs6000.cc (rs6000_conditional_register_usage): Change
GPR2 to volatile and non-fixed register for PCREL.

gcc/testsuite/
PR target/PR110320
* gcc.target/powerpc/pr110320-1.c: New testcase.
* gcc.target/powerpc/pr110320-2.c: New testcase.
* gcc.target/powerpc/pr110320-3.c: New testcase.

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 44b448d2ba6..9aa04ec5d57 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -10193,9 +10193,13 @@ rs6000_conditional_register_usage (void)
 for (i = 32; i < 64; i++)
   fixed_regs[i] = call_used_regs[i] = 1;
 
+  /* For non PC-relative code, GPR2 is unavailable for register allocation.  */
+  if (FIXED_R2 && !rs6000_pcrel_p ())
+fixed_regs[2] = 1;
+
   /* The TOC register is not killed across calls in a way that is
  visible to the compiler.  */
-  if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+  if (fixed_regs[2] && (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2))
 call_used_regs[2] = 0;
 
   if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 3503614efbd..2a24fbdf9fd 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -812,7 +812,7 @@ enum data_align { align_abi, align_opt, align_both };
 
 #define FIXED_REGISTERS  \
   {/* GPRs */ \
-   0, 1, FIXED_R2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, FIXED_R13, 0, 0, \
+   0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, FIXED_R13, 0, 0, \
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
/* FPRs */ \
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
diff --git a/gcc/testsuite/gcc.target/powerpc/pr110320-1.c 
b/gcc/testsuite/gcc.target/powerpc/pr110320-1.c
new file mode 100644
index 000..a4ad34d9303
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr110320-1.c
@@ -0,0 +1,22 @@
+/* PR target/110320 */
+/* { dg-require-effective-target powerpc_pcrel } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10 -ffixed-r0 -ffixed-r11 -ffixed-r12" 
} */
+
+/* Ensure we use r2 as a normal volatile register for the code below.
+   The test case ensures all of the parameter registers r3 - r10 are used
+   and needed after we compute the expression "x + y" which requires a
+   temporary.  The -ffixed-r* options disallow using the other volatile
+   registers r0, r11 and r12.  That leaves RA to choose from r2 and the more
+   expensive non-volatile registers for the temporary to be assigned to, and
+   RA will always chooses the cheaper volatile r2 register.  */
+
+extern long bar (long, long, long, long, long, long, long, long *);
+
+long
+foo (long r3, long r4, long r5, long r6, long r7, long r8, long r9, long *r10)
+{
+  *r10 = r3 + r4;
+  return bar (r3, r4, r5, r6, r7, r8, r9, r10);
+}
+
+/* { dg-final { scan-assembler {\madd 2,3,4\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr110320-2.c 
b/gcc/testsuite/gcc.target/powerpc/pr110320-2.c
new file mode 100644
index 000..9d6aefedd2e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr110320-2.c
@@ -0,0 +1,21 @@
+/* PR target/110320 */
+/* { dg-require-effective-target powerpc_pcrel } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10 -mno-pcrel -ffixed-r0 -ffixed-r11 
-ffixed-r12" } */
+
+/* Ensure we don't use r2 as a normal volatile register for the code below.
+   The test case ensures all of the parameter registers r3 - r10 are used
+   and needed after we compute the expression "x + y" which requires a
+   temporary.  The -ffixed-r* options disallow using the other volatile
+   registers r0, r11 and r12.  That only leaves RA to choose from the more
+   expensive non-volatile registers for the temporary to be assigned to.  */
+
+extern long bar (long, long, long, long, long, long, long, long *);
+
+long
+foo (long r3, long r4, long r5, long r6, long r7, long r8, long r9, long *r10)
+{
+  *r10 = r3 + r4;
+  return bar (r3, r4, r5, r6, r7, r8, r9, r10);
+}
+
+/* { dg-final { scan-assembler-not {\madd 2,3,4\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr110320-3.c 
b/gcc/testsuite/gcc.target/powerpc/pr110320-3.c
new file mode 100644
index 000..ea6c6188c8d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr11

[PATCH] rs6000: Change GPR2 to volatile & non-fixed register for function that does not use TOC [PR110320]

2023-06-23 Thread P Jeevitha via Gcc-patches
Hi All,

The following patch has been bootstrapped and regtested on powerpc64le-linux.

Normally, GPR2 is the TOC pointer and is defined as a fixed and non-volatile
register. However, it can be used as volatile for PCREL addressing. Therefore,
if the code is PCREL and the user is not explicitly requesting TOC addressing,
then the register r2 can be changed to volatile and non-fixed register. Changes
in register preservation roles can be accomplished with the help of available
target hooks (TARGET_CONDITIONAL_REGISTER_USAGE).

2023-06-23  Jeevitha Palanisamy  

gcc/
PR target/PR110320
* config/rs6000/rs6000.cc (rs6000_conditional_register_usage): Change
GPR2 to volatile and non-fixed register for pc-relative code.

gcc/testsuite/
PR target/PR110320
* gcc.target/powerpc/pr110320_1.c: New testcase.
* gcc.target/powerpc/pr110320_2.c: New testcase.

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 546c353029b..9e978f85f9d 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -10169,6 +10169,35 @@ rs6000_conditional_register_usage (void)
   if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
 call_used_regs[2] = 0;
 
+  /* The TOC register is not needed for functions using the PC-relative ABI
+ extension, so make it available for register allocation as a volatile
+ register.  */
+  if (FIXED_R2 && rs6000_pcrel_p ())
+{
+  bool cli_fixedr2 = false;
+
+  /* Verify the user has not explicitly asked for GPR2 to be fixed.  */
+  if (common_deferred_options)
+   {
+ unsigned int idx;
+ cl_deferred_option *opt;
+ vec v;
+ v = *((vec *) common_deferred_options);
+ FOR_EACH_VEC_ELT (v, idx, opt)
+   if (opt->opt_index == OPT_ffixed_ && strcmp (opt->arg,"r2") == 0)
+ {
+   cli_fixedr2 = true;
+   break;
+ }
+   }
+
+  /* If GPR2 is not FIXED (eg, not a TOC register), then it is volatile.  
*/
+  if (!cli_fixedr2)
+   {
+ fixed_regs[2] = 0;
+ call_used_regs[2] = 1;
+   }
+}
   if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
 
diff --git a/gcc/testsuite/gcc.target/powerpc/pr110320_1.c 
b/gcc/testsuite/gcc.target/powerpc/pr110320_1.c
new file mode 100644
index 000..42143fbf889
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr110320_1.c
@@ -0,0 +1,23 @@
+/* PR target/110320 */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-require-effective-target powerpc_pcrel } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10 -ffixed-r0 -ffixed-r11 -ffixed-r12" 
} */
+
+/* Ensure we use r2 as a normal volatile register for the code below.
+   The test case ensures all of the parameter registers r3 - r10 are used
+   and needed after we compute the expression "x + y" which requires a
+   temporary.  The -ffixed-r* options disallow using the other volatile
+   registers r0, r11 and r12.  That leaves RA to choose from r2 and the more
+   expensive non-volatile registers for the temporary to be assigned to, and
+   RA will always chooses the cheaper volatile r2 register.  */
+
+extern long bar (long, long, long, long, long, long, long, long *);
+
+long
+foo (long r3, long r4, long r5, long r6, long r7, long r8, long r9, long *r10)
+{
+  *r10 = r3 + r4;
+  return bar (r3, r4, r5, r6, r7, r8, r9, r10);
+}
+
+/* { dg-final { scan-assembler {\madd 2,3,4\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr110320_2.c 
b/gcc/testsuite/gcc.target/powerpc/pr110320_2.c
new file mode 100644
index 000..9d0da5b9695
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr110320_2.c
@@ -0,0 +1,22 @@
+/* PR target/110320 */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-require-effective-target powerpc_pcrel } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10 -mno-pcrel -ffixed-r0 -ffixed-r11 
-ffixed-r12" } */
+
+/* Ensure we don't use r2 as a normal volatile register for the code below.
+   The test case ensures all of the parameter registers r3 - r10 are used
+   and needed after we compute the expression "x + y" which requires a
+   temporary.  The -ffixed-r* options disallow using the other volatile
+   registers r0, r11 and r12.  That only leaves RA to choose from the more
+   expensive non-volatile registers for the temporary to be assigned to.  */
+
+extern long bar (long, long, long, long, long, long, long, long *);
+
+long
+foo (long r3, long r4, long r5, long r6, long r7, long r8, long r9, long *r10)
+{
+  *r10 = r3 + r4;
+  return bar (r3, r4, r5, r6, r7, r8, r9, r10);
+}
+
+/* { dg-final { scan-assembler-not {\madd 2,3,4\M} } } */




[PATCH] rs6000: Don't ICE when generating vector pair load/store insns [PR110411]

2023-07-05 Thread P Jeevitha via Gcc-patches
Hi All,

The following patch has been bootstrapped and regtested on powerpc64le-linux.

while generating vector pairs of load & store instruction, the src address
was treated as an altivec type and that type of address is invalid for 
lxvp and stxvp insns. The solution for this is to avoid altivec type address
for OOmode and XOmode.

2023-07-05  Jeevitha Palanisamy  

gcc/
PR target/110411
* config/rs6000/rs6000.cc (rs6000_legitimate_address_p): Avoid altivec
address for OOmode and XOmde.

gcc/testsuite/
PR target/110411
* gcc.target/powerpc/pr110411.c: New testcase.

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 07c3a3d15ac..b914c65e5c9 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -9894,6 +9894,8 @@ rs6000_legitimate_address_p (machine_mode mode, rtx x, 
bool reg_ok_strict)
 
   /* Handle unaligned altivec lvx/stvx type addresses.  */
   if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
+  && mode !=  OOmode
+  && mode !=  XOmode
   && GET_CODE (x) == AND
   && CONST_INT_P (XEXP (x, 1))
   && INTVAL (XEXP (x, 1)) == -16)
diff --git a/gcc/testsuite/gcc.target/powerpc/pr110411.c 
b/gcc/testsuite/gcc.target/powerpc/pr110411.c
new file mode 100644
index 000..83ef0638fb2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr110411.c
@@ -0,0 +1,21 @@
+/* PR target/110411 */
+/* { dg-options "-O2 -mdejagnu-cpu=power10 -S -mblock-ops-vector-pair" } */
+
+/* Verify we do not ICE on the following.  */
+
+#include 
+
+struct s {
+  long a;
+  long b;
+  long c;
+  long d: 1;
+};
+unsigned long ptr;
+
+void
+foo (struct s *dst)
+{
+  struct s *src = (struct s *)(ptr & ~0xFUL);
+  memcpy (dst, src, sizeof(struct s));
+}



Re: [PATCH] rs6000: Change GPR2 to volatile & non-fixed register for function that does not use TOC [PR110320]

2023-07-11 Thread P Jeevitha via Gcc-patches



On 07/07/2023 A 12:11 am, Peter Bergner wrote:

> I believe the untested patch below should also work, without having to scan
> the (uncommonly used) options.  Jeevitha, can you bootstrap and regtest the
> patch below?

Yeah Peter, Bootstrapped and regtested the below patch on powerpc64le-linux 
there was no regression.

> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
> index d197c3f3289..7c356a73ac6 100644
> --- a/gcc/config/rs6000/rs6000.cc
> +++ b/gcc/config/rs6000/rs6000.cc
> @@ -10160,9 +10160,13 @@ rs6000_conditional_register_usage (void)
>  for (i = 32; i < 64; i++)
>fixed_regs[i] = call_used_regs[i] = 1;
> 
> +  /* For non PC-relative code, GPR2 is unavailable for register allocation.  
> */
> +  if (FIXED_R2 && !rs6000_pcrel_p ())
> +fixed_regs[2] = 1;
> +
>/* The TOC register is not killed across calls in a way that is
>   visible to the compiler.  */
> -  if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
> +  if (fixed_regs[2] && (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2))
>  call_used_regs[2] = 0;
> 
>if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
> diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
> index 3503614efbd..2a24fbdf9fd 100644
> --- a/gcc/config/rs6000/rs6000.h
> +++ b/gcc/config/rs6000/rs6000.h
> @@ -812,7 +812,7 @@ enum data_align { align_abi, align_opt, align_both };
> 
>  #define FIXED_REGISTERS  \
>{/* GPRs */ \
> -   0, 1, FIXED_R2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, FIXED_R13, 0, 0, \
> +   0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, FIXED_R13, 0, 0, \
> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
> /* FPRs */ \
> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
> 

> 
> 
>> Besides, IMHO we need a corresponding test case to cover this -ffixed-r2 
>> handling.
> 
> Good idea.  I think we can duplicate the pr110320_2.c test case, replacing the
> -mno-pcrel option with -ffixed-r2.  Jeevitha, can you give that a try?
 
Yeah, adding the new test cases along with the mentioned changes for the older 
ones below,

diff --git a/gcc/testsuite/gcc.target/powerpc/pr110320_1.c 
b/gcc/testsuite/gcc.target/powerpc/pr110320_1.c
new file mode 100644
index 000..a4ad34d9303
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr110320_1.c
@@ -0,0 +1,22 @@
+/* PR target/110320 */
+/* { dg-require-effective-target powerpc_pcrel } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10 -ffixed-r0 -ffixed-r11 -ffixed-r12" 
} */
+
+/* Ensure we use r2 as a normal volatile register for the code below.
+   The test case ensures all of the parameter registers r3 - r10 are used
+   and needed after we compute the expression "x + y" which requires a
+   temporary.  The -ffixed-r* options disallow using the other volatile
+   registers r0, r11 and r12.  That leaves RA to choose from r2 and the more
+   expensive non-volatile registers for the temporary to be assigned to, and
+   RA will always chooses the cheaper volatile r2 register.  */
+
+extern long bar (long, long, long, long, long, long, long, long *);
+
+long
+foo (long r3, long r4, long r5, long r6, long r7, long r8, long r9, long *r10)
+{
+  *r10 = r3 + r4;
+  return bar (r3, r4, r5, r6, r7, r8, r9, r10);
+}
+
+/* { dg-final { scan-assembler {\madd 2,3,4\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr110320_2.c 
b/gcc/testsuite/gcc.target/powerpc/pr110320_2.c
new file mode 100644
index 000..9d6aefedd2e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr110320_2.c
@@ -0,0 +1,21 @@
+/* PR target/110320 */
+/* { dg-require-effective-target powerpc_pcrel } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10 -mno-pcrel -ffixed-r0 -ffixed-r11 
-ffixed-r12" } */
+
+/* Ensure we don't use r2 as a normal volatile register for the code below.
+   The test case ensures all of the parameter registers r3 - r10 are used
+   and needed after we compute the expression "x + y" which requires a
+   temporary.  The -ffixed-r* options disallow using the other volatile
+   registers r0, r11 and r12.  That only leaves RA to choose from the more
+   expensive non-volatile registers for the temporary to be assigned to.  */
+
+extern long bar (long, long, long, long, long, long, long, long *);
+
+long
+foo (long r3, long r4, long r5, long r6, long r7, long r8, long r9, long *r10)
+{
+  *r10 = r3 + r4;
+  return bar (r3, r4, r5, r6, r7, r8, r9, r10);
+}
+
+/* { dg-final { scan-assembler-not {\madd 2,3,4\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr110320_3.c 
b/gcc/testsuite/gcc.target/powerpc/pr110320_3.c
new file mode 100644
index 000..ea6c6188c8d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr110320_3.c
@@ -0,0 +1,21 @@
+/* PR target/110320 */
+/* { dg-require-effective-target powerpc_pcrel } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10 -ffixed-r2 -ffixed-r0 -ffixed-r11 
-ffixed-r12" } */
+
+/* Ensure we don't use r2 as a normal volatile register f