[gcc r15-2910] Fix ICE in build_function_decl [PR116292]

2024-08-14 Thread Andre Vehreschild via Gcc-cvs
https://gcc.gnu.org/g:bb2324769c5a03e275de00416659e624c97f1442

commit r15-2910-gbb2324769c5a03e275de00416659e624c97f1442
Author: Andre Vehreschild 
Date:   Fri Aug 9 16:19:23 2024 +0200

Fix ICE in build_function_decl [PR116292]

Fix ICE by getting the vtype only when a derived or class type is
prevent.  Also take care about the _len component for unlimited
polymorphics.

gcc/fortran/ChangeLog:

PR fortran/116292

* trans-intrinsic.cc (conv_intrinsic_move_alloc): Get the vtab
only for derived types and classes and adjust _len for class
types.

gcc/testsuite/ChangeLog:

* gfortran.dg/move_alloc_19.f90: New test.

Diff:
---
 gcc/fortran/trans-intrinsic.cc  | 20 ++---
 gcc/testsuite/gfortran.dg/move_alloc_19.f90 | 34 +
 2 files changed, 51 insertions(+), 3 deletions(-)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index 150cb9ff963b..84a378ef310c 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -12764,9 +12764,12 @@ conv_intrinsic_move_alloc (gfc_code *code)
  gfc_symbol *vtab;
  from_tree = from_se.expr;
 
- vtab = gfc_find_vtab (&from_expr->ts);
- gcc_assert (vtab);
- from_se.expr = gfc_get_symbol_decl (vtab);
+ if (to_expr->ts.type == BT_CLASS)
+   {
+ vtab = gfc_find_vtab (&from_expr->ts);
+ gcc_assert (vtab);
+ from_se.expr = gfc_get_symbol_decl (vtab);
+   }
}
   gfc_add_block_to_block (&block, &from_se.pre);
 
@@ -12811,6 +12814,15 @@ conv_intrinsic_move_alloc (gfc_code *code)
  gfc_class_set_vptr (&block, to_se.expr, from_se.expr);
  if (from_is_class)
gfc_reset_vptr (&block, from_expr);
+ if (UNLIMITED_POLY (to_expr))
+   {
+ tree to_len = gfc_class_len_get (to_se.class_container);
+ tmp = from_expr->ts.type == BT_CHARACTER && from_se.string_length
+ ? from_se.string_length
+ : size_zero_node;
+ gfc_add_modify_loc (input_location, &block, to_len,
+ fold_convert (TREE_TYPE (to_len), tmp));
+   }
}
 
   if (from_is_scalar)
@@ -12825,6 +12837,8 @@ conv_intrinsic_move_alloc (gfc_code *code)
  input_location, &block, from_se.string_length,
  build_int_cst (TREE_TYPE (from_se.string_length), 0));
}
+ if (UNLIMITED_POLY (from_expr))
+   gfc_reset_len (&block, from_expr);
 
  return gfc_finish_block (&block);
}
diff --git a/gcc/testsuite/gfortran.dg/move_alloc_19.f90 
b/gcc/testsuite/gfortran.dg/move_alloc_19.f90
new file mode 100644
index ..d23d9809ba11
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/move_alloc_19.f90
@@ -0,0 +1,34 @@
+!{ dg-do run }
+
+! Check PR 116292 is fixed.
+
+! Contributed by Harald Anlauf  
+!Sam James  
+
+program move_alloc_19
+  character, allocatable :: buffer, dummy, dummy2
+  class(*), allocatable :: poly
+
+  dummy = 'C'
+  dummy2 = 'A'
+  call s()
+  if (allocated (dummy)) stop 1
+  if (allocated (dummy2)) stop 2
+  if (.not. allocated (buffer)) stop 3
+  if (.not. allocated (poly)) stop 4
+  if (buffer /= 'C') stop 5
+  select type (poly)
+type is (character(*))
+  if (poly /= 'A') stop 6
+  if (len (poly) /= 1) stop 7
+class default
+  stop 8
+  end select
+  deallocate (poly, buffer)
+contains
+  subroutine s
+call move_alloc (dummy, buffer)
+call move_alloc (dummy2, poly)
+  end
+end
+


[gcc r15-2911] Prevent future proc_ptr parsing issues in associate [PR102973]

2024-08-14 Thread Andre Vehreschild via Gcc-cvs
https://gcc.gnu.org/g:54be14bfd6e2dee7cb4e1b3c20dc2677093ee818

commit r15-2911-g54be14bfd6e2dee7cb4e1b3c20dc2677093ee818
Author: Andre Vehreschild 
Date:   Tue Aug 13 15:06:56 2024 +0200

Prevent future proc_ptr parsing issues in associate [PR102973]

A global variable is set when proc_ptr parsing in an associate is
expected. In the case of an error, that flag was not reset, which is
fixed now.

gcc/fortran/ChangeLog:

PR fortran/102973

* match.cc (gfc_match_associate): Reset proc_ptr parsing flag on
error.

Diff:
---
 gcc/fortran/match.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/fortran/match.cc b/gcc/fortran/match.cc
index 1851a8f94a54..e4b60bf5f685 100644
--- a/gcc/fortran/match.cc
+++ b/gcc/fortran/match.cc
@@ -1932,6 +1932,7 @@ gfc_match_associate (void)
  gfc_matching_procptr_assignment = 1;
  if (gfc_match (" %e", &newAssoc->target) != MATCH_YES)
{
+ gfc_matching_procptr_assignment = 0;
  gfc_error ("Invalid association target at %C");
  goto assocListError;
}


[gcc r15-2912] Restrict pr116202-run-1.c test to riscv_v target

2024-08-14 Thread Mark Wielaard via Gcc-cvs
https://gcc.gnu.org/g:42aba4786e42ac2317b4f1185a93bffb3de2ce50

commit r15-2912-g42aba4786e42ac2317b4f1185a93bffb3de2ce50
Author: Mark Wielaard 
Date:   Mon Aug 12 22:25:42 2024 +0200

Restrict pr116202-run-1.c test to riscv_v target

The testcase uses -march=rv64gcv and dg-do run, so should be
restricted to a riscv_v target.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr116202-run-1.c (dg-do run):
Add target riscv_v.

Diff:
---
 gcc/testsuite/gcc.target/riscv/rvv/base/pr116202-run-1.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr116202-run-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr116202-run-1.c
index 02814183dbb9..979989f8a857 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/pr116202-run-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr116202-run-1.c
@@ -1,4 +1,4 @@
-/* { dg-do run } */
+/* { dg-do run { target { riscv_v } } } */
 /* { dg-options "-O3 -march=rv64gcv_zvl256b -mabi=lp64d 
-fdump-rtl-expand-details" } */
 
 int b[24];


[gcc r15-2913] i386: Optimization for APX NDD is always zero-uppered for ADD

2024-08-14 Thread Kong Lingling via Gcc-cvs
https://gcc.gnu.org/g:a302cd6abbeccc93784b7ecc86f2e9106aa412ca

commit r15-2913-ga302cd6abbeccc93784b7ecc86f2e9106aa412ca
Author: Lingling Kong 
Date:   Wed Aug 14 16:35:29 2024 +0800

i386: Optimization for APX NDD is always zero-uppered for ADD

gcc/ChangeLog:

PR target/113729
* config/i386/i386.md (*addqi_1_zext): New
define.
(*addhi_1_zext): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr113729.c: New test.

Diff:
---
 gcc/config/i386/i386.md  | 80 
 gcc/testsuite/gcc.target/i386/pr113729.c | 27 +++
 2 files changed, 107 insertions(+)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 1a6188f5161b..5f237275bdc8 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -6572,6 +6572,86 @@
(set_attr "has_nf" "1")
(set_attr "mode" "")])
 
+;; For APX instruction with an NDD, the destination GPR  will get the
+;; instruction’s result in bits [OSIZE-1:0] and, if OSIZE < 64b, have
+;; its upper bits [63:OSIZE] zeroed.
+
+(define_insn "*addqi_1_zext"
+  [(set (match_operand:SWI248x 0 "register_operand" "=r,r")
+   (zero_extend:SWI248x
+ (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%rm,r")
+  (match_operand:QI 2 "general_operand" "rn,m"]
+  "TARGET_APX_NDD && 
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+{
+  switch (get_attr_type (insn))
+{
+case TYPE_INCDEC:
+  if (operands[2] == const1_rtx)
+   return "inc{b}\t{%1, %b0|%b0, %1}";
+  else
+   {
+ gcc_assert (operands[2] == constm1_rtx);
+ return "dec{b}\t{%1, %b0|%b0, %1}";
+   }
+
+default:
+  if (x86_maybe_negate_const_int (&operands[2], QImode))
+   return "sub{b}\t{%2, %1, %b0|%b0, %1, %2}";
+  return "add{b}\t{%2, %1, %b0|%b0, %1, %2}";
+}
+}
+  [(set (attr "type")
+ (cond [(match_operand:QI 2 "incdec_operand")
+ (const_string "incdec")
+  ]
+  (const_string "alu")))
+   (set (attr "length_immediate")
+  (if_then_else
+   (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
+   (const_string "1")
+   (const_string "*")))
+   (set_attr "has_nf" "1")
+   (set_attr "mode" "QI")])
+
+(define_insn "*addhi_1_zext"
+  [(set (match_operand:SWI48x 0 "register_operand" "=r,r")
+   (zero_extend:SWI48x
+ (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%rm,r")
+  (match_operand:HI 2 "general_operand" "rn,m"]
+  "TARGET_APX_NDD && 
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+{
+  switch (get_attr_type (insn))
+{
+case TYPE_INCDEC:
+  if (operands[2] == const1_rtx)
+   return "inc{w}\t{%1, %w0|%w0, %1}";
+  else
+   {
+ gcc_assert (operands[2] == constm1_rtx);
+ return "dec{w}\t{%1, %w0|%w0, %1}";
+   }
+
+default:
+  if (x86_maybe_negate_const_int (&operands[2], HImode))
+   return "sub{w}\t{%2, %1, %w0|%w0, %1, %2}";
+  return "add{w}\t{%2, %1, %w0|%w0, %1, %2}";
+}
+}
+  [(set (attr "type")
+ (cond [(match_operand:QI 2 "incdec_operand")
+ (const_string "incdec")
+  ]
+  (const_string "alu")))
+   (set (attr "length_immediate")
+  (if_then_else
+   (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
+   (const_string "1")
+   (const_string "*")))
+   (set_attr "has_nf" "1")
+   (set_attr "mode" "HI")])
+
 ;; It may seem that nonimmediate operand is proper one for operand 1.
 ;; The addsi_1 pattern allows nonimmediate operand at that place and
 ;; we take care in ix86_binary_operator_ok to not allow two memory
diff --git a/gcc/testsuite/gcc.target/i386/pr113729.c 
b/gcc/testsuite/gcc.target/i386/pr113729.c
new file mode 100644
index ..34518a5cdc88
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr113729.c
@@ -0,0 +1,27 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mapx-features=ndd -march=x86-64 -O2" } */
+/* { dg-final { scan-assembler-not "movz"} } */
+
+#include 
+
+#define F(TYPE1, TYPE2, OP_NAME, OP)\
+TYPE1   \
+__attribute__ ((noipa)) \
+f_##OP_NAME##_##TYPE2##_##TYPE1 (unsigned TYPE2 b)  \
+{   \
+  return (unsigned TYPE2) (200 OP b);   \
+}   \
+TYPE1   \
+__attribute__ ((noipa)) \
+f1_##OP_NAME##_##TYPE2##_##TYPE1\
+(unsigned TYPE2 a, unsigned TYPE2 b)\
+{   \
+  return (unsigned TYPE2) (a OP b); \
+}  
+
+/* addqi_1_zext */
+F (short, char, add, +)
+F (int, char, add, +)
+F (int64_t, char, add,

[gcc r15-2916] i386: Optimization for APX NDD is always zero-uppered for shift

2024-08-14 Thread Kong Lingling via Gcc-cvs
https://gcc.gnu.org/g:edb2712936368e803fd67aa520323054b2a5c5e7

commit r15-2916-gedb2712936368e803fd67aa520323054b2a5c5e7
Author: Lingling Kong 
Date:   Wed Aug 14 16:48:30 2024 +0800

i386: Optimization for APX NDD is always zero-uppered for shift

gcc/ChangeLog:

PR target/113729
* config/i386/i386.md (*ashlqi3_1_zext):
New define_insn.
(*ashlhi3_1_zext): Ditto.
(*qi3_1_zext): Ditto.
(*hi3_1_zext): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr113729.c: Add testcase for shift and rotate.

Diff:
---
 gcc/config/i386/i386.md  | 66 
 gcc/testsuite/gcc.target/i386/pr113729.c | 62 ++
 2 files changed, 128 insertions(+)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 9f933e908d3d..d3ba2425f16e 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -15919,6 +15919,28 @@
   [(set_attr "type" "ishiftx")
(set_attr "mode" "SI")])
 
+(define_insn "*ashlqi3_1_zext"
+  [(set (match_operand:SWI248x 0 "register_operand" "=r")
+   (zero_extend:SWI248x
+ (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "rm")
+(match_operand:QI 2 "nonmemory_operand" "cI"]
+  "TARGET_APX_NDD && "
+  "sal{b}\t{%2, %1, %b0|%b0, %1, %2}"
+  [(set_attr "type" "ishiftx")
+   (set_attr "has_nf" "1")
+   (set_attr "mode" "QI")])
+
+(define_insn "*ashlhi3_1_zext"
+  [(set (match_operand:SWI48x 0 "register_operand" "=r")
+   (zero_extend:SWI48x
+ (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "rm")
+(match_operand:QI 2 "nonmemory_operand" "cI"]
+  "TARGET_APX_NDD && "
+  "sal{w}\t{%2, %1, %w0|%w0, %1, %2}"
+  [(set_attr "type" "ishiftx")
+   (set_attr "has_nf" "1")
+   (set_attr "mode" "HI")])
+
 (define_insn "*ashlsi3_1_zext"
   [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
(zero_extend:DI
@@ -17403,6 +17425,28 @@
   [(set_attr "type" "ishiftx")
(set_attr "mode" "SI")])
 
+(define_insn "*qi3_1_zext"
+  [(set (match_operand:SWI248x 0 "register_operand" "=r")
+   (zero_extend:SWI248x
+ (any_shiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "rm")
+ (match_operand:QI 2 "nonmemory_operand" "cI"]
+  "TARGET_APX_NDD && "
+  "{b}\t{%2, %1, %b0|%b0, %1, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "has_nf" "1")
+   (set_attr "mode" "QI")])
+
+(define_insn "*hi3_1_zext"
+  [(set (match_operand:SWI48x 0 "register_operand" "=r")
+   (zero_extend:SWI48x
+ (any_shiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "rm")
+ (match_operand:QI 2 "nonmemory_operand" "cI"]
+  "TARGET_APX_NDD && "
+  "{w}\t{%2, %1, %w0|%w0, %1, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "has_nf" "1")
+   (set_attr "mode" "HI")])
+
 (define_insn "*si3_1_zext"
   [(set (match_operand:DI 0 "register_operand" "=r,r,r,?k")
(zero_extend:DI
@@ -18179,6 +18223,28 @@
   [(set_attr "type" "rotatex")
(set_attr "mode" "SI")])
 
+(define_insn "*qi3_1_zext"
+  [(set (match_operand:SWI248x 0 "register_operand" "=r")
+   (zero_extend:SWI248x
+ (any_rotate:QI (match_operand:QI 1 "nonimmediate_operand" "rm")
+(match_operand:QI 2 "nonmemory_operand" "cI"]
+  "TARGET_APX_NDD && "
+  "{b}\t{%2, %1, %b0|%b0, %1, %2}"
+  [(set_attr "type" "rotate")
+   (set_attr "has_nf" "1")
+   (set_attr "mode" "QI")])
+
+(define_insn "*hi3_1_zext"
+  [(set (match_operand:SWI48x 0 "register_operand" "=r")
+   (zero_extend:SWI48x
+ (any_rotate:HI (match_operand:HI 1 "nonimmediate_operand" "rm")
+(match_operand:QI 2 "nonmemory_operand" "cI"]
+  "TARGET_APX_NDD && "
+  "{w}\t{%2, %1, %w0|%w0, %1, %2}"
+  [(set_attr "type" "rotate")
+   (set_attr "has_nf" "1")
+   (set_attr "mode" "HI")])
+
 (define_insn "*si3_1_zext"
   [(set (match_operand:DI 0 "register_operand" "=r,r,r")
(zero_extend:DI
diff --git a/gcc/testsuite/gcc.target/i386/pr113729.c 
b/gcc/testsuite/gcc.target/i386/pr113729.c
index d17e0999e6c0..53b6df78c26b 100644
--- a/gcc/testsuite/gcc.target/i386/pr113729.c
+++ b/gcc/testsuite/gcc.target/i386/pr113729.c
@@ -71,3 +71,65 @@ F (int, char, xor, ^)
 F (int64_t, char, xor, ^)
 F (int, short, xor, ^)
 F (int64_t, short, xor, ^)
+
+#define F2(TYPE1,TYPE2, OP_NAME, OP, IMM)  \
+TYPE1  \
+__attribute__ ((noipa))\
+f2_##OP_NAME##_##TYPE1##_##TYPE2 (TYPE2 a) \
+{  \
+  unsigned TYPE2 b = a OP IMM; \
+  return b;\
+}  
+
+/* ashlqi3_1_zext */
+F2 (short, char, shl, <<, 7)
+F2 (int, char, shl, <<, 6)
+F2 (int64_t, char, shl, <<, 7)
+F2 (int, short, shl, <<, 6)
+F2 (int64_t, short, shl, <<, 3)
+
+/* ashrqi3_1_zext */
+F2 (short, char, sar, 

[gcc r15-2914] i386: Optimization for APX NDD is always zero-uppered for sub/adc/sbb

2024-08-14 Thread Kong Lingling via Gcc-cvs
https://gcc.gnu.org/g:1b761744dc3ea6f3d66a9c48f16719ad1c92d5ad

commit r15-2914-g1b761744dc3ea6f3d66a9c48f16719ad1c92d5ad
Author: Lingling Kong 
Date:   Wed Aug 14 16:38:46 2024 +0800

i386: Optimization for APX NDD is always zero-uppered for sub/adc/sbb

gcc/ChangeLog:

PR target/113729
* config/i386/i386.md (*subqi_1_zext): New
define_insn.
(*subhi_1_zext): Ditto.
(*addqi3_carry_zext): Ditto.
(*addhi3_carry_zext): Ditto.
(*addqi3_carry_zext_0): Ditto.
(*addhi3_carry_zext_0): Ditto.
(*addqi3_carry_zext_0r): Ditto.
(*addhi3_carry_zext_0r): Ditto.
(*subqi3_carry_zext): Ditto.
(*subhi3_carry_zext): Ditto.
(*subqi3_carry_zext_0): Ditto.
(*subhi3_carry_zext_0): Ditto.
(*subqi3_carry_zext_0r): Ditto.
(*subhi3_carry_zext_0r): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr113729.c: Add more test.
* gcc.target/i386/pr113729-adc-sbb.c: New test.

Diff:
---
 gcc/config/i386/i386.md  | 244 ++-
 gcc/testsuite/gcc.target/i386/pr113729-adc-sbb.c |  63 ++
 gcc/testsuite/gcc.target/i386/pr113729.c |   6 +
 3 files changed, 305 insertions(+), 8 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 5f237275bdc8..e267b330b251 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -8043,6 +8043,34 @@
(set_attr "has_nf" "1")
(set_attr "mode" "")])
 
+(define_insn "*subqi_1_zext"
+  [(set (match_operand:SWI248x 0 "register_operand" "=r,r")
+   (zero_extend:SWI248x
+ (minus:QI (match_operand:QI 1 "nonimmediate_operand" "rm,r")
+   (match_operand:QI 2 "x86_64_general_operand" "rn,m"]
+  "TARGET_APX_NDD && 
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+  sub{b}\t{%2, %1, %b0|%b0, %1, %2}
+  sub{b}\t{%2, %1, %b0|%b0, %1, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "has_nf" "1")
+   (set_attr "mode" "QI")])
+
+(define_insn "*subhi_1_zext"
+  [(set (match_operand:SWI48x 0 "register_operand" "=r,r")
+   (zero_extend:SWI48x
+ (minus:HI (match_operand:HI 1 "nonimmediate_operand" "rm,r")
+   (match_operand:HI 2 "x86_64_general_operand" "rn,m"]
+  "TARGET_APX_NDD && 
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+  sub{w}\t{%2, %1, %w0|%w0, %1, %2}
+  sub{w}\t{%2, %1, %w0|%w0, %1, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "has_nf" "1")
+   (set_attr "mode" "HI")])
+
 (define_insn "*subsi_1_zext"
   [(set (match_operand:DI 0 "register_operand" "=r,r,r")
(zero_extend:DI
@@ -8762,6 +8790,44 @@
(set_attr "pent_pair" "pu")
(set_attr "mode" "")])
 
+(define_insn "*addqi3_carry_zext"
+  [(set (match_operand:SWI248x 0 "register_operand" "=r,r")
+   (zero_extend:SWI248x
+ (plus:QI
+   (plus:QI (match_operator:QI 3 "ix86_carry_flag_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+(match_operand:QI 1 "nonimmediate_operand" "%rm,r"))
+   (match_operand:QI 2 "x86_64_general_operand" "rn,m"
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_APX_NDD
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+  adc{b}\t{%2, %1, %b0|%b0, %1, %2}
+  adc{b}\t{%2, %1, %b0|%b0, %1, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
+   (set_attr "pent_pair" "pu")
+   (set_attr "mode" "QI")])
+
+(define_insn "*addhi3_carry_zext"
+  [(set (match_operand:SWI48x 0 "register_operand" "=r,r")
+   (zero_extend:SWI48x
+ (plus:HI
+   (plus:HI (match_operator:HI 3 "ix86_carry_flag_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+(match_operand:HI 1 "nonimmediate_operand" "%rm,r"))
+   (match_operand:HI 2 "x86_64_general_operand" "rn,m"
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_APX_NDD
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+  adc{w}\t{%2, %1, %w0|%w0, %1, %2}
+  adc{w}\t{%2, %1, %w0|%w0, %1, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
+   (set_attr "pent_pair" "pu")
+   (set_attr "mode" "HI")])
+
 (define_insn "*addsi3_carry_zext"
   [(set (match_operand:DI 0 "register_operand" "=r,r,r")
(zero_extend:DI
@@ -8783,6 +8849,34 @@
(set_attr "pent_pair" "pu")
(set_attr "mode" "SI")])
 
+(define_insn "*addqi3_carry_zext_0"
+  [(set (match_operand:SWI248x 0 "register_operand" "=r")
+   (zero_extend:SWI248x
+ (plus:QI (match_operator:QI 2 "ix86_carry_flag_operator"
+   [(reg FLAGS_REG) (const_int 0)])
+  (match_operand:QI 1 "nonimmediate_operand" "rm"
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_APX_NDD"
+  "adc{b}\t{$0, %1, %b0|%b0, %1, 0}"
+  [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
+   (set_attr "pent_pair" "pu")
+   (set_attr "mode" "QI")])
+

[gcc r15-2915] i386: Optimization for APX NDD is always zero-uppered for logic

2024-08-14 Thread Kong Lingling via Gcc-cvs
https://gcc.gnu.org/g:d08a5f211135374b3ad700780c46a198cd320328

commit r15-2915-gd08a5f211135374b3ad700780c46a198cd320328
Author: Lingling Kong 
Date:   Wed Aug 14 16:42:29 2024 +0800

i386: Optimization for APX NDD is always zero-uppered for logic

gcc/ChangeLog:

PR target/113729
* config/i386/i386.md (*andqi_1_zext): New
define_insn.
(*andhi_1_zext): Ditto.
(*qi_1_zext): Ditto.
(*hi_1_zext): Ditto.
(*negqi_1_zext): Ditto.
(*neghi_1_zext): Ditto.
(*one_cmplqi2_1_zext): Ditto.
(*one_cmplhi2_1_zext): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr113729.c: Add more tests.

Diff:
---
 gcc/config/i386/i386.md  | 94 
 gcc/testsuite/gcc.target/i386/pr113729.c | 40 ++
 2 files changed, 134 insertions(+)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index e267b330b251..9f933e908d3d 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -12478,6 +12478,34 @@
 operands[2] = gen_lowpart (SImode, operands[2]);
 })
 
+(define_insn "*andqi_1_zext"
+  [(set (match_operand:SWI248x 0 "register_operand" "=r,r")
+   (zero_extend:SWI248x
+ (and:QI (match_operand:QI 1 "nonimmediate_operand" "%rm,r")
+ (match_operand:QI 2 "x86_64_general_operand" "rn,m"]
+  "TARGET_APX_NDD && 
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+  and{b}\t{%2, %1, %b0|%b0, %1, %2}
+  and{b}\t{%2, %1, %b0|%b0, %1, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "has_nf" "1")
+   (set_attr "mode" "QI")])
+
+(define_insn "*andhi_1_zext"
+  [(set (match_operand:SWI48x 0 "register_operand" "=r,r")
+   (zero_extend:SWI48x
+ (and:HI (match_operand:HI 1 "nonimmediate_operand" "%rm,r")
+ (match_operand:HI 2 "x86_64_general_operand" "rn,m"]
+  "TARGET_APX_NDD && 
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+  and{w}\t{%2, %1, %w0|%w0, %1, %2}
+  and{w}\t{%2, %1, %w0|%w0, %1, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "has_nf" "1")
+   (set_attr "mode" "HI")])
+
 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
 (define_insn "*andsi_1_zext"
   [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
@@ -13539,6 +13567,34 @@
   operands[5] = gen_reg_rtx (mode);
 })
 
+(define_insn "*qi_1_zext"
+  [(set (match_operand:SWI248x 0 "register_operand" "=r,r")
+   (zero_extend:SWI248x
+(any_or:QI (match_operand:QI 1 "nonimmediate_operand" "%rm,r")
+   (match_operand:QI 2 "x86_64_general_operand" "rn,m"]
+  "TARGET_APX_NDD && 
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+  {b}\t{%2, %1, %b0|%b0, %1, %2}
+  {b}\t{%2, %1, %b0|%b0, %1, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "has_nf" "1")
+   (set_attr "mode" "QI")])
+
+(define_insn "*hi_1_zext"
+  [(set (match_operand:SWI48x 0 "register_operand" "=r,r")
+   (zero_extend:SWI48x
+(any_or:HI (match_operand:HI 1 "nonimmediate_operand" "%rm,r")
+   (match_operand:HI 2 "x86_64_general_operand" "rn,m"]
+  "TARGET_APX_NDD && 
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+  {w}\t{%2, %1, %w0|%w0, %1, %2}
+  {w}\t{%2, %1, %w0|%w0, %1, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "has_nf" "1")
+   (set_attr "mode" "HI")])
+
 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
 (define_insn "*si_1_zext"
   [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
@@ -14157,6 +14213,26 @@
(set_attr "has_nf" "1")
(set_attr "mode" "")])
 
+(define_insn "*negqi_1_zext"
+  [(set (match_operand:SWI248x 0 "register_operand" "=r")
+   (zero_extend:SWI248x
+ (neg:QI (match_operand:QI 1 "nonimmediate_operand" "rm"]
+  "TARGET_APX_NDD && "
+  "neg{b}\t{%b1, %b0|%b0, %b1}"
+  [(set_attr "type" "negnot")
+   (set_attr "has_nf" "1")
+   (set_attr "mode" "QI")])
+
+(define_insn "*neghi_1_zext"
+  [(set (match_operand:SWI48x 0 "register_operand" "=r")
+   (zero_extend:SWI48x
+ (neg:HI (match_operand:HI 1 "nonimmediate_operand" "rm"]
+  "TARGET_APX_NDD && "
+  "neg{w}\t{%w1, %w0|%w0, %w1}"
+  [(set_attr "type" "negnot")
+   (set_attr "has_nf" "1")
+   (set_attr "mode" "HI")])
+
 (define_insn "*negsi_1_zext"
   [(set (match_operand:DI 0 "register_operand" "=r,r")
(zero_extend:DI
@@ -14779,6 +14855,24 @@
(set_attr "type" "negnot,negnot,msklog")
(set_attr "mode" "")])
 
+(define_insn "*one_cmplqi2_1_zext"
+  [(set (match_operand:SWI248x 0 "register_operand" "=r")
+   (zero_extend:SWI248x
+ (not:QI (match_operand:QI 1 "nonimmediate_operand" "rm"]
+  "TARGET_APX_NDD"
+  "not{b}\t{%1, %b0|%b0, %1}"
+  [(set_attr "type" "negnot")
+   (set_attr "mode" "QI")])
+
+(define_insn "*one_cmplhi2_1_zext"
+  [(set (match_operand:SWI48x 0 "register_operand" "=r")
+   (zero_extend:SWI48x
+ (not:HI

[gcc r15-2917] Fortran: fix minor frontend GMP leaks

2024-08-14 Thread Harald Anlauf via Gcc-cvs
https://gcc.gnu.org/g:a82c4dfe52dac332e0a6f1522459d2f8548419bc

commit r15-2917-ga82c4dfe52dac332e0a6f1522459d2f8548419bc
Author: Harald Anlauf 
Date:   Tue Aug 13 21:17:45 2024 +0200

Fortran: fix minor frontend GMP leaks

gcc/fortran/ChangeLog:

* simplify.cc (gfc_simplify_sizeof): Clear used gmp variable.
* target-memory.cc (gfc_target_expr_size): Likewise.

Diff:
---
 gcc/fortran/simplify.cc  | 10 +++---
 gcc/fortran/target-memory.cc |  2 ++
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/gcc/fortran/simplify.cc b/gcc/fortran/simplify.cc
index 8ddd491de113..953d59efd70e 100644
--- a/gcc/fortran/simplify.cc
+++ b/gcc/fortran/simplify.cc
@@ -7778,9 +7778,13 @@ gfc_simplify_sizeof (gfc_expr *x)
  || x->ts.u.cl->length->expr_type != EXPR_CONSTANT))
 return NULL;
 
-  if (x->rank && x->expr_type != EXPR_ARRAY
-  && !gfc_array_size (x, &array_size))
-return NULL;
+  if (x->rank && x->expr_type != EXPR_ARRAY)
+{
+  if (!gfc_array_size (x, &array_size))
+   return NULL;
+
+  mpz_clear (array_size);
+}
 
   result = gfc_get_constant_expr (BT_INTEGER, gfc_index_integer_kind,
  &x->where);
diff --git a/gcc/fortran/target-memory.cc b/gcc/fortran/target-memory.cc
index a02db7a06e49..0a289f32d373 100644
--- a/gcc/fortran/target-memory.cc
+++ b/gcc/fortran/target-memory.cc
@@ -158,6 +158,8 @@ gfc_target_expr_size (gfc_expr *e, size_t *size)
asz = mpz_get_ui (tmp);
   else
return false;
+
+  mpz_clear (tmp);
 }
   else
 asz = 1;


[gcc(refs/users/meissner/heads/work175-bugs)] Revert changes

2024-08-14 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:7e9545480f33141bedcbc3e5bf86fdb3db0d2a03

commit 7e9545480f33141bedcbc3e5bf86fdb3db0d2a03
Author: Michael Meissner 
Date:   Wed Aug 14 13:12:32 2024 -0400

Revert changes

Diff:
---
 gcc/ChangeLog.bugs | 31 --
 gcc/config/rs6000/vsx.md   | 18 -
 gcc/testsuite/gcc.target/powerpc/pr99293.c | 22 -
 3 files changed, 29 insertions(+), 42 deletions(-)

diff --git a/gcc/ChangeLog.bugs b/gcc/ChangeLog.bugs
index 5e3009425c27..4a1989456fc6 100644
--- a/gcc/ChangeLog.bugs
+++ b/gcc/ChangeLog.bugs
@@ -2,6 +2,33 @@
 
 Optimize splat of a V2DF/V2DI extract with constant element
 
+We had optimizations for splat of a vector extract for the other vector
+types, but we missed having one for V2DI and V2DF.  This patch adds a
+combiner insn to do this optimization.
+
+In looking at the source, we had similar optimizations for V4SI and V4SF
+extract and splats, but we missed doing V2DI/V2DF.
+
+Without the patch for the code:
+
+   vector long long splat_dup_l_0 (vector long long v)
+   {
+ return __builtin_vec_splats (__builtin_vec_extract (v, 0));
+   }
+
+the compiler generates (on a little endian power9):
+
+   splat_dup_l_0:
+   mfvsrld 9,34
+   mtvsrdd 34,9,9
+   blr
+
+Now it generates:
+
+   splat_dup_l_0:
+   xxpermdi 34,34,34,3
+   blr
+
 2024-08-14  Michael Meissner  
 
 gcc/
@@ -33,7 +60,7 @@ With this patch, GCC now realizes that the vector shift 
instructions will look
 at the bottom 6 bits for the shift count, and it can use either a VSPLTISW or
 XXSPLTIB instruction to load the shift count.
 
-[gcc]
+gcc/
 2024-08-12  Michael Meissner  
 
PR target/89213
@@ -48,7 +75,7 @@ XXSPLTIB instruction to load the shift count.
* config/rs6000/predicates.md (vector_shift_constant): New
predicate.
 
-[gcc/testsuite]
+gcc/testsuite/
 2024-08-12  Michael Meissner  
 
PR target/89213
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 6e8cd041f3b8..7892477fa922 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -4797,24 +4797,6 @@
   "lxvdsx %x0,%y1"
   [(set_attr "type" "vecload")])
 
-;; Optimize SPLAT of an extract from a V2DF/V2DI vector with a constant element
-(define_insn "*vsx_splat_extract_"
-  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
-   (vec_duplicate:VSX_D
-(vec_select:
- (match_operand:VSX_D 1 "vsx_register_operand" "wa")
- (parallel [(match_operand 2 "const_0_to_1_operand" "n")]]
-  "VECTOR_MEM_VSX_P (mode)"
-{
-  int which_word = INTVAL (operands[2]);
-  if (!BYTES_BIG_ENDIAN)
-which_word = 1 - which_word;
-
-  operands[3] = GEN_INT (which_word ? 3 : 0);
-  return "xxpermdi %x0,%x1,%x1,%3";
-}
-  [(set_attr "type" "vecperm")])
-
 ;; V4SI splat support
 (define_insn "vsx_splat_v4si"
   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,wa")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr99293.c 
b/gcc/testsuite/gcc.target/powerpc/pr99293.c
deleted file mode 100644
index 20adc1f27f65..
--- a/gcc/testsuite/gcc.target/powerpc/pr99293.c
+++ /dev/null
@@ -1,22 +0,0 @@
-/* { dg-do compile { target powerpc*-*-* } } */
-/* { dg-require-effective-target powerpc_vsx_ok } */
-/* { dg-options "-O2 -mvsx" } */
-
-/* Test for PR 99263, which wants to do:
-   __builtin_vec_splats (__builtin_vec_extract (v, n))
-
-   where v is a V2DF or V2DI vector and n is either 0 or 1.  Previously the
-   compiler would do a direct move to the GPR registers to select the item and 
a
-   direct move from the GPR registers to do the splat.  */
-
-vector long long splat_dup_l_0 (vector long long v)
-{
-  return __builtin_vec_splats (__builtin_vec_extract (v, 0));
-}
-
-vector long long splat_dup_l_1 (vector long long v)
-{
-  return __builtin_vec_splats (__builtin_vec_extract (v, 1));
-}
-
-/* { dg-final { scan-assembler-times "xxpermdi" 2 } } */


[gcc(refs/users/meissner/heads/work175-bugs)] Optimize splat of a V2DF/V2DI extract with constant element

2024-08-14 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:8ef3f882631e4472edf6972ff34ccea5af594095

commit 8ef3f882631e4472edf6972ff34ccea5af594095
Author: Michael Meissner 
Date:   Wed Aug 14 13:16:28 2024 -0400

Optimize splat of a V2DF/V2DI extract with constant element

We had optimizations for splat of a vector extract for the other vector
types, but we missed having one for V2DI and V2DF.  This patch adds a
combiner insn to do this optimization.

In looking at the source, we had similar optimizations for V4SI and V4SF
extract and splats, but we missed doing V2DI/V2DF.

Without the patch for the code:

vector long long splat_dup_l_0 (vector long long v)
{
  return __builtin_vec_splats (__builtin_vec_extract (v, 0));
}

the compiler generates (on a little endian power9):

splat_dup_l_0:
mfvsrld 9,34
mtvsrdd 34,9,9
blr

Now it generates:

splat_dup_l_0:
xxpermdi 34,34,34,3
blr

2024-08-14  Michael Meissner  

gcc/

* config/rs6000/vsx.md (vsx_splat_extract_): New insn.

gcc/testsuite/

* gcc.target/powerpc/builtins-1.c: Adjust insn counts.
* gcc.target/powerpc/pr99293.c: New test.

Diff:
---
 gcc/config/rs6000/vsx.md  | 18 ++
 gcc/testsuite/gcc.target/powerpc/builtins-1.c |  2 +-
 gcc/testsuite/gcc.target/powerpc/pr99293.c| 22 ++
 3 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 7892477fa922..6e8cd041f3b8 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -4797,6 +4797,24 @@
   "lxvdsx %x0,%y1"
   [(set_attr "type" "vecload")])
 
+;; Optimize SPLAT of an extract from a V2DF/V2DI vector with a constant element
+(define_insn "*vsx_splat_extract_"
+  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
+   (vec_duplicate:VSX_D
+(vec_select:
+ (match_operand:VSX_D 1 "vsx_register_operand" "wa")
+ (parallel [(match_operand 2 "const_0_to_1_operand" "n")]]
+  "VECTOR_MEM_VSX_P (mode)"
+{
+  int which_word = INTVAL (operands[2]);
+  if (!BYTES_BIG_ENDIAN)
+which_word = 1 - which_word;
+
+  operands[3] = GEN_INT (which_word ? 3 : 0);
+  return "xxpermdi %x0,%x1,%x1,%3";
+}
+  [(set_attr "type" "vecperm")])
+
 ;; V4SI splat support
 (define_insn "vsx_splat_v4si"
   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,wa")
diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-1.c 
b/gcc/testsuite/gcc.target/powerpc/builtins-1.c
index 8410a5fd4319..1f81f492ad61 100644
--- a/gcc/testsuite/gcc.target/powerpc/builtins-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/builtins-1.c
@@ -1035,4 +1035,4 @@ foo156 (vector unsigned short usa)
 /* { dg-final { scan-assembler-times {\mvmrglb\M} 3 } } */
 /* { dg-final { scan-assembler-times {\mvmrgew\M} 4 } } */
 /* { dg-final { scan-assembler-times {\mvsplth|xxsplth\M} 4 } } */
-/* { dg-final { scan-assembler-times {\mxxpermdi\M} 44 } } */
+/* { dg-final { scan-assembler-times {\mxxpermdi\M} 34 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr99293.c 
b/gcc/testsuite/gcc.target/powerpc/pr99293.c
new file mode 100644
index ..20adc1f27f65
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr99293.c
@@ -0,0 +1,22 @@
+/* { dg-do compile { target powerpc*-*-* } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mvsx" } */
+
+/* Test for PR 99263, which wants to do:
+   __builtin_vec_splats (__builtin_vec_extract (v, n))
+
+   where v is a V2DF or V2DI vector and n is either 0 or 1.  Previously the
+   compiler would do a direct move to the GPR registers to select the item and 
a
+   direct move from the GPR registers to do the splat.  */
+
+vector long long splat_dup_l_0 (vector long long v)
+{
+  return __builtin_vec_splats (__builtin_vec_extract (v, 0));
+}
+
+vector long long splat_dup_l_1 (vector long long v)
+{
+  return __builtin_vec_splats (__builtin_vec_extract (v, 1));
+}
+
+/* { dg-final { scan-assembler-times "xxpermdi" 2 } } */


[gcc(refs/users/meissner/heads/work175-bugs)] Update ChangeLog.*

2024-08-14 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:4f279acae2495f372379906e1a25eb639df5959f

commit 4f279acae2495f372379906e1a25eb639df5959f
Author: Michael Meissner 
Date:   Wed Aug 14 13:18:45 2024 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.bugs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/ChangeLog.bugs b/gcc/ChangeLog.bugs
index 4a1989456fc6..e0632d4ab848 100644
--- a/gcc/ChangeLog.bugs
+++ b/gcc/ChangeLog.bugs
@@ -37,6 +37,7 @@ gcc/
 
 gcc/testsuite/
 
+   * gcc.target/powerpc/builtins-1.c: Adjust insn count.
* gcc.target/powerpc/pr99293.c: New test.


[gcc(refs/users/meissner/heads/work175-bugs)] Revert changes

2024-08-14 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:b6a06e4a909946cecbe42081cf18d50b419c9c5c

commit b6a06e4a909946cecbe42081cf18d50b419c9c5c
Author: Michael Meissner 
Date:   Wed Aug 14 14:02:33 2024 -0400

Revert changes

Diff:
---
 gcc/config/rs6000/vsx.md  | 18 --
 gcc/testsuite/gcc.target/powerpc/builtins-1.c |  2 +-
 gcc/testsuite/gcc.target/powerpc/pr99293.c| 22 --
 3 files changed, 1 insertion(+), 41 deletions(-)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 6e8cd041f3b8..7892477fa922 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -4797,24 +4797,6 @@
   "lxvdsx %x0,%y1"
   [(set_attr "type" "vecload")])
 
-;; Optimize SPLAT of an extract from a V2DF/V2DI vector with a constant element
-(define_insn "*vsx_splat_extract_"
-  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
-   (vec_duplicate:VSX_D
-(vec_select:
- (match_operand:VSX_D 1 "vsx_register_operand" "wa")
- (parallel [(match_operand 2 "const_0_to_1_operand" "n")]]
-  "VECTOR_MEM_VSX_P (mode)"
-{
-  int which_word = INTVAL (operands[2]);
-  if (!BYTES_BIG_ENDIAN)
-which_word = 1 - which_word;
-
-  operands[3] = GEN_INT (which_word ? 3 : 0);
-  return "xxpermdi %x0,%x1,%x1,%3";
-}
-  [(set_attr "type" "vecperm")])
-
 ;; V4SI splat support
 (define_insn "vsx_splat_v4si"
   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,wa")
diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-1.c 
b/gcc/testsuite/gcc.target/powerpc/builtins-1.c
index 1f81f492ad61..8410a5fd4319 100644
--- a/gcc/testsuite/gcc.target/powerpc/builtins-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/builtins-1.c
@@ -1035,4 +1035,4 @@ foo156 (vector unsigned short usa)
 /* { dg-final { scan-assembler-times {\mvmrglb\M} 3 } } */
 /* { dg-final { scan-assembler-times {\mvmrgew\M} 4 } } */
 /* { dg-final { scan-assembler-times {\mvsplth|xxsplth\M} 4 } } */
-/* { dg-final { scan-assembler-times {\mxxpermdi\M} 34 } } */
+/* { dg-final { scan-assembler-times {\mxxpermdi\M} 44 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr99293.c 
b/gcc/testsuite/gcc.target/powerpc/pr99293.c
deleted file mode 100644
index 20adc1f27f65..
--- a/gcc/testsuite/gcc.target/powerpc/pr99293.c
+++ /dev/null
@@ -1,22 +0,0 @@
-/* { dg-do compile { target powerpc*-*-* } } */
-/* { dg-require-effective-target powerpc_vsx_ok } */
-/* { dg-options "-O2 -mvsx" } */
-
-/* Test for PR 99263, which wants to do:
-   __builtin_vec_splats (__builtin_vec_extract (v, n))
-
-   where v is a V2DF or V2DI vector and n is either 0 or 1.  Previously the
-   compiler would do a direct move to the GPR registers to select the item and 
a
-   direct move from the GPR registers to do the splat.  */
-
-vector long long splat_dup_l_0 (vector long long v)
-{
-  return __builtin_vec_splats (__builtin_vec_extract (v, 0));
-}
-
-vector long long splat_dup_l_1 (vector long long v)
-{
-  return __builtin_vec_splats (__builtin_vec_extract (v, 1));
-}
-
-/* { dg-final { scan-assembler-times "xxpermdi" 2 } } */


[gcc(refs/users/meissner/heads/work175-bugs)] Optimize splat of a V2DF/V2DI extract with constant element

2024-08-14 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:4d8b53a9e96d9ea7dcd2299480c3275459a6ff72

commit 4d8b53a9e96d9ea7dcd2299480c3275459a6ff72
Author: Michael Meissner 
Date:   Wed Aug 14 14:03:26 2024 -0400

Optimize splat of a V2DF/V2DI extract with constant element

We had optimizations for splat of a vector extract for the other vector
types, but we missed having one for V2DI and V2DF.  This patch adds a
combiner insn to do this optimization.

In looking at the source, we had similar optimizations for V4SI and V4SF
extract and splats, but we missed doing V2DI/V2DF.

Without the patch for the code:

vector long long splat_dup_l_0 (vector long long v)
{
  return __builtin_vec_splats (__builtin_vec_extract (v, 0));
}

the compiler generates (on a little endian power9):

splat_dup_l_0:
mfvsrld 9,34
mtvsrdd 34,9,9
blr

Now it generates:

splat_dup_l_0:
xxpermdi 34,34,34,3
blr

2024-08-14  Michael Meissner  

gcc/

* config/rs6000/vsx.md (vsx_splat_extract_): New insn.

gcc/testsuite/

* gcc.target/powerpc/builtins-1.c: Adjust insn count.
* gcc.target/powerpc/pr99293.c: New test.

Diff:
---
 gcc/config/rs6000/vsx.md  | 18 ++
 gcc/testsuite/gcc.target/powerpc/builtins-1.c |  2 +-
 gcc/testsuite/gcc.target/powerpc/pr99293.c| 22 ++
 3 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 7892477fa922..6e8cd041f3b8 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -4797,6 +4797,24 @@
   "lxvdsx %x0,%y1"
   [(set_attr "type" "vecload")])
 
+;; Optimize SPLAT of an extract from a V2DF/V2DI vector with a constant element
+(define_insn "*vsx_splat_extract_"
+  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
+   (vec_duplicate:VSX_D
+(vec_select:
+ (match_operand:VSX_D 1 "vsx_register_operand" "wa")
+ (parallel [(match_operand 2 "const_0_to_1_operand" "n")]]
+  "VECTOR_MEM_VSX_P (mode)"
+{
+  int which_word = INTVAL (operands[2]);
+  if (!BYTES_BIG_ENDIAN)
+which_word = 1 - which_word;
+
+  operands[3] = GEN_INT (which_word ? 3 : 0);
+  return "xxpermdi %x0,%x1,%x1,%3";
+}
+  [(set_attr "type" "vecperm")])
+
 ;; V4SI splat support
 (define_insn "vsx_splat_v4si"
   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,wa")
diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-1.c 
b/gcc/testsuite/gcc.target/powerpc/builtins-1.c
index 8410a5fd4319..4e7e5384675f 100644
--- a/gcc/testsuite/gcc.target/powerpc/builtins-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/builtins-1.c
@@ -1035,4 +1035,4 @@ foo156 (vector unsigned short usa)
 /* { dg-final { scan-assembler-times {\mvmrglb\M} 3 } } */
 /* { dg-final { scan-assembler-times {\mvmrgew\M} 4 } } */
 /* { dg-final { scan-assembler-times {\mvsplth|xxsplth\M} 4 } } */
-/* { dg-final { scan-assembler-times {\mxxpermdi\M} 44 } } */
+/* { dg-final { scan-assembler-times {\mxxpermdi\M} 42 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr99293.c 
b/gcc/testsuite/gcc.target/powerpc/pr99293.c
new file mode 100644
index ..20adc1f27f65
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr99293.c
@@ -0,0 +1,22 @@
+/* { dg-do compile { target powerpc*-*-* } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mvsx" } */
+
+/* Test for PR 99263, which wants to do:
+   __builtin_vec_splats (__builtin_vec_extract (v, n))
+
+   where v is a V2DF or V2DI vector and n is either 0 or 1.  Previously the
+   compiler would do a direct move to the GPR registers to select the item and 
a
+   direct move from the GPR registers to do the splat.  */
+
+vector long long splat_dup_l_0 (vector long long v)
+{
+  return __builtin_vec_splats (__builtin_vec_extract (v, 0));
+}
+
+vector long long splat_dup_l_1 (vector long long v)
+{
+  return __builtin_vec_splats (__builtin_vec_extract (v, 1));
+}
+
+/* { dg-final { scan-assembler-times "xxpermdi" 2 } } */


[gcc r15-2918] s390: Fix high-level builtins vec_gfmsum{,_accum}_128

2024-08-14 Thread Stefan Schulze Frielinghaus via Gcc-cvs
https://gcc.gnu.org/g:e8a7142a697c5d2673adea33ba23af82a89c9559

commit r15-2918-ge8a7142a697c5d2673adea33ba23af82a89c9559
Author: Stefan Schulze Frielinghaus 
Date:   Wed Aug 14 20:01:36 2024 +0200

s390: Fix high-level builtins vec_gfmsum{,_accum}_128

Starting with r14-9449-g9f2b16ce1efef0 builtins were streamlined with
those in LLVM.  In particular s390_vgfm{,a}g have been changed from
UV16QI to UINT128 in order to match those in LLVM.  However, these
low-level builtins are directly used by the high-level builtins
vec_gfmsum{,_accum}_128 which expect UV16QI instead.  Therefore,
introduce new low-level builtins s390_vgfm{,a}g_128 and make use of
them, respectively.

gcc/ChangeLog:

* config/s390/s390-builtin-types.def (BT_FN_UV16QI_UV2DI_UV2DI):
New.
(BT_FN_UV16QI_UV2DI_UV2DI_UV16QI): New.
* config/s390/s390-builtins.def (s390_vgfmg_128): New.
(s390_vgfmag_128): New.
* config/s390/vecintrin.h (vec_gfmsum_128): Use s390_vgfmg_128.
(vec_gfmsum_accum_128): Use s390_vgfmag_128.

Diff:
---
 gcc/config/s390/s390-builtin-types.def | 2 ++
 gcc/config/s390/s390-builtins.def  | 2 ++
 gcc/config/s390/vecintrin.h| 4 ++--
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/gcc/config/s390/s390-builtin-types.def 
b/gcc/config/s390/s390-builtin-types.def
index d70eaade8eab..e6f5631ed7ae 100644
--- a/gcc/config/s390/s390-builtin-types.def
+++ b/gcc/config/s390/s390-builtin-types.def
@@ -221,6 +221,7 @@ DEF_FN_TYPE_2 (BT_FN_UV16QI_UCHAR_UCHAR, BT_UV16QI, 
BT_UCHAR, BT_UCHAR)
 DEF_FN_TYPE_2 (BT_FN_UV16QI_UV16QI_INTPTR, BT_UV16QI, BT_UV16QI, BT_INTPTR)
 DEF_FN_TYPE_2 (BT_FN_UV16QI_UV16QI_UCHAR, BT_UV16QI, BT_UV16QI, BT_UCHAR)
 DEF_FN_TYPE_2 (BT_FN_UV16QI_UV16QI_UV16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI)
+DEF_FN_TYPE_2 (BT_FN_UV16QI_UV2DI_UV2DI, BT_UV16QI, BT_UV2DI, BT_UV2DI)
 DEF_FN_TYPE_2 (BT_FN_UV16QI_UV8HI_UV8HI, BT_UV16QI, BT_UV8HI, BT_UV8HI)
 DEF_FN_TYPE_2 (BT_FN_UV2DI_UCHAR_UCHAR, BT_UV2DI, BT_UCHAR, BT_UCHAR)
 DEF_FN_TYPE_2 (BT_FN_UV2DI_ULONGLONG_INT, BT_UV2DI, BT_ULONGLONG, BT_INT)
@@ -299,6 +300,7 @@ DEF_FN_TYPE_3 (BT_FN_UV16QI_UV16QI_UCHAR_INT, BT_UV16QI, 
BT_UV16QI, BT_UCHAR, BT
 DEF_FN_TYPE_3 (BT_FN_UV16QI_UV16QI_UV16QI_INT, BT_UV16QI, BT_UV16QI, 
BT_UV16QI, BT_INT)
 DEF_FN_TYPE_3 (BT_FN_UV16QI_UV16QI_UV16QI_INTPTR, BT_UV16QI, BT_UV16QI, 
BT_UV16QI, BT_INTPTR)
 DEF_FN_TYPE_3 (BT_FN_UV16QI_UV16QI_UV16QI_UV16QI, BT_UV16QI, BT_UV16QI, 
BT_UV16QI, BT_UV16QI)
+DEF_FN_TYPE_3 (BT_FN_UV16QI_UV2DI_UV2DI_UV16QI, BT_UV16QI, BT_UV2DI, BT_UV2DI, 
BT_UV16QI)
 DEF_FN_TYPE_3 (BT_FN_UV16QI_UV8HI_UV8HI_INTPTR, BT_UV16QI, BT_UV8HI, BT_UV8HI, 
BT_INTPTR)
 DEF_FN_TYPE_3 (BT_FN_UV2DI_UV2DI_ULONGLONG_INT, BT_UV2DI, BT_UV2DI, 
BT_ULONGLONG, BT_INT)
 DEF_FN_TYPE_3 (BT_FN_UV2DI_UV2DI_UV2DI_INT, BT_UV2DI, BT_UV2DI, BT_UV2DI, 
BT_INT)
diff --git a/gcc/config/s390/s390-builtins.def 
b/gcc/config/s390/s390-builtins.def
index 3a63213e5719..7f6190fa8103 100644
--- a/gcc/config/s390/s390-builtins.def
+++ b/gcc/config/s390/s390-builtins.def
@@ -1666,6 +1666,7 @@ B_DEF  (s390_vgfmb, vec_gfmsumv16qi,  
  0,
 B_DEF  (s390_vgfmh, vec_gfmsumv8hi, 0, 
 B_VX,   0,  BT_FN_UV4SI_UV8HI_UV8HI)
 B_DEF  (s390_vgfmf, vec_gfmsumv4si, 0, 
 B_VX,   0,  BT_FN_UV2DI_UV4SI_UV4SI)
 B_DEF  (s390_vgfmg, vec_gfmsum_128, 0, 
 B_VX,   0,  BT_FN_UINT128_UV2DI_UV2DI)
+B_DEF  (s390_vgfmg_128, vec_gfmsum_128, 0, 
 B_VX,   0,  BT_FN_UV16QI_UV2DI_UV2DI)
 
 OB_DEF (s390_vec_gfmsum_accum,  
s390_vec_gfmsum_accum_u8,s390_vec_gfmsum_accum_u32,B_VX,
BT_FN_OV4SI_OV4SI_OV4SI_OV4SI)
 OB_DEF_VAR (s390_vec_gfmsum_accum_u8,   s390_vgfmab,0, 
 0,  BT_OV_UV8HI_UV16QI_UV16QI_UV8HI)
@@ -1676,6 +1677,7 @@ B_DEF  (s390_vgfmab,
vec_gfmsum_accumv16qi,0,
 B_DEF  (s390_vgfmah,vec_gfmsum_accumv8hi,0,
 B_VX,   0,  BT_FN_UV4SI_UV8HI_UV8HI_UV4SI)
 B_DEF  (s390_vgfmaf,vec_gfmsum_accumv4si,0,
 B_VX,   0,  BT_FN_UV2DI_UV4SI_UV4SI_UV2DI)
 B_DEF  (s390_vgfmag,vec_gfmsum_accum_128,0,
 B_VX,   0,  BT_FN_UINT128_UV2DI_UV2DI_UINT128)
+B_DEF  (s390_vgfmag_128,vec_gfmsum_accum_128,0,
 B_VX,   0,  BT_FN_UV16QI_UV2DI_UV2DI_UV16QI)
 
 OB_DEF (s390_vec_abs,   s390_vec_abs_s8,s390_vec_abs_dbl,  
 B_VX,   BT_FN_OV4SI_OV4SI)
 OB_DEF_VAR (s390_vec_abs_s8,s390_vlpb,  0, 
 0,  BT_OV_

[gcc r15-2919] s390: Remove vector intrinsics

2024-08-14 Thread Stefan Schulze Frielinghaus via Gcc-cvs
https://gcc.gnu.org/g:a247088adaf122116919235f4a40189506139495

commit r15-2919-ga247088adaf122116919235f4a40189506139495
Author: Stefan Schulze Frielinghaus 
Date:   Wed Aug 14 20:01:43 2024 +0200

s390: Remove vector intrinsics

The following intrinsics are not implemented.  Thus, remove them.

gcc/ChangeLog:

* config/s390/vecintrin.h (vec_vstbrh): Remove.
(vec_vstbrf): Remove.
(vec_vstbrg): Remove.
(vec_vstbrq): Remove.
(vec_vstbrf_flt): Remove.
(vec_vstbrg_dbl): Remove.
(vec_vsterb): Remove.
(vec_vsterh): Remove.
(vec_vsterf): Remove.
(vec_vsterg): Remove.
(vec_vsterf_flt): Remove.
(vec_vsterg_dbl): Remove.

Diff:
---
 gcc/config/s390/vecintrin.h | 14 --
 1 file changed, 14 deletions(-)

diff --git a/gcc/config/s390/vecintrin.h b/gcc/config/s390/vecintrin.h
index daeed91ef972..de29f913637d 100644
--- a/gcc/config/s390/vecintrin.h
+++ b/gcc/config/s390/vecintrin.h
@@ -160,20 +160,6 @@ __lcbb(const void *ptr, int bndry)
   cc != 3 ? 1 : 0; \
 })
 
-#define vec_vstbrh vec_vlbrh
-#define vec_vstbrf vec_vlbrf
-#define vec_vstbrg vec_vlbrg
-#define vec_vstbrq vec_vlbrq
-#define vec_vstbrf_flt vec_vlbrf_flt
-#define vec_vstbrg_dbl vec_vlbrg_dbl
-
-#define vec_vsterb vec_vlerb
-#define vec_vsterh vec_vlerh
-#define vec_vsterf vec_vlerh
-#define vec_vsterg vec_vlerh
-#define vec_vsterf_flt vec_vlerf_flt
-#define vec_vsterg_dbl vec_vlerg_dbl
-
 #define vec_extend_to_fp32_hi __builtin_s390_vclfnhs
 #define vec_extend_to_fp32_lo __builtin_s390_vclfnls
 #define vec_round_from_fp32 __builtin_s390_vcrnfs


[gcc r14-10582] s390: Fix high-level builtins vec_gfmsum{, _accum}_128

2024-08-14 Thread Stefan Schulze Frielinghaus via Gcc-cvs
https://gcc.gnu.org/g:e903ada5e8881acec734eb3f89c3644bbd8da7e9

commit r14-10582-ge903ada5e8881acec734eb3f89c3644bbd8da7e9
Author: Stefan Schulze Frielinghaus 
Date:   Wed Aug 14 20:12:35 2024 +0200

s390: Fix high-level builtins vec_gfmsum{,_accum}_128

Starting with r14-9449-g9f2b16ce1efef0 builtins were streamlined with
those in LLVM.  In particular s390_vgfm{,a}g have been changed from
UV16QI to UINT128 in order to match those in LLVM.  However, these
low-level builtins are directly used by the high-level builtins
vec_gfmsum{,_accum}_128 which expect UV16QI instead.  Therefore,
introduce new low-level builtins s390_vgfm{,a}g_128 and make use of
them, respectively.

gcc/ChangeLog:

* config/s390/s390-builtin-types.def (BT_FN_UV16QI_UV2DI_UV2DI):
New.
(BT_FN_UV16QI_UV2DI_UV2DI_UV16QI): New.
* config/s390/s390-builtins.def (s390_vgfmg_128): New.
(s390_vgfmag_128): New.
* config/s390/vecintrin.h (vec_gfmsum_128): Use s390_vgfmg_128.
(vec_gfmsum_accum_128): Use s390_vgfmag_128.

(cherry picked from commit e8a7142a697c5d2673adea33ba23af82a89c9559)

Diff:
---
 gcc/config/s390/s390-builtin-types.def | 2 ++
 gcc/config/s390/s390-builtins.def  | 2 ++
 gcc/config/s390/vecintrin.h| 4 ++--
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/gcc/config/s390/s390-builtin-types.def 
b/gcc/config/s390/s390-builtin-types.def
index d70eaade8eab..e6f5631ed7ae 100644
--- a/gcc/config/s390/s390-builtin-types.def
+++ b/gcc/config/s390/s390-builtin-types.def
@@ -221,6 +221,7 @@ DEF_FN_TYPE_2 (BT_FN_UV16QI_UCHAR_UCHAR, BT_UV16QI, 
BT_UCHAR, BT_UCHAR)
 DEF_FN_TYPE_2 (BT_FN_UV16QI_UV16QI_INTPTR, BT_UV16QI, BT_UV16QI, BT_INTPTR)
 DEF_FN_TYPE_2 (BT_FN_UV16QI_UV16QI_UCHAR, BT_UV16QI, BT_UV16QI, BT_UCHAR)
 DEF_FN_TYPE_2 (BT_FN_UV16QI_UV16QI_UV16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI)
+DEF_FN_TYPE_2 (BT_FN_UV16QI_UV2DI_UV2DI, BT_UV16QI, BT_UV2DI, BT_UV2DI)
 DEF_FN_TYPE_2 (BT_FN_UV16QI_UV8HI_UV8HI, BT_UV16QI, BT_UV8HI, BT_UV8HI)
 DEF_FN_TYPE_2 (BT_FN_UV2DI_UCHAR_UCHAR, BT_UV2DI, BT_UCHAR, BT_UCHAR)
 DEF_FN_TYPE_2 (BT_FN_UV2DI_ULONGLONG_INT, BT_UV2DI, BT_ULONGLONG, BT_INT)
@@ -299,6 +300,7 @@ DEF_FN_TYPE_3 (BT_FN_UV16QI_UV16QI_UCHAR_INT, BT_UV16QI, 
BT_UV16QI, BT_UCHAR, BT
 DEF_FN_TYPE_3 (BT_FN_UV16QI_UV16QI_UV16QI_INT, BT_UV16QI, BT_UV16QI, 
BT_UV16QI, BT_INT)
 DEF_FN_TYPE_3 (BT_FN_UV16QI_UV16QI_UV16QI_INTPTR, BT_UV16QI, BT_UV16QI, 
BT_UV16QI, BT_INTPTR)
 DEF_FN_TYPE_3 (BT_FN_UV16QI_UV16QI_UV16QI_UV16QI, BT_UV16QI, BT_UV16QI, 
BT_UV16QI, BT_UV16QI)
+DEF_FN_TYPE_3 (BT_FN_UV16QI_UV2DI_UV2DI_UV16QI, BT_UV16QI, BT_UV2DI, BT_UV2DI, 
BT_UV16QI)
 DEF_FN_TYPE_3 (BT_FN_UV16QI_UV8HI_UV8HI_INTPTR, BT_UV16QI, BT_UV8HI, BT_UV8HI, 
BT_INTPTR)
 DEF_FN_TYPE_3 (BT_FN_UV2DI_UV2DI_ULONGLONG_INT, BT_UV2DI, BT_UV2DI, 
BT_ULONGLONG, BT_INT)
 DEF_FN_TYPE_3 (BT_FN_UV2DI_UV2DI_UV2DI_INT, BT_UV2DI, BT_UV2DI, BT_UV2DI, 
BT_INT)
diff --git a/gcc/config/s390/s390-builtins.def 
b/gcc/config/s390/s390-builtins.def
index 3a63213e5719..7f6190fa8103 100644
--- a/gcc/config/s390/s390-builtins.def
+++ b/gcc/config/s390/s390-builtins.def
@@ -1666,6 +1666,7 @@ B_DEF  (s390_vgfmb, vec_gfmsumv16qi,  
  0,
 B_DEF  (s390_vgfmh, vec_gfmsumv8hi, 0, 
 B_VX,   0,  BT_FN_UV4SI_UV8HI_UV8HI)
 B_DEF  (s390_vgfmf, vec_gfmsumv4si, 0, 
 B_VX,   0,  BT_FN_UV2DI_UV4SI_UV4SI)
 B_DEF  (s390_vgfmg, vec_gfmsum_128, 0, 
 B_VX,   0,  BT_FN_UINT128_UV2DI_UV2DI)
+B_DEF  (s390_vgfmg_128, vec_gfmsum_128, 0, 
 B_VX,   0,  BT_FN_UV16QI_UV2DI_UV2DI)
 
 OB_DEF (s390_vec_gfmsum_accum,  
s390_vec_gfmsum_accum_u8,s390_vec_gfmsum_accum_u32,B_VX,
BT_FN_OV4SI_OV4SI_OV4SI_OV4SI)
 OB_DEF_VAR (s390_vec_gfmsum_accum_u8,   s390_vgfmab,0, 
 0,  BT_OV_UV8HI_UV16QI_UV16QI_UV8HI)
@@ -1676,6 +1677,7 @@ B_DEF  (s390_vgfmab,
vec_gfmsum_accumv16qi,0,
 B_DEF  (s390_vgfmah,vec_gfmsum_accumv8hi,0,
 B_VX,   0,  BT_FN_UV4SI_UV8HI_UV8HI_UV4SI)
 B_DEF  (s390_vgfmaf,vec_gfmsum_accumv4si,0,
 B_VX,   0,  BT_FN_UV2DI_UV4SI_UV4SI_UV2DI)
 B_DEF  (s390_vgfmag,vec_gfmsum_accum_128,0,
 B_VX,   0,  BT_FN_UINT128_UV2DI_UV2DI_UINT128)
+B_DEF  (s390_vgfmag_128,vec_gfmsum_accum_128,0,
 B_VX,   0,  BT_FN_UV16QI_UV2DI_UV2DI_UV16QI)
 
 OB_DEF (s390_vec_abs,   s390_vec_abs_s8,s390_vec_abs_dbl,  
 B_VX,   BT_FN_OV4SI_OV4SI)
 OB_DEF_VAR (s390_vec_abs_s8,

[gcc r15-2920] c++: ICE with NSDMIs and fn arguments [PR116015]

2024-08-14 Thread Marek Polacek via Gcc-cvs
https://gcc.gnu.org/g:d91b6c93f98cac71f5588d73191d08ad788e600c

commit r15-2920-gd91b6c93f98cac71f5588d73191d08ad788e600c
Author: Marek Polacek 
Date:   Fri Aug 9 16:14:18 2024 -0400

c++: ICE with NSDMIs and fn arguments [PR116015]

The problem in this PR is that we ended up with

  {.rows=(&)->n,
   .outer_stride=(&)->rows}

that is, two PLACEHOLDER_EXPRs for different types on the same level
in one { }.  That should not happen; we may, for instance, neglect to
replace a PLACEHOLDER_EXPR due to CONSTRUCTOR_PLACEHOLDER_BOUNDARY on
the constructor.

The same problem happened in PR100252, which I fixed by introducing
replace_placeholders_for_class_temp_r.  That didn't work here, though,
because r_p_for_c_t_r only works for non-eliding TARGET_EXPRs: replacing
a PLACEHOLDER_EXPR with a temporary that is going to be elided will
result in a crash in gimplify_var_or_parm_decl when it encounters such
a loose decl.

But leaving the PLACEHOLDER_EXPRs in is also bad because then we end
up with this PR.

TARGET_EXPRs for function arguments are elided in gimplify_arg.  The
argument will get a real temporary only in get_formal_tmp_var.  One
idea was to use the temporary that is going to be elided anyway, and
then replace_decl it with the real object once we get it.  But that
didn't work out: one problem is that we elide the TARGET_EXPR for an
argument before we create the real temporary for the argument, and
when we get it, the context that this was a TARGET_EXPR for an argument
has been lost.  We're also in the middle end territory now, even though
this is a C++-specific problem.

A solution is to simply stop eliding TARGET_EXPRs whose initializer is
a CONSTRUCTOR.  Such copies can't be (at the moment) elided anyway.  But
not eliding all TARGET_EXPRs would be a pessimization.

PR c++/116015

gcc/cp/ChangeLog:

* call.cc (convert_for_arg_passing): Don't set_target_expr_eliding
when the TARGET_EXPR initializer is a CONSTRUCTOR.

gcc/ChangeLog:

* gimplify.cc (gimplify_arg): Do not strip a TARGET_EXPR whose
initializer is a CONSTRUCTOR.

gcc/testsuite/ChangeLog:

* g++.dg/cpp1y/nsdmi-aggr23.C: New test.

Diff:
---
 gcc/cp/call.cc|  7 +--
 gcc/gimplify.cc   | 17 -
 gcc/testsuite/g++.dg/cpp1y/nsdmi-aggr23.C | 26 ++
 3 files changed, 47 insertions(+), 3 deletions(-)

diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc
index 94015db4e650..0fe679aae9fe 100644
--- a/gcc/cp/call.cc
+++ b/gcc/cp/call.cc
@@ -9468,8 +9468,11 @@ convert_for_arg_passing (tree type, tree val, 
tsubst_flags_t complain)
   if (complain & tf_warning)
 warn_for_address_of_packed_member (type, val);
 
-  /* gimplify_arg elides TARGET_EXPRs that initialize a function argument.  */
-  if (SIMPLE_TARGET_EXPR_P (val))
+  /* gimplify_arg elides TARGET_EXPRs that initialize a function argument,
+ unless the initializer is a CONSTRUCTOR.  In that case, we fail to
+ elide the copy anyway.  See that function for more information.  */
+  if (SIMPLE_TARGET_EXPR_P (val)
+  && TREE_CODE (TARGET_EXPR_INITIAL (val)) != CONSTRUCTOR)
 set_target_expr_eliding (val);
 
   return val;
diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc
index 71cc6c38d807..26a216e151d6 100644
--- a/gcc/gimplify.cc
+++ b/gcc/gimplify.cc
@@ -3760,7 +3760,22 @@ gimplify_arg (tree *arg_p, gimple_seq *pre_p, location_t 
call_location,
{
  tree init = TARGET_EXPR_INITIAL (*arg_p);
  if (init
- && !VOID_TYPE_P (TREE_TYPE (init)))
+ && !VOID_TYPE_P (TREE_TYPE (init))
+ /* Currently, due to c++/116015, it is not desirable to
+strip a TARGET_EXPR whose initializer is a {}.  The
+problem is that if we do elide it, we also have to
+replace all the occurrences of the slot temporary in the
+initializer with the temporary created for the argument.
+But we do not have that temporary yet so the replacement
+would be quite awkward and it might be needed to resort
+back to a PLACEHOLDER_EXPR.  Note that stripping the
+TARGET_EXPR wouldn't help anyway, as gimplify_expr would
+just allocate a temporary to store the CONSTRUCTOR into.
+(FIXME PR116375.)
+
+See convert_for_arg_passing for the C++ code that marks
+the TARGET_EXPR as eliding or not.  */
+ && TREE_CODE (init) != CONSTRUCTOR)
*arg_p = init;
}
 }
diff --git a/gcc/testsuite/g++.dg/cpp1y/nsdmi-aggr23.C 
b/gcc/testsuite/g++.dg/cpp1y/nsdmi-aggr23.C
new file mode 100644
index ..2f5b8ca97bf7
--- /dev/null
+++ b/gcc/t

[gcc] Created branch 'mikael/heads/inline_minmaxloc_without_dim_v10' in namespace 'refs/users'

2024-08-14 Thread Mikael Morin via Gcc-cvs
The branch 'mikael/heads/inline_minmaxloc_without_dim_v10' was created in 
namespace 'refs/users' pointing to:

 72f52d749140... fortran: Add -finline-intrinsics flag for MINLOC/MAXLOC [PR


[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v10)] fortran: Add tests covering inline MINLOC/MAXLOC without DIM [PR90608]

2024-08-14 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:7bc5ceaf8579ccd27fa54ffbeaceedaabba09911

commit 7bc5ceaf8579ccd27fa54ffbeaceedaabba09911
Author: Mikael Morin 
Date:   Wed Jul 31 10:09:25 2024 +0200

fortran: Add tests covering inline MINLOC/MAXLOC without DIM [PR90608]

Tested on x86_64-pc-linux-gnu.
OK for master?

-- >8 --

Add the tests covering the various cases for which we are about to implement
inline expansion of MINLOC and MAXLOC.  Those are cases where the DIM
argument is not present.

PR fortran/90608

gcc/testsuite/ChangeLog:

* gfortran.dg/maxloc_7.f90: New test.
* gfortran.dg/maxloc_with_mask_1.f90: New test.
* gfortran.dg/minloc_8.f90: New test.
* gfortran.dg/minloc_with_mask_1.f90: New test.

Diff:
---
 gcc/testsuite/gfortran.dg/ieee/maxloc_nan_1.f90  |  44 +++
 gcc/testsuite/gfortran.dg/ieee/minloc_nan_1.f90  |  44 +++
 gcc/testsuite/gfortran.dg/maxloc_7.f90   | 208 +
 gcc/testsuite/gfortran.dg/maxloc_with_mask_1.f90 | 373 +++
 gcc/testsuite/gfortran.dg/minloc_8.f90   | 208 +
 gcc/testsuite/gfortran.dg/minloc_with_mask_1.f90 | 372 ++
 6 files changed, 1249 insertions(+)

diff --git a/gcc/testsuite/gfortran.dg/ieee/maxloc_nan_1.f90 
b/gcc/testsuite/gfortran.dg/ieee/maxloc_nan_1.f90
new file mode 100644
index ..329b54e8e1f8
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/ieee/maxloc_nan_1.f90
@@ -0,0 +1,44 @@
+! { dg-do run }
+!
+! PR fortran/90608
+! Check the correct behaviour of the inline MAXLOC implementation,
+! when ARRAY is filled with NANs.
+
+program p
+  implicit none
+  call check_without_mask
+  call check_with_mask
+contains
+  subroutine check_without_mask()
+use, intrinsic :: ieee_arithmetic
+real, allocatable :: a(:,:,:)
+real :: nan
+integer, allocatable :: m(:)
+if (.not. ieee_support_nan(nan)) return
+nan = ieee_value(nan, ieee_quiet_nan)
+allocate(a(3,3,3), source = nan)
+m = maxloc(a)
+if (size(m, dim=1) /= 3) stop 32
+if (any(m /= (/ 1, 1, 1 /))) stop 35
+  end subroutine
+  subroutine check_with_mask()
+use, intrinsic :: ieee_arithmetic
+real, allocatable :: a(:,:,:)
+logical, allocatable :: m(:,:,:)
+real :: nan
+integer, allocatable :: r(:)
+if (.not. ieee_support_nan(nan)) return
+nan = ieee_value(nan, ieee_quiet_nan)
+allocate(a(3,3,3), source = nan)
+allocate(m(3,3,3))
+m(:,:,:) = reshape((/ .false., .false., .true. , .true. , .false., &
+  .true. , .false., .false., .false., .true. , &
+  .true. , .false., .true. , .true. , .true. , &
+  .false., .false., .true. , .true. , .false., &
+  .false., .true. , .false., .false., .true. , &
+  .true. , .true. /), shape(m))
+r = maxloc(a, mask = m)
+if (size(r, dim = 1) /= 3) stop 62
+if (any(r /= (/ 3, 1, 1 /))) stop 65
+  end subroutine
+end program p
diff --git a/gcc/testsuite/gfortran.dg/ieee/minloc_nan_1.f90 
b/gcc/testsuite/gfortran.dg/ieee/minloc_nan_1.f90
new file mode 100644
index ..71471dc6bbfb
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/ieee/minloc_nan_1.f90
@@ -0,0 +1,44 @@
+! { dg-do run }
+!
+! PR fortran/90608
+! Check the correct behaviour of the inline MINLOC implementation,
+! when ARRAY is filled with NANs.
+
+program p
+  implicit none
+  call check_without_mask
+  call check_with_mask
+contains
+  subroutine check_with_mask()
+use, intrinsic :: ieee_arithmetic
+real, allocatable :: a(:,:,:)
+real :: nan
+integer, allocatable :: m(:)
+if (.not. ieee_support_nan(nan)) return
+nan = ieee_value(nan, ieee_quiet_nan)
+allocate(a(3,3,3), source = nan)
+m = minloc(a)
+if (size(m, dim=1) /= 3) stop 32
+if (any(m /= (/ 1, 1, 1 /))) stop 35
+  end subroutine
+  subroutine check_with_mask()
+use, intrinsic :: ieee_arithmetic
+real, allocatable :: a(:,:,:)
+logical, allocatable :: m(:,:,:)
+real :: nan
+integer, allocatable :: r(:)
+if (.not. ieee_support_nan(nan)) return
+nan = ieee_value(nan, ieee_quiet_nan)
+allocate(a(3,3,3), source = nan)
+allocate(m(3,3,3))
+m(:,:,:) = reshape((/ .false., .false., .true. , .true. , .false., &
+  .true. , .false., .false., .false., .true. , &
+  .true. , .false., .true. , .true. , .true. , &
+  .false., .false., .true. , .true. , .false., &
+  .false., .true. , .false., .false., .true. , &
+  .true. , .true. /), shape(m))
+r = minloc(a, mask = m)
+if (size(r, dim = 1) /= 3) stop 62
+if (any(r /= (/ 3, 1, 1 /))) stop 65
+  end subroutine
+end program p
diff --git a/gcc/testsuite/gfortran.dg/maxloc_7.f90 
b/gcc/testsuite/gfortran.dg/maxloc_7.f90
new file mode 100644
ind

[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v10)] fortran: Disable frontend passes for inlinable MINLOC/MAXLOC [PR90608]

2024-08-14 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:79b5b827782158ceb23456ffd6710455d69c30e6

commit 79b5b827782158ceb23456ffd6710455d69c30e6
Author: Mikael Morin 
Date:   Wed Jul 31 10:09:39 2024 +0200

fortran: Disable frontend passes for inlinable MINLOC/MAXLOC [PR90608]

Regression-tested on x86_64-pc-linux-gnu.
OK for master?

-- >8 --

Disable rewriting of MINLOC/MAXLOC expressions for which inline code
generation is supported.  Update the gfc_inline_intrinsic_function_p
predicate (already existing) for that, with the current state of
MINLOC/MAXLOC inlining support, that is only the cases of a scalar
result and non-CHARACTER argument for now.

This change has no effect currently, as the MINLOC/MAXLOC front-end passes
only change expressions of rank 1, but the inlining control predicate
gfc_inline_intrinsic_function_p returns false for those.  However, later
changes will extend MINLOC/MAXLOC inline expansion support to array
expressions and update the inlining control predicate, and this will become
effective.

PR fortran/90608

gcc/fortran/ChangeLog:

* frontend-passes.cc (optimize_minmaxloc): Skip if we can generate
inline code for the unmodified expression.
* trans-intrinsic.cc (gfc_inline_intrinsic_function_p): Add
MINLOC and MAXLOC cases.

Diff:
---
 gcc/fortran/frontend-passes.cc |  3 ++-
 gcc/fortran/trans-intrinsic.cc | 23 +++
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/gcc/fortran/frontend-passes.cc b/gcc/fortran/frontend-passes.cc
index 3c06018fdbbf..8e4c6310ba8d 100644
--- a/gcc/fortran/frontend-passes.cc
+++ b/gcc/fortran/frontend-passes.cc
@@ -2277,7 +2277,8 @@ optimize_minmaxloc (gfc_expr **e)
   || fn->value.function.actual == NULL
   || fn->value.function.actual->expr == NULL
   || fn->value.function.actual->expr->ts.type == BT_CHARACTER
-  || fn->value.function.actual->expr->rank != 1)
+  || fn->value.function.actual->expr->rank != 1
+  || gfc_inline_intrinsic_function_p (fn))
 return;
 
   *e = gfc_get_array_expr (fn->ts.type, fn->ts.kind, &fn->where);
diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index 84a378ef310c..2c8512060ccd 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -11652,6 +11652,29 @@ gfc_inline_intrinsic_function_p (gfc_expr *expr)
 case GFC_ISYM_TRANSPOSE:
   return true;
 
+case GFC_ISYM_MINLOC:
+case GFC_ISYM_MAXLOC:
+  {
+   /* Disable inline expansion if code size matters.  */
+   if (optimize_size)
+ return false;
+
+   gfc_actual_arglist *array_arg = expr->value.function.actual;
+   gfc_actual_arglist *dim_arg = array_arg->next;
+
+   gfc_expr *array = array_arg->expr;
+   gfc_expr *dim = dim_arg->expr;
+
+   if (!(array->ts.type == BT_INTEGER
+ || array->ts.type == BT_REAL))
+ return false;
+
+   if (array->rank == 1 && dim != nullptr)
+ return true;
+
+   return false;
+  }
+
 default:
   return false;
 }


[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v10)] fortran: Inline MINLOC/MAXLOC with no DIM and ARRAY of rank 1 [PR90608]

2024-08-14 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:611fd47ae647c6ca59e9c27d9af890dae07e2351

commit 611fd47ae647c6ca59e9c27d9af890dae07e2351
Author: Mikael Morin 
Date:   Wed Jul 31 10:09:53 2024 +0200

fortran: Inline MINLOC/MAXLOC with no DIM and ARRAY of rank 1 [PR90608]

Regression-tested on x86_64-pc-linux-gnu.
OK for master?

-- >8 --

Enable inline code generation for the MINLOC and MAXLOC intrinsic, if the
DIM argument is not present and ARRAY has rank 1.  This case is similar to
the case where the result is scalar (DIM present and rank 1 ARRAY), which
already supports inline expansion of the intrinsic.  Both cases return
the same value, with the difference that the result is an array of size 1 if
DIM is absent, whereas it's a scalar if DIM  is present.  So all there is
to do for the new case to work is hook the inline expansion with the
scalarizer.

PR fortran/90608

gcc/fortran/ChangeLog:

* trans-array.cc (gfc_conv_ss_startstride): Set the scalarization
rank based on the MINLOC/MAXLOC rank if needed.  Call the inline
code generation and setup the scalarizer array descriptor info
in the MINLOC and MAXLOC cases.
* trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Return the
result array element if the scalarizer is setup and we are inside
the loops.  Restrict library function call dispatch to the case
where inline expansion is not supported.  Declare an array result
if the expression isn't scalar.  Initialize the array result single
element and return the result variable if the expression isn't
scalar.
(walk_inline_intrinsic_minmaxloc): New function.
(walk_inline_intrinsic_function): Add MINLOC and MAXLOC cases,
dispatching to walk_inline_intrinsic_minmaxloc.
(gfc_add_intrinsic_ss_code): Add MINLOC and MAXLOC cases.
(gfc_inline_intrinsic_function_p): Return true if ARRAY has rank 1,
regardless of DIM.

Diff:
---
 gcc/fortran/trans-array.cc |  25 ++
 gcc/fortran/trans-intrinsic.cc | 198 +++--
 2 files changed, 155 insertions(+), 68 deletions(-)

diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc
index 9fb0b2b398d2..46e2152d0f0b 100644
--- a/gcc/fortran/trans-array.cc
+++ b/gcc/fortran/trans-array.cc
@@ -4851,6 +4851,8 @@ gfc_conv_ss_startstride (gfc_loopinfo * loop)
case GFC_ISYM_UBOUND:
case GFC_ISYM_LCOBOUND:
case GFC_ISYM_UCOBOUND:
+   case GFC_ISYM_MAXLOC:
+   case GFC_ISYM_MINLOC:
case GFC_ISYM_SHAPE:
case GFC_ISYM_THIS_IMAGE:
  loop->dimen = ss->dimen;
@@ -4900,6 +4902,29 @@ done:
case GFC_SS_INTRINSIC:
  switch (expr->value.function.isym->id)
{
+   case GFC_ISYM_MINLOC:
+   case GFC_ISYM_MAXLOC:
+ {
+   gfc_se se;
+   gfc_init_se (&se, nullptr);
+   se.loop = loop;
+   se.ss = ss;
+   gfc_conv_intrinsic_function (&se, expr);
+   gfc_add_block_to_block (&outer_loop->pre, &se.pre);
+   gfc_add_block_to_block (&outer_loop->post, &se.post);
+
+   info->descriptor = se.expr;
+
+   info->data = gfc_conv_array_data (info->descriptor);
+   info->data = gfc_evaluate_now (info->data, &outer_loop->pre);
+
+   info->offset = gfc_index_zero_node;
+   info->start[0] = gfc_index_zero_node;
+   info->end[0] = gfc_index_zero_node;
+   info->stride[0] = gfc_index_one_node;
+   continue;
+ }
+
/* Fall through to supply start and stride.  */
case GFC_ISYM_LBOUND:
case GFC_ISYM_UBOUND:
diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index 2c8512060ccd..2fd637a12b05 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -5273,66 +5273,69 @@ strip_kind_from_actual (gfc_actual_arglist * actual)
we need to handle.  For performance reasons we sometimes create two
loops instead of one, where the second one is much simpler.
Examples for minloc intrinsic:
-   1) Result is an array, a call is generated
-   2) Array mask is used and NaNs need to be supported:
-  limit = Infinity;
-  pos = 0;
-  S = from;
-  while (S <= to) {
-   if (mask[S]) {
- if (pos == 0) pos = S + (1 - from);
- if (a[S] <= limit) { limit = a[S]; pos = S + (1 - from); goto lab1; }
-   }
-   S++;
-  }
-  goto lab2;
-  lab1:;
-  while (S <= to) {
-   if (mask[S]) if (a[S] < limit) { limit = a[S]; pos = S + (1 - from); }
-   S++;
-  }
-  lab2:;
-   3) NaNs need to be supported, but it is known at compile time or cheaply
- 

[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v10)] fortran: Remove MINLOC/MAXLOC frontend optimization

2024-08-14 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:9fe562ded527f53feaab9b03d821f8fc54544aba

commit 9fe562ded527f53feaab9b03d821f8fc54544aba
Author: Mikael Morin 
Date:   Fri Aug 9 21:21:01 2024 +0200

fortran: Remove MINLOC/MAXLOC frontend optimization

Remove the frontend pass rewriting calls of MINLOC/MAXLOC without DIM to
calls with one-valued DIM enclosed in an array constructor.  This
transformation was circumventing the limitation of inline MINLOC/MAXLOC code
generation to scalar cases only, allowing inline code to be generated if
ARRAY had rank 1 and DIM was absent.  As MINLOC/MAXLOC has gained support of
inline code generation in that case, the limitation is no longer effective,
and the transformation no longer necessary.

gcc/fortran/ChangeLog:

* frontend-passes.cc (optimize_minmaxloc): Remove.
(optimize_expr): Remove dispatch to optimize_minmaxloc.

Diff:
---
 gcc/fortran/frontend-passes.cc | 57 --
 1 file changed, 57 deletions(-)

diff --git a/gcc/fortran/frontend-passes.cc b/gcc/fortran/frontend-passes.cc
index 8e4c6310ba8d..31d553e9844d 100644
--- a/gcc/fortran/frontend-passes.cc
+++ b/gcc/fortran/frontend-passes.cc
@@ -36,7 +36,6 @@ static bool optimize_op (gfc_expr *);
 static bool optimize_comparison (gfc_expr *, gfc_intrinsic_op);
 static bool optimize_trim (gfc_expr *);
 static bool optimize_lexical_comparison (gfc_expr *);
-static void optimize_minmaxloc (gfc_expr **);
 static bool is_empty_string (gfc_expr *e);
 static void doloop_warn (gfc_namespace *);
 static int do_intent (gfc_expr **);
@@ -356,17 +355,6 @@ optimize_expr (gfc_expr **e, int *walk_subtrees 
ATTRIBUTE_UNUSED,
   if ((*e)->expr_type == EXPR_OP && optimize_op (*e))
 gfc_simplify_expr (*e, 0);
 
-  if ((*e)->expr_type == EXPR_FUNCTION && (*e)->value.function.isym)
-switch ((*e)->value.function.isym->id)
-  {
-  case GFC_ISYM_MINLOC:
-  case GFC_ISYM_MAXLOC:
-   optimize_minmaxloc (e);
-   break;
-  default:
-   break;
-  }
-
   if (function_expr)
 count_arglist --;
 
@@ -2262,51 +2250,6 @@ optimize_trim (gfc_expr *e)
   return true;
 }
 
-/* Optimize minloc(b), where b is rank 1 array, into
-   (/ minloc(b, dim=1) /), and similarly for maxloc,
-   as the latter forms are expanded inline.  */
-
-static void
-optimize_minmaxloc (gfc_expr **e)
-{
-  gfc_expr *fn = *e;
-  gfc_actual_arglist *a;
-  char *name, *p;
-
-  if (fn->rank != 1
-  || fn->value.function.actual == NULL
-  || fn->value.function.actual->expr == NULL
-  || fn->value.function.actual->expr->ts.type == BT_CHARACTER
-  || fn->value.function.actual->expr->rank != 1
-  || gfc_inline_intrinsic_function_p (fn))
-return;
-
-  *e = gfc_get_array_expr (fn->ts.type, fn->ts.kind, &fn->where);
-  (*e)->shape = fn->shape;
-  fn->rank = 0;
-  fn->shape = NULL;
-  gfc_constructor_append_expr (&(*e)->value.constructor, fn, &fn->where);
-
-  name = XALLOCAVEC (char, strlen (fn->value.function.name) + 1);
-  strcpy (name, fn->value.function.name);
-  p = strstr (name, "loc0");
-  p[3] = '1';
-  fn->value.function.name = gfc_get_string ("%s", name);
-  if (fn->value.function.actual->next)
-{
-  a = fn->value.function.actual->next;
-  gcc_assert (a->expr == NULL);
-}
-  else
-{
-  a = gfc_get_actual_arglist ();
-  fn->value.function.actual->next = a;
-}
-  a->expr = gfc_get_constant_expr (BT_INTEGER, gfc_default_integer_kind,
-  &fn->where);
-  mpz_set_ui (a->expr->value.integer, 1);
-}
-
 /* Data package to hand down for DO loop checks in a contained
procedure.  */
 typedef struct contained_info


[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v10)] fortran: Outline array bound check generation code

2024-08-14 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:6a40b28b1aa9d6ebd8623a8f9d169222ff04760a

commit 6a40b28b1aa9d6ebd8623a8f9d169222ff04760a
Author: Mikael Morin 
Date:   Wed Jul 31 10:10:06 2024 +0200

fortran: Outline array bound check generation code

The next patch will need reindenting of the array bound check generation
code.  This outlines it to its own function beforehand, reducing the churn
in the next patch.

Regression-tested on x86_64-pc-linux-gnu.
OK for master?

-- >8 --

gcc/fortran/ChangeLog:

* trans-array.cc (gfc_conv_ss_startstride): Move array bound check
generation code...
(add_check_section_in_array_bounds): ... here as a new function.

Diff:
---
 gcc/fortran/trans-array.cc | 297 ++---
 1 file changed, 143 insertions(+), 154 deletions(-)

diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc
index 46e2152d0f0b..e578b676fcce 100644
--- a/gcc/fortran/trans-array.cc
+++ b/gcc/fortran/trans-array.cc
@@ -4816,6 +4816,146 @@ gfc_conv_section_startstride (stmtblock_t * block, 
gfc_ss * ss, int dim)
 }
 
 
+/* Generate in INNER the bounds checking code along the dimension DIM for
+   the array associated with SS_INFO.  */
+
+static void
+add_check_section_in_array_bounds (stmtblock_t *inner, gfc_ss_info *ss_info,
+  int dim)
+{
+  gfc_expr *expr = ss_info->expr;
+  locus *expr_loc = &expr->where;
+  const char *expr_name = expr->symtree->name;
+
+  gfc_array_info *info = &ss_info->data.array;
+
+  bool check_upper;
+  if (dim == info->ref->u.ar.dimen - 1
+  && info->ref->u.ar.as->type == AS_ASSUMED_SIZE)
+check_upper = false;
+  else
+check_upper = true;
+
+  /* Zero stride is not allowed.  */
+  tree tmp = fold_build2_loc (input_location, EQ_EXPR, logical_type_node,
+ info->stride[dim], gfc_index_zero_node);
+  char * msg = xasprintf ("Zero stride is not allowed, for dimension %d "
+ "of array '%s'", dim + 1, expr_name);
+  gfc_trans_runtime_check (true, false, tmp, inner, expr_loc, msg);
+  free (msg);
+
+  tree desc = info->descriptor;
+
+  /* This is the run-time equivalent of resolve.cc's
+ check_dimension.  The logical is more readable there
+ than it is here, with all the trees.  */
+  tree lbound = gfc_conv_array_lbound (desc, dim);
+  tree end = info->end[dim];
+  tree ubound = check_upper ? gfc_conv_array_ubound (desc, dim) : NULL_TREE;
+
+  /* non_zerosized is true when the selected range is not
+ empty.  */
+  tree stride_pos = fold_build2_loc (input_location, GT_EXPR, 
logical_type_node,
+info->stride[dim], gfc_index_zero_node);
+  tmp = fold_build2_loc (input_location, LE_EXPR, logical_type_node,
+info->start[dim], end);
+  stride_pos = fold_build2_loc (input_location, TRUTH_AND_EXPR,
+   logical_type_node, stride_pos, tmp);
+
+  tree stride_neg = fold_build2_loc (input_location, LT_EXPR, 
logical_type_node,
+info->stride[dim], gfc_index_zero_node);
+  tmp = fold_build2_loc (input_location, GE_EXPR, logical_type_node,
+info->start[dim], end);
+  stride_neg = fold_build2_loc (input_location, TRUTH_AND_EXPR,
+   logical_type_node, stride_neg, tmp);
+  tree non_zerosized = fold_build2_loc (input_location, TRUTH_OR_EXPR,
+   logical_type_node, stride_pos,
+   stride_neg);
+
+  /* Check the start of the range against the lower and upper
+ bounds of the array, if the range is not empty.
+ If upper bound is present, include both bounds in the
+ error message.  */
+  if (check_upper)
+{
+  tmp = fold_build2_loc (input_location, LT_EXPR, logical_type_node,
+info->start[dim], lbound);
+  tmp = fold_build2_loc (input_location, TRUTH_AND_EXPR, logical_type_node,
+non_zerosized, tmp);
+  tree tmp2 = fold_build2_loc (input_location, GT_EXPR, logical_type_node,
+  info->start[dim], ubound);
+  tmp2 = fold_build2_loc (input_location, TRUTH_AND_EXPR, 
logical_type_node,
+ non_zerosized, tmp2);
+  msg = xasprintf ("Index '%%ld' of dimension %d of array '%s' outside of "
+  "expected range (%%ld:%%ld)", dim + 1, expr_name);
+  gfc_trans_runtime_check (true, false, tmp, inner, expr_loc, msg,
+ fold_convert (long_integer_type_node, info->start[dim]),
+ fold_convert (long_integer_type_node, lbound),
+ fold_convert (long_integer_type_node, ubound));
+  gfc_trans_runtime_check (true, false, tmp2, inner, expr_loc, msg,
+ fold_convert (long_integer_type_node, info->start[dim]),
+ fold_convert (long_integer_type_node, lbound),
+

[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v10)] fortran: Inline integral MINLOC/MAXLOC with no DIM and no MASK [PR90608]

2024-08-14 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:e8d1a84c9a2dac5f9f4c2b7a9685375e3d8aedbc

commit e8d1a84c9a2dac5f9f4c2b7a9685375e3d8aedbc
Author: Mikael Morin 
Date:   Wed Jul 31 10:10:19 2024 +0200

fortran: Inline integral MINLOC/MAXLOC with no DIM and no MASK [PR90608]

Regression-tested on x86_64-pc-linux-gnu.
OK for master?

-- >8 --

Enable generation of inline code for the MINLOC and MAXLOC intrinsic,
if the ARRAY argument is of integral type and of any rank (only the rank 1
case was previously inlined), and neither DIM nor MASK arguments are
present.

This needs a few adjustments in gfc_conv_intrinsic_minmaxloc,
mainly to replace the single variables POS and OFFSET, with collections
of variables, one variable per dimension each.

The restriction to integral ARRAY and absent MASK limits the scope of
the change to the cases where we generate single loop inline code.  The
code generation for the second loop is only accessible with ARRAY of rank
1, so it can continue using a single variable.  A later change will extend
inlining to the double loop cases.

There is some bounds checking code that was previously handled by the
library, and that needed some changes in the scalarizer to avoid regressing.
The bounds check code generation was already supported by the scalarizer,
but it was only applying to array reference sections, checking both
for array bound violation and for shape conformability between all the
involved arrays.  With this change, for MINLOC or MAXLOC, enable the
conformability check between all the scalarized arrays, and disable the
array bound violation check.

PR fortran/90608

gcc/fortran/ChangeLog:

* trans-array.cc (gfc_conv_ss_startstride): Set the MINLOC/MAXLOC
result upper bound using the rank of the ARRAY argument.  Ajdust
the error message for intrinsic result arrays.  Only check array
bounds for array references.  Move bound check decision code...
(bounds_check_needed): ... here as a new predicate.  Allow bound
check for MINLOC/MAXLOC intrinsic results.
* trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Change the
result array upper bound to the rank of ARRAY.  Update the NONEMPTY
variable to depend on the non-empty extent of every dimension.  Use
one variable per dimension instead of a single variable for the
position and the offset.  Update their declaration, initialization,
and update to affect the variable of each dimension.  Use the first
variable only in areas only accessed with rank 1 ARRAY argument.
Set every element of the result using its corresponding variable.
(gfc_inline_intrinsic_function_p): Return true for integral ARRAY
and absent DIM and MASK.

gcc/testsuite/ChangeLog:

* gfortran.dg/maxloc_bounds_4.f90: Additionally accept the error
message emitted by the scalarizer.

Diff:
---
 gcc/fortran/trans-array.cc|  68 +---
 gcc/fortran/trans-intrinsic.cc| 150 +++---
 gcc/testsuite/gfortran.dg/maxloc_bounds_4.f90 |   4 +-
 3 files changed, 165 insertions(+), 57 deletions(-)

diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc
index e578b676fcce..1190bfa6c024 100644
--- a/gcc/fortran/trans-array.cc
+++ b/gcc/fortran/trans-array.cc
@@ -4956,6 +4956,35 @@ add_check_section_in_array_bounds (stmtblock_t *inner, 
gfc_ss_info *ss_info,
 }
 
 
+/* Tells whether we need to generate bounds checking code for the array
+   associated with SS.  */
+
+bool
+bounds_check_needed (gfc_ss *ss)
+{
+  /* Catch allocatable lhs in f2003.  */
+  if (flag_realloc_lhs && ss->no_bounds_check)
+return false;
+
+  gfc_ss_info *ss_info = ss->info;
+  if (ss_info->type == GFC_SS_SECTION)
+return true;
+
+  if (!(ss_info->type == GFC_SS_INTRINSIC
+   && ss_info->expr
+   && ss_info->expr->expr_type == EXPR_FUNCTION))
+return false;
+
+  gfc_intrinsic_sym *isym = ss_info->expr->value.function.isym;
+  if (!(isym
+   && (isym->id == GFC_ISYM_MAXLOC
+   || isym->id == GFC_ISYM_MINLOC)))
+return false;
+
+  return gfc_inline_intrinsic_function_p (ss_info->expr);
+}
+
+
 /* Calculates the range start and stride for a SS chain.  Also gets the
descriptor and data pointer.  The range of vector subscripts is the size
of the vector.  Array bounds are also checked.  */
@@ -5057,10 +5086,17 @@ done:
info->data = gfc_conv_array_data (info->descriptor);
info->data = gfc_evaluate_now (info->data, &outer_loop->pre);
 
-   info->offset = gfc_index_zero_node;
+   gfc_expr *array = expr->value.function.actual->expr;
+   tree rank = build_int_cst (gfc_array_index_type, array->rank);
+

[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v10)] fortran: Inline integral MINLOC/MAXLOC with no DIM and scalar MASK [PR90608]

2024-08-14 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:f53fd9d642f8b8372bf57b98e8da59c5470aabbe

commit f53fd9d642f8b8372bf57b98e8da59c5470aabbe
Author: Mikael Morin 
Date:   Wed Jul 31 10:10:33 2024 +0200

fortran: Inline integral MINLOC/MAXLOC with no DIM and scalar MASK [PR90608]

Regression-tested on x86_64-pc-linux-gnu.
OK for master?

-- >8 --

Enable the generation of inline code for MINLOC/MAXLOC when argument ARRAY
is of integral type, DIM is not present, and MASK is present and is scalar
(only absent MASK or rank 1 ARRAY were inlined before).

Scalar masks are implemented with a wrapping condition around the code one
would generate if MASK wasn't present, so they are easy to support once
inline code without MASK is working.

PR fortran/90608

gcc/fortran/ChangeLog:

* trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Generate
variable initialization for each dimension in the else branch of
the toplevel condition.
(gfc_inline_intrinsic_function_p): Return TRUE for scalar MASK.

gcc/testsuite/ChangeLog:

* gfortran.dg/maxloc_bounds_7.f90: Additionally accept the error 
message
reported by the scalarizer.

Diff:
---
 gcc/fortran/trans-intrinsic.cc| 13 -
 gcc/testsuite/gfortran.dg/maxloc_bounds_7.f90 |  4 ++--
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index 30a03f983f59..15a61991aa82 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -5888,7 +5888,6 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   /* For a scalar mask, enclose the loop in an if statement.  */
   if (maskexpr && maskss == NULL)
 {
-  gcc_assert (loop.dimen == 1);
   tree ifmask;
 
   gfc_init_se (&maskse, NULL);
@@ -5903,7 +5902,8 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
 the pos variable the same way as above.  */
 
   gfc_init_block (&elseblock);
-  gfc_add_modify (&elseblock, pos[0], gfc_index_zero_node);
+  for (int i = 0; i < loop.dimen; i++)
+   gfc_add_modify (&elseblock, pos[i], gfc_index_zero_node);
   elsetmp = gfc_finish_block (&elseblock);
   ifmask = conv_mask_condition (&maskse, maskexpr, optional_mask);
   tmp = build3_v (COND_EXPR, ifmask, tmp, elsetmp);
@@ -11797,9 +11797,12 @@ gfc_inline_intrinsic_function_p (gfc_expr *expr)
if (array->rank == 1)
  return true;
 
-   if (array->ts.type == BT_INTEGER
-   && dim == nullptr
-   && mask == nullptr)
+   if (array->ts.type != BT_INTEGER
+   || dim != nullptr)
+ return false;
+
+   if (mask == nullptr
+   || mask->rank == 0)
  return true;
 
return false;
diff --git a/gcc/testsuite/gfortran.dg/maxloc_bounds_7.f90 
b/gcc/testsuite/gfortran.dg/maxloc_bounds_7.f90
index 206a29b149da..3aa9d3dcebee 100644
--- a/gcc/testsuite/gfortran.dg/maxloc_bounds_7.f90
+++ b/gcc/testsuite/gfortran.dg/maxloc_bounds_7.f90
@@ -1,6 +1,6 @@
 ! { dg-do run }
 ! { dg-options "-fbounds-check" }
-! { dg-shouldfail "Incorrect extent in return value of MAXLOC intrinsic: is 3, 
should be 2" }
+! { dg-shouldfail "Incorrect extent in return value of MAXLOC intrinsic: is 3, 
should be 2|Array bound mismatch for dimension 1 of array 'res' .3/2." }
 module tst
 contains
   subroutine foo(res)
@@ -18,4 +18,4 @@ program main
   integer :: res(3)
   call foo(res)
 end program main
-! { dg-output "Fortran runtime error: Incorrect extent in return value of 
MAXLOC intrinsic: is 3, should be 2" }
+! { dg-output "Fortran runtime error: Incorrect extent in return value of 
MAXLOC intrinsic: is 3, should be 2|Array bound mismatch for dimension 1 of 
array 'res' .3/2." }


[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v10)] fortran: Inline non-character MINLOC/MAXLOC with no DIM [PR90608]

2024-08-14 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:c6dbb3d1de2348d2400434b23990417b245b774f

commit c6dbb3d1de2348d2400434b23990417b245b774f
Author: Mikael Morin 
Date:   Wed Jul 31 10:10:49 2024 +0200

fortran: Inline non-character MINLOC/MAXLOC with no DIM [PR90608]

Regression-tested on x86_64-pc-linux-gnu.
OK for master?

-- >8 --

Enable generation of inline MINLOC/MAXLOC code in the case where DIM
is not present, and either ARRAY is of floating point type or MASK is an
array.  Those cases are the remaining bits to fully support inlining of
non-CHARACTER MINLOC/MAXLOC without DIM.  They are treated together because
they generate similar code, the NANs for REAL types being handled a bit like
a second level of masking.  These are the cases for which we generate two
sets of loops.

This change affects the code generating the second loop, that was previously
accessible only in the cases ARRAY has rank 1 only.  The single variable
initialization and update are changed to apply to multiple variables, one
per dimension.

The code generated is as follows (if ARRAY has rank 2):

for (idx11 in lower1..upper1)
  {
for (idx12 in lower2..upper2)
  {
...
if (...)
  {
...
goto second_loop;
  }
  }
  }
second_loop:
for (idx21 in lower1..upper1)
  {
for (idx22 in lower2..upper2)
  {
...
  }
  }

This code leads to processing the first elements redundantly, both in the
first set of loops and in the second one.  The loop over idx22 could
start from idx12 the first time it is run, but as it has to start from
lower2 for the rest of the runs, this change uses the same bounds for both
set of loops for simplicity.  In the rank 1 case, this makes the generated
code worse compared to the inline code that was generated before.  A later
change will introduce conditionals to avoid the duplicate processing and
restore the generated code in that case.

PR fortran/90608

gcc/fortran/ChangeLog:

* trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Initialize
and update all the variables.  Put the label and goto in the
outermost scalarizer loop.  Don't start the second loop where the
first stopped.
(gfc_inline_intrinsic_function_p): Also return TRUE for array MASK
or for any REAL type.

gcc/testsuite/ChangeLog:

* gfortran.dg/maxloc_bounds_5.f90: Additionally accept error
messages reported by the scalarizer.
* gfortran.dg/maxloc_bounds_6.f90: Ditto.

Diff:
---
 gcc/fortran/trans-intrinsic.cc| 127 +-
 gcc/testsuite/gfortran.dg/maxloc_bounds_5.f90 |   4 +-
 gcc/testsuite/gfortran.dg/maxloc_bounds_6.f90 |   4 +-
 3 files changed, 87 insertions(+), 48 deletions(-)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index 15a61991aa82..7cd3382573f5 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -5332,12 +5332,55 @@ strip_kind_from_actual (gfc_actual_arglist * actual)
   if (a[S] < limit) { limit = a[S]; pos = S + (1 - from); }
   S++;
 }
-   B: ARRAY has rank 1, and DIM is absent.  Use the same code as the scalar
-  case and wrap the result in an array.
-   C: ARRAY has rank > 1, NANs are not supported, and DIM and MASK are absent.
-  Generate code similar to the single loop scalar case, but using one
-  variable per dimension, for example if ARRAY has rank 2:
-  4) NAN's aren't supported, no MASK:
+   B: Array result, non-CHARACTER type, DIM absent
+  Generate similar code as in the scalar case, using a collection of
+  variables (one per dimension) instead of a single variable as result.
+  Picking only cases 1) and 4) with ARRAY of rank 2, the generated code
+  becomes:
+  1) Array mask is used and NaNs need to be supported:
+limit = Infinity;
+pos0 = 0;
+pos1 = 0;
+S1 = from1;
+while (S1 <= to1) {
+  S0 = from0;
+  while (s0 <= to0 {
+if (mask[S1][S0]) {
+  if (pos0 == 0) {
+pos0 = S0 + (1 - from0);
+pos1 = S1 + (1 - from1);
+  }
+  if (a[S1][S0] <= limit) {
+limit = a[S1][S0];
+pos0 = S0 + (1 - from0);
+pos1 = S1 + (1 - from1);
+goto lab1;
+  }
+}
+S0++;
+  }
+  S1++;
+}
+goto lab2;
+lab1:;
+S1 = from1;
+while (S1 <= to1) {
+   

[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v10)] fortran: Continue MINLOC/MAXLOC second loop where the first stopped [PR90608]

2024-08-14 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:7719509dcfe5669144f36f0ebbc9cead3101541b

commit 7719509dcfe5669144f36f0ebbc9cead3101541b
Author: Mikael Morin 
Date:   Wed Jul 31 10:11:02 2024 +0200

fortran: Continue MINLOC/MAXLOC second loop where the first stopped 
[PR90608]

Regression-tested on x86_64-pc-linux-gnu.
OK for master?

-- >8 --

Continue the second set of loops where the first one stopped in the
generated inline MINLOC/MAXLOC code in the cases where the generated code
contains two sets of loops.  This fixes a regression that was introduced
when enabling the generation of inline MINLOC/MAXLOC code with ARRAY of rank
greater than 1, no DIM argument, and either non-scalar MASK or floating-
point ARRAY.

In the cases where two sets of loops are generated as inline MINLOC/MAXLOC
code, we previously generated code such as (for rank 2 ARRAY, so with two
levels of nesting):

for (idx11 in lower1..upper1)
  {
for (idx12 in lower2..upper2)
  {
...
if (...)
  {
...
goto second_loop;
  }
  }
  }
second_loop:
for (idx21 in lower1..upper1)
  {
for (idx22 in lower2..upper2)
  {
...
  }
  }

which means we process the first elements twice, once in the first set
of loops and once in the second one.  This change avoids this duplicate
processing by using a conditional as lower bound for the second set of
loops, generating code like:

second_loop_entry = false;
for (idx11 in lower1..upper1)
  {
for (idx12 in lower2..upper2)
  {
...
if (...)
  {
...
second_loop_entry = true;
goto second_loop;
  }
  }
  }
second_loop:
for (idx21 in (second_loop_entry ? idx11 : lower1)..upper1)
  {
for (idx22 in (second_loop_entry ? idx12 : lower2)..upper2)
  {
...
second_loop_entry = false;
  }
  }

It was expected that the compiler optimizations would be able to remove the
state variable second_loop_entry.  It is the case if ARRAY has rank 1 (so
without loop nesting), the variable is removed and the loop bounds become
unconditional, which restores previously generated code, fully fixing the
regression.  For larger rank, unfortunately, the state variable and
conditional loop bounds remain, but those cases were previously using
library calls, so it's not a regression.

PR fortran/90608

gcc/fortran/ChangeLog:

* trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Generate a set
of index variables.  Set them using the loop indexes before leaving
the first set of loops.  Generate a new loop entry predicate.
Initialize it.  Set it before leaving the first set of loops.  Clear
it in the body of the second set of loops.  For the second set of
loops, update each loop lower bound to use the corresponding index
variable if the predicate variable is set.

Diff:
---
 gcc/fortran/trans-intrinsic.cc | 33 +++--
 1 file changed, 31 insertions(+), 2 deletions(-)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index 7cd3382573f5..f0cbaa305f99 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -5342,6 +5342,7 @@ strip_kind_from_actual (gfc_actual_arglist * actual)
 pos0 = 0;
 pos1 = 0;
 S1 = from1;
+second_loop_entry = false;
 while (S1 <= to1) {
   S0 = from0;
   while (s0 <= to0 {
@@ -5354,6 +5355,7 @@ strip_kind_from_actual (gfc_actual_arglist * actual)
 limit = a[S1][S0];
 pos0 = S0 + (1 - from0);
 pos1 = S1 + (1 - from1);
+second_loop_entry = true;
 goto lab1;
   }
 }
@@ -5363,9 +5365,9 @@ strip_kind_from_actual (gfc_actual_arglist * actual)
 }
 goto lab2;
 lab1:;
-S1 = from1;
+S1 = second_loop_entry ? S1 : from1;
 while (S1 <= to1) {
-  S0 = from0;
+  S0 = second_loop_entry ? S0 : from0;
   while (S0 <= to0) {
 if (mask[S1][S0])
   if (a[S1][S0] < limit) {
@@ -5373,6 +5375,7 @@ strip_kind_from_actual (gfc_actual_arglist * actual)
 pos0 = S + (1 - from0);
 pos1 

[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v10)] fortran: Add -finline-intrinsics flag for MINLOC/MAXLOC [PR90608]

2024-08-14 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:72f52d74914023bc559c8c6b57898324b964f03f

commit 72f52d74914023bc559c8c6b57898324b964f03f
Author: Mikael Morin 
Date:   Sat Aug 10 14:17:21 2024 +0200

fortran: Add -finline-intrinsics flag for MINLOC/MAXLOC [PR90608]

Introduce the -finline-intrinsics flag to control from the command line
whether to generate either inline code or calls to the functions from the
library, for the MINLOC and MAXLOC intrinsics.

The flag allows to specify inlining either independently for each intrinsic
(either MINLOC or MAXLOC), or all together.  For each intrinsic, a default
value is set if none was set.  The default value depends on the
optimization: inlining is avoided if not optimizing or optimizing for size;
otherwise inlining is preferred.

There is no direct support for this behaviour provided by the .opt options
framework.  It is obtained by defining three different variants of the flag
(finline-intrinsics, fno-inline-intrinsics, finline-intrinsics=) all using
the same underlying option variable.  Each enum value (corresponding to an
intrinsic function) uses two identical bits, and the variable is initialized
with alternated bits, so that we can tell whether the value was left
initialized by checking whether the two bits have different values.

PR fortran/90608

gcc/ChangeLog:

* flag-types.h (enum gfc_inlinable_intrinsics): New type.

gcc/fortran/ChangeLog:

* lang.opt (finline-intrinsics, finline-intrinsics=,
fno-inline-intrinsics): New flags.
* options.cc (gfc_post_options): If the option variable controling
the inlining of MAXLOC (respectively MINLOC) has not been set, set
it or clear it depending on the optimization option variables.
* trans-intrinsic.cc (gfc_inline_intrinsic_function_p): Return false
if inlining for the intrinsic is disabled according to the option
variable.

gcc/testsuite/ChangeLog:

* gfortran.dg/minmaxloc_18.f90: New test.
* gfortran.dg/minmaxloc_18a.f90: New test.
* gfortran.dg/minmaxloc_18b.f90: New test.
* gfortran.dg/minmaxloc_18c.f90: New test.
* gfortran.dg/minmaxloc_18d.f90: New test.

Diff:
---
 gcc/flag-types.h|  19 +
 gcc/fortran/lang.opt|  27 +
 gcc/fortran/options.cc  |  19 +-
 gcc/fortran/trans-intrinsic.cc  |  13 +-
 gcc/testsuite/gfortran.dg/minmaxloc_18.f90  | 772 
 gcc/testsuite/gfortran.dg/minmaxloc_18a.f90 |  10 +
 gcc/testsuite/gfortran.dg/minmaxloc_18b.f90 |  10 +
 gcc/testsuite/gfortran.dg/minmaxloc_18c.f90 |  10 +
 gcc/testsuite/gfortran.dg/minmaxloc_18d.f90 |  10 +
 9 files changed, 885 insertions(+), 5 deletions(-)

diff --git a/gcc/flag-types.h b/gcc/flag-types.h
index 1e497f0bb913..77e314fa062c 100644
--- a/gcc/flag-types.h
+++ b/gcc/flag-types.h
@@ -451,6 +451,25 @@ enum gfc_convert
 };
 
 
+/* gfortran -finline-intrinsics= values;
+   We use two identical bits for each value, and initialize with alternating
+   bits, so that we can check whether a value has been set by checking whether
+   the two bits have identical value.  */
+#define GFC_INLINABLE_INTRINSICS_COUNT 2
+enum gfc_inlinable_intrinsics
+{
+  GFC_FLAG_INLINE_INTRINSIC_NONE = 0,
+  GFC_FLAG_INLINE_INTRINSIC_MAXLOC = 3 << 0,
+  GFC_FLAG_INLINE_INTRINSIC_MINLOC = 3 << 2,
+  GFC_FLAG_INLINE_INTRINSIC_ALL = (1 << (2 * GFC_INLINABLE_INTRINSICS_COUNT)) 
- 1,
+  GFC_FLAG_INLINE_INTRINSIC_NONE_UNSET = 0x & 
GFC_FLAG_INLINE_INTRINSIC_NONE,
+  GFC_FLAG_INLINE_INTRINSIC_MAXLOC_UNSET = 0x & 
GFC_FLAG_INLINE_INTRINSIC_MAXLOC,
+  GFC_FLAG_INLINE_INTRINSIC_MINLOC_UNSET = 0x & 
GFC_FLAG_INLINE_INTRINSIC_MINLOC,
+  GFC_FLAG_INLINE_INTRINSIC_ALL_UNSET = 0x & 
GFC_FLAG_INLINE_INTRINSIC_ALL
+};
+#undef GFC_INLINABLE_INTRINSICS_COUNT
+
+
 /* Inline String Operations functions.  */
 enum ilsop_fn
 {
diff --git a/gcc/fortran/lang.opt b/gcc/fortran/lang.opt
index 5cf7b4922540..0ef8597f1a10 100644
--- a/gcc/fortran/lang.opt
+++ b/gcc/fortran/lang.opt
@@ -676,6 +676,33 @@ finline-arg-packing
 Fortran  Var(flag_inline_arg_packing) Init(-1)
 -finline-arg-packing   Perform argument packing inline.
 
+finline-intrinsics
+Fortran RejectNegative Enum(gfc_inlinable_intrinsics) 
Var(flag_inline_intrinsics, GFC_FLAG_INLINE_INTRINSIC_ALL) Undocumented
+
+fno-inline-intrinsics
+Fortran RejectNegative Enum(gfc_inlinable_intrinsics) 
Var(flag_inline_intrinsics, GFC_FLAG_INLINE_INTRINSIC_NONE) Undocumented
+
+finline-intrinsics=
+Fortran Joined Var(flag_inline_intrinsics) Enum(gfc_inlinable_intrinsics) 
Init(GFC_FLAG_INLINE_INTRINSIC_ALL_UNSET) EnumSet LangUrlSuffix_Fortran(toto)
+Enable or disable generation of inline code instead of calls to the library.
+
+Enum
+Name(gfc_inlinable_int

[gcc/aoliva/heads/testbase] (709 commits) Daily bump.

2024-08-14 Thread Alexandre Oliva via Gcc-cvs
The branch 'aoliva/heads/testbase' was updated to point to:

 4d2e8fcdaf32... Daily bump.

It previously pointed to:

 ad642d2c9506... [5/n][PR rtl-optimization/115877] Fix handling of input/out

Diff:

Summary of changes (added commits):
---

  4d2e8fc... Daily bump. (*)
  d91b6c9... c++: ICE with NSDMIs and fn arguments [PR116015] (*)
  a247088... s390: Remove vector intrinsics (*)
  e8a7142... s390: Fix high-level builtins vec_gfmsum{,_accum}_128 (*)
  a82c4df... Fortran: fix minor frontend GMP leaks (*)
  edb2712... i386: Optimization for APX NDD is always zero-uppered for s (*)
  d08a5f2... i386: Optimization for APX NDD is always zero-uppered for l (*)
  1b76174... i386: Optimization for APX NDD is always zero-uppered for s (*)
  a302cd6... i386: Optimization for APX NDD is always zero-uppered for A (*)
  42aba47... Restrict pr116202-run-1.c test to riscv_v target (*)
  54be14b... Prevent future proc_ptr parsing issues in associate [PR1029 (*)
  bb23247... Fix ICE in build_function_decl [PR116292] (*)
  ca7936f... genoutput: Accelerate the place_operands function. (*)
  e4f9a87... Revert "[rtl-optimization/116244] Don't create bogus regs i (*)
  10972e6... testsuite: Fix fam-in-union-alone-in-struct-2.c with unsign (*)
  c3c83d2... Move ix86_align_loops into a separate pass and insert the p (*)
  9045ab7... Daily bump. (*)
  b13e346... testsuite: Fix struct size check [PR116155] (*)
  cc00a73... ifcvt: Fix force_operand ICE in noce_convert_multiple_sets  (*)
  9988d7e... Fortran: reject array constructor value of abstract type [P (*)
  ccd7068... RISC-V: Fix non-obvious comment typos (*)
  5618b02... Internal-fn: Handle vector bool type for type strict match  (*)
  49d5e21... LRA: Don't emit move for substituted CONSTATNT_P operand [P (*)
  bee532c... Regenerate avr.opt.urls (*)
  3f1e15e... Daily bump. (*)
  0451bc5... rs6000: ROP - Do not disable shrink-wrapping for leaf funct (*)
  ef90a13... RISC-V: Fix missing abi arg in test (*)
  e9738e7... [rtl-optimization/116244] Don't create bogus regs in alter_ (*)
  edc47d3... borrowck: Fix debug prints on 32-bits architectures (*)
  12028d7... borrowck: Avoid overloading issues on 32bit architectures (*)
  b219cbe... ifcvt: Handle multiple rewired regs and refactor noce_conve (*)
  72c9b5f... ifcvt: Allow more operations in multiple set if conversion (*)
  28b3812... ifcvt: handle sequences that clobber flags in noce_convert_ (*)
  68da681... AVR: target/85624 - Fix non-matching alignment in clrmem* i (*)
  24df2ab... 16-bit testsuite fixes - excessive code size (*)
  46bd63d... This fixes problems with tests that exceed a data type or t (*)
  40b9a7b... Avoid cfg corruption when using sjlj exceptions where loops (*)
  9ab8681... Use splay-tree-utils.h in tree-ssa-sccvn [PR30920] (*)
  fcc766c... aarch64: Emit ADD X, Y, Y instead of SHL X, Y, #1 for Advan (*)
  8d8db21... Fortran: Fix coarray in associate not linking [PR85510] (*)
  4bcb480... Initial support for AVX10.2 (*)
  7a970bd... PR target/116275: Handle STV of *extenddi2_doubleword_highp (*)
  7bf4cd4... LoongArch: Provide ashr lshr and ashl RTL pattern for vecto (*)
  0498f8b... LoongArch: Drop vcond{,u} expanders. (*)
  75e852b... LoongArch: Use iorn and andn standard pattern names. (*)
  9f3b5c2... PR modula2/116181 fix ODR warnings for C/m2 interface libra (*)
  f09be22... Daily bump. (*)
  2b23a44... Fortran: silence Wmaybe-uninitialized warnings for LTO buil (*)
  149a23e... AVR: -mlra is not documeted in TEXI. (*)
  29a3236... AVR: Add function avr.cc::ra_in_progress(). (*)
  19c9ba0... Daily bump. (*)
  8035619... i386: testsuite: Adapt fentryname3.c for r14-811 change [PR (*)
  331f7d8... i386: testsuite: Add -no-pie for pr113689-1.c [PR70150] (*)
  85a6073... Fix reference to the dom walker function in the documentati (*)
  16ce781... gm2: add missing debug output guard (*)
  9d5c500... testsuite: Fix up sse3-addsubps.c (*)
  09a87ea... AVR: ad target/113934 - Add option -mlra to enable LRA. (*)
  8cc67b5... c++: inherited CTAD fixes [PR116276] (*)
  70da0ca... c++: DECL_UNINSTANTIATED_TEMPLATE_FRIEND_P tweaks (*)
  cf7feae... c++: clean up cp_identifier_kind checks (*)
  6b4b27a... Daily bump. (*)
  d4e1290... [RISC-V][PR target/116283] Fix split code for recent Zbs im (*)
  4734c1b... Revert "lra: emit caller-save register spills before call i (*)
  9e4da94... Adjust rangers recomputation depth based on the number of B (*)
  5ce3874... Limit equivalency processing in rangers cache. (*)
  d0bc1cb... btf: Protect BTF_KIND_INFO against invalid kind (*)
  786ebbd... c++: Don't accept multiple enum definitions within template (*)
  180ede3... RISC-V: Enable stack clash in alloca (*)
  2862d99... RISC-V: Add support to vector stack-clash protection (*)
  b82d173... RISC-V: Stack-clash protection implemention (*)
  5694fcf... RISC-V: Move riscv_v_adjust_scalable_frame (*)
  0e604d0... RISC-V: Small stack tie changes (*)
  f91f720... c-family: regenerate c.op

[gcc(refs/users/aoliva/heads/testme)] optimize initialization of small padded objects

2024-08-14 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:8152f1f549179b377634b7ec360e6907fdd528c1

commit 8152f1f549179b377634b7ec360e6907fdd528c1
Author: Alexandre Oliva 
Date:   Wed Aug 14 21:59:28 2024 -0300

optimize initialization of small padded objects

Diff:
---
 gcc/expr.cc| 20 +---
 gcc/expr.h |  3 ++-
 gcc/fold-const.cc  | 33 +
 gcc/gimple-fold.cc | 50 ++
 gcc/gimplify.cc| 14 +-
 5 files changed, 111 insertions(+), 9 deletions(-)

diff --git a/gcc/expr.cc b/gcc/expr.cc
index 2089c2b86a98..a701c67b3485 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -7096,7 +7096,7 @@ count_type_elements (const_tree type, bool for_ctor_p)
 static bool
 categorize_ctor_elements_1 (const_tree ctor, HOST_WIDE_INT *p_nz_elts,
HOST_WIDE_INT *p_unique_nz_elts,
-   HOST_WIDE_INT *p_init_elts, bool *p_complete)
+   HOST_WIDE_INT *p_init_elts, int *p_complete)
 {
   unsigned HOST_WIDE_INT idx;
   HOST_WIDE_INT nz_elts, unique_nz_elts, init_elts, num_fields;
@@ -7218,7 +7218,10 @@ categorize_ctor_elements_1 (const_tree ctor, 
HOST_WIDE_INT *p_nz_elts,
 
   if (*p_complete && !complete_ctor_at_level_p (TREE_TYPE (ctor),
num_fields, elt_type))
-*p_complete = false;
+*p_complete = 0;
+  else if (*p_complete > 0
+  && type_has_padding_at_level_p (TREE_TYPE (ctor)))
+*p_complete = -1;
 
   *p_nz_elts += nz_elts;
   *p_unique_nz_elts += unique_nz_elts;
@@ -7239,7 +7242,10 @@ categorize_ctor_elements_1 (const_tree ctor, 
HOST_WIDE_INT *p_nz_elts,
  and place it in *P_ELT_COUNT.
* whether the constructor is complete -- in the sense that every
  meaningful byte is explicitly given a value --
- and place it in *P_COMPLETE.
+ and place it in *P_COMPLETE:
+ -  0 if any field is missing
+ -  1 if all fields are initialized, and there's no padding
+ - -1 if all fields are initialized, but there's padding
 
Return whether or not CTOR is a valid static constant initializer, the same
as "initializer_constant_valid_p (CTOR, TREE_TYPE (CTOR)) != 0".  */
@@ -7247,12 +7253,12 @@ categorize_ctor_elements_1 (const_tree ctor, 
HOST_WIDE_INT *p_nz_elts,
 bool
 categorize_ctor_elements (const_tree ctor, HOST_WIDE_INT *p_nz_elts,
  HOST_WIDE_INT *p_unique_nz_elts,
- HOST_WIDE_INT *p_init_elts, bool *p_complete)
+ HOST_WIDE_INT *p_init_elts, int *p_complete)
 {
   *p_nz_elts = 0;
   *p_unique_nz_elts = 0;
   *p_init_elts = 0;
-  *p_complete = true;
+  *p_complete = 1;
 
   return categorize_ctor_elements_1 (ctor, p_nz_elts, p_unique_nz_elts,
 p_init_elts, p_complete);
@@ -7313,7 +7319,7 @@ mostly_zeros_p (const_tree exp)
   if (TREE_CODE (exp) == CONSTRUCTOR)
 {
   HOST_WIDE_INT nz_elts, unz_elts, init_elts;
-  bool complete_p;
+  int complete_p;
 
   categorize_ctor_elements (exp, &nz_elts, &unz_elts, &init_elts,
&complete_p);
@@ -7331,7 +7337,7 @@ all_zeros_p (const_tree exp)
   if (TREE_CODE (exp) == CONSTRUCTOR)
 {
   HOST_WIDE_INT nz_elts, unz_elts, init_elts;
-  bool complete_p;
+  int complete_p;
 
   categorize_ctor_elements (exp, &nz_elts, &unz_elts, &init_elts,
&complete_p);
diff --git a/gcc/expr.h b/gcc/expr.h
index 533ae0af3871..04782b15f192 100644
--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -361,7 +361,8 @@ extern unsigned HOST_WIDE_INT highest_pow2_factor 
(const_tree);
 
 extern bool categorize_ctor_elements (const_tree, HOST_WIDE_INT *,
  HOST_WIDE_INT *, HOST_WIDE_INT *,
- bool *);
+ int *);
+extern bool type_has_padding_at_level_p (tree);
 extern bool immediate_const_ctor_p (const_tree, unsigned int words = 1);
 extern void store_constructor (tree, rtx, int, poly_int64, bool);
 extern HOST_WIDE_INT int_expr_size (const_tree exp);
diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index 8908e7381e72..5e7fd6460c5d 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -8193,6 +8193,36 @@ native_encode_string (const_tree expr, unsigned char 
*ptr, int len, int off)
   return len;
 }
 
+/* subroutine of native_encode_expr.  Encode the CONSTRUCTOR
+   specified by EXPR into the buffer PTR of length LEN bytes.
+   Return the number of bytes placed in the buffer, or zero
+   upon failure.  */
+
+static int
+native_encode_constructor (const_tree expr, unsigned char *ptr, int len, int 
off)
+{
+  /* We are only concerned with zero-initialization constructors here.  */
+  if (CONSTRUCTOR_NELTS (expr))
+return 0;
+
+  /* Wide-char strings are encoded in target byte-order so native
+ encoding them is trivial.  */
+  if (BITS_PER_UNIT != CHAR_BIT
+  

[gcc/aoliva/heads/testme] (710 commits) optimize initialization of small padded objects

2024-08-14 Thread Alexandre Oliva via Gcc-cvs
The branch 'aoliva/heads/testme' was updated to point to:

 8152f1f54917... optimize initialization of small padded objects

It previously pointed to:

 9d90ad447ba1... [libstdc++] [testsuite] avoid async.cc loss of precision [P

Diff:

!!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST):
---

  9d90ad4... [libstdc++] [testsuite] avoid async.cc loss of precision [P


Summary of changes (added commits):
---

  8152f1f... optimize initialization of small padded objects
  4d2e8fc... Daily bump. (*)
  d91b6c9... c++: ICE with NSDMIs and fn arguments [PR116015] (*)
  a247088... s390: Remove vector intrinsics (*)
  e8a7142... s390: Fix high-level builtins vec_gfmsum{,_accum}_128 (*)
  a82c4df... Fortran: fix minor frontend GMP leaks (*)
  edb2712... i386: Optimization for APX NDD is always zero-uppered for s (*)
  d08a5f2... i386: Optimization for APX NDD is always zero-uppered for l (*)
  1b76174... i386: Optimization for APX NDD is always zero-uppered for s (*)
  a302cd6... i386: Optimization for APX NDD is always zero-uppered for A (*)
  42aba47... Restrict pr116202-run-1.c test to riscv_v target (*)
  54be14b... Prevent future proc_ptr parsing issues in associate [PR1029 (*)
  bb23247... Fix ICE in build_function_decl [PR116292] (*)
  ca7936f... genoutput: Accelerate the place_operands function. (*)
  e4f9a87... Revert "[rtl-optimization/116244] Don't create bogus regs i (*)
  10972e6... testsuite: Fix fam-in-union-alone-in-struct-2.c with unsign (*)
  c3c83d2... Move ix86_align_loops into a separate pass and insert the p (*)
  9045ab7... Daily bump. (*)
  b13e346... testsuite: Fix struct size check [PR116155] (*)
  cc00a73... ifcvt: Fix force_operand ICE in noce_convert_multiple_sets  (*)
  9988d7e... Fortran: reject array constructor value of abstract type [P (*)
  ccd7068... RISC-V: Fix non-obvious comment typos (*)
  5618b02... Internal-fn: Handle vector bool type for type strict match  (*)
  49d5e21... LRA: Don't emit move for substituted CONSTATNT_P operand [P (*)
  bee532c... Regenerate avr.opt.urls (*)
  3f1e15e... Daily bump. (*)
  0451bc5... rs6000: ROP - Do not disable shrink-wrapping for leaf funct (*)
  ef90a13... RISC-V: Fix missing abi arg in test (*)
  e9738e7... [rtl-optimization/116244] Don't create bogus regs in alter_ (*)
  edc47d3... borrowck: Fix debug prints on 32-bits architectures (*)
  12028d7... borrowck: Avoid overloading issues on 32bit architectures (*)
  b219cbe... ifcvt: Handle multiple rewired regs and refactor noce_conve (*)
  72c9b5f... ifcvt: Allow more operations in multiple set if conversion (*)
  28b3812... ifcvt: handle sequences that clobber flags in noce_convert_ (*)
  68da681... AVR: target/85624 - Fix non-matching alignment in clrmem* i (*)
  24df2ab... 16-bit testsuite fixes - excessive code size (*)
  46bd63d... This fixes problems with tests that exceed a data type or t (*)
  40b9a7b... Avoid cfg corruption when using sjlj exceptions where loops (*)
  9ab8681... Use splay-tree-utils.h in tree-ssa-sccvn [PR30920] (*)
  fcc766c... aarch64: Emit ADD X, Y, Y instead of SHL X, Y, #1 for Advan (*)
  8d8db21... Fortran: Fix coarray in associate not linking [PR85510] (*)
  4bcb480... Initial support for AVX10.2 (*)
  7a970bd... PR target/116275: Handle STV of *extenddi2_doubleword_highp (*)
  7bf4cd4... LoongArch: Provide ashr lshr and ashl RTL pattern for vecto (*)
  0498f8b... LoongArch: Drop vcond{,u} expanders. (*)
  75e852b... LoongArch: Use iorn and andn standard pattern names. (*)
  9f3b5c2... PR modula2/116181 fix ODR warnings for C/m2 interface libra (*)
  f09be22... Daily bump. (*)
  2b23a44... Fortran: silence Wmaybe-uninitialized warnings for LTO buil (*)
  149a23e... AVR: -mlra is not documeted in TEXI. (*)
  29a3236... AVR: Add function avr.cc::ra_in_progress(). (*)
  19c9ba0... Daily bump. (*)
  8035619... i386: testsuite: Adapt fentryname3.c for r14-811 change [PR (*)
  331f7d8... i386: testsuite: Add -no-pie for pr113689-1.c [PR70150] (*)
  85a6073... Fix reference to the dom walker function in the documentati (*)
  16ce781... gm2: add missing debug output guard (*)
  9d5c500... testsuite: Fix up sse3-addsubps.c (*)
  09a87ea... AVR: ad target/113934 - Add option -mlra to enable LRA. (*)
  8cc67b5... c++: inherited CTAD fixes [PR116276] (*)
  70da0ca... c++: DECL_UNINSTANTIATED_TEMPLATE_FRIEND_P tweaks (*)
  cf7feae... c++: clean up cp_identifier_kind checks (*)
  6b4b27a... Daily bump. (*)
  d4e1290... [RISC-V][PR target/116283] Fix split code for recent Zbs im (*)
  4734c1b... Revert "lra: emit caller-save register spills before call i (*)
  9e4da94... Adjust rangers recomputation depth based on the number of B (*)
  5ce3874... Limit equivalency processing in rangers cache. (*)
  d0bc1cb... btf: Protect BTF_KIND_INFO against invalid kind (*)
  786ebbd... c++: Don't accept multiple enum definitions within template (*)
  180ede3... RISC-V: Enab

[gcc] Created branch 'ix86/heads/avx10.2' in namespace 'refs/vendors'

2024-08-14 Thread Haochen Jiang via Gcc-cvs
The branch 'ix86/heads/avx10.2' was created in namespace 'refs/vendors' 
pointing to:

 4d2e8fcdaf32... Daily bump.


[gcc(refs/vendors/ix86/heads/avx10.2)] AVX10.2 ymm rounding: Support vadd{s, d, h} and vcmp{s, d, h} intrins

2024-08-14 Thread Haochen Jiang via Gcc-cvs
https://gcc.gnu.org/g:7eed0d3f707f4bade3edb3268d8d5b2c9b8ef8d8

commit 7eed0d3f707f4bade3edb3268d8d5b2c9b8ef8d8
Author: Hu, Lin1 
Date:   Thu Aug 15 09:38:15 2024 +0800

AVX10.2 ymm rounding: Support vadd{s,d,h} and vcmp{s,d,h} intrins

gcc/ChangeLog:

* config.gcc: Add avx10_2roundingintrin.h.
* config/i386/i386-builtin-types.def: Add new DEF_FUNCTION_TYPE.
* config/i386/i386-builtin.def (BDESC): Add new builtins.
* config/i386/i386-expand.cc (ix86_expand_round_builtin): Handle
V4DF_FTYPE_V4DF_V4DF_V4DF_UQI_INT, 
V8SF_FTYPE_V8SF_V8SF_V8SF_UQI_INT,
V16HF_FTYPE_V16HF_V16HF_V16HF_UHI_INT, 
UQI_FTYPE_V4DF_V4DF_INT_UQI_INT,
UHI_FTYPE_V16HF_V16HF_INT_UHI_INT, UQI_FTYPE_V8SF_V8SF_INT_UQI_INT.
* config/i386/immintrin.h: Include avx10_2roundingintrin.h.
* config/i386/sse.md: Change subst_attr name due to renaming.
* config/i386/subst.md:
(): Add condition check for avx10.2
rounding control 256bit intrins and renamed to ...
(): ...this.
(round_saeonly_mode512bit_condition): Add condition check for
avx10.2 rounding control 256 bit intris and renamed to ...
(round_saeonly_mode_condition): ...this.
* config/i386/avx10_2roundingintrin.h: New file.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx-1.c: Add -mavx10.2 and new builtin test.
* gcc.target/i386/avx-2.c: Ditto.
* gcc.target/i386/sse-13.c: Add new tests.
* gcc.target/i386/sse-23.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Ditto.
* gcc.target/i386/avx10_2-rounding-1.c: New test.

Diff:
---
 gcc/config.gcc |   2 +-
 gcc/config/i386/avx10_2roundingintrin.h| 337 +
 gcc/config/i386/i386-builtin-types.def |   8 +
 gcc/config/i386/i386-builtin.def   |   8 +
 gcc/config/i386/i386-expand.cc |   6 +
 gcc/config/i386/immintrin.h|   2 +
 gcc/config/i386/sse.md | 100 +++---
 gcc/config/i386/subst.md   |  32 +-
 gcc/testsuite/gcc.target/i386/avx-1.c  |  10 +-
 gcc/testsuite/gcc.target/i386/avx-2.c  |   2 +-
 gcc/testsuite/gcc.target/i386/avx10_2-rounding-1.c |  64 
 gcc/testsuite/gcc.target/i386/sse-13.c |   8 +
 gcc/testsuite/gcc.target/i386/sse-14.c |  17 ++
 gcc/testsuite/gcc.target/i386/sse-22.c |  17 ++
 gcc/testsuite/gcc.target/i386/sse-23.c |   8 +
 15 files changed, 558 insertions(+), 63 deletions(-)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index a36dd1bcbc66..2c0f4518638d 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -452,7 +452,7 @@ i[34567]86-*-* | x86_64-*-*)
   cmpccxaddintrin.h amxfp16intrin.h prfchiintrin.h
   raointintrin.h amxcomplexintrin.h avxvnniint16intrin.h
   sm3intrin.h sha512intrin.h sm4intrin.h
-  usermsrintrin.h"
+  usermsrintrin.h avx10_2roundingintrin.h"
;;
 ia64-*-*)
extra_headers=ia64intrin.h
diff --git a/gcc/config/i386/avx10_2roundingintrin.h 
b/gcc/config/i386/avx10_2roundingintrin.h
new file mode 100644
index ..5698ed05c1d6
--- /dev/null
+++ b/gcc/config/i386/avx10_2roundingintrin.h
@@ -0,0 +1,337 @@
+/* Copyright (C) 2024 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   .  */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use  directly; include  
instead."
+#endif
+
+#ifndef _AVX10_2ROUNDINGINTRIN_H_INCLUDED
+#define _AVX10_2ROUNDINGINTRIN_H_INCLUDED
+
+#ifndef __AVX10_2_256__
+#pragma GCC push_options
+#pragma GCC target("avx10.2-256")
+#define __DISABLE_AVX10_2_256__
+#endif /* __AVX10_2_256__ */
+
+#ifdef  __OPTIMIZE__
+extern __inline __m256d
+__attribute__ ((__gnu_inline__,

[gcc(refs/vendors/ix86/heads/avx10.2)] AVX10.2 ymm rounding: Support vcvtpd2{, u}{dq, qq} intrins

2024-08-14 Thread Haochen Jiang via Gcc-cvs
https://gcc.gnu.org/g:3c755fad847af4d38d06ed7c22fe6bfdf227b718

commit 3c755fad847af4d38d06ed7c22fe6bfdf227b718
Author: Hu, Lin1 
Date:   Thu Aug 15 09:38:17 2024 +0800

AVX10.2 ymm rounding: Support vcvtpd2{,u}{dq,qq} intrins

gcc/ChangeLog:

* config/i386/avx10_2roundingintrin.h: Add new intrins.
* config/i386/i386-builtin-types.def: Add new DEF_FUNCTION_TYPE.
* config/i386/i386-builtin.def (BDESC): Add new builtins.
* config/i386/i386-expand.cc (ix86_expand_round_builtin): Handle
V4DI_FTYPE_V4DF_V4DI_UQI_INT, V4SI_FTYPE_V4DF_V4SI_UQI_INT.
* config/i386/sse.md:
(avx_cvtpd2dq256): Change name to
avx_cvtpd2dq256 and extend pattern to
generate 256bit insns.
(fixuns_notrunc2):
Add round_mode_condition.
* config/i386/subst.md (round_pd2udqsuff): New iterator.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx-1.c: Add new builtin test.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-23.c: Ditto.
* gcc.target/i386/sse-14.c: Add new macro test.
* gcc.target/i386/sse-22.c: Ditto.
* gcc.target/i386/avx10_2-rounding-1.c: Add test.

Diff:
---
 gcc/config/i386/avx10_2roundingintrin.h| 218 +
 gcc/config/i386/i386-builtin-types.def |   2 +
 gcc/config/i386/i386-builtin.def   |   4 +
 gcc/config/i386/i386-expand.cc |   2 +
 gcc/config/i386/sse.md |  13 +-
 gcc/config/i386/subst.md   |   1 +
 gcc/testsuite/gcc.target/i386/avx-1.c  |   4 +
 gcc/testsuite/gcc.target/i386/avx10_2-rounding-1.c |  33 
 gcc/testsuite/gcc.target/i386/sse-13.c |   4 +
 gcc/testsuite/gcc.target/i386/sse-14.c |  12 ++
 gcc/testsuite/gcc.target/i386/sse-22.c |  12 ++
 gcc/testsuite/gcc.target/i386/sse-23.c |   4 +
 12 files changed, 303 insertions(+), 6 deletions(-)

diff --git a/gcc/config/i386/avx10_2roundingintrin.h 
b/gcc/config/i386/avx10_2roundingintrin.h
index 09285c1ffcdd..3e5e9f3ba0ec 100644
--- a/gcc/config/i386/avx10_2roundingintrin.h
+++ b/gcc/config/i386/avx10_2roundingintrin.h
@@ -348,6 +348,144 @@ _mm256_maskz_cvt_roundpd_ps (__mmask8 __U, __m256d __A, 
const int __R)
 (__mmask8) __U,
 __R);
 }
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvt_roundpd_epi32 (__m256d __A, const int __R)
+{
+  return
+(__m128i) __builtin_ia32_cvtpd2dq256_mask_round ((__v4df) __A,
+(__v4si)
+_mm_undefined_si128 (),
+(__mmask8) -1,
+__R);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvt_roundpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A,
+  const int __R)
+{
+  return (__m128i) __builtin_ia32_cvtpd2dq256_mask_round ((__v4df) __A,
+ (__v4si) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m256d __A, const int __R)
+{
+  return (__m128i) __builtin_ia32_cvtpd2dq256_mask_round ((__v4df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvt_roundpd_epi64 (__m256d __A, const int __R)
+{
+  return
+(__m256i) __builtin_ia32_cvtpd2qq256_mask_round ((__v4df) __A,
+(__v4di)
+_mm256_setzero_si256 (),
+(__mmask8) -1,
+__R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvt_roundpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A,
+  const int __R)
+{
+  return (__m256i) __builtin_ia32_cvtpd2qq256_mask_round ((__v4df) __A,
+ (__v4di) __W,
+ 

[gcc(refs/vendors/ix86/heads/avx10.2)] AVX10.2 ymm rounding: Support vcvtdq2p{s, h} and vcvtpd2p{s, h} intrins

2024-08-14 Thread Haochen Jiang via Gcc-cvs
https://gcc.gnu.org/g:33181163a09cb315cc8d19464cb1feca063c959c

commit 33181163a09cb315cc8d19464cb1feca063c959c
Author: Hu, Lin1 
Date:   Thu Aug 15 09:38:16 2024 +0800

AVX10.2 ymm rounding: Support vcvtdq2p{s,h} and vcvtpd2p{s,h} intrins

gcc/ChangeLog:

* config/i386/avx10_2roundingintrin.h: Add new intrins.
* config/i386/i386-builtin-types.def: Add new DEF_FUNCTION_TYPE.
* config/i386/i386-builtin.def (BDESC): Add new builtins.
* config/i386/i386-expand.cc (ix86_expand_round_builtin): Handle
V8SF_FTYPE_V8SI_V8SF_UQI_INT, V4SF_FTYPE_V4DF_V4SF_UQI_INT,
V8HF_FTYPE_V8SI_V8HF_UQI_INT, V8HF_FTYPE_V4DF_V8HF_UQI_INT.
* config/i386/sse.md:

(avx512fp16_vcvt2ph_):
Add condition check.
(avx512fp16_vcvtpd2ph_v4df_mask_round): New expand.
(*avx512fp16_vcvt2ph__mask): Change name to
avx512fp16_vcvt2ph__mask_1
and extend pattern to generate 256bit insns.
(avx_cvtpd2ps256): Change name to
avx_cvtpd2ps256 and extend pattern to
generate 256bit insns.
* config/i386/subst.md (round_applied): New condition.
(round_suff): New iterator.
(round_mode_condition): Add V32HI check for 512bit.
(round_saeonly_mode_condition): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx-1.c: Add new builtin test.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-23.c: Ditto.
* gcc.target/i386/sse-14.c: Add new macro test.
* gcc.target/i386/sse-22.c: Ditto.
* gcc.target/i386/avx10_2-rounding-1.c: Add test.

Diff:
---
 gcc/config/i386/avx10_2roundingintrin.h| 210 +
 gcc/config/i386/i386-builtin-types.def |   4 +
 gcc/config/i386/i386-builtin.def   |   4 +
 gcc/config/i386/i386-expand.cc |   4 +
 gcc/config/i386/sse.md |  32 +++-
 gcc/config/i386/subst.md   |   4 +
 gcc/testsuite/gcc.target/i386/avx-1.c  |   4 +
 gcc/testsuite/gcc.target/i386/avx10_2-rounding-1.c |  44 -
 gcc/testsuite/gcc.target/i386/sse-13.c |   4 +
 gcc/testsuite/gcc.target/i386/sse-14.c |  12 ++
 gcc/testsuite/gcc.target/i386/sse-22.c |  12 ++
 gcc/testsuite/gcc.target/i386/sse-23.c |   4 +
 12 files changed, 322 insertions(+), 16 deletions(-)

diff --git a/gcc/config/i386/avx10_2roundingintrin.h 
b/gcc/config/i386/avx10_2roundingintrin.h
index 5698ed05c1d6..09285c1ffcdd 100644
--- a/gcc/config/i386/avx10_2roundingintrin.h
+++ b/gcc/config/i386/avx10_2roundingintrin.h
@@ -216,6 +216,138 @@ _mm256_mask_cmp_round_ps_mask (__mmask8 __U, __m256 __A, 
__m256 __B,
(__mmask8) __U,
__R);
 }
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvt_roundepi32_ph (__m256i __A, const int __R)
+{
+  return (__m128h) __builtin_ia32_vcvtdq2ph256_mask_round ((__v8si) __A,
+  (__v8hf)
+  _mm_setzero_ph (),
+  (__mmask8) -1,
+  __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvt_roundepi32_ph (__m128h __W, __mmask8 __U, __m256i __A,
+  const int __R)
+{
+  return (__m128h) __builtin_ia32_vcvtdq2ph256_mask_round ((__v8si) __A,
+  (__v8hf) __W,
+  (__mmask8) __U,
+  __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvt_roundepi32_ph (__mmask8 __U, __m256i __A, const int __R)
+{
+  return (__m128h) __builtin_ia32_vcvtdq2ph256_mask_round ((__v8si) __A,
+  (__v8hf)
+  _mm_setzero_ph (),
+  (__mmask8) __U,
+  __R);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvt_roundepi32_ps (__m256i __A, const int __R)
+{
+  return (__m256) __builtin_ia32_cvtdq2ps256_mask_round ((__v8si) __A,
+(__v8sf)
+_mm256_undefined_ps (),
+  

[gcc(refs/vendors/ix86/heads/avx10.2)] AVX10.2 ymm rounding: Support vcvtph2p{s, d, sx} and vcvtph2{, u}{dq, qq} intrins

2024-08-14 Thread Haochen Jiang via Gcc-cvs
https://gcc.gnu.org/g:6fc313aad25ad678e1d32f290edadc5ac2481c7d

commit 6fc313aad25ad678e1d32f290edadc5ac2481c7d
Author: Hu, Lin1 
Date:   Thu Aug 15 09:38:18 2024 +0800

AVX10.2 ymm rounding: Support vcvtph2p{s,d,sx} and vcvtph2{,u}{dq,qq} 
intrins

gcc/ChangeLog:

* config/i386/avx10_2roundingintrin.h: New intrins.
* config/i386/i386-builtin-types.def: Add new DEF_FUNCTION_TYPE.
* config/i386/i386-builtin.def (BDESC): Add new builtins.
* config/i386/i386-expand.cc (ix86_expand_round_builtin): Handle
V8SF_FTYPE_V8HF_V8SF_UQI_INT, V8SI_FTYPE_V8HF_V8SI_UQI_INT,
V4DF_FTYPE_V8HF_V4DF_UQI_INT, V4DI_FTYPE_V8HF_V4DI_UQI_INT.
* config/i386/sse.md:
(avx512fp16_float_extend_ph2):
Add condition check.
(avx512fp16_vcvtph2_
 ):
Ditto.
(avx512fp16_float_extend_ph2): Extend round 
saeonly.
(vcvtph2ps256): Ditto.
* config/i386/subst.md
(round_saeonly_applied): New condition.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx-1.c: Add new builtin test.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Add new macro test.
* gcc.target/i386/sse-23.c: Ditto.
* gcc.target/i386/avx10_2-rounding-1.c: Add test.

Diff:
---
 gcc/config/i386/avx10_2roundingintrin.h| 384 +
 gcc/config/i386/i386-builtin-types.def |   4 +
 gcc/config/i386/i386-builtin.def   |   7 +
 gcc/config/i386/i386-expand.cc |   4 +
 gcc/config/i386/sse.md |  19 +-
 gcc/config/i386/subst.md   |   1 +
 gcc/testsuite/gcc.target/i386/avx-1.c  |   7 +
 gcc/testsuite/gcc.target/i386/avx10_2-rounding-1.c |  57 +++
 gcc/testsuite/gcc.target/i386/sse-13.c |   7 +
 gcc/testsuite/gcc.target/i386/sse-14.c |  20 ++
 gcc/testsuite/gcc.target/i386/sse-22.c |  21 ++
 gcc/testsuite/gcc.target/i386/sse-23.c |   7 +
 12 files changed, 529 insertions(+), 9 deletions(-)

diff --git a/gcc/config/i386/avx10_2roundingintrin.h 
b/gcc/config/i386/avx10_2roundingintrin.h
index 3e5e9f3ba0ec..29966f5e1bf8 100644
--- a/gcc/config/i386/avx10_2roundingintrin.h
+++ b/gcc/config/i386/avx10_2roundingintrin.h
@@ -486,6 +486,246 @@ _mm256_maskz_cvt_roundpd_epu64 (__mmask8 __U, __m256d 
__A, const int __R)
  (__mmask8) __U,
  __R);
 }
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvt_roundph_epi32 (__m128h __A, const int __R)
+{
+  return
+(__m256i) __builtin_ia32_vcvtph2dq256_mask_round ((__v8hf) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvt_roundph_epi32 (__m256i __W, __mmask8 __U, __m128h __A,
+  const int __R)
+{
+  return (__m256i) __builtin_ia32_vcvtph2dq256_mask_round ((__v8hf) __A,
+  (__v8si) __W,
+  (__mmask8) __U,
+  __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvt_roundph_epi32 (__mmask8 __U, __m128h __A, const int __R)
+{
+  return
+(__m256i) __builtin_ia32_vcvtph2dq256_mask_round ((__v8hf) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvt_roundph_pd (__m128h __A, const int __R)
+{
+  return (__m256d) __builtin_ia32_vcvtph2pd256_mask_round ((__v8hf) __A,
+  (__v4df)
+  _mm256_setzero_pd (),
+  (__mmask8) -1,
+  __R);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvt_roundph_pd (__m256d __W, __mmask8 __U, __m128h __A,
+   const int __R)
+{
+  return (__m256d)

[gcc(refs/vendors/ix86/heads/avx10.2)] AVX10.2 ymm rounding: Support vcvtph2{, u}w and vcvtps2p{d, hx} intrins

2024-08-14 Thread Haochen Jiang via Gcc-cvs
https://gcc.gnu.org/g:a0e52c6dc21d8d2713e63e70e8bdbdc5dc536185

commit a0e52c6dc21d8d2713e63e70e8bdbdc5dc536185
Author: Hu, Lin1 
Date:   Thu Aug 15 09:38:18 2024 +0800

AVX10.2 ymm rounding: Support vcvtph2{,u}w and vcvtps2p{d,hx} intrins

gcc/ChangeLog:

* config/i386/avx10_2roundingintrin.h: New intrins.
* config/i386/i386-builtin-types.def: Add new DEF_FUNCTION_TYPE.
* config/i386/i386-builtin.def (BDESC): Add new builtins.
* config/i386/i386-expand.cc (ix86_expand_round_builtin): Handle
V16HI_FTYPE_V16HF_V16HI_UHI_INT, V4DF_FTYPE_V4SF_V4DF_UQI_INT
V8HF_FTYPE_V8SF_V8HF_UQI_INT.
* config/i386/sse.md
(avx512fp16_vcvt2ph_):
Add round condition check.
* config/i386/subst.md (round_mode_condition): Add V16HI check for
256bit.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx-1.c: Add new builtin test.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Add new macro test.
* gcc.target/i386/sse-23.c: Ditto.
* gcc.target/i386/avx10_2-rounding-1.c: Add test.

Diff:
---
 gcc/config/i386/avx10_2roundingintrin.h| 220 +
 gcc/config/i386/i386-builtin-types.def |   3 +
 gcc/config/i386/i386-builtin.def   |   4 +
 gcc/config/i386/i386-expand.cc |   3 +
 gcc/config/i386/sse.md |   2 +-
 gcc/config/i386/subst.md   |   1 +
 gcc/testsuite/gcc.target/i386/avx-1.c  |   4 +
 gcc/testsuite/gcc.target/i386/avx10_2-rounding-1.c |  36 
 gcc/testsuite/gcc.target/i386/sse-13.c |   4 +
 gcc/testsuite/gcc.target/i386/sse-14.c |  12 ++
 gcc/testsuite/gcc.target/i386/sse-22.c |  12 ++
 gcc/testsuite/gcc.target/i386/sse-23.c |   4 +
 12 files changed, 304 insertions(+), 1 deletion(-)

diff --git a/gcc/config/i386/avx10_2roundingintrin.h 
b/gcc/config/i386/avx10_2roundingintrin.h
index 29966f5e1bf8..bc3f92a7d1ae 100644
--- a/gcc/config/i386/avx10_2roundingintrin.h
+++ b/gcc/config/i386/avx10_2roundingintrin.h
@@ -726,6 +726,143 @@ _mm256_maskz_cvt_roundph_epu64 (__mmask8 __U, __m128h 
__A, const int __R)
   (__mmask8) __U,
   __R);
 }
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvt_roundph_epu16 (__m256h __A, const int __R)
+{
+  return
+(__m256i) __builtin_ia32_vcvtph2uw256_mask_round ((__v16hf) __A,
+ (__v16hi)
+ _mm256_undefined_si256 (),
+ (__mmask16) -1,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvt_roundph_epu16 (__m256i __W, __mmask16 __U, __m256h __A,
+  const int __R)
+{
+  return (__m256i) __builtin_ia32_vcvtph2uw256_mask_round ((__v16hf) __A,
+  (__v16hi) __W,
+  (__mmask16) __U,
+  __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvt_roundph_epu16 (__mmask16 __U, __m256h __A, const int __R)
+{
+  return
+(__m256i) __builtin_ia32_vcvtph2uw256_mask_round ((__v16hf) __A,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvt_roundph_epi16 (__m256h __A, const int __R)
+{
+  return
+(__m256i) __builtin_ia32_vcvtph2w256_mask_round ((__v16hf) __A,
+(__v16hi)
+_mm256_undefined_si256 (),
+(__mmask16) -1,
+__R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvt_roundph_epi16 (__m256i __W, __mmask16 __U, __m256h __A,
+  const int __R)
+{
+  return (__m256i) __builtin_ia32_vcvtph2w256_mask_round ((__v16hf) __A,
+ (__v16hi) __W,
+   

[gcc(refs/vendors/ix86/heads/avx10.2)] AVX10.2 ymm rounding: Support vcvtps2{, u}{dq, qq} intrins

2024-08-14 Thread Haochen Jiang via Gcc-cvs
https://gcc.gnu.org/g:4d8dc583f64af45f4f0fd4c0cce5c40283b9f1cc

commit 4d8dc583f64af45f4f0fd4c0cce5c40283b9f1cc
Author: Hu, Lin1 
Date:   Thu Aug 15 09:38:19 2024 +0800

AVX10.2 ymm rounding: Support vcvtps2{,u}{dq,qq} intrins

gcc/ChangeLog:

* config/i386/avx10_2roundingintrin.h: New intrins.
* config/i386/i386-builtin-types.def: Add new DEF_FUNCTION_TYPE.
* config/i386/i386-builtin.def (BDESC): Add new builtins.
* config/i386/i386-expand.cc (ix86_expand_round_builtin): Handle
V8SI_FTYPE_V8SF_V8SI_UQI_INT, V4DI_FTYPE_V4SF_V4DI_UQI_INT.
* config/i386/sse.md
(_fix_notrunc):
Extend to round.

(_fixuns_notrunc):
Add round condition check.
* config/i386/subst.md (round_constraint4): New.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx-1.c: Add new builtin test.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Add new macro test.
* gcc.target/i386/sse-23.c: Ditto.
* gcc.target/i386/avx10_2-rounding-1.c: Add test.

Diff:
---
 gcc/config/i386/avx10_2roundingintrin.h| 226 +
 gcc/config/i386/i386-builtin-types.def |   2 +
 gcc/config/i386/i386-builtin.def   |   4 +
 gcc/config/i386/i386-expand.cc |   2 +
 gcc/config/i386/sse.md |  10 +-
 gcc/config/i386/subst.md   |   1 +
 gcc/testsuite/gcc.target/i386/avx-1.c  |   4 +
 gcc/testsuite/gcc.target/i386/avx10_2-rounding-1.c |  32 +++
 gcc/testsuite/gcc.target/i386/sse-13.c |   4 +
 gcc/testsuite/gcc.target/i386/sse-14.c |  12 ++
 gcc/testsuite/gcc.target/i386/sse-22.c |  12 ++
 gcc/testsuite/gcc.target/i386/sse-23.c |   4 +
 12 files changed, 308 insertions(+), 5 deletions(-)

diff --git a/gcc/config/i386/avx10_2roundingintrin.h 
b/gcc/config/i386/avx10_2roundingintrin.h
index bc3f92a7d1ae..fca10a6b586f 100644
--- a/gcc/config/i386/avx10_2roundingintrin.h
+++ b/gcc/config/i386/avx10_2roundingintrin.h
@@ -863,6 +863,146 @@ _mm256_maskz_cvtx_roundps_ph (__mmask8 __U, __m256 __A, 
const int __R)
(__mmask8) __U,
__R);
 }
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvt_roundps_epi32 (__m256 __A, const int __R)
+{
+  return
+(__m256i) __builtin_ia32_vcvtps2dq256_mask_round ((__v8sf) __A,
+ (__v8si)
+ _mm256_undefined_si256 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvt_roundps_epi32 (__m256i __W, __mmask8 __U, __m256 __A,
+  const int __R)
+{
+  return (__m256i) __builtin_ia32_vcvtps2dq256_mask_round ((__v8sf) __A,
+  (__v8si) __W,
+  (__mmask8) __U,
+  __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvt_roundps_epi32 (__mmask8 __U, __m256 __A, const int __R)
+{
+  return
+(__m256i) __builtin_ia32_vcvtps2dq256_mask_round ((__v8sf) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvt_roundps_epi64 (__m128 __A, const int __R)
+{
+  return
+(__m256i) __builtin_ia32_cvtps2qq256_mask_round ((__v4sf) __A,
+(__v4di)
+_mm256_setzero_si256 (),
+(__mmask8) -1,
+__R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvt_roundps_epi64 (__m256i __W, __mmask8 __U, __m128 __A,
+  const int __R)
+{
+  return (__m256i) __builtin_ia32_cvtps2qq256_mask_round ((__v4sf) __A,
+ (__v4di) __W,
+ (__mmask8) __U,
+   

[gcc(refs/vendors/ix86/heads/avx10.2)] AVX10.2 ymm rounding: Support vcvtqq2p{s, d, h} and vcvttpd2{, u}{dq, qq} intrins

2024-08-14 Thread Haochen Jiang via Gcc-cvs
https://gcc.gnu.org/g:76f880db8637df5fd682cb5e77854c4014b4c59f

commit 76f880db8637df5fd682cb5e77854c4014b4c59f
Author: Hu, Lin1 
Date:   Thu Aug 15 09:38:20 2024 +0800

AVX10.2 ymm rounding: Support vcvtqq2p{s,d,h} and vcvttpd2{,u}{dq,qq} 
intrins

gcc/ChangeLog:

* config/i386/avx10_2roundingintrin.h: New intrins.
* config/i386/i386-builtin-types.def: Add new DEF_FUNCTION_TYPE.
* config/i386/i386-builtin.def (BDESC): Add new builtins.
* config/i386/i386-expand.cc (ix86_expand_round_builtin): Handle
V4DF_FTYPE_V4DI_V4DF_UQI_INT, V4SF_FTYPE_V4DI_V4SF_UQI_INT,
V8HF_FTYPE_V4DI_V8HF_UQI_INT.
* config/i386/sse.md:
(avx512fp16_vcvtqq2ph_v4di_mask_round): New expand.
(*avx512fp16_vcvt2ph__mask):
Extend round control and add "_1" suffix.

(float2):
Add condition check.

(float2):
Ditto.

(float2):
Limit suffix output.
(unspec_fix_truncv4dfv4si2): Extend round control.
(unspec_fixuns_truncv4dfv4si2): Ditto.
* config/i386/subst.md (round_qq2pssuff): New iterator.
(round_saeonly_suff): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx-1.c: Add new builtin test.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Add new macro test.
* gcc.target/i386/sse-23.c: Ditto.
* gcc.target/i386/avx10_2-rounding-2.c: New test.

Diff:
---
 gcc/config/i386/avx10_2roundingintrin.h| 390 +
 gcc/config/i386/i386-builtin-types.def |   3 +
 gcc/config/i386/i386-builtin.def   |   7 +
 gcc/config/i386/i386-expand.cc |   3 +
 gcc/config/i386/sse.md |  43 ++-
 gcc/config/i386/subst.md   |   2 +
 gcc/testsuite/gcc.target/i386/avx-1.c  |   7 +
 gcc/testsuite/gcc.target/i386/avx10_2-rounding-2.c |  72 
 gcc/testsuite/gcc.target/i386/sse-13.c |   7 +
 gcc/testsuite/gcc.target/i386/sse-14.c |  21 ++
 gcc/testsuite/gcc.target/i386/sse-22.c |  21 ++
 gcc/testsuite/gcc.target/i386/sse-23.c |   7 +
 12 files changed, 569 insertions(+), 14 deletions(-)

diff --git a/gcc/config/i386/avx10_2roundingintrin.h 
b/gcc/config/i386/avx10_2roundingintrin.h
index fca10a6b586f..25efd9d7b96b 100644
--- a/gcc/config/i386/avx10_2roundingintrin.h
+++ b/gcc/config/i386/avx10_2roundingintrin.h
@@ -1003,6 +1003,244 @@ _mm256_maskz_cvt_roundps_epu64 (__mmask8 __U, __m128 
__A, const int __R)
  (__mmask8) __U,
  __R);
 }
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvt_roundepi64_pd (__m256i __A, const int __R)
+{
+  return (__m256d) __builtin_ia32_cvtqq2pd256_mask_round ((__v4di) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvt_roundepi64_pd (__m256d __W, __mmask8 __U, __m256i __A,
+  const int __R)
+{
+  return (__m256d) __builtin_ia32_cvtqq2pd256_mask_round ((__v4di) __A,
+ (__v4df) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvt_roundepi64_pd (__mmask8 __U, __m256i __A, const int __R)
+{
+  return (__m256d) __builtin_ia32_cvtqq2pd256_mask_round ((__v4di) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvt_roundepi64_ph (__m256i __A, const int __R)
+{
+  return (__m128h) __builtin_ia32_vcvtqq2ph256_mask_round ((__v4di) __A,
+  (__v8hf)
+  _mm_setzero_ph (),
+  (__mmask8) -1,
+  __R);
+}
+
+extern __inline __m128h
+__attribute__ 

[gcc(refs/vendors/ix86/heads/avx10.2)] AVX10.2 ymm rounding: Support vcvttph2{, u}{dq, qq, w} intrins

2024-08-14 Thread Haochen Jiang via Gcc-cvs
https://gcc.gnu.org/g:33714da7770ba84eb3afcedef5ea1406e7d218bf

commit 33714da7770ba84eb3afcedef5ea1406e7d218bf
Author: Hu, Lin1 
Date:   Thu Aug 15 09:38:20 2024 +0800

AVX10.2 ymm rounding: Support vcvttph2{,u}{dq,qq,w} intrins

gcc/ChangeLog:

* config/i386/avx10_2roundingintrin.h: New intrins.
* config/i386/i386-builtin-types.def: Add new DEF_FUNCTION_TYPE.
* config/i386/i386-builtin.def (BDESC): Add new builtins.
* config/i386/sse.md 
(avx512fp16_fix_trunc2):
Extend round control for 256bit.
(unspec_avx512fp16_fix_trunc2):
Ditto.

(avx512fp16_fix_trunc2):
Add condition check.
* config/i386/subst.md
(round_saeonly_mode_condition): Add V16HI check for 256bit.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx-1.c: Add new builtin test.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Add new macro test.
* gcc.target/i386/sse-23.c: Ditto.
* gcc.target/i386/avx10_2-rounding-2.c: Add test.

Diff:
---
 gcc/config/i386/avx10_2roundingintrin.h| 335 +
 gcc/config/i386/i386-builtin.def   |   6 +
 gcc/config/i386/sse.md |  10 +-
 gcc/config/i386/subst.md   |   1 +
 gcc/testsuite/gcc.target/i386/avx-1.c  |   6 +
 gcc/testsuite/gcc.target/i386/avx10_2-rounding-2.c |  46 +++
 gcc/testsuite/gcc.target/i386/sse-13.c |   6 +
 gcc/testsuite/gcc.target/i386/sse-14.c |  18 ++
 gcc/testsuite/gcc.target/i386/sse-22.c |  18 ++
 gcc/testsuite/gcc.target/i386/sse-23.c |   6 +
 10 files changed, 447 insertions(+), 5 deletions(-)

diff --git a/gcc/config/i386/avx10_2roundingintrin.h 
b/gcc/config/i386/avx10_2roundingintrin.h
index 25efd9d7b96b..45a04e5a7a87 100644
--- a/gcc/config/i386/avx10_2roundingintrin.h
+++ b/gcc/config/i386/avx10_2roundingintrin.h
@@ -1241,6 +1241,216 @@ _mm256_maskz_cvtt_roundpd_epu64 (__mmask8 __U, __m256d 
__A, const int __R)
   (__mmask8) __U,
   __R);
 }
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtt_roundph_epi32 (__m128h __A, const int __R)
+{
+  return
+(__m256i) __builtin_ia32_vcvttph2dq256_mask_round ((__v8hf) __A,
+  (__v8si)
+  _mm256_setzero_si256 (),
+  (__mmask8) -1,
+  __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtt_roundph_epi32 (__m256i __W, __mmask8 __U, __m128h __A,
+   const int __R)
+{
+  return (__m256i) __builtin_ia32_vcvttph2dq256_mask_round ((__v8hf) __A,
+   (__v8si) __W,
+   (__mmask8) __U,
+   __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtt_roundph_epi32 (__mmask8 __U, __m128h __A, const int __R)
+{
+  return
+(__m256i) __builtin_ia32_vcvttph2dq256_mask_round ((__v8hf) __A,
+  (__v8si)
+  _mm256_setzero_si256 (),
+  (__mmask8) __U,
+  __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtt_roundph_epi64 (__m128h __A, const int __R)
+{
+  return
+(__m256i) __builtin_ia32_vcvttph2qq256_mask_round ((__v8hf) __A,
+  (__v4di)
+  _mm256_setzero_si256 (),
+  (__mmask8) -1,
+  __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtt_roundph_epi64 (__m256i __W, __mmask8 __U, __m128h __A,
+   const int __R)
+{
+  return (__m256i) __builtin_ia32_vcvttph2qq256_mask_round ((__v8hf) __A,
+   (__v4di) __W,
+   (__mmask8) __U,
+   __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__,

[gcc(refs/vendors/ix86/heads/avx10.2)] AVX10.2 ymm rounding: Support vcvttps2{, u}{dq, qq} and vcvtu{dq, qq}2p{s, d, h} intrins

2024-08-14 Thread Haochen Jiang via Gcc-cvs
https://gcc.gnu.org/g:2434026e4bcc5bde764e97a964cdea6940e73413

commit 2434026e4bcc5bde764e97a964cdea6940e73413
Author: Hu, Lin1 
Date:   Thu Aug 15 09:38:21 2024 +0800

AVX10.2 ymm rounding: Support vcvttps2{,u}{dq,qq} and vcvtu{dq,qq}2p{s,d,h} 
intrins

gcc/ChangeLog:

* config/i386/avx10_2roundingintrin.h: New intrins.
* config/i386/i386-builtin-types.def: Add new DEF_FUNCTION_TYPE.
* config/i386/i386-builtin.def (BDESC): Add new builtins.
* config/i386/sse.md
(unspec_fix_truncv8sfv8si2): Extend rounding control.
(fixuns_trunc2):
Ditto.

(floatuns2):
Add condition check.

(fix_trunc2):
Remove round_saeonly_name.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx-1.c: Add new builtin test.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Add new macro test.
* gcc.target/i386/sse-23.c: Ditto.
* gcc.target/i386/avx10_2-rounding-2.c: Add test.

Diff:
---
 gcc/config/i386/avx10_2roundingintrin.h| 492 +
 gcc/config/i386/i386-builtin.def   |   9 +
 gcc/config/i386/sse.md |  27 +-
 gcc/testsuite/gcc.target/i386/avx-1.c  |   9 +
 gcc/testsuite/gcc.target/i386/avx10_2-rounding-2.c |  75 
 gcc/testsuite/gcc.target/i386/sse-13.c |   9 +
 gcc/testsuite/gcc.target/i386/sse-14.c |  26 ++
 gcc/testsuite/gcc.target/i386/sse-22.c |  27 ++
 gcc/testsuite/gcc.target/i386/sse-23.c |   9 +
 9 files changed, 670 insertions(+), 13 deletions(-)

diff --git a/gcc/config/i386/avx10_2roundingintrin.h 
b/gcc/config/i386/avx10_2roundingintrin.h
index 45a04e5a7a87..384facb424c0 100644
--- a/gcc/config/i386/avx10_2roundingintrin.h
+++ b/gcc/config/i386/avx10_2roundingintrin.h
@@ -1451,6 +1451,312 @@ _mm256_maskz_cvtt_roundph_epi16 (__mmask16 __U, __m256h 
__A, const int __R)
  (__mmask16) __U,
  __R);
 }
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtt_roundps_epi32 (__m256 __A, const int __R)
+{
+  return
+(__m256i) __builtin_ia32_cvttps2dq256_mask_round ((__v8sf) __A,
+ (__v8si)
+ _mm256_undefined_si256 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtt_roundps_epi32 (__m256i __W, __mmask8 __U, __m256 __A,
+   const int __R)
+{
+  return (__m256i) __builtin_ia32_cvttps2dq256_mask_round ((__v8sf) __A,
+  (__v8si) __W,
+  (__mmask8) __U,
+  __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtt_roundps_epi32 (__mmask8 __U, __m256 __A, const int __R)
+{
+  return
+(__m256i) __builtin_ia32_cvttps2dq256_mask_round ((__v8sf) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtt_roundps_epi64 (__m128 __A, const int __R)
+{
+  return
+(__m256i) __builtin_ia32_cvttps2qq256_mask_round ((__v4sf) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtt_roundps_epi64 (__m256i __W, __mmask8 __U, __m128 __A,
+   const int __R)
+{
+  return (__m256i) __builtin_ia32_cvttps2qq256_mask_round ((__v4sf) __A,
+  (__v4di) __W,
+  (__mmask8) __U,
+  __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtt_roundps_epi64 (__mmask8 __U, __m128 __A, const int __R)
+{
+  return
+(__m256i) __builtin_ia

[gcc(refs/vendors/ix86/heads/avx10.2)] AVX10.2 ymm rounding: Support vcvt{, u}w2ph and vdivp{s, d, h} intrins

2024-08-14 Thread Haochen Jiang via Gcc-cvs
https://gcc.gnu.org/g:d2cf76a13b5a513489c14e15faf0e722ab94d663

commit d2cf76a13b5a513489c14e15faf0e722ab94d663
Author: Hu, Lin1 
Date:   Thu Aug 15 09:38:22 2024 +0800

AVX10.2 ymm rounding: Support vcvt{,u}w2ph and vdivp{s,d,h} intrins

gcc/ChangeLog:

* config/i386/avx10_2roundingintrin.h: New intrins.
* config/i386/i386-builtin-types.def: Add new DEF_FUNCTION_TYPE.
* config/i386/i386-builtin.def (BDESC): Add new builtins.
* config/i386/i386-expand.cc (ix86_expand_round_builtin): Handle
V16HF_FTYPE_V16HI_V16HF_UHI_INT.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx-1.c: Add new builtin test.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Add new macro test.
* gcc.target/i386/sse-23.c: Ditto.
* gcc.target/i386/avx10_2-rounding-3.c: New test.

Diff:
---
 gcc/config/i386/avx10_2roundingintrin.h| 286 +
 gcc/config/i386/i386-builtin-types.def |   1 +
 gcc/config/i386/i386-builtin.def   |   5 +
 gcc/config/i386/i386-expand.cc |   1 +
 gcc/testsuite/gcc.target/i386/avx-1.c  |   5 +
 gcc/testsuite/gcc.target/i386/avx10_2-rounding-3.c |  58 +
 gcc/testsuite/gcc.target/i386/sse-13.c |   5 +
 gcc/testsuite/gcc.target/i386/sse-14.c |  15 ++
 gcc/testsuite/gcc.target/i386/sse-22.c |  15 ++
 gcc/testsuite/gcc.target/i386/sse-23.c |   5 +
 10 files changed, 396 insertions(+)

diff --git a/gcc/config/i386/avx10_2roundingintrin.h 
b/gcc/config/i386/avx10_2roundingintrin.h
index 384facb424c0..15ea46b59835 100644
--- a/gcc/config/i386/avx10_2roundingintrin.h
+++ b/gcc/config/i386/avx10_2roundingintrin.h
@@ -1757,6 +1757,183 @@ _mm256_maskz_cvt_roundepu64_ps (__mmask8 __U, __m256i 
__A, const int __R)
  (__mmask8) __U,
  __R);
 }
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvt_roundepu16_ph (__m256i __A, const int __R)
+{
+  return (__m256h) __builtin_ia32_vcvtuw2ph256_mask_round ((__v16hi) __A,
+  (__v16hf)
+  _mm256_setzero_ph (),
+  (__mmask16) -1,
+  __R);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvt_roundepu16_ph (__m256h __W, __mmask16 __U, __m256i __A,
+  const int __R)
+{
+  return (__m256h) __builtin_ia32_vcvtuw2ph256_mask_round ((__v16hi) __A,
+  (__v16hf) __W,
+  (__mmask16) __U,
+  __R);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvt_roundepu16_ph (__mmask16 __U, __m256i __A, const int __R)
+{
+  return (__m256h) __builtin_ia32_vcvtuw2ph256_mask_round ((__v16hi) __A,
+  (__v16hf)
+  _mm256_setzero_ph (),
+  (__mmask16) __U,
+  __R);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvt_roundepi16_ph (__m256i __A, const int __R)
+{
+  return (__m256h) __builtin_ia32_vcvtw2ph256_mask_round ((__v16hi) __A,
+ (__v16hf)
+ _mm256_setzero_ph (),
+ (__mmask16) -1,
+ __R);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvt_roundepi16_ph (__m256h __W, __mmask16 __U, __m256i __A,
+  const int __R)
+{
+  return (__m256h) __builtin_ia32_vcvtw2ph256_mask_round ((__v16hi) __A,
+ (__v16hf) __W,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvt_roundepi16_ph (__mmask16 __U, __m256i __A, const int __R)
+{
+  return (__m256h) __builtin_ia32_vcvtw2ph256_mask_round ((__v16hi) __A,
+  

[gcc(refs/vendors/ix86/heads/avx10.2)] AVX10.2 ymm rounding: Support vfmadd{132, 231, 213}p{s, d, h} intrins

2024-08-14 Thread Haochen Jiang via Gcc-cvs
https://gcc.gnu.org/g:8194aafd95d05e5a49f730f5cf7c09b562fb7f95

commit 8194aafd95d05e5a49f730f5cf7c09b562fb7f95
Author: Hu, Lin1 
Date:   Thu Aug 15 09:38:23 2024 +0800

AVX10.2 ymm rounding: Support vfmadd{132,231,213}p{s,d,h} intrins

gcc/ChangeLog:

* config/i386/avx10_2roundingintrin.h: New intrins.
* config/i386/i386-builtin.def (BDESC): Add new builtins.
* config/i386/sse.md:
(_fmadd__mask3): Add condition check.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx-1.c: Add new builtin test.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Add new macro test.
* gcc.target/i386/sse-23.c: Ditto.
* gcc.target/i386/avx10_2-rounding-3.c: New test.

Diff:
---
 gcc/config/i386/avx10_2roundingintrin.h| 176 +
 gcc/config/i386/i386-builtin.def   |   9 ++
 gcc/config/i386/sse.md |   2 +-
 gcc/testsuite/gcc.target/i386/avx-1.c  |   9 ++
 gcc/testsuite/gcc.target/i386/avx10_2-rounding-3.c |  31 
 gcc/testsuite/gcc.target/i386/sse-13.c |   9 ++
 gcc/testsuite/gcc.target/i386/sse-14.c |  12 ++
 gcc/testsuite/gcc.target/i386/sse-22.c |  12 ++
 gcc/testsuite/gcc.target/i386/sse-23.c |   9 ++
 9 files changed, 268 insertions(+), 1 deletion(-)

diff --git a/gcc/config/i386/avx10_2roundingintrin.h 
b/gcc/config/i386/avx10_2roundingintrin.h
index d5ea6bc57da9..9015095144e4 100644
--- a/gcc/config/i386/avx10_2roundingintrin.h
+++ b/gcc/config/i386/avx10_2roundingintrin.h
@@ -2092,6 +2092,146 @@ _mm256_maskz_fixupimm_round_ps (__mmask8 __U, __m256 
__A, __m256 __B,
(__mmask8) __U,
__R);
 }
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmadd_round_pd (__m256d __A, __m256d __B, __m256d __D, const int __R)
+{
+  return (__m256d) __builtin_ia32_vfmaddpd256_mask_round ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __D,
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fmadd_round_pd (__m256d __A, __mmask8 __U, __m256d __B,
+   __m256d __D, const int __R)
+{
+  return (__m256d) __builtin_ia32_vfmaddpd256_mask_round ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __D,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask3_fmadd_round_pd (__m256d __A, __m256d __B, __m256d __D,
+__mmask8 __U, const int __R)
+{
+  return (__m256d) __builtin_ia32_vfmaddpd256_mask3_round ((__v4df) __A,
+  (__v4df) __B,
+  (__v4df) __D,
+  (__mmask8) __U,
+  __R);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fmadd_round_pd (__mmask8 __U, __m256d __A, __m256d __B,
+__m256d __D, const int __R)
+{
+  return (__m256d) __builtin_ia32_vfmaddpd256_maskz_round ((__v4df) __A,
+  (__v4df) __B,
+  (__v4df) __D,
+  (__mmask8) __U,
+  __R);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmadd_round_ph (__m256h __A, __m256h __B, __m256h __D, const int __R)
+{
+  return (__m256h) __builtin_ia32_vfmaddph256_mask_round ((__v16hf) __A,
+ (__v16hf) __B,
+ (__v16hf) __D,
+ (__mmask16) -1,
+ __R);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fmadd_round_ph (__m256h __A, __mmask16 __U, __m256h __B,
+   __m256h __D, const int __R)
+{
+  return (__m256h) __builtin_ia32_vfmaddph256_ma

[gcc(refs/vendors/ix86/heads/avx10.2)] AVX10.2 ymm rounding: Support vfmaddcph and vfmaddsub{132, 231, 213}p{s, d, h} intrins

2024-08-14 Thread Haochen Jiang via Gcc-cvs
https://gcc.gnu.org/g:3e30845566e8334c2e654f31bcd21ef4ac5f96cc

commit 3e30845566e8334c2e654f31bcd21ef4ac5f96cc
Author: Hu, Lin1 
Date:   Thu Aug 15 09:38:23 2024 +0800

AVX10.2 ymm rounding: Support vfmaddcph and vfmaddsub{132,231,213}p{s,d,h} 
intrins

gcc/ChangeLog:

* config/i386/avx10_2roundingintrin.h: New intrins.
* config/i386/i386-builtin-types.def: Add new DEF_FUNCTION_TYPE.
* config/i386/i386-builtin.def (BDESC): Add new builtins.
* config/i386/sse.md:
(_fmaddsub__mask): Add condition check.
(_fmaddsub__mask3): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx-1.c: Add new builtin test.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Add new macro test.
* gcc.target/i386/sse-23.c: Ditto.
* gcc.target/i386/avx10_2-rounding-3.c: Add test.

Diff:
---
 gcc/config/i386/avx10_2roundingintrin.h| 238 +
 gcc/config/i386/i386-builtin.def   |  13 ++
 gcc/config/i386/sse.md |   4 +-
 gcc/testsuite/gcc.target/i386/avx-1.c  |  13 ++
 gcc/testsuite/gcc.target/i386/avx10_2-rounding-3.c |  43 
 gcc/testsuite/gcc.target/i386/sse-13.c |  13 ++
 gcc/testsuite/gcc.target/i386/sse-14.c |  16 ++
 gcc/testsuite/gcc.target/i386/sse-22.c |  15 ++
 gcc/testsuite/gcc.target/i386/sse-23.c |  13 ++
 9 files changed, 366 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/avx10_2roundingintrin.h 
b/gcc/config/i386/avx10_2roundingintrin.h
index 9015095144e4..95e42410a109 100644
--- a/gcc/config/i386/avx10_2roundingintrin.h
+++ b/gcc/config/i386/avx10_2roundingintrin.h
@@ -2232,6 +2232,193 @@ _mm256_maskz_fmadd_round_ps (__mmask8 __U, __m256 __A, 
__m256 __B,
  (__mmask8) __U,
  __R);
 }
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmadd_round_pch (__m256h __A, __m256h __B, __m256h __D, const int __R)
+{
+  return (__m256h) __builtin_ia32_vfmaddcph256_round ((__v16hf) __A,
+ (__v16hf) __B,
+ (__v16hf) __D,
+ __R);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fmadd_round_pch (__m256h __A, __mmask16 __U, __m256h __B,
+__m256h __D, const int __R)
+{
+  return (__m256h) __builtin_ia32_vfmaddcph256_mask_round ((__v16hf) __A,
+  (__v16hf) __B,
+  (__v16hf) __D,
+  __U,
+  __R);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask3_fmadd_round_pch (__m256h __A, __m256h __B, __m256h __D,
+ __mmask16 __U, const int __R)
+{
+  return (__m256h) __builtin_ia32_vfmaddcph256_mask3_round ((__v16hf) __A,
+   (__v16hf) __B,
+   (__v16hf) __D,
+   __U,
+   __R);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fmadd_round_pch (__mmask16 __U, __m256h __A, __m256h __B,
+ __m256h __D, const int __R)
+{
+  return (__m256h) __builtin_ia32_vfmaddcph256_maskz_round ((__v16hf) __A,
+   (__v16hf) __B,
+   (__v16hf) __D,
+   __U,
+   __R);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmaddsub_round_pd (__m256d __A, __m256d __B, __m256d __D, const int __R)
+{
+  return (__m256d) __builtin_ia32_vfmaddsubpd256_mask_round ((__v4df) __A,
+(__v4df) __B,
+(__v4df) __D,
+(__mmask8) -1,
+__R);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fmaddsub_round_pd (__m256d __A, __mmask8 __U, __

[gcc(refs/vendors/ix86/heads/avx10.2)] AVX10.2 ymm rounding: Support vfc{madd, mul}cph, vfixupimmp{s, d} intrins

2024-08-14 Thread Haochen Jiang via Gcc-cvs
https://gcc.gnu.org/g:f39193494511bdfac24accf5da95388ac32bb272

commit f39193494511bdfac24accf5da95388ac32bb272
Author: Hu, Lin1 
Date:   Thu Aug 15 09:38:22 2024 +0800

AVX10.2 ymm rounding: Support vfc{madd,mul}cph, vfixupimmp{s,d} intrins

gcc/ChangeLog:

* config/i386/avx10_2roundingintrin.h: New intrins.
* config/i386/i386-builtin-types.def: Add new DEF_FUNCTION_TYPE.
* config/i386/i386-builtin.def (BDESC): Add new builtins.
* config/i386/i386-expand.cc (ix86_expand_round_builtin): Handle
V16HF_FTYPE_V16HF_V16HF_INT, V16HF_FTYPE_V16HF_V16HF_V16HF_INT,
V16HF_FTYPE_V16HF_V16HF_V16HF_UQI_INT,
V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI_INT,
V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI_INT.
* config/i386/sse.md:
(_fixupimm):
Add condition check.
(_fixupimm_mask): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx-1.c: Add new builtin test.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Add new macro test.
* gcc.target/i386/sse-23.c: Ditto.
* gcc.target/i386/avx10_2-rounding-3.c: New test.

Diff:
---
 gcc/config/i386/avx10_2roundingintrin.h| 247 +
 gcc/config/i386/i386-builtin-types.def |   5 +
 gcc/config/i386/i386-builtin.def   |  10 +
 gcc/config/i386/i386-expand.cc |   5 +
 gcc/config/i386/sse.md |   4 +-
 gcc/testsuite/gcc.target/i386/avx-1.c  |  10 +
 gcc/testsuite/gcc.target/i386/avx10_2-rounding-3.c |  49 
 gcc/testsuite/gcc.target/i386/sse-13.c |  10 +
 gcc/testsuite/gcc.target/i386/sse-14.c |  13 ++
 gcc/testsuite/gcc.target/i386/sse-22.c |  13 ++
 gcc/testsuite/gcc.target/i386/sse-23.c |  10 +
 11 files changed, 374 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/avx10_2roundingintrin.h 
b/gcc/config/i386/avx10_2roundingintrin.h
index 15ea46b59835..d5ea6bc57da9 100644
--- a/gcc/config/i386/avx10_2roundingintrin.h
+++ b/gcc/config/i386/avx10_2roundingintrin.h
@@ -1934,6 +1934,164 @@ _mm256_maskz_div_round_ps (__mmask8 __U, __m256 __A, 
__m256 __B,
  (__mmask8) __U,
  __R);
 }
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fcmadd_round_pch (__m256h __A, __m256h __B, __m256h __D, const int __R)
+{
+  return (__m256h) __builtin_ia32_vfcmaddcph256_round ((__v16hf) __A,
+  (__v16hf) __B,
+  (__v16hf) __D,
+  __R);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fcmadd_round_pch (__m256h __A, __mmask8 __U, __m256h __B,
+ __m256h __D, const int __R)
+{
+  return (__m256h) __builtin_ia32_vfcmaddcph256_mask_round ((__v16hf) __A,
+   (__v16hf) __B,
+   (__v16hf) __D,
+   __U,
+   __R);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask3_fcmadd_round_pch (__m256h __A, __m256h __B, __m256h __D,
+  __mmask8 __U, const int __R)
+{
+  return (__m256h) __builtin_ia32_vfcmaddcph256_mask3_round ((__v16hf) __A,
+(__v16hf) __B,
+(__v16hf) __D,
+__U,
+__R);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fcmadd_round_pch (__mmask8 __U, __m256h __A, __m256h __B,
+  __m256h __D, const int __R)
+{
+  return (__m256h) __builtin_ia32_vfcmaddcph256_maskz_round ((__v16hf) __A,
+(__v16hf) __B,
+(__v16hf) __D,
+__U,
+__R);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fcmul_round_pch (__m256h __A, __m256h __B, const int __R)
+{
+  return
+(__m256h) __builtin_ia32_vfcmulcph256_round ((__v16hf) __A,
+  

[gcc(refs/vendors/ix86/heads/avx10.2)] AVX10.2 ymm rounding: Support vfm{sub, subadd}{132, 231, 213}p{s, d, h} intrins

2024-08-14 Thread Haochen Jiang via Gcc-cvs
https://gcc.gnu.org/g:4d822f755a34ea5e5a3c891881f4ba0226151448

commit 4d822f755a34ea5e5a3c891881f4ba0226151448
Author: Hu, Lin1 
Date:   Thu Aug 15 09:38:24 2024 +0800

AVX10.2 ymm rounding: Support vfm{sub,subadd}{132,231,213}p{s,d,h} intrins

gcc/ChangeLog:

* config/i386/avx10_2roundingintrin.h: New intrins.
* config/i386/i386-builtin-types.def: Add new DEF_FUNCTION_TYPE.
* config/i386/i386-builtin.def (BDESC): Add new builtins.
* config/i386/sse.md:
(_fmsub__mask): Add condition check.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx-1.c: Add new builtin test.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Add new macro test.
* gcc.target/i386/sse-23.c: Ditto.
* gcc.target/i386/avx10_2-rounding-3.c: Add test.

Diff:
---
 gcc/config/i386/avx10_2roundingintrin.h| 350 +
 gcc/config/i386/i386-builtin.def   |  18 ++
 gcc/config/i386/sse.md |   2 +-
 gcc/testsuite/gcc.target/i386/avx-1.c  |  18 ++
 gcc/testsuite/gcc.target/i386/avx10_2-rounding-3.c |  62 
 gcc/testsuite/gcc.target/i386/sse-13.c |  18 ++
 gcc/testsuite/gcc.target/i386/sse-14.c |  24 ++
 gcc/testsuite/gcc.target/i386/sse-22.c |  24 ++
 gcc/testsuite/gcc.target/i386/sse-23.c |  18 ++
 9 files changed, 533 insertions(+), 1 deletion(-)

diff --git a/gcc/config/i386/avx10_2roundingintrin.h 
b/gcc/config/i386/avx10_2roundingintrin.h
index 95e42410a109..346a32c1a8a5 100644
--- a/gcc/config/i386/avx10_2roundingintrin.h
+++ b/gcc/config/i386/avx10_2roundingintrin.h
@@ -2419,6 +2419,284 @@ _mm256_maskz_fmaddsub_round_ps (__mmask8 __U, __m256 
__A, __m256 __B,
 (__mmask8) __U,
 __R);
 }
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmsub_round_pd (__m256d __A, __m256d __B, __m256d __D, const int __R)
+{
+  return (__m256d) __builtin_ia32_vfmsubpd256_mask_round ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __D,
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fmsub_round_pd (__m256d __A, __mmask8 __U, __m256d __B,
+   __m256d __D, const int __R)
+{
+  return (__m256d) __builtin_ia32_vfmsubpd256_mask_round ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __D,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask3_fmsub_round_pd (__m256d __A, __m256d __B, __m256d __D,
+__mmask8 __U, const int __R)
+{
+  return (__m256d) __builtin_ia32_vfmsubpd256_mask3_round ((__v4df) __A,
+  (__v4df) __B,
+  (__v4df) __D,
+  (__mmask8) __U, __R);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fmsub_round_pd (__mmask8 __U, __m256d __A, __m256d __B,
+__m256d __D, const int __R)
+{
+  return (__m256d) __builtin_ia32_vfmsubpd256_maskz_round ((__v4df) __A,
+  (__v4df) __B,
+  (__v4df) __D,
+  (__mmask8) __U, __R);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmsub_round_ph (__m256h __A, __m256h __B, __m256h __D, const int __R)
+{
+  return (__m256h)
+__builtin_ia32_vfmsubph256_mask_round ((__v16hf) __A,
+  (__v16hf) __B,
+  (__v16hf) __D,
+  (__mmask16) -1, __R);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fmsub_round_ph (__m256h __A, __mmask16 __U, __m256h __B,
+   __m256h __D, const int __R)
+{
+  return (__m256h)
+__builtin_ia32_vfmsubph256_mask_round ((__v16hf) __A,
+  (__v16hf) __B,
+  (__v16hf) __D,
+

[gcc(refs/vendors/ix86/heads/avx10.2)] AVX10.2 ymm rounding: Support vfmulcph and vfnmadd{132, 231, 213}p{s, d, h} intrins

2024-08-14 Thread Haochen Jiang via Gcc-cvs
https://gcc.gnu.org/g:81f1bcf7c470d1797bfdddc17f3a9b7be50fa040

commit 81f1bcf7c470d1797bfdddc17f3a9b7be50fa040
Author: Hu, Lin1 
Date:   Thu Aug 15 09:38:24 2024 +0800

AVX10.2 ymm rounding: Support vfmulcph and vfnmadd{132,231,213}p{s,d,h} 
intrins

gcc/ChangeLog:

* config/i386/avx10_2roundingintrin.h: New intrins.
* config/i386/i386-builtin.def (BDESC): Add new builtins.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx-1.c: Add new builtin test.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Add new macro test.
* gcc.target/i386/sse-23.c: Ditto.
* gcc.target/i386/avx10_2-rounding-3.c: Add test.

Diff:
---
 gcc/config/i386/avx10_2roundingintrin.h| 241 +
 gcc/config/i386/i386-builtin.def   |  11 +
 gcc/testsuite/gcc.target/i386/avx-1.c  |  11 +
 gcc/testsuite/gcc.target/i386/avx10_2-rounding-3.c |  50 +
 gcc/testsuite/gcc.target/i386/sse-13.c |  11 +
 gcc/testsuite/gcc.target/i386/sse-14.c |  14 ++
 gcc/testsuite/gcc.target/i386/sse-22.c |  14 ++
 gcc/testsuite/gcc.target/i386/sse-23.c |  11 +
 8 files changed, 363 insertions(+)

diff --git a/gcc/config/i386/avx10_2roundingintrin.h 
b/gcc/config/i386/avx10_2roundingintrin.h
index 346a32c1a8a5..3f833bffa54d 100644
--- a/gcc/config/i386/avx10_2roundingintrin.h
+++ b/gcc/config/i386/avx10_2roundingintrin.h
@@ -2697,6 +2697,185 @@ _mm256_maskz_fmsubadd_round_ps (__mmask8 __U, __m256 
__A, __m256 __B,
 (__mmask8) __U,
 __R);
 }
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmul_round_pch (__m256h __B, __m256h __D, const int __R)
+{
+  return (__m256h) __builtin_ia32_vfmulcph256_round ((__v16hf) __B,
+(__v16hf) __D,
+__R);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fmul_round_pch (__m256h __A, __mmask8 __U, __m256h __B,
+   __m256h __D, const int __R)
+{
+  return (__m256h) __builtin_ia32_vfmulcph256_mask_round ((__v16hf) __B,
+ (__v16hf) __D,
+ (__v16hf) __A,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fmul_round_pch (__mmask8 __U, __m256h __B, __m256h __D,
+const int __R)
+{
+  return (__m256h) __builtin_ia32_vfmulcph256_mask_round ((__v16hf) __B,
+ (__v16hf) __D,
+ (__v16hf)
+ _mm256_setzero_ph (),
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fnmadd_round_pd (__m256d __A, __m256d __B, __m256d __D, const int __R)
+{
+  return (__m256d) __builtin_ia32_vfnmaddpd256_mask_round ((__v4df) __A,
+  (__v4df) __B,
+  (__v4df) __D,
+  (__mmask8) -1,
+  __R);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fnmadd_round_pd (__m256d __A, __mmask8 __U, __m256d __B,
+__m256d __D, const int __R)
+{
+  return (__m256d) __builtin_ia32_vfnmaddpd256_mask_round ((__v4df) __A,
+  (__v4df) __B,
+  (__v4df) __D,
+  (__mmask8) __U,
+  __R);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask3_fnmadd_round_pd (__m256d __A, __m256d __B, __m256d __D,
+ __mmask8 __U, const int __R)
+{
+  return (__m256d) __builtin_ia32_vfnmaddpd256_mask3_round ((__v4df) __A,
+   (__v4df) __B,
+   (__v4df) __D,
+  

[gcc(refs/vendors/ix86/heads/avx10.2)] AVX10.2 ymm rounding: Support vfnmsub{132, 231, 213}p{s, d, h} intrins

2024-08-14 Thread Haochen Jiang via Gcc-cvs
https://gcc.gnu.org/g:93c5a73e0ef33fa4ef46a3fe87225bd584830dc3

commit 93c5a73e0ef33fa4ef46a3fe87225bd584830dc3
Author: Hu, Lin1 
Date:   Thu Aug 15 09:38:25 2024 +0800

AVX10.2 ymm rounding: Support vfnmsub{132,231,213}p{s,d,h} intrins

gcc/ChangeLog:

* config/i386/avx10_2roundingintrin.h: New intrins.
* config/i386/i386-builtin.def (BDESC): Add new builtins.
* config/i386/sse.md:
(_fnmsub__mask3): Add condition check.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx-1.c: Add new builtin test.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Add new macro test.
* gcc.target/i386/sse-23.c: Ditto.
* gcc.target/i386/avx10_2-rounding-3.c: Add test.

Diff:
---
 gcc/config/i386/avx10_2roundingintrin.h| 181 +
 gcc/config/i386/i386-builtin.def   |   9 +
 gcc/config/i386/sse.md |   2 +-
 gcc/testsuite/gcc.target/i386/avx-1.c  |   9 +
 gcc/testsuite/gcc.target/i386/avx10_2-rounding-3.c |  31 
 gcc/testsuite/gcc.target/i386/sse-13.c |   9 +
 gcc/testsuite/gcc.target/i386/sse-14.c |  12 ++
 gcc/testsuite/gcc.target/i386/sse-22.c |  12 ++
 gcc/testsuite/gcc.target/i386/sse-23.c |   9 +
 9 files changed, 273 insertions(+), 1 deletion(-)

diff --git a/gcc/config/i386/avx10_2roundingintrin.h 
b/gcc/config/i386/avx10_2roundingintrin.h
index 3f833bffa54d..afc1220fea42 100644
--- a/gcc/config/i386/avx10_2roundingintrin.h
+++ b/gcc/config/i386/avx10_2roundingintrin.h
@@ -2876,6 +2876,151 @@ _mm256_maskz_fnmadd_round_ps (__mmask8 __U, __m256 __A, 
__m256 __B,
   (__mmask8) __U,
   __R);
 }
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fnmsub_round_pd (__m256d __A, __m256d __B, __m256d __D, const int __R)
+{
+  return (__m256d) __builtin_ia32_vfnmsubpd256_mask_round ((__v4df) __A,
+  (__v4df) __B,
+  (__v4df) __D,
+  (__mmask8) -1,
+  __R);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fnmsub_round_pd (__m256d __A, __mmask8 __U, __m256d __B,
+__m256d __D, const int __R)
+{
+  return (__m256d) __builtin_ia32_vfnmsubpd256_mask_round ((__v4df) __A,
+  (__v4df) __B,
+  (__v4df) __D,
+  (__mmask8) __U,
+  __R);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask3_fnmsub_round_pd (__m256d __A, __m256d __B, __m256d __D,
+ __mmask8 __U, const int __R)
+{
+  return (__m256d) __builtin_ia32_vfnmsubpd256_mask3_round ((__v4df) __A,
+   (__v4df) __B,
+   (__v4df) __D,
+   (__mmask8) __U,
+   __R);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fnmsub_round_pd (__mmask8 __U, __m256d __A, __m256d __B,
+ __m256d __D, const int __R)
+{
+  return (__m256d) __builtin_ia32_vfnmsubpd256_maskz_round ((__v4df) __A,
+   (__v4df) __B,
+   (__v4df) __D,
+   (__mmask8) __U,
+   __R);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fnmsub_round_ph (__m256h __A, __m256h __B, __m256h __D, const int __R)
+{
+  return (__m256h)
+__builtin_ia32_vfnmsubph256_mask_round ((__v16hf) __A,
+   (__v16hf) __B,
+   (__v16hf) __D,
+   (__mmask16) -1,
+   __R);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fnmsub_round_ph (__m256h __A, __mmask16 __U, __m256h __B,
+__m256h __D, const int __R)
+{
+  return (__m256

[gcc(refs/vendors/ix86/heads/avx10.2)] AVX10.2 ymm rounding: Support vgetexpp{s, d, h} and vgetmantp{s, d, h} intrins

2024-08-14 Thread Haochen Jiang via Gcc-cvs
https://gcc.gnu.org/g:5da970fcb303434bc27d35a7ba9e6f5d4ee00e2d

commit 5da970fcb303434bc27d35a7ba9e6f5d4ee00e2d
Author: Hu, Lin1 
Date:   Thu Aug 15 09:38:26 2024 +0800

AVX10.2 ymm rounding: Support vgetexpp{s,d,h} and vgetmantp{s,d,h} intrins

gcc/ChangeLog:

* config/i386/avx10_2roundingintrin.h: New intrins.
* config/i386/i386-builtin-types.def: Add new DEF_FUNCTION_TYPE.
* config/i386/i386-builtin.def (BDESC): Add new builtins.
* config/i386/i386-expand.cc (ix86_expand_round_builtin): Handle
V8SF_FTYPE_V8SF_V8SF_UQI_INT, V4DF_FTYPE_V4DF_V4DF_UQI_INT,
V16HF_FTYPE_V16HF_V16HF_UHI_INT, 
V16HF_FTYPE_V16HF_INT_V16HF_UHI_INT,
V4DF_FTYPE_V4DF_INT_V4DF_UQI_INT, V8SF_FTYPE_V8SF_INT_V8SF_UQI_INT.
* config/i386/sse.md:
(_getexp):
Add condition check.
(_getmant):
Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx-1.c: Add new builtin test.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Add new macro test.
* gcc.target/i386/sse-23.c: Ditto.
* gcc.target/i386/avx10_2-rounding-3.c: Add test.

Diff:
---
 gcc/config/i386/avx10_2roundingintrin.h| 341 +
 gcc/config/i386/i386-builtin-types.def |   6 +
 gcc/config/i386/i386-builtin.def   |   6 +
 gcc/config/i386/i386-expand.cc |   6 +
 gcc/config/i386/sse.md |   4 +-
 gcc/testsuite/gcc.target/i386/avx-1.c  |   6 +
 gcc/testsuite/gcc.target/i386/avx10_2-rounding-3.c |  59 
 gcc/testsuite/gcc.target/i386/sse-13.c |   6 +
 gcc/testsuite/gcc.target/i386/sse-14.c |  18 ++
 gcc/testsuite/gcc.target/i386/sse-22.c |  18 ++
 gcc/testsuite/gcc.target/i386/sse-23.c |   6 +
 11 files changed, 474 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/avx10_2roundingintrin.h 
b/gcc/config/i386/avx10_2roundingintrin.h
index afc1220fea42..07729a6cc04f 100644
--- a/gcc/config/i386/avx10_2roundingintrin.h
+++ b/gcc/config/i386/avx10_2roundingintrin.h
@@ -3021,6 +3021,217 @@ _mm256_maskz_fnmsub_round_ps (__mmask8 __U, __m256 __A, 
__m256 __B,
   (__mmask8) __U,
   __R);
 }
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_getexp_round_pd (__m256d __A, const int __R)
+{
+  return
+(__m256d) __builtin_ia32_getexppd256_mask_round ((__v4df) __A,
+(__v4df)
+_mm256_undefined_pd (),
+(__mmask8) -1,
+__R);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_getexp_round_pd (__m256d __W, __mmask8 __U, __m256d __A,
+const int __R)
+{
+  return (__m256d) __builtin_ia32_getexppd256_mask_round ((__v4df) __A,
+ (__v4df) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_getexp_round_pd (__mmask8 __U, __m256d __A, const int __R)
+{
+  return (__m256d) __builtin_ia32_getexppd256_mask_round ((__v4df) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_getexp_round_ph (__m256h __A, const int __R)
+{
+  return (__m256h) __builtin_ia32_getexpph256_mask_round ((__v16hf) __A,
+ (__v16hf)
+ _mm256_setzero_ph (),
+ (__mmask16) -1,
+ __R);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_getexp_round_ph (__m256h __W, __mmask16 __U, __m256h __A,
+const int __R)
+{
+  return (__m256h) __builtin_ia32_getexpph256_mask_round ((__v16hf) __A,
+ (__v16hf) __W,
+ (__mmask16) __U,
+

[gcc(refs/vendors/ix86/heads/avx10.2)] AVX10.2 ymm rounding: Support vmulp{s, d, h} and vrangep{s, d} intrins

2024-08-14 Thread Haochen Jiang via Gcc-cvs
https://gcc.gnu.org/g:ef07d9e1195a7ba200b4c26194bcc17948cb97f1

commit ef07d9e1195a7ba200b4c26194bcc17948cb97f1
Author: Hu, Lin1 
Date:   Thu Aug 15 09:38:27 2024 +0800

AVX10.2 ymm rounding: Support vmulp{s,d,h} and vrangep{s,d} intrins

gcc/ChangeLog:

* config/i386/avx10_2roundingintrin.h: New intrins.
* config/i386/i386-builtin-types.def: Add new DEF_FUNCTION_TYPE.
* config/i386/i386-builtin.def (BDESC): Add new builtins.
* config/i386/i386-expand.cc (ix86_expand_round_builtin):
Handle V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI_INT,
V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI_INT.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx-1.c: Add new builtin test.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Add new macro test.
* gcc.target/i386/sse-23.c: Ditto.
* gcc.target/i386/avx10_2-rounding-3.c: Add test.

Diff:
---
 gcc/config/i386/avx10_2roundingintrin.h| 313 +
 gcc/config/i386/i386-builtin-types.def |   2 +
 gcc/config/i386/i386-builtin.def   |   5 +
 gcc/config/i386/i386-expand.cc |   2 +
 gcc/testsuite/gcc.target/i386/avx-1.c  |   5 +
 gcc/testsuite/gcc.target/i386/avx10_2-rounding-3.c |  43 +++
 gcc/testsuite/gcc.target/i386/sse-13.c |   5 +
 gcc/testsuite/gcc.target/i386/sse-14.c |  15 +
 gcc/testsuite/gcc.target/i386/sse-22.c |  15 +
 gcc/testsuite/gcc.target/i386/sse-23.c |   5 +
 10 files changed, 410 insertions(+)

diff --git a/gcc/config/i386/avx10_2roundingintrin.h 
b/gcc/config/i386/avx10_2roundingintrin.h
index a5712f5230aa..ac0914415c94 100644
--- a/gcc/config/i386/avx10_2roundingintrin.h
+++ b/gcc/config/i386/avx10_2roundingintrin.h
@@ -3454,6 +3454,198 @@ _mm256_maskz_min_round_ps (__mmask8 __U, __m256 __A, 
__m256 __B,
  (__mmask8) __U,
  __R);
 }
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mul_round_pd (__m256d __A, __m256d __B, const int __R)
+{
+  return (__m256d) __builtin_ia32_mulpd256_mask_round ((__v4df) __A,
+  (__v4df) __B,
+  (__v4df)
+  _mm256_undefined_pd (),
+  (__mmask8) -1,
+  __R);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_mul_round_pd (__m256d __W, __mmask8 __U, __m256d __A,
+ __m256d __B, const int __R)
+{
+  return (__m256d) __builtin_ia32_mulpd256_mask_round ((__v4df) __A,
+  (__v4df) __B,
+  (__v4df) __W,
+  (__mmask8) __U,
+  __R);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_mul_round_pd (__mmask8 __U, __m256d __A, __m256d __B,
+  const int __R)
+{
+  return (__m256d) __builtin_ia32_mulpd256_mask_round ((__v4df) __A,
+  (__v4df) __B,
+  (__v4df)
+  _mm256_setzero_pd (),
+  (__mmask8) __U,
+  __R);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mul_round_ph (__m256h __A, __m256h __B, const int __R)
+{
+  return (__m256h) __builtin_ia32_mulph256_mask_round ((__v16hf) __A,
+  (__v16hf) __B,
+  (__v16hf)
+  _mm256_undefined_ph (),
+  (__mmask16) -1,
+  __R);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_mul_round_ph (__m256h __W, __mmask16 __U, __m256h __A,
+ __m256h __B, const int __R)
+{
+  return (__m256h) __builtin_ia32_mulph256_mask_round ((__v16hf) __A,
+  (__v16hf) __B,
+  (__v16hf) __W,
+  (__mmask16) __U,
+ 

[gcc(refs/vendors/ix86/heads/avx10.2)] AVX10.2 ymm rounding: Support v{max, min}p{s, d, h} intrins

2024-08-14 Thread Haochen Jiang via Gcc-cvs
https://gcc.gnu.org/g:ce16fa99857c057ad95ad7cef8ce6f5ffbe9ef48

commit ce16fa99857c057ad95ad7cef8ce6f5ffbe9ef48
Author: Hu, Lin1 
Date:   Thu Aug 15 09:38:26 2024 +0800

AVX10.2 ymm rounding: Support v{max,min}p{s,d,h} intrins

gcc/ChangeLog:

* config/i386/avx10_2roundingintrin.h: New intrins.
* config/i386/i386-builtin.def (BDESC): Add new builtins.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx-1.c: Add new builtin test.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Add new macro test.
* gcc.target/i386/sse-23.c: Ditto.
* gcc.target/i386/avx10_2-rounding-3.c: Add test.

Diff:
---
 gcc/config/i386/avx10_2roundingintrin.h| 360 +
 gcc/config/i386/i386-builtin.def   |   6 +
 gcc/testsuite/gcc.target/i386/avx-1.c  |   6 +
 gcc/testsuite/gcc.target/i386/avx10_2-rounding-3.c |  50 +++
 gcc/testsuite/gcc.target/i386/sse-13.c |   6 +
 gcc/testsuite/gcc.target/i386/sse-14.c |  18 ++
 gcc/testsuite/gcc.target/i386/sse-22.c |  18 ++
 gcc/testsuite/gcc.target/i386/sse-23.c |   6 +
 8 files changed, 470 insertions(+)

diff --git a/gcc/config/i386/avx10_2roundingintrin.h 
b/gcc/config/i386/avx10_2roundingintrin.h
index 07729a6cc04f..a5712f5230aa 100644
--- a/gcc/config/i386/avx10_2roundingintrin.h
+++ b/gcc/config/i386/avx10_2roundingintrin.h
@@ -3232,6 +3232,228 @@ _mm256_maskz_getmant_round_ps (__mmask8 __U, __m256 __A,
  _mm256_setzero_ps (),
  __U, __R);
 }
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_max_round_pd (__m256d __A, __m256d __B, const int __R)
+{
+  return (__m256d) __builtin_ia32_maxpd256_mask_round ((__v4df) __A,
+  (__v4df) __B,
+  (__v4df)
+  _mm256_undefined_pd (),
+  (__mmask8) -1,
+  __R);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_max_round_pd (__m256d __W, __mmask8 __U, __m256d __A,
+ __m256d __B, const int __R)
+{
+  return (__m256d) __builtin_ia32_maxpd256_mask_round ((__v4df) __A,
+  (__v4df) __B,
+  (__v4df) __W,
+  (__mmask8) __U,
+  __R);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_max_round_pd (__mmask8 __U, __m256d __A, __m256d __B,
+  const int __R)
+{
+  return (__m256d) __builtin_ia32_maxpd256_mask_round ((__v4df) __A,
+  (__v4df) __B,
+  (__v4df)
+  _mm256_setzero_pd (),
+  (__mmask8) __U,
+  __R);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_max_round_ph (__m256h __A, __m256h __B, const int __R)
+{
+  return (__m256h) __builtin_ia32_maxph256_mask_round ((__v16hf) __A,
+  (__v16hf) __B,
+  (__v16hf)
+  _mm256_undefined_ph (),
+  (__mmask16) -1,
+  __R);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_max_round_ph (__m256h __W, __mmask16 __U, __m256h __A,
+ __m256h __B, const int __R)
+{
+  return (__m256h) __builtin_ia32_maxph256_mask_round ((__v16hf) __A,
+  (__v16hf) __B,
+  (__v16hf) __W,
+  (__mmask16) __U,
+  __R);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_max_round_ph (__mmask16 __U, __m256h __A, __m256h __B,
+  const int __R)
+{
+  return (__m256h) __builtin_ia32_maxph256_mask_round ((__v16hf) __A,
+

[gcc(refs/vendors/ix86/heads/avx10.2)] AVX10.2 ymm rounding: Support vscalefp{s, d, h} intrins

2024-08-14 Thread Haochen Jiang via Gcc-cvs
https://gcc.gnu.org/g:144b9ca72a05d9f7235bf939821a3ff425a251c8

commit 144b9ca72a05d9f7235bf939821a3ff425a251c8
Author: Hu, Lin1 
Date:   Thu Aug 15 09:38:28 2024 +0800

AVX10.2 ymm rounding: Support vscalefp{s,d,h} intrins

gcc/ChangeLog:

* config/i386/avx10_2roundingintrin.h: New intrins.
* config/i386/i386-builtin-types.def: Add new DEF_FUNCTION_TYPE.
* config/i386/sse.md:
(_scalef): Add condition check.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx-1.c: Add new builtin test.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Add new macro test.
* gcc.target/i386/sse-23.c: Ditto.
* gcc.target/i386/avx10_2-rounding-3.c: Add test.

Diff:
---
 gcc/config/i386/avx10_2roundingintrin.h| 182 +
 gcc/config/i386/i386-builtin.def   |   3 +
 gcc/config/i386/sse.md |   2 +-
 gcc/testsuite/gcc.target/i386/avx-1.c  |   3 +
 gcc/testsuite/gcc.target/i386/avx10_2-rounding-3.c |  25 +++
 gcc/testsuite/gcc.target/i386/sse-13.c |   3 +
 gcc/testsuite/gcc.target/i386/sse-14.c |   9 +
 gcc/testsuite/gcc.target/i386/sse-22.c |   9 +
 gcc/testsuite/gcc.target/i386/sse-23.c |   3 +
 9 files changed, 238 insertions(+), 1 deletion(-)

diff --git a/gcc/config/i386/avx10_2roundingintrin.h 
b/gcc/config/i386/avx10_2roundingintrin.h
index d6b8e2695de3..f35f23378583 100644
--- a/gcc/config/i386/avx10_2roundingintrin.h
+++ b/gcc/config/i386/avx10_2roundingintrin.h
@@ -3873,6 +3873,119 @@ _mm256_maskz_roundscale_round_ps (__mmask8 __U, __m256 
__A, const int __C,
   (__mmask8) __U,
   __R);
 }
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_scalef_round_pd (__m256d __A, __m256d __B, const int __R)
+{
+  return
+(__m256d) __builtin_ia32_scalefpd256_mask_round ((__v4df) __A,
+(__v4df) __B,
+(__v4df)
+_mm256_undefined_pd (),
+(__mmask8) -1,
+__R);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_scalef_round_pd (__m256d __W, __mmask8 __U, __m256d __A,
+__m256d __B, const int __R)
+{
+  return (__m256d) __builtin_ia32_scalefpd256_mask_round ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_scalef_round_pd (__mmask8 __U, __m256d __A, __m256d __B,
+ const int __R)
+{
+  return (__m256d) __builtin_ia32_scalefpd256_mask_round ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_scalef_round_ph (__m256h __A, __m256h __B, const int __R)
+{
+  return
+(__m256h) __builtin_ia32_scalefph256_mask_round ((__v16hf) __A,
+(__v16hf) __B,
+(__v16hf)
+_mm256_undefined_ph (),
+(__mmask16) -1,
+__R);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_scalef_round_ph (__m256h __W, __mmask16 __U, __m256h __A,
+__m256h __B, const int __R)
+{
+  return (__m256h) __builtin_ia32_scalefph256_mask_round ((__v16hf) __A,
+ (__v16hf) __B,
+ (__v16hf) __W,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, _

[gcc(refs/vendors/ix86/heads/avx10.2)] AVX10.2 ymm rounding: Support vreducep{s, d, h} and vrndscalep{s, d, h} intrins

2024-08-14 Thread Haochen Jiang via Gcc-cvs
https://gcc.gnu.org/g:2d78e6dbb4620da8e4c2e87ef14134f627559467

commit 2d78e6dbb4620da8e4c2e87ef14134f627559467
Author: Hu, Lin1 
Date:   Thu Aug 15 09:38:27 2024 +0800

AVX10.2 ymm rounding: Support vreducep{s,d,h} and vrndscalep{s,d,h} intrins

gcc/ChangeLog:

* config/i386/avx10_2roundingintrin.h: New intrins.
* config/i386/i386-builtin.def (BDESC): Add new builtins.
* config/i386/sse.md:
(reducep):
Add condition check.
(_rndscale): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx-1.c: Add new builtin test.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Add new macro test.
* gcc.target/i386/sse-23.c: Ditto.
* gcc.target/i386/avx10_2-rounding-3.c: Add test.

Diff:
---
 gcc/config/i386/avx10_2roundingintrin.h| 367 +
 gcc/config/i386/i386-builtin.def   |   6 +
 gcc/config/i386/sse.md |   4 +-
 gcc/testsuite/gcc.target/i386/avx-1.c  |   6 +
 gcc/testsuite/gcc.target/i386/avx10_2-rounding-3.c |  50 +++
 gcc/testsuite/gcc.target/i386/sse-13.c |   6 +
 gcc/testsuite/gcc.target/i386/sse-14.c |  18 +
 gcc/testsuite/gcc.target/i386/sse-22.c |  18 +
 gcc/testsuite/gcc.target/i386/sse-23.c |   6 +
 9 files changed, 479 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/avx10_2roundingintrin.h 
b/gcc/config/i386/avx10_2roundingintrin.h
index ac0914415c94..d6b8e2695de3 100644
--- a/gcc/config/i386/avx10_2roundingintrin.h
+++ b/gcc/config/i386/avx10_2roundingintrin.h
@@ -3646,6 +3646,233 @@ _mm256_maskz_range_round_ps (__mmask8 __U, __m256 __A, 
__m256 __B,
(__mmask8) __U,
__R);
 }
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_reduce_round_pd (__m256d __A, const int __C, const int __R)
+{
+  return (__m256d) __builtin_ia32_reducepd256_mask_round ((__v4df) __A,
+ __C,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_reduce_round_pd (__m256d __W, __mmask8 __U, __m256d __A,
+const int __C, const int __R)
+{
+  return (__m256d) __builtin_ia32_reducepd256_mask_round ((__v4df) __A,
+ __C,
+ (__v4df) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_reduce_round_pd (__mmask8 __U, __m256d __A, const int __C,
+ const int __R)
+{
+  return (__m256d) __builtin_ia32_reducepd256_mask_round ((__v4df) __A,
+ __C,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_reduce_round_ph (__m256h __A, const int __C, const int __R)
+{
+  return (__m256h) __builtin_ia32_reduceph256_mask_round ((__v16hf) __A,
+ __C,
+ (__v16hf)
+ _mm256_setzero_ph (),
+ (__mmask16) -1,
+ __R);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_reduce_round_ph (__m256h __W, __mmask16 __U, __m256h __A,
+const int __C, const int __R)
+{
+  return (__m256h) __builtin_ia32_reduceph256_mask_round ((__v16hf) __A,
+ __C,
+ (__v16hf) __W,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m256h
+__attri

[gcc(refs/vendors/ix86/heads/avx10.2)] AVX10.2 ymm rounding: Support vsqrtp{s, d, h} and vsubp{s, d, h} intrins

2024-08-14 Thread Haochen Jiang via Gcc-cvs
https://gcc.gnu.org/g:bab0adf0069e69115aa984aa750a03fb3d1d0cdb

commit bab0adf0069e69115aa984aa750a03fb3d1d0cdb
Author: Hu, Lin1 
Date:   Thu Aug 15 09:38:29 2024 +0800

AVX10.2 ymm rounding: Support vsqrtp{s,d,h} and vsubp{s,d,h} intrins

gcc/ChangeLog:

* config/i386/avx10_2roundingintrin.h: New intrins.
* config/i386/i386-builtin.def (BDESC): Add new builtins.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx-1.c: Add new builtin test.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Add new macro test.
* gcc.target/i386/sse-23.c: Ditto.
* gcc.target/i386/avx10_2-rounding-3.c: Add test.

Diff:
---
 gcc/config/i386/avx10_2roundingintrin.h| 339 +
 gcc/config/i386/i386-builtin.def   |   6 +
 gcc/testsuite/gcc.target/i386/avx-1.c  |   6 +
 gcc/testsuite/gcc.target/i386/avx10_2-rounding-3.c |  50 +++
 gcc/testsuite/gcc.target/i386/sse-13.c |   7 +
 gcc/testsuite/gcc.target/i386/sse-14.c |  18 ++
 gcc/testsuite/gcc.target/i386/sse-22.c |  15 +
 gcc/testsuite/gcc.target/i386/sse-23.c |   6 +
 8 files changed, 447 insertions(+)

diff --git a/gcc/config/i386/avx10_2roundingintrin.h 
b/gcc/config/i386/avx10_2roundingintrin.h
index f35f23378583..c7146e37ec9a 100644
--- a/gcc/config/i386/avx10_2roundingintrin.h
+++ b/gcc/config/i386/avx10_2roundingintrin.h
@@ -3986,6 +3986,216 @@ _mm256_maskz_scalef_round_ps (__mmask8 __U, __m256 __A, 
__m256 __B,
 (__mmask8) __U,
 __R);
 }
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sqrt_round_pd (__m256d __A, const int __R)
+{
+  return (__m256d) __builtin_ia32_sqrtpd256_mask_round ((__v4df) __A,
+   (__v4df)
+   _mm256_undefined_pd (),
+   (__mmask8) -1,
+   __R);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_sqrt_round_pd (__m256d __W, __mmask8 __U, __m256d __A,
+  const int __R)
+{
+  return (__m256d) __builtin_ia32_sqrtpd256_mask_round ((__v4df) __A,
+   (__v4df) __W,
+   (__mmask8) __U,
+   __R);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_sqrt_round_pd (__mmask8 __U, __m256d __A, const int __R)
+{
+  return (__m256d) __builtin_ia32_sqrtpd256_mask_round ((__v4df) __A,
+   (__v4df)
+   _mm256_setzero_pd (),
+   (__mmask8) __U,
+   __R);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sqrt_round_ph (__m256h __A, const int __R)
+{
+  return (__m256h) __builtin_ia32_sqrtph256_mask_round ((__v16hf) __A,
+   (__v16hf)
+   _mm256_undefined_ph (),
+   (__mmask16) -1,
+   __R);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_sqrt_round_ph (__m256h __W, __mmask16 __U, __m256h __A,
+  const int __R)
+{
+  return (__m256h) __builtin_ia32_sqrtph256_mask_round ((__v16hf) __A,
+   (__v16hf) __W,
+   (__mmask16) __U,
+   __R);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_sqrt_round_ph (__mmask16 __U, __m256h __A, const int __R)
+{
+  return (__m256h) __builtin_ia32_sqrtph256_mask_round ((__v16hf) __A,
+   (__v16hf)
+   _mm256_setzero_ph (),
+   (__mmask16) __U,
+   __R);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sqrt_round_ps (__m256 __A, const int __R)
+{
+  return (__m256) __builtin_ia32_sqrtps

[gcc r15-2922] Value Range: Add range op for builtin isinf

2024-08-14 Thread HaoChen Gui via Gcc-cvs
https://gcc.gnu.org/g:cd0930768ff936b681135bf8873e7f7b796a4ed9

commit r15-2922-gcd0930768ff936b681135bf8873e7f7b796a4ed9
Author: Haochen Gui 
Date:   Thu Aug 15 11:19:10 2024 +0800

Value Range: Add range op for builtin isinf

The builtin isinf is not folded at front end if the corresponding optab
exists.  So the range op for isinf is needed for value range analysis.
This patch adds range op for builtin isinf.

gcc/
PR target/114678
* gimple-range-op.cc (class cfn_isinf): New.
(op_cfn_isinf): New variables.
(gimple_range_op_handler::maybe_builtin_call): Handle
CASE_FLT_FN (BUILT_IN_ISINF).

gcc/testsuite/
PR target/114678
* gcc.dg/tree-ssa/range-isinf.c: New test.
* gcc.dg/tree-ssa/range-sincos.c: Remove xfail for s390.
* gcc.dg/tree-ssa/vrp-float-abs-1.c: Likewise.

Diff:
---
 gcc/gimple-range-op.cc  | 62 +
 gcc/testsuite/gcc.dg/tree-ssa/range-isinf.c | 44 ++
 gcc/testsuite/gcc.dg/tree-ssa/range-sincos.c|  2 +-
 gcc/testsuite/gcc.dg/tree-ssa/vrp-float-abs-1.c |  2 +-
 4 files changed, 108 insertions(+), 2 deletions(-)

diff --git a/gcc/gimple-range-op.cc b/gcc/gimple-range-op.cc
index a80b93cf063..24559951dd6 100644
--- a/gcc/gimple-range-op.cc
+++ b/gcc/gimple-range-op.cc
@@ -1153,6 +1153,63 @@ private:
   bool m_is_pos;
 } op_cfn_goacc_dim_size (false), op_cfn_goacc_dim_pos (true);
 
+// Implement range operator for CFN_BUILT_IN_ISINF
+class cfn_isinf : public range_operator
+{
+public:
+  using range_operator::fold_range;
+  using range_operator::op1_range;
+  virtual bool fold_range (irange &r, tree type, const frange &op1,
+  const irange &, relation_trio) const override
+  {
+if (op1.undefined_p ())
+  return false;
+
+if (op1.known_isinf ())
+  {
+   wide_int one = wi::one (TYPE_PRECISION (type));
+   r.set (type, one, one);
+   return true;
+  }
+
+if (op1.known_isnan ()
+   || (!real_isinf (&op1.lower_bound ())
+   && !real_isinf (&op1.upper_bound (
+  {
+   r.set_zero (type);
+   return true;
+  }
+
+r.set_varying (type);
+return true;
+  }
+  virtual bool op1_range (frange &r, tree type, const irange &lhs,
+ const frange &, relation_trio) const override
+  {
+if (lhs.undefined_p ())
+  return false;
+
+if (lhs.zero_p ())
+  {
+   nan_state nan (true);
+   r.set (type, real_min_representable (type),
+  real_max_representable (type), nan);
+   return true;
+  }
+
+if (!range_includes_zero_p (lhs))
+  {
+   // The range is [-INF,-INF][+INF,+INF], but it can't be represented.
+   // Set range to [-INF,+INF]
+   r.set_varying (type);
+   r.clear_nan ();
+   return true;
+  }
+
+r.set_varying (type);
+return true;
+  }
+} op_cfn_isinf;
 
 // Implement range operator for CFN_BUILT_IN_
 class cfn_parity : public range_operator
@@ -1246,6 +1303,11 @@ gimple_range_op_handler::maybe_builtin_call ()
   m_operator = &op_cfn_signbit;
   break;
 
+CASE_FLT_FN (BUILT_IN_ISINF):
+  m_op1 = gimple_call_arg (call, 0);
+  m_operator = &op_cfn_isinf;
+  break;
+
 CASE_CFN_COPYSIGN_ALL:
   m_op1 = gimple_call_arg (call, 0);
   m_op2 = gimple_call_arg (call, 1);
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/range-isinf.c 
b/gcc/testsuite/gcc.dg/tree-ssa/range-isinf.c
new file mode 100644
index 000..468f1bcf5c7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/range-isinf.c
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-evrp" } */
+
+#include 
+void link_error();
+
+void
+test1 (double x)
+{
+  if (x > __DBL_MAX__ && !__builtin_isinf (x))
+link_error ();
+  if (x < -__DBL_MAX__ && !__builtin_isinf (x))
+link_error ();
+}
+
+void
+test2 (float x)
+{
+  if (x > __FLT_MAX__ && !__builtin_isinf (x))
+link_error ();
+  if (x < -__FLT_MAX__ && !__builtin_isinf (x))
+link_error ();
+}
+
+void
+test3 (double x)
+{
+  if (!__builtin_isinf (x) && !__builtin_isnan (x) && x > __DBL_MAX__)
+link_error ();
+  if (!__builtin_isinf (x) && !__builtin_isnan (x) && x < -__DBL_MAX__)
+link_error ();
+}
+
+void
+test4 (float x)
+{
+  if (!__builtin_isinf (x) && !__builtin_isnan (x) && x > __FLT_MAX__)
+link_error ();
+  if (!__builtin_isinf (x) && !__builtin_isnan (x) && x < -__FLT_MAX__)
+link_error ();
+}
+
+/* { dg-final { scan-tree-dump-not "link_error" "evrp" } } */
+
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/range-sincos.c 
b/gcc/testsuite/gcc.dg/tree-ssa/range-sincos.c
index 35b38c3c914..337f9cda02f 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/range-sincos.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/range-sincos.c
@@ -40,4 +40,4 @@ stool (double x)
 link_error ();
 }
 
-// { dg-final { scan-tree-dump-not "link_error" 

[gcc r15-2923] Value Range: Add range op for builtin isfinite

2024-08-14 Thread HaoChen Gui via Gcc-cvs
https://gcc.gnu.org/g:b1d21582bafa1954db3a62f0972ae3a2e3bc0b48

commit r15-2923-gb1d21582bafa1954db3a62f0972ae3a2e3bc0b48
Author: Haochen Gui 
Date:   Thu Aug 15 11:20:27 2024 +0800

Value Range: Add range op for builtin isfinite

The former patch adds optab for builtin isfinite. Thus builtin isfinite
might not be folded at front end.  So the range op for isfinite is needed
for value range analysis.  This patch adds range op for builtin isfinite.

gcc/
* gimple-range-op.cc (class cfn_isfinite): New.
(op_cfn_finite): New variables.
(gimple_range_op_handler::maybe_builtin_call): Handle
CFN_BUILT_IN_ISFINITE.

gcc/testsuite/
* gcc.dg/tree-ssa/range-isfinite.c: New test.

Diff:
---
 gcc/gimple-range-op.cc | 61 ++
 gcc/testsuite/gcc.dg/tree-ssa/range-isfinite.c | 31 +
 2 files changed, 92 insertions(+)

diff --git a/gcc/gimple-range-op.cc b/gcc/gimple-range-op.cc
index 24559951dd6..7edfa8e315f 100644
--- a/gcc/gimple-range-op.cc
+++ b/gcc/gimple-range-op.cc
@@ -1211,6 +1211,62 @@ public:
   }
 } op_cfn_isinf;
 
+//Implement range operator for CFN_BUILT_IN_ISFINITE
+class cfn_isfinite : public range_operator
+{
+public:
+  using range_operator::fold_range;
+  using range_operator::op1_range;
+  virtual bool fold_range (irange &r, tree type, const frange &op1,
+  const irange &, relation_trio) const override
+  {
+if (op1.undefined_p ())
+  return false;
+
+if (op1.known_isfinite ())
+  {
+   wide_int one = wi::one (TYPE_PRECISION (type));
+   r.set (type, one, one);
+   return true;
+  }
+
+if (op1.known_isnan ()
+   || op1.known_isinf ())
+  {
+   r.set_zero (type);
+   return true;
+  }
+
+r.set_varying (type);
+return true;
+  }
+  virtual bool op1_range (frange &r, tree type, const irange &lhs,
+ const frange &, relation_trio) const override
+  {
+if (lhs.undefined_p ())
+  return false;
+
+if (lhs.zero_p ())
+  {
+   // The range is [-INF,-INF][+INF,+INF] NAN, but it can't be represented.
+   // Set range to varying
+   r.set_varying (type);
+   return true;
+  }
+
+if (!range_includes_zero_p (lhs))
+  {
+   nan_state nan (false);
+   r.set (type, real_min_representable (type),
+  real_max_representable (type), nan);
+   return true;
+  }
+
+r.set_varying (type);
+return true;
+  }
+} op_cfn_isfinite;
+
 // Implement range operator for CFN_BUILT_IN_
 class cfn_parity : public range_operator
 {
@@ -1308,6 +1364,11 @@ gimple_range_op_handler::maybe_builtin_call ()
   m_operator = &op_cfn_isinf;
   break;
 
+case CFN_BUILT_IN_ISFINITE:
+  m_op1 = gimple_call_arg (call, 0);
+  m_operator = &op_cfn_isfinite;
+  break;
+
 CASE_CFN_COPYSIGN_ALL:
   m_op1 = gimple_call_arg (call, 0);
   m_op2 = gimple_call_arg (call, 1);
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/range-isfinite.c 
b/gcc/testsuite/gcc.dg/tree-ssa/range-isfinite.c
new file mode 100644
index 000..f5dce0a0486
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/range-isfinite.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-evrp" } */
+
+#include 
+void link_error();
+
+void test1 (double x)
+{
+  if (x < __DBL_MAX__ && x > -__DBL_MAX__ && !__builtin_isfinite (x))
+link_error ();
+}
+
+void test2 (float x)
+{
+  if (x < __FLT_MAX__ && x > -__FLT_MAX__ && !__builtin_isfinite (x))
+link_error ();
+}
+
+void test3 (double x)
+{
+  if (__builtin_isfinite (x) && __builtin_isinf (x))
+link_error ();
+}
+
+void test4 (float x)
+{
+  if (__builtin_isfinite (x) && __builtin_isinf (x))
+link_error ();
+}
+
+/* { dg-final { scan-tree-dump-not "link_error" "evrp" } } */


[gcc r15-2924] Value Range: Add range op for builtin isnormal

2024-08-14 Thread HaoChen Gui via Gcc-cvs
https://gcc.gnu.org/g:d2e90c7d65749a02a20aca717ac47d02ef0b5d81

commit r15-2924-gd2e90c7d65749a02a20aca717ac47d02ef0b5d81
Author: Haochen Gui 
Date:   Thu Aug 15 11:21:08 2024 +0800

Value Range: Add range op for builtin isnormal

The former patch adds optab for builtin isnormal. Thus builtin isnormal
might not be folded at front end.  So the range op for isnormal is needed
for value range analysis.  This patch adds range op for builtin isnormal.

gcc/
* gimple-range-op.cc (class cfn_isfinite): New.
(op_cfn_finite): New variables.
(gimple_range_op_handler::maybe_builtin_call): Handle
CFN_BUILT_IN_ISFINITE.
* value-range.h (class frange): Declear known_isnormal and
known_isdenormal_or_zero.
(frange::known_isnormal): Define.
(frange::known_isdenormal_or_zero): Define.

gcc/testsuite/
* gcc.dg/tree-ssa/range-isnormal.c: New test.

Diff:
---
 gcc/gimple-range-op.cc | 60 ++
 gcc/testsuite/gcc.dg/tree-ssa/range-isnormal.c | 37 
 gcc/value-range.h  | 29 +
 3 files changed, 126 insertions(+)

diff --git a/gcc/gimple-range-op.cc b/gcc/gimple-range-op.cc
index 7edfa8e315f..d1c527191f4 100644
--- a/gcc/gimple-range-op.cc
+++ b/gcc/gimple-range-op.cc
@@ -1267,6 +1267,61 @@ public:
   }
 } op_cfn_isfinite;
 
+//Implement range operator for CFN_BUILT_IN_ISNORMAL
+class cfn_isnormal :  public range_operator
+{
+public:
+  using range_operator::fold_range;
+  using range_operator::op1_range;
+  virtual bool fold_range (irange &r, tree type, const frange &op1,
+  const irange &, relation_trio) const override
+  {
+if (op1.undefined_p ())
+  return false;
+
+if (op1.known_isnormal ())
+  {
+   wide_int one = wi::one (TYPE_PRECISION (type));
+   r.set (type, one, one);
+   return true;
+  }
+
+if (op1.known_isnan ()
+   || op1.known_isinf ()
+   || op1.known_isdenormal_or_zero ())
+  {
+   r.set_zero (type);
+   return true;
+  }
+
+r.set_varying (type);
+return true;
+  }
+  virtual bool op1_range (frange &r, tree type, const irange &lhs,
+ const frange &, relation_trio) const override
+  {
+if (lhs.undefined_p ())
+  return false;
+
+if (lhs.zero_p ())
+  {
+   r.set_varying (type);
+   return true;
+  }
+
+if (!range_includes_zero_p (lhs))
+  {
+   nan_state nan (false);
+   r.set (type, real_min_representable (type),
+  real_max_representable (type), nan);
+   return true;
+  }
+
+r.set_varying (type);
+return true;
+  }
+} op_cfn_isnormal;
+
 // Implement range operator for CFN_BUILT_IN_
 class cfn_parity : public range_operator
 {
@@ -1369,6 +1424,11 @@ gimple_range_op_handler::maybe_builtin_call ()
   m_operator = &op_cfn_isfinite;
   break;
 
+case CFN_BUILT_IN_ISNORMAL:
+  m_op1 = gimple_call_arg (call, 0);
+  m_operator = &op_cfn_isnormal;
+  break;
+
 CASE_CFN_COPYSIGN_ALL:
   m_op1 = gimple_call_arg (call, 0);
   m_op2 = gimple_call_arg (call, 1);
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/range-isnormal.c 
b/gcc/testsuite/gcc.dg/tree-ssa/range-isnormal.c
new file mode 100644
index 000..c4df4d839b0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/range-isnormal.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-evrp" } */
+
+#include 
+void link_error();
+
+void test1 (double x)
+{
+  if (x < __DBL_MAX__ && x > __DBL_MIN__ && !__builtin_isnormal (x))
+link_error ();
+
+  if (x < -__DBL_MIN__ && x > -__DBL_MAX__ && !__builtin_isnormal (x))
+link_error ();
+}
+
+void test2 (float x)
+{
+  if (x < __FLT_MAX__ && x > __FLT_MIN__ && !__builtin_isnormal (x))
+link_error ();
+
+  if (x < -__FLT_MIN__ && x > - __FLT_MAX__ && !__builtin_isnormal (x))
+link_error ();
+}
+
+void test3 (double x)
+{
+  if (__builtin_isnormal (x) && __builtin_isinf (x))
+link_error ();
+}
+
+void test4 (float x)
+{
+  if (__builtin_isnormal (x) && __builtin_isinf (x))
+link_error ();
+}
+
+/* { dg-final { scan-tree-dump-not "link_error" "evrp" } } */
diff --git a/gcc/value-range.h b/gcc/value-range.h
index 03af758d152..ff63d4fc5ce 100644
--- a/gcc/value-range.h
+++ b/gcc/value-range.h
@@ -588,6 +588,8 @@ public:
   bool maybe_isinf () const;
   bool signbit_p (bool &signbit) const;
   bool nan_signbit_p (bool &signbit) const;
+  bool known_isnormal () const;
+  bool known_isdenormal_or_zero () const;
 
 protected:
   virtual bool contains_p (tree cst) const override;
@@ -1648,6 +1650,33 @@ frange::known_isfinite () const
   return (!maybe_isnan () && !real_isinf (&m_min) && !real_isinf (&m_max));
 }
 
+// Return TRUE if range is known to be normal.
+
+inline bool
+frange::known_isnormal () const
+{
+  if (!known_isfinite

[gcc r15-2925] rs6000: Implement optab_isinf for SFDF and IEEE128

2024-08-14 Thread HaoChen Gui via Gcc-cvs
https://gcc.gnu.org/g:53945be1efb502f235d84ff67ceafe4a764b6e1c

commit r15-2925-g53945be1efb502f235d84ff67ceafe4a764b6e1c
Author: Haochen Gui 
Date:   Thu Aug 15 13:38:22 2024 +0800

rs6000: Implement optab_isinf for SFDF and IEEE128

gcc/
PR target/97786
* config/rs6000/rs6000.md (constant VSX_TEST_DATA_CLASS_NAN,
VSX_TEST_DATA_CLASS_POS_INF, VSX_TEST_DATA_CLASS_NEG_INF,
VSX_TEST_DATA_CLASS_POS_ZERO, VSX_TEST_DATA_CLASS_NEG_ZERO,
VSX_TEST_DATA_CLASS_POS_DENORMAL, VSX_TEST_DATA_CLASS_NEG_DENORMAL):
Define.
(mode_attr sdq, vsx_altivec, wa_v, x): Define.
(mode_iterator IEEE_FP): Define.
* config/rs6000/vsx.md (isinf2): New expand.
(expand xststdcqp_, xststdcp): Combine into...
(expand xststdc_): ...this.
(insn *xststdcqp_, *xststdcp): Combine into...
(insn *xststdc_): ...this.
* config/rs6000/rs6000-builtin.cc (rs6000_expand_builtin): Rename
CODE_FOR_xststdcqp_kf as CODE_FOR_xststdc_kf,
CODE_FOR_xststdcqp_tf as CODE_FOR_xststdc_tf.
* config/rs6000/rs6000-builtins.def: Rename xststdcdp as xststdc_df,
xststdcsp as xststdc_sf, xststdcqp_kf as xststdc_kf.

gcc/testsuite/
PR target/97786
* gcc.target/powerpc/pr97786-1.c: New test.
* gcc.target/powerpc/pr97786-2.c: New test.

Diff:
---
 gcc/config/rs6000/rs6000-builtin.cc  |  4 +-
 gcc/config/rs6000/rs6000-builtins.def|  6 +--
 gcc/config/rs6000/rs6000.md  | 35 +
 gcc/config/rs6000/vsx.md | 58 +---
 gcc/testsuite/gcc.target/powerpc/pr97786-1.c | 22 +++
 gcc/testsuite/gcc.target/powerpc/pr97786-2.c | 17 
 6 files changed, 97 insertions(+), 45 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index 099cbc82245..9bdbae1ecf9 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -3254,8 +3254,8 @@ rs6000_expand_builtin (tree exp, rtx target, rtx /* 
subtarget */,
   case CODE_FOR_xsiexpqpf_kf:
icode = CODE_FOR_xsiexpqpf_tf;
break;
-  case CODE_FOR_xststdcqp_kf:
-   icode = CODE_FOR_xststdcqp_tf;
+  case CODE_FOR_xststdc_kf:
+   icode = CODE_FOR_xststdc_tf;
break;
   case CODE_FOR_xscmpexpqp_eq_kf:
icode = CODE_FOR_xscmpexpqp_eq_tf;
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index 5b513a7ef2b..0e9dc05dbcf 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -2554,11 +2554,11 @@
 
   const signed int \
   __builtin_vsx_scalar_test_data_class_dp (double, const int<7>);
-VSTDCDP xststdcdp {}
+VSTDCDP xststdc_df {}
 
   const signed int \
   __builtin_vsx_scalar_test_data_class_sp (float, const int<7>);
-VSTDCSP xststdcsp {}
+VSTDCSP xststdc_sf {}
 
   const signed int __builtin_vsx_scalar_test_neg_dp (double);
 VSTDCNDP xststdcnegdp {}
@@ -2727,7 +2727,7 @@
 
   const signed int __builtin_vsx_scalar_test_data_class_qp (_Float128, \
 const int<7>);
-VSTDCQP xststdcqp_kf {}
+VSTDCQP xststdc_kf {}
 
   const signed int __builtin_vsx_scalar_test_neg_qp (_Float128);
 VSTDCNQP xststdcnegqp_kf {}
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index d352a1431ad..267affa5057 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -53,6 +53,20 @@
(FRAME_POINTER_REGNUM   110)
   ])
 
+;;
+;; Test data class mask bits
+;;
+
+(define_constants
+  [(VSX_TEST_DATA_CLASS_NAN0x40)
+   (VSX_TEST_DATA_CLASS_POS_INF0x20)
+   (VSX_TEST_DATA_CLASS_NEG_INF0x10)
+   (VSX_TEST_DATA_CLASS_POS_ZERO   0x8)
+   (VSX_TEST_DATA_CLASS_NEG_ZERO   0x4)
+   (VSX_TEST_DATA_CLASS_POS_DENORMAL   0x2)
+   (VSX_TEST_DATA_CLASS_NEG_DENORMAL   0x1)
+  ])
+
 ;;
 ;; UNSPEC usage
 ;;
@@ -605,6 +619,10 @@
 (define_mode_attr sd [(SF   "s") (DF   "d")
  (V4SF "s") (V2DF "d")])
 
+; A generic s/d/q attribute, for sp/dp/qp for example.
+(define_mode_attr sdq [(SF "s") (DF "d")
+  (TF "q") (KF "q")])
+
 ; "s" or nothing, for fmuls/fmul for example.
 (define_mode_attr s [(SF "s") (DF "")])
 
@@ -616,6 +634,23 @@
 (define_mode_iterator IEEE128 [(KF "FLOAT128_IEEE_P (KFmode)")
   (TF "FLOAT128_IEEE_P (TFmode)")])
 
+; Iterator for IEEE floating point
+(define_mode_iterator IEEE_FP [SFDF IEEE128])
+
+; "vsx/altivec_register_operand", for IEEE_FP predicates
+(define_mode_attr fp_register_op [(SF "vsx_register_operand")
+ (DF "vsx_register_operand")
+ (TF "altivec_register_operand")
+   

[gcc r15-2926] rs6000: Implement optab_isfinite for SFDF and IEEE128

2024-08-14 Thread HaoChen Gui via Gcc-cvs
https://gcc.gnu.org/g:44eb45c2ef7192eb6a811fd46fcb2c7fbeb6f865

commit r15-2926-g44eb45c2ef7192eb6a811fd46fcb2c7fbeb6f865
Author: Haochen Gui 
Date:   Thu Aug 15 13:41:25 2024 +0800

rs6000: Implement optab_isfinite for SFDF and IEEE128

gcc/
PR target/97786
* config/rs6000/vsx.md (isfinite2): New expand.

gcc/testsuite/
PR target/97786
* gcc.target/powerpc/pr97786-4.c: New test.
* gcc.target/powerpc/pr97786-5.c: New test.

Diff:
---
 gcc/config/rs6000/vsx.md | 15 +++
 gcc/testsuite/gcc.target/powerpc/pr97786-4.c | 17 +
 gcc/testsuite/gcc.target/powerpc/pr97786-5.c | 12 
 3 files changed, 44 insertions(+)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 3a6afd13c16..0f18fd5cffe 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5370,6 +5370,21 @@
   DONE;
 })
 
+(define_expand "isfinite2"
+  [(use (match_operand:SI 0 "gpc_reg_operand"))
+   (use (match_operand:IEEE_FP 1 ""))]
+  "TARGET_P9_VECTOR
+   && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW)"
+{
+  rtx tmp = gen_reg_rtx (SImode);
+  /* It is neither infinite nor NAN.  */
+  int mask = VSX_TEST_DATA_CLASS_POS_INF | VSX_TEST_DATA_CLASS_NEG_INF
+| VSX_TEST_DATA_CLASS_NAN;
+  emit_insn (gen_xststdc_ (tmp, operands[1], GEN_INT (mask)));
+  emit_insn (gen_xorsi3 (operands[0], tmp, const1_rtx));
+  DONE;
+})
+
 ;; The VSX Scalar Test Negative Quad-Precision
 (define_expand "xststdcnegqp_"
   [(set (match_dup 2)
diff --git a/gcc/testsuite/gcc.target/powerpc/pr97786-4.c 
b/gcc/testsuite/gcc.target/powerpc/pr97786-4.c
new file mode 100644
index 000..9cdde78257d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr97786-4.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mdejagnu-cpu=power9" } */
+/* { dg-require-effective-target powerpc_vsx } */
+
+int test1 (double x)
+{
+  return __builtin_isfinite (x);
+}
+
+int test2 (float x)
+{
+  return __builtin_isfinite (x);
+}
+
+/* { dg-final { scan-assembler-not {\mfcmp} } } */
+/* { dg-final { scan-assembler-times {\mxststdcsp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxststdcdp\M} 1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr97786-5.c 
b/gcc/testsuite/gcc.target/powerpc/pr97786-5.c
new file mode 100644
index 000..0ef8b86f6cb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr97786-5.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target ppc_float128_hw } */
+/* { dg-options "-O2 -mdejagnu-cpu=power9 -mabi=ieeelongdouble -Wno-psabi" } */
+/* { dg-require-effective-target powerpc_vsx } */
+
+int test1 (long double x)
+{
+  return __builtin_isfinite (x);
+}
+
+/* { dg-final { scan-assembler-not {\mxscmpuqp\M} } } */
+/* { dg-final { scan-assembler {\mxststdcqp\M} } } */


[gcc r15-2928] rs6000: Add TARGET_FLOAT128_HW guard for quad-precision insns

2024-08-14 Thread HaoChen Gui via Gcc-cvs
https://gcc.gnu.org/g:bf891fcabca7a59ce71e85c8f2eea2bfabbffe59

commit r15-2928-gbf891fcabca7a59ce71e85c8f2eea2bfabbffe59
Author: Haochen Gui 
Date:   Thu Aug 15 13:45:35 2024 +0800

rs6000: Add TARGET_FLOAT128_HW guard for quad-precision insns

gcc/
* config/rs6000/rs6000.md (floatti2, floatunsti2,
fix_truncti2): Add guard TARGET_FLOAT128_HW.
* config/rs6000/vsx.md (xsxexpqp__,
xsxsigqp__, xsiexpqpf_,
xsiexpqp__, xscmpexpqp__,
*xscmpexpqp, xststdcnegqp_): Replace guard TARGET_P9_VECTOR
with TARGET_FLOAT128_HW.
(xststdc_, *xststdc_, isinf2): Add guard
TARGET_FLOAT128_HW for the IEEE128 modes.

gcc/testsuite/
* gcc.target/powerpc/float128-cmp2-runnable.c: Replace
ppc_float128_sw with ppc_float128_hw and remove p9vector_hw.

Diff:
---
 gcc/config/rs6000/rs6000.md|  6 +++---
 gcc/config/rs6000/vsx.md   | 23 --
 .../gcc.target/powerpc/float128-cmp2-runnable.c|  3 +--
 3 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 267affa5057..8eda2f7bb0d 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -6928,7 +6928,7 @@
 (define_insn "floatti2"
   [(set (match_operand:IEEE128 0 "vsx_register_operand" "=v")
(float:IEEE128 (match_operand:TI 1 "vsx_register_operand" "v")))]
-  "TARGET_POWER10"
+  "TARGET_POWER10 && TARGET_FLOAT128_HW"
 {
   return  "xscvsqqp %0,%1";
 }
@@ -6937,7 +6937,7 @@
 (define_insn "floatunsti2"
   [(set (match_operand:IEEE128 0 "vsx_register_operand" "=v")
(unsigned_float:IEEE128 (match_operand:TI 1 "vsx_register_operand" 
"v")))]
-  "TARGET_POWER10"
+  "TARGET_POWER10 && TARGET_FLOAT128_HW"
 {
   return  "xscvuqqp %0,%1";
 }
@@ -6946,7 +6946,7 @@
 (define_insn "fix_truncti2"
   [(set (match_operand:TI 0 "vsx_register_operand" "=v")
(fix:TI (match_operand:IEEE128 1 "vsx_register_operand" "v")))]
-  "TARGET_POWER10"
+  "TARGET_POWER10 && TARGET_FLOAT128_HW"
 {
   return  "xscvqpsqz %0,%1";
 }
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 705e70bd6e9..27069d070e1 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5196,7 +5196,7 @@
(unspec:V2DI_DI
  [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
 UNSPEC_VSX_SXEXPDP))]
-  "TARGET_P9_VECTOR"
+  "TARGET_FLOAT128_HW"
   "xsxexpqp %0,%1"
   [(set_attr "type" "vecmove")])
 
@@ -5215,7 +5215,7 @@
(unspec:VEC_TI [(match_operand:IEEE128 1
"altivec_register_operand" "v")]
 UNSPEC_VSX_SXSIG))]
-  "TARGET_P9_VECTOR"
+  "TARGET_FLOAT128_HW"
   "xsxsigqp %0,%1"
   [(set_attr "type" "vecmove")])
 
@@ -5235,7 +5235,7 @@
 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
  (match_operand:DI 2 "altivec_register_operand" "v")]
 UNSPEC_VSX_SIEXPQP))]
-  "TARGET_P9_VECTOR"
+  "TARGET_FLOAT128_HW"
   "xsiexpqp %0,%1,%2"
   [(set_attr "type" "vecmove")])
 
@@ -5247,7 +5247,7 @@
 (match_operand:V2DI_DI 2
  "altivec_register_operand" "v")]
 UNSPEC_VSX_SIEXPQP))]
-  "TARGET_P9_VECTOR"
+  "TARGET_FLOAT128_HW"
   "xsiexpqp %0,%1,%2"
   [(set_attr "type" "vecmove")])
 
@@ -5317,7 +5317,7 @@
(set (match_operand:SI 0 "register_operand" "=r")
(CMP_TEST:SI (match_dup 3)
 (const_int 0)))]
-  "TARGET_P9_VECTOR"
+  "TARGET_FLOAT128_HW"
 {
   if ( == UNORDERED && !HONOR_NANS (mode))
 {
@@ -5335,7 +5335,7 @@
  (match_operand:IEEE128 2 "altivec_register_operand" 
"v")]
  UNSPEC_VSX_SCMPEXPQP)
 (match_operand:SI 3 "zero_constant" "j")))]
-  "TARGET_P9_VECTOR"
+  "TARGET_FLOAT128_HW"
   "xscmpexpqp %0,%1,%2"
   [(set_attr "type" "fpcompare")])
 
@@ -5354,7 +5354,8 @@
(set (match_operand:SI 0 "register_operand" "=r")
(eq:SI (match_dup 3)
   (const_int 0)))]
-  "TARGET_P9_VECTOR"
+  "TARGET_P9_VECTOR
+   && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW)"
 {
   operands[3] = gen_reg_rtx (CCFPmode);
   operands[4] = CONST0_RTX (SImode);
@@ -5363,7 +5364,8 @@
 (define_expand "isinf2"
   [(use (match_operand:SI 0 "gpc_reg_operand"))
(use (match_operand:IEEE_FP 1 ""))]
-  "TARGET_HARD_FLOAT && TARGET_P9_VECTOR"
+  "TARGET_P9_VECTOR
+   && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW)"
 {
   int mask = VSX_TEST_DATA_CLASS_POS_INF | VSX_TEST_DATA_CLASS_NEG_INF;
   emit_insn (gen_xststdc_ (operands[0], operands[1], GEN_INT (mask)));
@@ -5415,7 +5417,7 @@
(set (match_operand:SI 0 "register_operand" "=r")
(lt:SI (match_dup 2)
   (const_int 0)))]
-  "TARGET_P9_VECTOR"
+  "TARGET_FLOAT128_HW"
 {
   operands[2] = gen_reg_rtx (CCFPmode);
 })
@@ -5446,7 +5448,8 @@
   (match_operand:SI 2 "u7bit_cint_operand" "n")]
 

[gcc r15-2927] rs6000: Implement optab_isnormal for SFDF and IEEE128

2024-08-14 Thread HaoChen Gui via Gcc-cvs
https://gcc.gnu.org/g:08108d57246210de7d5a00b1967dab7102d356bc

commit r15-2927-g08108d57246210de7d5a00b1967dab7102d356bc
Author: Haochen Gui 
Date:   Thu Aug 15 13:43:28 2024 +0800

rs6000: Implement optab_isnormal for SFDF and IEEE128

gcc/
PR target/97786
* config/rs6000/vsx.md (isnormal2): New expand.

gcc/testsuite/
PR target/97786
* gcc.target/powerpc/pr97786-7.c: New test.
* gcc.target/powerpc/pr97786-8.c: New test.

Diff:
---
 gcc/config/rs6000/vsx.md | 18 ++
 gcc/testsuite/gcc.target/powerpc/pr97786-7.c | 17 +
 gcc/testsuite/gcc.target/powerpc/pr97786-8.c | 12 
 3 files changed, 47 insertions(+)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 0f18fd5cffe..705e70bd6e9 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5385,6 +5385,24 @@
   DONE;
 })
 
+(define_expand "isnormal2"
+  [(use (match_operand:SI 0 "gpc_reg_operand"))
+   (use (match_operand:IEEE_FP 1 ""))]
+  "TARGET_P9_VECTOR
+   && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW)"
+{
+  rtx tmp = gen_reg_rtx (SImode);
+  /* It is neither NAN, infinite, zero, nor denormal.  */
+  int mask = VSX_TEST_DATA_CLASS_NAN
+| VSX_TEST_DATA_CLASS_POS_INF | VSX_TEST_DATA_CLASS_NEG_INF
+| VSX_TEST_DATA_CLASS_POS_ZERO | VSX_TEST_DATA_CLASS_NEG_ZERO
+| VSX_TEST_DATA_CLASS_POS_DENORMAL
+| VSX_TEST_DATA_CLASS_NEG_DENORMAL;
+  emit_insn (gen_xststdc_ (tmp, operands[1], GEN_INT (mask)));
+  emit_insn (gen_xorsi3 (operands[0], tmp, const1_rtx));
+  DONE;
+})
+
 ;; The VSX Scalar Test Negative Quad-Precision
 (define_expand "xststdcnegqp_"
   [(set (match_dup 2)
diff --git a/gcc/testsuite/gcc.target/powerpc/pr97786-7.c 
b/gcc/testsuite/gcc.target/powerpc/pr97786-7.c
new file mode 100644
index 000..eb01eed39d3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr97786-7.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mdejagnu-cpu=power9" } */
+/* { dg-require-effective-target powerpc_vsx } */
+
+int test1 (double x)
+{
+  return __builtin_isnormal (x);
+}
+
+int test2 (float x)
+{
+  return __builtin_isnormal (x);
+}
+
+/* { dg-final { scan-assembler-not {\mfcmp} } } */
+/* { dg-final { scan-assembler-times {\mxststdcsp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxststdcdp\M} 1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr97786-8.c 
b/gcc/testsuite/gcc.target/powerpc/pr97786-8.c
new file mode 100644
index 000..eba90d3b1b7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr97786-8.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target ppc_float128_hw } */
+/* { dg-options "-O2 -mdejagnu-cpu=power9 -mabi=ieeelongdouble -Wno-psabi" } */
+/* { dg-require-effective-target powerpc_vsx } */
+
+int test1 (long double x)
+{
+  return __builtin_isnormal (x);
+}
+
+/* { dg-final { scan-assembler-not {\mxscmpuqp\M} } } */
+/* { dg-final { scan-assembler {\mxststdcqp\M} } } */


[gcc r14-10584] RISC-V: Make full-vec-move1.c test robust for optimization

2024-08-14 Thread Kito Cheng via Gcc-cvs
https://gcc.gnu.org/g:8c98f0601f7f0d8051eda47370939198f4e01fc4

commit r14-10584-g8c98f0601f7f0d8051eda47370939198f4e01fc4
Author: Pan Li 
Date:   Thu May 9 10:56:46 2024 +0800

RISC-V: Make full-vec-move1.c test robust for optimization

During investigate the support of early break autovec, we notice
the test full-vec-move1.c will be optimized to 'return 0;' in main
function body.  Because somehow the value of V type is compiler
time constant,  and then the second loop will be considered as
assert (true).

Thus,  the ccp4 pass will eliminate these stmt and just return 0.

typedef int16_t V __attribute__((vector_size (128)));

int main ()
{
  V v;
  for (int i = 0; i < sizeof (v) / sizeof (v[0]); i++)
(v)[i] = i;

  V res = v;
  for (int i = 0; i < sizeof (v) / sizeof (v[0]); i++)
assert (res[i] == i); // will be optimized to assert (true)
}

This patch would like to introduce a extern function to use the res[i]
that get rid of the ccp4 optimization.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c:
Introduce extern func use to get rid of ccp4 optimization.

Signed-off-by: Pan Li 
(cherry picked from commit b1520d2260c5e0cfcd7a4354fab70f66e2912ff2)

Diff:
---
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c
index d73bad4af6f..fae2ae91572 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c
@@ -2,11 +2,12 @@
 /* { dg-additional-options "-std=c99 -O3 -march=rv64gcv_zvl128b -mabi=lp64d 
-fno-vect-cost-model -mrvv-vector-bits=zvl" } */
 
 #include 
-#include 
 
 /* This would cause us to emit a vl1r.v for VNx4HImode even when
the hardware vector size vl > 64.  */
 
+extern int16_t test_element (int16_t);
+
 typedef int16_t V __attribute__((vector_size (128)));
 
 int main ()
@@ -14,9 +15,10 @@ int main ()
   V v;
   for (int i = 0; i < sizeof (v) / sizeof (v[0]); i++)
 (v)[i] = i;
+
   V res = v;
   for (int i = 0; i < sizeof (v) / sizeof (v[0]); i++)
-assert (res[i] == i);
+test_element (res[i]);
 }
 
 /* { dg-final { scan-assembler-not {vl[1248]r.v} } }  */


[gcc r14-10585] [PATCH] RISC-V: Fix unresolved mcpu-[67].c tests

2024-08-14 Thread Kito Cheng via Gcc-cvs
https://gcc.gnu.org/g:d9bd361d893d3b62797f2962bca1b8d56521f3c3

commit r14-10585-gd9bd361d893d3b62797f2962bca1b8d56521f3c3
Author: Craig Blackmore 
Date:   Sat Jun 22 22:07:06 2024 -0600

[PATCH] RISC-V: Fix unresolved mcpu-[67].c tests

These tests check the sched2 dump, so skip them for optimization levels
that do not enable sched2.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/mcpu-6.c: Skip for -O0, -O1, -Og.
* gcc.target/riscv/mcpu-7.c: Likewise.

(cherry picked from commit 77f3b3419d476e90a2b82dff2204466aba3b9c2c)

Diff:
---
 gcc/testsuite/gcc.target/riscv/mcpu-6.c | 1 +
 gcc/testsuite/gcc.target/riscv/mcpu-7.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/mcpu-6.c 
b/gcc/testsuite/gcc.target/riscv/mcpu-6.c
index 96faa01653e..0126011939f 100644
--- a/gcc/testsuite/gcc.target/riscv/mcpu-6.c
+++ b/gcc/testsuite/gcc.target/riscv/mcpu-6.c
@@ -1,4 +1,5 @@
 /* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Og" } } */
 /* Verify -mtune has higher priority than -mcpu for pipeline model .  */
 /* { dg-options "-mcpu=sifive-u74 -mtune=rocket -fdump-rtl-sched2-details 
-march=rv32i -mabi=ilp32" } */
 /* { dg-final { scan-rtl-dump "simple_return\[ \]+:alu" "sched2" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/mcpu-7.c 
b/gcc/testsuite/gcc.target/riscv/mcpu-7.c
index 6832323e529..656436343bd 100644
--- a/gcc/testsuite/gcc.target/riscv/mcpu-7.c
+++ b/gcc/testsuite/gcc.target/riscv/mcpu-7.c
@@ -1,4 +1,5 @@
 /* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Og" } } */
 /* Verify -mtune has higher priority than -mcpu for pipeline model .  */
 /* { dg-options "-mcpu=sifive-s21 -mtune=sifive-u74 -fdump-rtl-sched2-details 
-march=rv32i -mabi=ilp32" } */
 /* { dg-final { scan-rtl-dump "simple_return\[ \]+:sifive_7_B" "sched2" } } */