Re: [PATCH 0/8] i386: Opmitize code with AVX10.2 new instructions

2024-09-01 Thread Hongtao Liu
On Mon, Aug 26, 2024 at 2:43 PM Haochen Jiang  wrote:
>
> Hi all,
>
> I have just commited AVX10.2 new instructions patches into trunk hours
> ago. The next and final part for AVX10.2 upstream is to optimize code
> with AVX10.2 new instructions.
>
> In this patch series, it will contain the following optimizations:
>
>   - VNNI instruction auto vectorize (PATCH 1).
>   - Codegen optimization with new scalar comparison instructions to
> eliminate redundant code (PATCH 2-3).
>   - BF16 instruction auto vectorize (PATCH 4-8).
>
> This will finish the upstream for AVX10.2 series.
>
> Afterwards, we may add V2BF/V4BF in another thread just like what we
> have done for V2HF/V4HF when AVX512FP16 upstreamed.
>
> Bootstrapped on x86-64-pc-linux-gnu. Ok for trunk?
Ok for all 8 patches.
>
> Thx,
> Haochen
>
>


-- 
BR,
Hongtao


[PATCH v1 1/2] Match: Add int type fits check for form 1 of .SAT_SUB imm operand

2024-09-01 Thread pan2 . li
From: Pan Li 

This patch would like to add strict check for imm operand of .SAT_SUB
matching.  We have no type checking for imm operand in previous, which
may result in unexpected IL to be catched by .SAT_SUB pattern.

We leverage the int_fits_type_p here to make sure the imm operand is
a int type fits the result type of the .SAT_SUB.  For example:

Fits uint8_t:
uint8_t a;
uint8_t sum = .SAT_SUB (12, a);
uint8_t sum = .SAT_SUB (12u, a);
uint8_t sum = .SAT_SUB (126u, a);
uint8_t sum = .SAT_SUB (128u, a);
uint8_t sum = .SAT_SUB (228, a);
uint8_t sum = .SAT_SUB (223u, a);

Not fits uint8_t:
uint8_t a;
uint8_t sum = .SAT_SUB (-1, a);
uint8_t sum = .SAT_SUB (256u, a);
uint8_t sum = .SAT_SUB (257, a);

The below test suite are passed for this patch:
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* match.pd: Add int_fits_type_p check for .SAT_SUB imm operand.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_u_add_imm_type_check-53.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-54.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-55.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-56.c: New test.

Signed-off-by: Pan Li 
---
 gcc/match.pd  |  2 +-
 gcc/testsuite/gcc.target/riscv/sat_arith.h| 14 ++
 .../riscv/sat_u_add_imm_type_check-53.c   | 18 +
 .../riscv/sat_u_add_imm_type_check-54.c   | 27 +++
 .../riscv/sat_u_add_imm_type_check-55.c   | 18 +
 .../riscv/sat_u_add_imm_type_check-56.c   | 27 +++
 6 files changed, 105 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-53.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-54.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-55.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-56.c

diff --git a/gcc/match.pd b/gcc/match.pd
index be211535a49..45e0cc4a54f 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3269,7 +3269,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (match (unsigned_integer_sat_sub @0 @1)
  (cond^ (le @1 INTEGER_CST@2) (minus INTEGER_CST@0 @1) integer_zerop)
  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
- && types_match (type, @1))
+ && types_match (type, @1) && int_fits_type_p (@0, type))
  (with
   {
unsigned precision = TYPE_PRECISION (type);
diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index a899979904b..75f48b4b760 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -267,6 +267,20 @@ sat_u_sub_imm##IMM##_##T##_fmt_4 (T x)  \
 #define RUN_SAT_U_SUB_IMM_FMT_4(T, x, IMM, expect) \
   if (sat_u_sub_imm##IMM##_##T##_fmt_4(x) != expect) __builtin_abort ()
 
+#define DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1(INDEX, T, IMM) \
+T __attribute__((noinline))   \
+sat_u_sub_imm_type_check##_##INDEX##_##T##_fmt_1 (T y)\
+{ \
+  return IMM >= y ? IMM - y : 0;  \
+}
+
+#define DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_2(INDEX, T, IMM) \
+T __attribute__((noinline))   \
+sat_u_sub_imm_type_check##_##INDEX##_##T##_fmt_2 (T y)\
+{ \
+  return IMM > y ? IMM - y : 0;   \
+}
+
 
/**/
 /* Saturation Truncate (unsigned and signed)  
*/
 
/**/
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-53.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-53.c
new file mode 100644
index 000..c959eeb0d86
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-53.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details" } */
+
+#include "sat_arith.h"
+
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (0, uint8_t, -43)
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (1, uint8_t, 269)
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (2, uint8_t, 369u)
+
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (3, uint16_t, -4)
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (4, uint16_t, 65579)
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (5, uint16_t, 65679u)
+
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (6, uint32_t, -62)
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (7, uint32_t, 4294967342ll)
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (8, uint32_t, 4394967342ull)
+
+/* { dg-final { scan-rtl-dump-not ".SAT_ADD " "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-54.c 
b/gcc/testsuite/gc

[PATCH v1 2/2] Match: Add int type fits check for form 2 of .SAT_SUB imm operand

2024-09-01 Thread pan2 . li
From: Pan Li 

This patch would like to add strict check for imm operand of .SAT_SUB
matching.  We have no type checking for imm operand in previous, which
may result in unexpected IL to be catched by .SAT_SUB pattern.

We leverage the int_fits_type_p here to make sure the imm operand is
a int type fits the result type of the .SAT_SUB.  For example:

Fits uint8_t:
uint8_t a;
uint8_t sum = .SAT_SUB (a, 12);
uint8_t sum = .SAT_SUB (a, 12u);
uint8_t sum = .SAT_SUB (a, 126u);
uint8_t sum = .SAT_SUB (a, 128u);
uint8_t sum = .SAT_SUB (a, 228);
uint8_t sum = .SAT_SUB (a, 223u);

Not fits uint8_t:
uint8_t a;
uint8_t sum = .SAT_SUB (a, -1);
uint8_t sum = .SAT_SUB (a, 256u);
uint8_t sum = .SAT_SUB (a, 257);

The below test suite are passed for this patch:
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* match.pd: Add int_fits_type_p check for .SAT_SUB imm operand.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_u_add_imm_type_check-57.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-58.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-59.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-60.c: New test.

Signed-off-by: Pan Li 
---
 gcc/match.pd  |  2 +-
 gcc/testsuite/gcc.target/riscv/sat_arith.h| 14 ++
 .../riscv/sat_u_add_imm_type_check-57.c   | 18 +
 .../riscv/sat_u_add_imm_type_check-58.c   | 27 +++
 .../riscv/sat_u_add_imm_type_check-59.c   | 18 +
 .../riscv/sat_u_add_imm_type_check-60.c   | 27 +++
 6 files changed, 105 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-57.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-58.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-59.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-60.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 45e0cc4a54f..6c54f0502eb 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3288,7 +3288,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (match (unsigned_integer_sat_sub @0 @1)
  (plus (max @0 INTEGER_CST@1) INTEGER_CST@2)
  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
- && types_match (type, @1))
+ && types_match (type, @1) && int_fits_type_p (@1, type))
  (with
   {
unsigned precision = TYPE_PRECISION (type);
diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index 75f48b4b760..4d11b6dcf3b 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -281,6 +281,20 @@ sat_u_sub_imm_type_check##_##INDEX##_##T##_fmt_2 (T y)\
   return IMM > y ? IMM - y : 0;   \
 }
 
+#define DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3(INDEX, T, IMM) \
+T __attribute__((noinline))   \
+sat_u_sub_imm_type_check##_##INDEX##_##T##_fmt_3 (T x)\
+{ \
+  return x >= IMM ? x - IMM : 0;  \
+}
+
+#define DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_4(INDEX, T, IMM) \
+T __attribute__((noinline))   \
+sat_u_sub_imm_type_check##_##INDEX##_##T##_fmt_4 (T x)\
+{ \
+  return x > IMM ? x - IMM : 0;   \
+}
+
 
/**/
 /* Saturation Truncate (unsigned and signed)  
*/
 
/**/
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-57.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-57.c
new file mode 100644
index 000..1b193bcfb26
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-57.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details" } */
+
+#include "sat_arith.h"
+
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (0, uint8_t, -43)
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (1, uint8_t, 269)
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (2, uint8_t, 369u)
+
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (3, uint16_t, -4)
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (4, uint16_t, 65579)
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (5, uint16_t, 65679u)
+
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (6, uint32_t, -62l)
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (7, uint32_t, 6294967342ll)
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (8, uint32_t, 4394967342ull)
+
+/* { dg-final { scan-rtl-dump-not ".SAT_ADD " "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-58.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-58.c
new file mode 100644
ind

Re: VLA is a misnomer (rebuttal to n3187)

2024-09-01 Thread Alejandro Colomar
Hi Martin,

On Sun, Sep 01, 2024 at 11:51:10AM GMT, Martin Uecker wrote:
> 
> Alex,
> 
> I am all for making things more consistent, but there is also a cost
> to changing stuff too much.   length is the established 
> term in most programming languages and I would recommend to stick
> to it.
> 
> Note that it is not true that the standard consistently refers to 
> 
> char a[3][n]
> 
> as a VLA. It does so in the description in sizeof but not in the
> type compatibility rules, at least as understood by most compilers. 

The wording in 6.2.7 (Compatible type and composite type) is unclear,
IMO.  Also, it is not the definition of the term "variable length
array", but rather a use of it to describe the composite type.

I'd say the wording is incorrect and should be fixed, since it's a
misuse of the term "variable length array".

The term VLA is actually defined in 6.7.7.3 (Array declarators), and it
clearly defines what is and is not a VLA.


N3301::6.7.7.3p4 (with formatting blanks inserted by me):

[...]
If the size is an integer constant expression
and the element type has a known constant size,
the array type is not a variable length array type;
otherwise,
the array type is a _variable length array_ type.
(Variable length arrays with automatic storage duration are
 a conditional feature that implementations may support;
 see 6.10.10.4.)

(The underscores represent the italics in the document.)

> This is an inconsistency we *should* fix, but I do not think that
> changing away from "length" is a good ida.
> 
> Note that "number of elements" is inherently an ambiguous term for
> multi-dimensional arrays,

I presume you consider the ambiguity in the following way:

int a[3][4];

has 12 int "elements".  While colloquially that could make sense, it has
no precedents in ISO C or in technical language used in C projects
(other than maybe in a few rare projects).

In C, there's no such thing as a multi-dimensional array.  While there
are some mentions to "multi-dimensional array" in the standard, they're
used to clarify the syntax with arrays of arrays (especially to clarify
that C is different from for example Fortran).  However, the standard
also makes clear that technically they're just arrays of elements that
happen to have array type.

It is in


where it says

EXAMPLE The following snippet has an array object defined by the 
declaration:

int x[3][5];

Here x is a 3 × 5 array of objects of type int;
more precisely,
x is an array of three element objects,
each of which is an array of five objects of type int.
In the expression x[i],
which is equivalent to (*((x)+(i))),
x is first converted to
a pointer to the initial array of five objects of type int.
Then i is adjusted according to the type of x,
which conceptually entails
multiplying i by the size of the object to which the pointer points,
namely an array of five int objects.
The results are added and indirection is applied to yield
an array of five objects of type int.
When used in the expression x[i][j],
that array is in turn converted to
a pointer to the first of the objects of type int,
so x[i][j] yields an int.

The "flat" number of elements is something that C does not have (it's
impossible to calculate it in a generic way), which is a consequence of
multi-dimensional arrays not being a thing in C.

Consider for example the following array:

struct s { int a[10][10]; };

struct s  a[10][10];

The number of elements is 10.  The number of ints is 1, and the
number of s structures is 100, but I don't think any of those should be
a fundamental property described by a standard term, nor any operator
(nor operator-like macro) should get any of those two.  I'd like to see
code that needs such a thing before considering it.

If some specific code needs the number of structures, it can get it with
sizeof(a) / sizeof(struct s), or if some specific code needs the number
of integers, it can get it with sizeof(a) / sizeof(int).  But that's
still not justification to have a term for it (and currently there's
none).

> and I am not sure how you want to avoid
> this without making the wording more complex (e.g. "number of elements
> of the outermost array).

The standard already uses "number of elements of an array" all around
the standard, and IMO it's quite clear and simple about it.

It is also consistent with decay of arrays, which decay to a pointer to
their first element (which can be itself an array; it is understood that
the ultimate fundamental object is not an element of the (outermost)
array).




Re: [RFC] gimple ssa: VRP to mark switch default as unreachable instead of removing it [PR112687]

2024-09-01 Thread Filip Kastl
On Sun 2024-09-01 09:25:07, Jeff Law wrote:
> 
> 
> On 9/1/24 9:19 AM, Filip Kastl wrote:
> > (I'm Cc-ing Diego since he originally contributed the VRP pass and Jeff 
> > because
> > I've seen him in git blame on many lines of vr-values.cc around the place I
> > would like to make modifications)
> > 
> > Hello,
> > 
> > In this RFC I'd like to propose the following: When the value range 
> > statement
> > simplification machinery encounters a switch with a default case that never
> > executes, it should mark the default case with __builtin_unreachable 
> > instead of
> > removing the default case.  I think that should be the "canonical"
> > representation of switches with this property.
> > 
> > In terms of changes made to GCC code, this would be a small patch.
> > 
> > I'd like to hear other people's opinions on this.  Perhaps there are
> > implications of this change or some other issues with the idea which I don't
> > see.
> The right thing to do is to eliminate the unreachable code.   We don't
> replace the body of an if (0)  with a __builtin_unreachable.  We remove it.
> Switch statements should be no different here.
> 
> If that inhibits later analysis or optimization then that's an indicator
> that we need to improve our frameworks, not that we should keep dead code
> lying around in the IL.
> 
> Jeff
> 
> 

I see.  This is not the way forward.  While reading your reply it occured to me
that it could be possible to use value range info in switch conversion to get
the switches in question optimized without relying on __bultin_unreachable.
I'll try that.

Thanks,
Filip


RE: [nvptx] Fix code-gen for alias attribute

2024-09-01 Thread Prathamesh Kulkarni
> -Original Message-
> From: Prathamesh Kulkarni 
> Sent: Monday, August 26, 2024 4:21 PM
> To: Thomas Schwinge ; gcc-patches@gcc.gnu.org
> Subject: [nvptx] Fix code-gen for alias attribute
> 
> External email: Use caution opening links or attachments
> 
> 
> Hi,
> For the following test (adapted from pr96390.c):
> 
> __attribute__((noipa)) int foo () { return 42; } int bar ()
> __attribute__((alias ("foo"))); int baz () __attribute__((alias
> ("bar")));
> 
> int main ()
> {
>   int n;
>   #pragma omp target map(from:n)
> n = baz ();
>   return n;
> }
> 
> Compiling with -fopenmp -foffload=nvptx-none -foffload=-malias -
> foffload=-mptx=6.3 results in:
> 
> ptxas fatal   : Internal error: alias to unknown symbol
> nvptx-as: ptxas returned 255 exit status nvptx mkoffload: fatal error:
> ../../install/bin/aarch64-unknown-linux-gnu-accel-nvptx-none-gcc
> returned 1 exit status compilation terminated.
> lto-wrapper: fatal error: /home/prathameshk/gnu-toolchain/gcc/grcogcc-
> 38/install/libexec/gcc/aarch64-unknown-linux-gnu/15.0.0//accel/nvptx-
> none/mkoffload returned 1 exit status compilation terminated.
> 
> This happens because ptx code-gen shows:
> 
> // BEGIN GLOBAL FUNCTION DEF: foo
> .visible .func (.param.u32 %value_out) foo {
> .reg.u32 %value;
> mov.u32 %value, 42;
> st.param.u32[%value_out], %value;
> ret;
> }
> .visible .func (.param.u32 %value_out) bar; .alias bar,foo; .visible
> .func (.param.u32 %value_out) baz; .alias baz,bar;
> 
> .alias baz, bar is invalid since PTX requires aliasee to be a defined
> function:
> https://sw-docs-dgx-station.nvidia.com/cuda-latest/parallel-thread-
> execution/latest-internal/#kernel-and-function-directives-alias
> 
> The patch uses cgraph_node::get(name)->ultimate_alias_target ()
> instead of the provided value in nvptx_asm_output_def_from_decls.
> For the above case, it now generates the following ptx:
> 
> .alias baz,foo;
> instead of:
> .alias baz,bar;
> 
> which fixes the issue.
> 
> Does the patch look in the right direction ?
> 
> Signed-off-by: Prathamesh Kulkarni 
Hi,
ping: https://gcc.gnu.org/pipermail/gcc-patches/2024-August/661457.html

Thanks,
Prathamesh
> 
> Thanks,
> Prathamesh



Re: [PATCH] MATCH: add abs support for half float

2024-09-01 Thread Andrew Pinski
On Sun, Sep 1, 2024 at 4:27 PM Kugan Vivekanandarajah
 wrote:
>
> Hi Andrew.
>
> > On 28 Aug 2024, at 2:23 pm, Andrew Pinski  wrote:
> >
> > External email: Use caution opening links or attachments
> >
> >
> > On Tue, Aug 27, 2024 at 8:54 PM Kugan Vivekanandarajah
> >  wrote:
> >>
> >> Hi Richard,
> >>
> >> Thanks for the reply.
> >>
> >>> On 27 Aug 2024, at 7:05 pm, Richard Biener  
> >>> wrote:
> >>>
> >>> External email: Use caution opening links or attachments
> >>>
> >>>
> >>> On Tue, Aug 27, 2024 at 8:23 AM Kugan Vivekanandarajah
> >>>  wrote:
> 
>  Hi Richard,
> 
> > On 22 Aug 2024, at 10:34 pm, Richard Biener 
> >  wrote:
> >
> > External email: Use caution opening links or attachments
> >
> >
> > On Wed, Aug 21, 2024 at 12:08 PM Kugan Vivekanandarajah
> >  wrote:
> >>
> >> Hi Richard,
> >>
> >>> On 20 Aug 2024, at 6:09 pm, Richard Biener 
> >>>  wrote:
> >>>
> >>> External email: Use caution opening links or attachments
> >>>
> >>>
> >>> On Fri, Aug 9, 2024 at 2:39 AM Kugan Vivekanandarajah
> >>>  wrote:
> 
>  Thanks for the comments.
> 
> > On 2 Aug 2024, at 8:36 pm, Richard Biener 
> >  wrote:
> >
> > External email: Use caution opening links or attachments
> >
> >
> > On Fri, Aug 2, 2024 at 11:20 AM Kugan Vivekanandarajah
> >  wrote:
> >>
> >>
> >>
> >>> On 1 Aug 2024, at 10:46 pm, Richard Biener 
> >>>  wrote:
> >>>
> >>> External email: Use caution opening links or attachments
> >>>
> >>>
> >>> On Thu, Aug 1, 2024 at 5:31 AM Kugan Vivekanandarajah
> >>>  wrote:
> 
> 
>  On Mon, Jul 29, 2024 at 10:11 AM Andrew Pinski 
>   wrote:
> >
> > On Mon, Jul 29, 2024 at 12:57 AM Kugan Vivekanandarajah
> >  wrote:
> >>
> >> On Thu, Jul 25, 2024 at 10:19 PM Richard Biener
> >>  wrote:
> >>>
> >>> On Thu, Jul 25, 2024 at 4:42 AM Kugan Vivekanandarajah
> >>>  wrote:
> 
>  On Tue, Jul 23, 2024 at 11:56 PM Richard Biener
>   wrote:
> >
> > On Tue, Jul 23, 2024 at 10:27 AM Kugan Vivekanandarajah
> >  wrote:
> >>
> >> On Tue, Jul 23, 2024 at 10:35 AM Andrew Pinski 
> >>  wrote:
> >>>
> >>> On Mon, Jul 22, 2024 at 5:26 PM Kugan Vivekanandarajah
> >>>  wrote:
> 
>  Revised based on the comment and moved it into existing 
>  patterns as.
> 
>  gcc/ChangeLog:
> 
>  * match.pd: Extend A CMP 0 ? A : -A into (type)A CMP 0 ? 
>  A : -A.
>  Extend A CMP 0 ? A : -A into (type) A CMP 0 ? A : -A.
> 
>  gcc/testsuite/ChangeLog:
> 
>  * gcc.dg/tree-ssa/absfloat16.c: New test.
> >>>
> >>> The testcase needs to make sure it runs only for targets 
> >>> that support
> >>> float16 so like:
> >>>
> >>> /* { dg-require-effective-target float16 } */
> >>> /* { dg-add-options float16 } */
> >> Added in the attached version.
> >
> > + /* (type)A >=/> 0 ? A : -Asame as abs (A) */
> > (for cmp (ge gt)
> > (simplify
> > -   (cnd (cmp @0 zerop) @1 (negate @1))
> > -(if (!HONOR_SIGNED_ZEROS (TREE_TYPE(@0))
> > -&& !TYPE_UNSIGNED (TREE_TYPE(@0))
> > -&& bitwise_equal_p (@0, @1))
> > +   (cnd (cmp (convert?@0 @1) zerop) @2 (negate @2))
> > +(if (!HONOR_SIGNED_ZEROS (TREE_TYPE (@1))
> > +&& !TYPE_UNSIGNED (TREE_TYPE (@1))
> > +&& ((VECTOR_TYPE_P (type)
> > + && tree_nop_conversion_p (TREE_TYPE (@0), 
> > TREE_TYPE (@1)))
> > +   || (!VECTOR_TYPE_P (type)
> > +   && (TYPE_PRECISION (TREE_TYPE (@1))
> > +   <= TYPE_PRECISION (TREE_TYPE (@0)
> > +&& bitwise_equal_p (@1, @2))
> >
> > I wonder about the bitwise_equal_p which tests @1 against 
> > @2 now
> > with the convert still applied to @1 - that looks odd.  You 
> > are allowing
> > sign-changing conversions but doesn't that change 

Re: [PATCH v1] RISC-V: Support form 1 of integer scalar .SAT_ADD

2024-09-01 Thread Jeff Law




On 9/1/24 8:50 PM, Li, Pan2 wrote:

Thanks Jeff for comments.


OK.  Presumably the code you're getting here is more efficient than
whatever standard expansion would provide?  If so, should we be looking
at moving some of this stuff into generic expanders?  I don't really see
anything all that target specific here.


Mostly for that we can eliminate the branch for .SAT_ADD in scalar. Given we
don't have one SAT_ADD like insn like RVV vsadd.vv/vx/vi.
But I would expect that may be beneficial on other targets as well. 
It's not conceptually a lot different than what we do basic arithmetic 
with overflow, which has generic expansion which can be overridden by 
target specific expanders.  See expand_addsub_overflow.


Again, I think this is OK, but I'm thinking we probably want something 
more generic in the longer term.


The other question that I think Robin initially raised to me privately 
is whether or not the sequences we're generating are well suited for 
zicond or not.  If not, we might want to consider adjustments to either 
generate zicond if-then-else constructs during initial code generation 
or bias initial code generator towards sequences that ifcvt & combine 
can turn into zicond.  But again not strictly necessary for this patch 
to go forward, more a potential avenue for further improvements.





Pan

-Original Message-
From: Jeff Law 
Sent: Sunday, September 1, 2024 11:35 PM
To: Li, Pan2 ; gcc-patches@gcc.gnu.org
Cc: juzhe.zh...@rivai.ai; kito.ch...@gmail.com; rdapp@gmail.com
Subject: Re: [PATCH v1] RISC-V: Support form 1 of integer scalar .SAT_ADD



On 8/29/24 12:25 AM, pan2...@intel.com wrote:

From: Pan Li 

This patch would like to support the scalar signed ssadd pattern
for the RISC-V backend.  Aka

Form 1:
#define DEF_SAT_S_ADD_FMT_1(T, UT, MIN, MAX) \
T __attribute__((noinline))  \
sat_s_add_##T##_fmt_1 (T x, T y) \
{\
  T sum = (UT)x + (UT)y; \
  return (x ^ y) < 0 \
? sum\
: (sum ^ x) >= 0 \
  ? sum  \
  : x < 0 ? MIN : MAX;   \
}

DEF_SAT_S_ADD_FMT_1(int64_t, uint64_t, INT64_MIN, INT64_MAX)

Before this patch:
10   │ sat_s_add_int64_t_fmt_1:
11   │ mv   a5,a0
12   │ add  a0,a0,a1
13   │ xor  a1,a5,a1
14   │ not  a1,a1
15   │ xor  a4,a5,a0
16   │ and  a1,a1,a4
17   │ blt  a1,zero,.L5
18   │ ret
19   │ .L5:
20   │ srai a5,a5,63
21   │ li   a0,-1
22   │ srli a0,a0,1
23   │ xor  a0,a5,a0
24   │ ret

After this patch:
10   │ sat_s_add_int64_t_fmt_1:
11   │ add  a2,a0,a1
12   │ xor  a1,a0,a1
13   │ xor  a5,a0,a2
14   │ srli a5,a5,63
15   │ srli a1,a1,63
16   │ xori a1,a1,1
17   │ and  a5,a5,a1
18   │ srai a4,a0,63
19   │ li   a3,-1
20   │ srli a3,a3,1
21   │ xor  a3,a3,a4
22   │ neg  a4,a5
23   │ and  a3,a3,a4
24   │ addi a5,a5,-1
25   │ and  a0,a2,a5
26   │ or   a0,a0,a3
27   │ ret

The below test suites are passed for this patch:
1. The rv64gcv fully regression test.

gcc/ChangeLog:

* config/riscv/riscv-protos.h (riscv_expand_ssadd): Add new func
decl for expanding ssadd.
* config/riscv/riscv.cc (riscv_gen_sign_max_cst): Add new func
impl to gen the max int rtx.
(riscv_expand_ssadd): Add new func impl to expand the ssadd.
* config/riscv/riscv.md (ssadd3): Add new pattern for
signed integer .SAT_ADD.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_arith_data.h: Add test data.
* gcc.target/riscv/sat_s_add-1.c: New test.
* gcc.target/riscv/sat_s_add-2.c: New test.
* gcc.target/riscv/sat_s_add-3.c: New test.
* gcc.target/riscv/sat_s_add-4.c: New test.
* gcc.target/riscv/sat_s_add-run-1.c: New test.
* gcc.target/riscv/sat_s_add-run-2.c: New test.
* gcc.target/riscv/sat_s_add-run-3.c: New test.
* gcc.target/riscv/sat_s_add-run-4.c: New test.
* gcc.target/riscv/scalar_sat_binary_run_xxx.h: New test.

OK.  Presumably the code you're getting here is more efficient than
whatever standard expansion would provide?  If so, should we be looking
at moving some of this stuff into generic expanders?  I don't really see
anything all that target specific here.

jeff





Re: [RFC] gimple ssa: VRP to mark switch default as unreachable instead of removing it [PR112687]

2024-09-01 Thread Jeff Law




On 9/1/24 9:19 AM, Filip Kastl wrote:

(I'm Cc-ing Diego since he originally contributed the VRP pass and Jeff because
I've seen him in git blame on many lines of vr-values.cc around the place I
would like to make modifications)

Hello,

In this RFC I'd like to propose the following: When the value range statement
simplification machinery encounters a switch with a default case that never
executes, it should mark the default case with __builtin_unreachable instead of
removing the default case.  I think that should be the "canonical"
representation of switches with this property.

In terms of changes made to GCC code, this would be a small patch.

I'd like to hear other people's opinions on this.  Perhaps there are
implications of this change or some other issues with the idea which I don't
see.
The right thing to do is to eliminate the unreachable code.   We don't 
replace the body of an if (0)  with a __builtin_unreachable.  We remove 
it.  Switch statements should be no different here.


If that inhibits later analysis or optimization then that's an indicator 
that we need to improve our frameworks, not that we should keep dead 
code lying around in the IL.


Jeff




PING: [PATCH] ipa: Don't disable function parameter analysis for fat LTO streaming

2024-09-01 Thread H.J. Lu
On Tue, Aug 27, 2024 at 1:11 PM H.J. Lu  wrote:
>
> Update analyze_parms not to disable function parameter analysis for
> -ffat-lto-objects.  Tested on x86-64, there are no differences in zstd
> with "-O2 -flto=auto" -g "vs -O2 -flto=auto -g -ffat-lto-objects".
>
> PR ipa/116410
> * ipa-modref.cc (analyze_parms): Always analyze function parameter
> for LTO streaming.
>
> Signed-off-by: H.J. Lu 
> ---
>  gcc/ipa-modref.cc | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/ipa-modref.cc b/gcc/ipa-modref.cc
> index 59cfe91f987..9275030c254 100644
> --- a/gcc/ipa-modref.cc
> +++ b/gcc/ipa-modref.cc
> @@ -2975,7 +2975,7 @@ analyze_parms (modref_summary *summary, 
> modref_summary_lto *summary_lto,
> summary->arg_flags.safe_grow_cleared (count, true);
>   summary->arg_flags[parm_index] = EAF_UNUSED;
> }
> - else if (summary_lto)
> + if (summary_lto)
> {
>   if (parm_index >= summary_lto->arg_flags.length ())
> summary_lto->arg_flags.safe_grow_cleared (count, true);
> @@ -3034,7 +3034,7 @@ analyze_parms (modref_summary *summary, 
> modref_summary_lto *summary_lto,
> summary->arg_flags.safe_grow_cleared (count, true);
>   summary->arg_flags[parm_index] = flags;
> }
> - else if (summary_lto)
> + if (summary_lto)
> {
>   if (parm_index >= summary_lto->arg_flags.length ())
> summary_lto->arg_flags.safe_grow_cleared (count, true);
> --
> 2.46.0
>

These are oversights in

https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=85ebbabd85e03bdc3afc190aeb29250606d18322
https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=3350e59f2985469b2472e4d9a6d387337da4519b

to have

  if (summary)
  ...
  else if (summary_lto)
   This disables LTO optimization for  -ffat-lto-objects.

Is this patch OK for master and backports?

Thanks.

H.J.

-- 
H.J.


Re: [PATCH v1] RISC-V: Support form 1 of integer scalar .SAT_ADD

2024-09-01 Thread Jeff Law




On 8/29/24 12:25 AM, pan2...@intel.com wrote:

From: Pan Li 

This patch would like to support the scalar signed ssadd pattern
for the RISC-V backend.  Aka

Form 1:
   #define DEF_SAT_S_ADD_FMT_1(T, UT, MIN, MAX) \
   T __attribute__((noinline))  \
   sat_s_add_##T##_fmt_1 (T x, T y) \
   {\
 T sum = (UT)x + (UT)y; \
 return (x ^ y) < 0 \
   ? sum\
   : (sum ^ x) >= 0 \
 ? sum  \
 : x < 0 ? MIN : MAX;   \
   }

DEF_SAT_S_ADD_FMT_1(int64_t, uint64_t, INT64_MIN, INT64_MAX)

Before this patch:
   10   │ sat_s_add_int64_t_fmt_1:
   11   │ mv   a5,a0
   12   │ add  a0,a0,a1
   13   │ xor  a1,a5,a1
   14   │ not  a1,a1
   15   │ xor  a4,a5,a0
   16   │ and  a1,a1,a4
   17   │ blt  a1,zero,.L5
   18   │ ret
   19   │ .L5:
   20   │ srai a5,a5,63
   21   │ li   a0,-1
   22   │ srli a0,a0,1
   23   │ xor  a0,a5,a0
   24   │ ret

After this patch:
   10   │ sat_s_add_int64_t_fmt_1:
   11   │ add  a2,a0,a1
   12   │ xor  a1,a0,a1
   13   │ xor  a5,a0,a2
   14   │ srli a5,a5,63
   15   │ srli a1,a1,63
   16   │ xori a1,a1,1
   17   │ and  a5,a5,a1
   18   │ srai a4,a0,63
   19   │ li   a3,-1
   20   │ srli a3,a3,1
   21   │ xor  a3,a3,a4
   22   │ neg  a4,a5
   23   │ and  a3,a3,a4
   24   │ addi a5,a5,-1
   25   │ and  a0,a2,a5
   26   │ or   a0,a0,a3
   27   │ ret

The below test suites are passed for this patch:
1. The rv64gcv fully regression test.

gcc/ChangeLog:

* config/riscv/riscv-protos.h (riscv_expand_ssadd): Add new func
decl for expanding ssadd.
* config/riscv/riscv.cc (riscv_gen_sign_max_cst): Add new func
impl to gen the max int rtx.
(riscv_expand_ssadd): Add new func impl to expand the ssadd.
* config/riscv/riscv.md (ssadd3): Add new pattern for
signed integer .SAT_ADD.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_arith_data.h: Add test data.
* gcc.target/riscv/sat_s_add-1.c: New test.
* gcc.target/riscv/sat_s_add-2.c: New test.
* gcc.target/riscv/sat_s_add-3.c: New test.
* gcc.target/riscv/sat_s_add-4.c: New test.
* gcc.target/riscv/sat_s_add-run-1.c: New test.
* gcc.target/riscv/sat_s_add-run-2.c: New test.
* gcc.target/riscv/sat_s_add-run-3.c: New test.
* gcc.target/riscv/sat_s_add-run-4.c: New test.
* gcc.target/riscv/scalar_sat_binary_run_xxx.h: New test.
OK.  Presumably the code you're getting here is more efficient than 
whatever standard expansion would provide?  If so, should we be looking 
at moving some of this stuff into generic expanders?  I don't really see 
anything all that target specific here.


jeff



Re: [PATCH v1 1/2] RISC-V: Add testcases for form 3 of unsigned vector .SAT_ADD IMM

2024-09-01 Thread Jeff Law




On 8/29/24 9:04 PM, pan2...@intel.com wrote:

From: Pan Li 

This patch would like to add test cases for the unsigned vector .SAT_ADD
when one of the operand is IMM.

Form 3:
   #define DEF_VEC_SAT_U_ADD_IMM_FMT_3(T, IMM)  \
   T __attribute__((noinline))  \
   vec_sat_u_add_imm##IMM##_##T##_fmt_3 (T *out, T *in, unsigned limit) \
   {\
 unsigned i;\
 T ret; \
 for (i = 0; i < limit; i++)\
   {\
 out[i] = __builtin_add_overflow (in[i], IMM, &ret) ? -1 : ret; \
   }\
   }

DEF_VEC_SAT_U_ADD_IMM_FMT_3(uint64_t, 123)

The below test are passed for this patch.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-10.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-11.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-12.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-9.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-10.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-11.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-12.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-9.c: New 
test.

Both patches in this series are OK.
jeff



VLA is a misnomer (rebuttal to n3187)

2024-09-01 Thread Alejandro Colomar
Hi Jens, Martin,

On Wed, Aug 14, 2024 at 05:44:57PM GMT, Jens Gustedt wrote:
> Am 14. August 2024 16:47:32 MESZ schrieb Alejandro Colomar :
> > > > I was thinking of renaming the proposal to elementsof(), to avoid
> > > > confusion between length of an array and length of a string.  Would you
> > > > mind checking if elementsof() is ok?
> > > 
> > > No, not for me. I really want as to go consistently to talk about
> > > array length for this. Consistent terminology is important.
> > 
> > I understand your desire for consistency.  I think your paper is a net
> > improvement over the status quo (which is a mix of length, size, and
> > number of elements).  After your proposal, there will be only length and
> > number of elements.  That's great.
> > 
> > However, strlen(3) came first, and we must respect it.
> 
> Sure,  string length, a dynamic feature, and array length are two features.
> 
> But we also have VLA and not VNEA in the standard, So we should respect this 
> ;-)

I hadn't thought about it until yesterday after Martin insisted in
preferring lengthof over nelementsof or a contraction of it, and worried
about nelementsof possibly causing ambiguity with multi-dimensional
arrays.  But:

VLA is a misnomer.
~~

First, let's assume length refers to the number of elements, as we all
agree that length should not refer to the size in bytes of an array,
since we already have the term "size" for it, which is consistent with
sizeof.

int vla[3][n];

The array from above is a so-called variable length array, according to
the standard.  But it does not have a variable length, according to the
presumed meaning of length.  It does indeed have a variable size.  The
element of vla is itself an array, which is the one that really has a
variable length (or number of elements, as is the more technical term).

So, if n3187 develops, and really pretends to uniquely and unambiguously
use a term for the number of elements and another one for the size of an
array, it should also rename "variable length array" into "variable size
array".

It is indeed due to this problematic misuse of the colloquial term
length that "lenght" and not "number of elements" is misleading in
multi-dimensional arrays.  The standard is very strict in using NoE for
the first dimension of an array (so its true dimension), and not for
the dimensions of arrays that are elements of it.

And now you could say that this is only a problem of multi-dimensional
arrays.  It's not.  They're just the composition of arrays with elements
of type array.  The same problem arises with single dimensional arrays
in complex situations (although, admittedly, this is non-standard):

$ cat vla.c 
int
main(void)
{
int n = 5;

struct s {
int  v[n];
};

struct s  a[3];

return sizeof(a);
}
$ gcc -Wall -Wextra -Wpedantic vla.c 
vla.c: In function ‘main’:
vla.c:7:22: warning: a member of a structure or union cannot have a 
variably modified type [-Wpedantic]
7 | int  v[n];
  |  ^
$ ./a.out; echo $?
60

a is a VLA even if it is a single-dimension array of known constant
number of elements.  Huh?  :)

Terminology
~~~

Once we've determined that "length" in VLA does refer to the size and
not the number of elements, it's hard to justify a reformation of
terminology that would base on length meaning number of elements.

Indeed, either basing justifications of the origins of length on
strlen(3) or on VLA, we must conclude that "variable length array" must
be renamed to "variable size array".  I'm preparing a paper for that.

If eventually that paper would be accepted, I'd prepare a second paper
that would reform every use of size and length with arrays so that size
always refers to the size in bytes, length is completely removed, and
number of elements stands as the only term to refer to the number of
elements.


Have a lovely day!
Alex

> > Since you haven't proposed eliminating "number of elements" from the
> > standard, and it would still be used alongside length, I think
> > elementsof() would be consistent with your view (consistent with "number
> > of elements").
> 
> didn't we ? Then this is actually a good idea to do so, thanks for the idea !

-- 



signature.asc
Description: PGP signature


RE: [PATCH v1] RISC-V: Support form 1 of integer scalar .SAT_ADD

2024-09-01 Thread Li, Pan2
Thanks Jeff.

> But I would expect that may be beneficial on other targets as well.
I think x86 have the similar insn for saturation, for example as paddsw in 
below link.
https://www.felixcloutier.com/x86/paddsb:paddsw

And the backend of x86 implemented some of them already I bet, like usadd, 
ussub.

> The other question that I think Robin initially raised to me privately 
> is whether or not the sequences we're generating are well suited for 
> zicond or not.  

Got it, cmov like insn is well designed for such case(s). We can consider the 
best
practice to leverage zicond ext in further improvements.

Pan

-Original Message-
From: Jeff Law  
Sent: Monday, September 2, 2024 11:32 AM
To: Li, Pan2 ; gcc-patches@gcc.gnu.org
Cc: juzhe.zh...@rivai.ai; kito.ch...@gmail.com; rdapp@gmail.com
Subject: Re: [PATCH v1] RISC-V: Support form 1 of integer scalar .SAT_ADD



On 9/1/24 8:50 PM, Li, Pan2 wrote:
> Thanks Jeff for comments.
> 
>> OK.  Presumably the code you're getting here is more efficient than
>> whatever standard expansion would provide?  If so, should we be looking
>> at moving some of this stuff into generic expanders?  I don't really see
>> anything all that target specific here.
> 
> Mostly for that we can eliminate the branch for .SAT_ADD in scalar. Given we
> don't have one SAT_ADD like insn like RVV vsadd.vv/vx/vi.
But I would expect that may be beneficial on other targets as well. 
It's not conceptually a lot different than what we do basic arithmetic 
with overflow, which has generic expansion which can be overridden by 
target specific expanders.  See expand_addsub_overflow.

Again, I think this is OK, but I'm thinking we probably want something 
more generic in the longer term.

The other question that I think Robin initially raised to me privately 
is whether or not the sequences we're generating are well suited for 
zicond or not.  If not, we might want to consider adjustments to either 
generate zicond if-then-else constructs during initial code generation 
or bias initial code generator towards sequences that ifcvt & combine 
can turn into zicond.  But again not strictly necessary for this patch 
to go forward, more a potential avenue for further improvements.


> 
> Pan
> 
> -Original Message-
> From: Jeff Law 
> Sent: Sunday, September 1, 2024 11:35 PM
> To: Li, Pan2 ; gcc-patches@gcc.gnu.org
> Cc: juzhe.zh...@rivai.ai; kito.ch...@gmail.com; rdapp@gmail.com
> Subject: Re: [PATCH v1] RISC-V: Support form 1 of integer scalar .SAT_ADD
> 
> 
> 
> On 8/29/24 12:25 AM, pan2...@intel.com wrote:
>> From: Pan Li 
>>
>> This patch would like to support the scalar signed ssadd pattern
>> for the RISC-V backend.  Aka
>>
>> Form 1:
>> #define DEF_SAT_S_ADD_FMT_1(T, UT, MIN, MAX) \
>> T __attribute__((noinline))  \
>> sat_s_add_##T##_fmt_1 (T x, T y) \
>> {\
>>   T sum = (UT)x + (UT)y; \
>>   return (x ^ y) < 0 \
>> ? sum\
>> : (sum ^ x) >= 0 \
>>   ? sum  \
>>   : x < 0 ? MIN : MAX;   \
>> }
>>
>> DEF_SAT_S_ADD_FMT_1(int64_t, uint64_t, INT64_MIN, INT64_MAX)
>>
>> Before this patch:
>> 10   │ sat_s_add_int64_t_fmt_1:
>> 11   │ mv   a5,a0
>> 12   │ add  a0,a0,a1
>> 13   │ xor  a1,a5,a1
>> 14   │ not  a1,a1
>> 15   │ xor  a4,a5,a0
>> 16   │ and  a1,a1,a4
>> 17   │ blt  a1,zero,.L5
>> 18   │ ret
>> 19   │ .L5:
>> 20   │ srai a5,a5,63
>> 21   │ li   a0,-1
>> 22   │ srli a0,a0,1
>> 23   │ xor  a0,a5,a0
>> 24   │ ret
>>
>> After this patch:
>> 10   │ sat_s_add_int64_t_fmt_1:
>> 11   │ add  a2,a0,a1
>> 12   │ xor  a1,a0,a1
>> 13   │ xor  a5,a0,a2
>> 14   │ srli a5,a5,63
>> 15   │ srli a1,a1,63
>> 16   │ xori a1,a1,1
>> 17   │ and  a5,a5,a1
>> 18   │ srai a4,a0,63
>> 19   │ li   a3,-1
>> 20   │ srli a3,a3,1
>> 21   │ xor  a3,a3,a4
>> 22   │ neg  a4,a5
>> 23   │ and  a3,a3,a4
>> 24   │ addi a5,a5,-1
>> 25   │ and  a0,a2,a5
>> 26   │ or   a0,a0,a3
>> 27   │ ret
>>
>> The below test suites are passed for this patch:
>> 1. The rv64gcv fully regression test.
>>
>> gcc/ChangeLog:
>>
>>  * config/riscv/riscv-protos.h (riscv_expand_ssadd): Add new func
>>  decl for expanding ssadd.
>>  * config/riscv/riscv.cc (riscv_gen_sign_max_cst): Add new func
>>  impl to gen the max int rtx.
>>  (riscv_expand_ssadd): Add new func impl to expand the ssadd.
>>  * config/riscv/riscv.md (ssadd3): Add new pattern for
>>  signed integer .SAT_ADD.
>>
>> gcc/testsuite/ChangeLog:
>>
>>  * gcc.target/riscv/sat_arith.h: Add test helper macros.
>>  * gcc

Re: VLA is a misnomer (rebuttal to n3187)

2024-09-01 Thread Martin Uecker


Alex,

I am all for making things more consistent, but there is also a cost
to changing stuff too much.   length is the established 
term in most programming languages and I would recommend to stick
to it.

Note that it is not true that the standard consistently refers to 

char a[3][n]

as a VLA. It does so in the description in sizeof but not in the
type compatibility rules, at least as understood by most compilers. 
This is an inconsistency we *should* fix, but I do not think that
changing away from "length" is a good ida.

Note that "number of elements" is inherently an ambiguous term for
multi-dimensional arrays, and I am not sure how you want to avoid
this without making the wording more complex (e.g. "number of elements
of the outermost array).

So I would recommend not to go this way. You would need a really
good argument to convince me to vote for this, and I haven't seen
any such argument.

Martin



Am Sonntag, dem 01.09.2024 um 11:10 +0200 schrieb Alejandro Colomar:
> Hi Jens, Martin,
> 
> On Wed, Aug 14, 2024 at 05:44:57PM GMT, Jens Gustedt wrote:
> > Am 14. August 2024 16:47:32 MESZ schrieb Alejandro Colomar 
> > :
> > > > > I was thinking of renaming the proposal to elementsof(), to avoid
> > > > > confusion between length of an array and length of a string.  Would 
> > > > > you
> > > > > mind checking if elementsof() is ok?
> > > > 
> > > > No, not for me. I really want as to go consistently to talk about
> > > > array length for this. Consistent terminology is important.
> > > 
> > > I understand your desire for consistency.  I think your paper is a net
> > > improvement over the status quo (which is a mix of length, size, and
> > > number of elements).  After your proposal, there will be only length and
> > > number of elements.  That's great.
> > > 
> > > However, strlen(3) came first, and we must respect it.
> > 
> > Sure,  string length, a dynamic feature, and array length are two features.
> > 
> > But we also have VLA and not VNEA in the standard, So we should respect 
> > this ;-)
> 
> I hadn't thought about it until yesterday after Martin insisted in
> preferring lengthof over nelementsof or a contraction of it, and worried
> about nelementsof possibly causing ambiguity with multi-dimensional
> arrays.  But:
> 
> VLA is a misnomer.
> ~~
> 
> First, let's assume length refers to the number of elements, as we all
> agree that length should not refer to the size in bytes of an array,
> since we already have the term "size" for it, which is consistent with
> sizeof.
> 
>   int vla[3][n];
> 
> The array from above is a so-called variable length array, according to
> the standard.  But it does not have a variable length, according to the
> presumed meaning of length.  It does indeed have a variable size.  The
> element of vla is itself an array, which is the one that really has a
> variable length (or number of elements, as is the more technical term).
> 
> So, if n3187 develops, and really pretends to uniquely and unambiguously
> use a term for the number of elements and another one for the size of an
> array, it should also rename "variable length array" into "variable size
> array".
> 
> It is indeed due to this problematic misuse of the colloquial term
> length that "lenght" and not "number of elements" is misleading in
> multi-dimensional arrays.  The standard is very strict in using NoE for
> the first dimension of an array (so its true dimension), and not for
> the dimensions of arrays that are elements of it.
> 
> And now you could say that this is only a problem of multi-dimensional
> arrays.  It's not.  They're just the composition of arrays with elements
> of type array.  The same problem arises with single dimensional arrays
> in complex situations (although, admittedly, this is non-standard):
> 
>   $ cat vla.c 
>   int
>   main(void)
>   {
>   int n = 5;
> 
>   struct s {
>   int  v[n];
>   };
> 
>   struct s  a[3];
> 
>   return sizeof(a);
>   }
>   $ gcc -Wall -Wextra -Wpedantic vla.c 
>   vla.c: In function ‘main’:
>   vla.c:7:22: warning: a member of a structure or union cannot have a 
> variably modified type [-Wpedantic]
>   7 | int  v[n];
> |  ^
>   $ ./a.out; echo $?
>   60
> 
> a is a VLA even if it is a single-dimension array of known constant
> number of elements.  Huh?  :)
> 
> Terminology
> ~~~
> 
> Once we've determined that "length" in VLA does refer to the size and
> not the number of elements, it's hard to justify a reformation of
> terminology that would base on length meaning number of elements.
> 
> Indeed, either basing justifications of the origins of length on
> strlen(3) or on VLA, we must conclude that "variable length array" must
> be renamed to "variable size array".  I'm preparing a paper for that.
> 
> If eventually that paper would be accept

[RFC] gimple ssa: VRP to mark switch default as unreachable instead of removing it [PR112687]

2024-09-01 Thread Filip Kastl
(I'm Cc-ing Diego since he originally contributed the VRP pass and Jeff because
I've seen him in git blame on many lines of vr-values.cc around the place I
would like to make modifications)

Hello,

In this RFC I'd like to propose the following: When the value range statement
simplification machinery encounters a switch with a default case that never
executes, it should mark the default case with __builtin_unreachable instead of
removing the default case.  I think that should be the "canonical"
representation of switches with this property.

In terms of changes made to GCC code, this would be a small patch.

I'd like to hear other people's opinions on this.  Perhaps there are
implications of this change or some other issues with the idea which I don't
see.


Initial example
===

Consider this C code

switch (v & 3) {
case 0: return 0;
case 1: return 1;
case 2: return 2;
case 3: return 3;
default: __builtin_unreachable();
}

Currently, when early vrp is processing this code
(vr-values.cc:simplify_switch_using_ranges), it sees that the index expression
can only have values [0,3].  Since the numbered cases cover this range
entirely, vrp decides to transform the switch so that it only has these 4
numbered cases and removes the default case.  Because GIMPLE switches have to
have a default, vrp converts case 0 to the default case.  The switch then looks
like this

switch (v & 3) {
default: return 0;
case 1: return 1;
case 2: return 2;
case 3: return 3;
}

However, I think it would be better if the default case with
__builtin_unreachable remained.  I think it conveys more information to the
passes running after vrp.


What would this solve
=

Notice that the initial example switch doesn't do anything.  It is just an
identity mapping.  The example is actually taken from PR112687.  The PR shows
that currently GCC (at least in some cases) fails to optimize similar identity
switches.  Currently the initial example looks like this at the end of the
GIMPLE pipeline:

int unopt (int v)
{
  int _1;
  int _2;
  unsigned int _6;
  unsigned int _7;
 
  
  _1 = v_3(D) & 3;
  _6 = (unsigned int) _1;
  _7 = _6 + 4294967295;
  if (_7 <= 2)
goto ;
  else
goto ;
 
  
 
  
  # _2 = PHI <_1(2), 0(3)>
  return _2;
 
}

While it could look like this:

int unopt (int v)
{
  int _1;
 
  
  _1 = v_2(D) & 3;
  return _1;
 
}

Here is another example of similar code that currently doesn't get optimized...

int unopt(int v)
{
switch (v & 3) {
case 0: return 0;
case 1: return 2;
case 2: return 4;
case 3: return 6;
default: return -1;
}
}

...and could get optimized into something like this if VRP marked the default
case as unreachable.

int opt(int v)
{
return (v & 3) * 2;
}

I have originally thought about solving this problem in the switch conversion
pass.  However, I think that would require basically detecting that the "take
away the default case" transformation happened and then reversing it.  That
seems pretty messy to me.  Also, as I already mentioned, I think that the
canonical representation of this kind of switches should be some representation
that marks the default case as unreachable (with __builtin_unreachable or
somehow else).


How I'd do this
===

I'm attaching a work-in-progress patch to this RFC.

I'm aware that in the final version it would be good to also modify the code
surrounding the blob that I insert in the patch.  For example,
cleanup_edges_and_switches() contains code dealing with the situation when the
default case gets removed, which is a situation that never happens with my
patch applied.

I'll appreciate comments on if I should place the blob of code creating the
__builtin_unreachable somewhere else in the file.  I'll also appreciate any
other comments on the patch.


Cheers,
Filip Kastl


P.S. While writing this I realized that in this case...

int unopt(int v)
{
switch (v & 3) {
default: return 0;
case 1: return 1;
case 2: return 2;
case 3: return 3;
}
}

...GCC fails to optimize the switch even with my patch applied.  This is
because here VRP sees a default case that actually gets executed sometimes and
leaves the default case be.  We could make the following change to remedy this:
Whenever VRP encounters a default that only corresponds to one possible value
of the index expression of the switch, it could create a numbered case
corresponding to this value and mark the default case as unreachable.


-- 8< --


diff --git a/gcc/tree-vrp.cc b/gcc/tree-vrp.cc
index cf273a3fc62..b297e782b09 100644
--- a/gcc/vr-values.cc
+++ b/gcc/vr-values.cc
@@ -1368,6 +1368,32 @@ simplify_using_ranges::simplify_switch_using_ranges 
(gswitch *stmt)
   else
 return false;
 
+  if (!take_default)
+{
+  basic_block default_bb = gimple_switch_defau

Re: [PATCH v2 1/2] RISC-V: Add testcases for unsigned scalar quad and oct .SAT_TRUNC form 2

2024-09-01 Thread Jeff Law




On 8/29/24 2:12 AM, pan2...@intel.com wrote:

From: Pan Li 

This patch would like to add test cases for the unsigned scalar quad and
oct .SAT_TRUNC form 2.  Aka:

Form 2:
   #define DEF_SAT_U_TRUC_FMT_2(NT, WT) \
   NT __attribute__((noinline)) \
   sat_u_truc_##WT##_to_##NT##_fmt_2 (WT x) \
   {\
 WT max = (WT)(NT)-1;   \
 return x > max ? (NT) max : (NT)x; \
   }

QUAD:
DEF_SAT_U_TRUC_FMT_2 (uint16_t, uint64_t)
DEF_SAT_U_TRUC_FMT_2 (uint8_t, uint32_t)

OCT:
DEF_SAT_U_TRUC_FMT_2 (uint8_t, uint64_t)

The below test is passed for this patch.
* The rv64gcv regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_u_trunc-10.c: New test.
* gcc.target/riscv/sat_u_trunc-11.c: New test.
* gcc.target/riscv/sat_u_trunc-12.c: New test.
* gcc.target/riscv/sat_u_trunc-run-10.c: New test.
* gcc.target/riscv/sat_u_trunc-run-11.c: New test.
* gcc.target/riscv/sat_u_trunc-run-12.c: New test.

Both of the patches in this series are OK.
jeff



RE: [PATCH v1] RISC-V: Support form 1 of integer scalar .SAT_ADD

2024-09-01 Thread Li, Pan2
Thanks Jeff for comments.

> OK.  Presumably the code you're getting here is more efficient than 
> whatever standard expansion would provide?  If so, should we be looking 
> at moving some of this stuff into generic expanders?  I don't really see 
> anything all that target specific here.

Mostly for that we can eliminate the branch for .SAT_ADD in scalar. Given we
don't have one SAT_ADD like insn like RVV vsadd.vv/vx/vi.

Pan

-Original Message-
From: Jeff Law  
Sent: Sunday, September 1, 2024 11:35 PM
To: Li, Pan2 ; gcc-patches@gcc.gnu.org
Cc: juzhe.zh...@rivai.ai; kito.ch...@gmail.com; rdapp@gmail.com
Subject: Re: [PATCH v1] RISC-V: Support form 1 of integer scalar .SAT_ADD



On 8/29/24 12:25 AM, pan2...@intel.com wrote:
> From: Pan Li 
> 
> This patch would like to support the scalar signed ssadd pattern
> for the RISC-V backend.  Aka
> 
> Form 1:
>#define DEF_SAT_S_ADD_FMT_1(T, UT, MIN, MAX) \
>T __attribute__((noinline))  \
>sat_s_add_##T##_fmt_1 (T x, T y) \
>{\
>  T sum = (UT)x + (UT)y; \
>  return (x ^ y) < 0 \
>? sum\
>: (sum ^ x) >= 0 \
>  ? sum  \
>  : x < 0 ? MIN : MAX;   \
>}
> 
> DEF_SAT_S_ADD_FMT_1(int64_t, uint64_t, INT64_MIN, INT64_MAX)
> 
> Before this patch:
>10   │ sat_s_add_int64_t_fmt_1:
>11   │ mv   a5,a0
>12   │ add  a0,a0,a1
>13   │ xor  a1,a5,a1
>14   │ not  a1,a1
>15   │ xor  a4,a5,a0
>16   │ and  a1,a1,a4
>17   │ blt  a1,zero,.L5
>18   │ ret
>19   │ .L5:
>20   │ srai a5,a5,63
>21   │ li   a0,-1
>22   │ srli a0,a0,1
>23   │ xor  a0,a5,a0
>24   │ ret
> 
> After this patch:
>10   │ sat_s_add_int64_t_fmt_1:
>11   │ add  a2,a0,a1
>12   │ xor  a1,a0,a1
>13   │ xor  a5,a0,a2
>14   │ srli a5,a5,63
>15   │ srli a1,a1,63
>16   │ xori a1,a1,1
>17   │ and  a5,a5,a1
>18   │ srai a4,a0,63
>19   │ li   a3,-1
>20   │ srli a3,a3,1
>21   │ xor  a3,a3,a4
>22   │ neg  a4,a5
>23   │ and  a3,a3,a4
>24   │ addi a5,a5,-1
>25   │ and  a0,a2,a5
>26   │ or   a0,a0,a3
>27   │ ret
> 
> The below test suites are passed for this patch:
> 1. The rv64gcv fully regression test.
> 
> gcc/ChangeLog:
> 
>   * config/riscv/riscv-protos.h (riscv_expand_ssadd): Add new func
>   decl for expanding ssadd.
>   * config/riscv/riscv.cc (riscv_gen_sign_max_cst): Add new func
>   impl to gen the max int rtx.
>   (riscv_expand_ssadd): Add new func impl to expand the ssadd.
>   * config/riscv/riscv.md (ssadd3): Add new pattern for
>   signed integer .SAT_ADD.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/riscv/sat_arith.h: Add test helper macros.
>   * gcc.target/riscv/sat_arith_data.h: Add test data.
>   * gcc.target/riscv/sat_s_add-1.c: New test.
>   * gcc.target/riscv/sat_s_add-2.c: New test.
>   * gcc.target/riscv/sat_s_add-3.c: New test.
>   * gcc.target/riscv/sat_s_add-4.c: New test.
>   * gcc.target/riscv/sat_s_add-run-1.c: New test.
>   * gcc.target/riscv/sat_s_add-run-2.c: New test.
>   * gcc.target/riscv/sat_s_add-run-3.c: New test.
>   * gcc.target/riscv/sat_s_add-run-4.c: New test.
>   * gcc.target/riscv/scalar_sat_binary_run_xxx.h: New test.
OK.  Presumably the code you're getting here is more efficient than 
whatever standard expansion would provide?  If so, should we be looking 
at moving some of this stuff into generic expanders?  I don't really see 
anything all that target specific here.

jeff



Re: [PATCH] MATCH: add abs support for half float

2024-09-01 Thread Kugan Vivekanandarajah
Hi Andrew.

> On 28 Aug 2024, at 2:23 pm, Andrew Pinski  wrote:
> 
> External email: Use caution opening links or attachments
> 
> 
> On Tue, Aug 27, 2024 at 8:54 PM Kugan Vivekanandarajah
>  wrote:
>> 
>> Hi Richard,
>> 
>> Thanks for the reply.
>> 
>>> On 27 Aug 2024, at 7:05 pm, Richard Biener  
>>> wrote:
>>> 
>>> External email: Use caution opening links or attachments
>>> 
>>> 
>>> On Tue, Aug 27, 2024 at 8:23 AM Kugan Vivekanandarajah
>>>  wrote:
 
 Hi Richard,
 
> On 22 Aug 2024, at 10:34 pm, Richard Biener  
> wrote:
> 
> External email: Use caution opening links or attachments
> 
> 
> On Wed, Aug 21, 2024 at 12:08 PM Kugan Vivekanandarajah
>  wrote:
>> 
>> Hi Richard,
>> 
>>> On 20 Aug 2024, at 6:09 pm, Richard Biener  
>>> wrote:
>>> 
>>> External email: Use caution opening links or attachments
>>> 
>>> 
>>> On Fri, Aug 9, 2024 at 2:39 AM Kugan Vivekanandarajah
>>>  wrote:
 
 Thanks for the comments.
 
> On 2 Aug 2024, at 8:36 pm, Richard Biener 
>  wrote:
> 
> External email: Use caution opening links or attachments
> 
> 
> On Fri, Aug 2, 2024 at 11:20 AM Kugan Vivekanandarajah
>  wrote:
>> 
>> 
>> 
>>> On 1 Aug 2024, at 10:46 pm, Richard Biener 
>>>  wrote:
>>> 
>>> External email: Use caution opening links or attachments
>>> 
>>> 
>>> On Thu, Aug 1, 2024 at 5:31 AM Kugan Vivekanandarajah
>>>  wrote:
 
 
 On Mon, Jul 29, 2024 at 10:11 AM Andrew Pinski  
 wrote:
> 
> On Mon, Jul 29, 2024 at 12:57 AM Kugan Vivekanandarajah
>  wrote:
>> 
>> On Thu, Jul 25, 2024 at 10:19 PM Richard Biener
>>  wrote:
>>> 
>>> On Thu, Jul 25, 2024 at 4:42 AM Kugan Vivekanandarajah
>>>  wrote:
 
 On Tue, Jul 23, 2024 at 11:56 PM Richard Biener
  wrote:
> 
> On Tue, Jul 23, 2024 at 10:27 AM Kugan Vivekanandarajah
>  wrote:
>> 
>> On Tue, Jul 23, 2024 at 10:35 AM Andrew Pinski 
>>  wrote:
>>> 
>>> On Mon, Jul 22, 2024 at 5:26 PM Kugan Vivekanandarajah
>>>  wrote:
 
 Revised based on the comment and moved it into existing 
 patterns as.
 
 gcc/ChangeLog:
 
 * match.pd: Extend A CMP 0 ? A : -A into (type)A CMP 0 ? A 
 : -A.
 Extend A CMP 0 ? A : -A into (type) A CMP 0 ? A : -A.
 
 gcc/testsuite/ChangeLog:
 
 * gcc.dg/tree-ssa/absfloat16.c: New test.
>>> 
>>> The testcase needs to make sure it runs only for targets 
>>> that support
>>> float16 so like:
>>> 
>>> /* { dg-require-effective-target float16 } */
>>> /* { dg-add-options float16 } */
>> Added in the attached version.
> 
> + /* (type)A >=/> 0 ? A : -Asame as abs (A) */
> (for cmp (ge gt)
> (simplify
> -   (cnd (cmp @0 zerop) @1 (negate @1))
> -(if (!HONOR_SIGNED_ZEROS (TREE_TYPE(@0))
> -&& !TYPE_UNSIGNED (TREE_TYPE(@0))
> -&& bitwise_equal_p (@0, @1))
> +   (cnd (cmp (convert?@0 @1) zerop) @2 (negate @2))
> +(if (!HONOR_SIGNED_ZEROS (TREE_TYPE (@1))
> +&& !TYPE_UNSIGNED (TREE_TYPE (@1))
> +&& ((VECTOR_TYPE_P (type)
> + && tree_nop_conversion_p (TREE_TYPE (@0), 
> TREE_TYPE (@1)))
> +   || (!VECTOR_TYPE_P (type)
> +   && (TYPE_PRECISION (TREE_TYPE (@1))
> +   <= TYPE_PRECISION (TREE_TYPE (@0)
> +&& bitwise_equal_p (@1, @2))
> 
> I wonder about the bitwise_equal_p which tests @1 against @2 
> now
> with the convert still applied to @1 - that looks odd.  You 
> are allowing
> sign-changing conversions but doesn't that change ge/gt 
> behavior?
> Also why are sign/zero-extensions not OK for vector types?
 Thanks for the review.
 My main motivation here is for _Float16  as below.
 
 _Float16 absfloat16 (_Float16 x)

Re: [PATCH v1] RISC-V: Refactor gen zero_extend rtx for SAT_* when expand SImode in RV64

2024-09-01 Thread Jeff Law




On 8/30/24 1:23 AM, pan2...@intel.com wrote:

From: Pan Li 

In previous, we have some specially handling for both the .SAT_ADD and
.SAT_SUB for unsigned int.  There are similar to take care of SImode
in RV64 for zero extend.  Thus refactor these two helper function
into one for possible code duplication.

The below test suite are passed for this patch.
* The rv64gcv fully regression test.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_gen_zero_extend_rtx): Merge
the zero_extend handing from func riscv_gen_unsigned_xmode_reg.
(riscv_gen_unsigned_xmode_reg): Remove.
(riscv_expand_ussub): Leverage riscv_gen_zero_extend_rtx
instead of riscv_gen_unsigned_xmode_reg.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_u_sub-11.c: Adjust asm check.
* gcc.target/riscv/sat_u_sub-15.c: Ditto.
* gcc.target/riscv/sat_u_sub-19.c: Ditto.
* gcc.target/riscv/sat_u_sub-23.c: Ditto.
* gcc.target/riscv/sat_u_sub-27.c: Ditto.
* gcc.target/riscv/sat_u_sub-3.c: Ditto.
* gcc.target/riscv/sat_u_sub-31.c: Ditto.
* gcc.target/riscv/sat_u_sub-35.c: Ditto.
* gcc.target/riscv/sat_u_sub-39.c: Ditto.
* gcc.target/riscv/sat_u_sub-43.c: Ditto.
* gcc.target/riscv/sat_u_sub-47.c: Ditto.
* gcc.target/riscv/sat_u_sub-7.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-11.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-11_1.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-11_2.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-15.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-15_1.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-15_2.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-3.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-3_1.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-3_2.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-7.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-7_1.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-7_2.c: Ditto.

OK
jeff



Re: [PATCH] c++: Fix overeager Woverloaded-virtual with conversion operators [PR109918]

2024-09-01 Thread Simon Martin
Hi Jason,

On 26 Aug 2024, at 19:23, Jason Merrill wrote:

> On 8/25/24 12:37 PM, Simon Martin wrote:
>> On 24 Aug 2024, at 23:59, Simon Martin wrote:
>>> On 24 Aug 2024, at 15:13, Jason Merrill wrote:
>>>
 On 8/23/24 12:44 PM, Simon Martin wrote:
> We currently emit an incorrect -Woverloaded-virtual warning upon 
> the
>>
> following
> test case
>
> === cut here ===
> struct A {
> virtual operator int() { return 42; }
> virtual operator char() = 0;
> };
> struct B : public A {
> operator char() { return 'A'; }
> };
> === cut here ===
>
> The problem is that warn_hidden relies on get_basefndecls to find
>>
> the
> methods
> in A possibly hidden B's operator char(), and gets both the
> conversion operator
> to int and to char. It eventually wrongly concludes that the
> conversion to int
> is hidden.
>
> This patch fixes this by filtering out conversion operators to
> different types
> from the list returned by get_basefndecls.

 Hmm, same_signature_p already tries to handle comparing conversion
 operators, why isn't that working?

>>> It does indeed.
>>>
>>> However, `ovl_range (fns)` does not only contain `char 
>>> B::operator()` -
>>> for which `any_override` gets true - but also `conv_op_marker` - for

>>> which `any_override` gets false, causing `seen_non_override` to get 
>>> to
>>> true. Because of that, we run the last loop, that will emit a 
>>> warning
>>> for all `base_fndecls` (except `char B::operator()` that has been
>>> removed).
>>>
>>> We could test `fndecl` and `base_fndecls[k]` against 
>>> `conv_op_marker` in
>>> the loop, but we’d still need to inspect the “converting to” 
>>> type
>>> in the last loop (for when `warn_overloaded_virtual` is 2). This 
>>> would
>>> make the code much more complex than the current patch.
>
> Makes sense.
>
>>> It would however probably be better if `get_basefndecls` only 
>>> returned
>>> the right conversion operator, not all of them. I’ll draft another
>>> version of the patch that does that and submit it in this thread.
>>>
>> I have explored my suggestion further and it actually ends up more
>> complicated than the initial patch.
>
> Yeah, you'd need to do lookup again for each member of fns.
>
>> Please find attached a new revision to fix the reported issue, as 
>> well
>> as new ones I discovered while testing with -Woverloaded-virtual=2.

>>
>> It’s pretty close to the initial patch, but (1) adds a missing
>> “continue;” (2) fixes a location problem when
>> -Woverloaded-virtual==2 (3) adds more test cases. The commit log is 
>> also
>> more comprehensive, and should describe well the various problems and

>>
>> why the patch is correct.
>
>> +if (IDENTIFIER_CONV_OP_P (name)
>> +&& !same_type_p (DECL_CONV_FN_TYPE (fndecl),
>> + DECL_CONV_FN_TYPE (base_fndecls[k])))
>> +  {
>> +base_fndecls[k] = NULL_TREE;
>> +continue;
>> +  }
>
> So this removes base_fndecls[k] if it doesn't return the same type as 
> fndecl.  But what if there's another conversion op in fns that does 

> return the same type as base_fndecls[k]?
>
> If I add an operator int() to both base and derived in 
> Woverloaded-virt7.C, the warning disappears.
>
That was an issue indeed. I’ve reworked the patch, and came up with 
the attached latest version. It explicitly keeps track both of 
overloaded and of hidden base methods (and the “hiding method” for 
the latter), and uses those instead of juggling with bools and nullified 
base_decls.

On top of fixing the issue the PR reports, it fixes a few that I came 

across while investigating:
- wrongly emitting the warning if the base method is not virtual (the 

lines added to Woverloaded-virt1.C would cause a warning without the 
patch)
- wrongly emitting the warning when the derived class method is a 
template, which is wrong since template members don’t override virtual 
base methods (see the change in pr61945.C)
- an invalid early return - see Woverloaded-virt9.C,
- reporting the first overload instead of the one that actually hides - 
see the dg-note in Woverloaded-virt8.C that’d fail without the patch 
because we’d report the int overload)

Successfully tested on x86_64-pc-linux-gnu; OK for trunk?

>>else if (TREE_CODE (t) == OVERLOAD)
>> +t = OVL_FIRST (t) != conv_op_marker ? OVL_FIRST (t) : OVL_CHAIN 
>> (t);
>
> Usually OVL_CHAIN will be another OVERLOAD, you want OVL_FIRST 
> (OVL_CHAIN (t)) in that case.
Thanks. Even though not strictly needed anymore by the updated patch, 

I’m still including the (fixed) change in the patch.

Simon
From 2dd66681842725649abdaef0eec3df553264fb80 Mon Sep 17 00:00:00 2001
From: Simon Martin 
Date: Sun, 25 Aug 2024 15:17:47 +0200
Subject: [PATCH] c++: Fix overeager Woverloaded-virtual with conversion 
operators [

Re: [RFC] gimple ssa: VRP to mark switch default as unreachable instead of removing it [PR112687]

2024-09-01 Thread Jeff Law




On 9/1/24 9:47 AM, Filip Kastl wrote:

On Sun 2024-09-01 09:25:07, Jeff Law wrote:



On 9/1/24 9:19 AM, Filip Kastl wrote:

(I'm Cc-ing Diego since he originally contributed the VRP pass and Jeff because
I've seen him in git blame on many lines of vr-values.cc around the place I
would like to make modifications)

Hello,

In this RFC I'd like to propose the following: When the value range statement
simplification machinery encounters a switch with a default case that never
executes, it should mark the default case with __builtin_unreachable instead of
removing the default case.  I think that should be the "canonical"
representation of switches with this property.

In terms of changes made to GCC code, this would be a small patch.

I'd like to hear other people's opinions on this.  Perhaps there are
implications of this change or some other issues with the idea which I don't
see.

The right thing to do is to eliminate the unreachable code.   We don't
replace the body of an if (0)  with a __builtin_unreachable.  We remove it.
Switch statements should be no different here.

If that inhibits later analysis or optimization then that's an indicator
that we need to improve our frameworks, not that we should keep dead code
lying around in the IL.

Jeff




I see.  This is not the way forward.  While reading your reply it occured to me
that it could be possible to use value range info in switch conversion to get
the switches in question optimized without relying on __bultin_unreachable.
I'll try that.
I think one of the problems we've got is that when we drop unreachable 
paths it makes it much harder to reconstruct certain range information. 
We've seen this on a few tests since the new Ranger framework went in.


I think that argues we  need to handle this earlier, need to find a way 
to either carry range information further, or recreate the more precise 
range information later  Your initial idea falls into that last bucket. 
There may be other ways to do that.  For example, we could actually 
annotate the switch node with some additional data from the early 
analysis/optimization -- without introducing the builtin_unreachable.


jeff


[PATCH] c++, coroutines: Instrument missing return_void UB.

2024-09-01 Thread Iain Sandoe
This came up in discussion of an earlier patch.

I'm in two minds as to whether it's a good idea or not - the underlying
issue being that libubsan does not yet (AFAICT) have the concept of a
coroutine, so that the diagnostics are not very specific and might appear
strange (i.e. "execution reached the end of a value-returning function
without returning a value" which is a bit of an odd diagnostic for
a missing return_void ()).

OTOH one might argue that some diagnostic is better than silent UB .. but
I do not have cycles to address improving this in upstream libsanitizer ...

The logic used here is intended to match cp_maybe_instrument_return ()
although it's not 100% clear that that is doing exactly what the comment
says - since it does not distinguish between -fno-sanitize=return and
the case that the user simply did not specify it.  So that
-fsanitize=unreachable is ignored for both fno-sanitize=return and the
unset case.

tested on x86_64-darwin and powerpc64le-linux, 
apply to trunk? Opinions?
thanks
Iain

--- 8< ---

[dcl.fct.def.coroutine] / 6 Note 1:
"If return_void is found, flowing off the end of a coroutine is equivalent
to a co_return with no operand. Otherwise, flowing off the end of a
coroutine results in undefined behavior."

Here we implement this as a check for sanitized returns and call the ubsan
instrumentation; if that is not enabled we mark this as unreachable (which
might trap depending on the target settings).

gcc/cp/ChangeLog:

* coroutines.cc
(cp_coroutine_transform::wrap_original_function_body): Instrument
the case where control flows off the end of a coroutine and the
user promise has no return_void entry.

Signed-off-by: Iain Sandoe 
---
 gcc/cp/coroutines.cc | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc
index e709d02b5f7..b67f4e3ef88 100644
--- a/gcc/cp/coroutines.cc
+++ b/gcc/cp/coroutines.cc
@@ -33,6 +33,9 @@ along with GCC; see the file COPYING3.  If not see
 #include "gcc-rich-location.h"
 #include "hash-map.h"
 #include "coroutines.h"
+#include "c-family/c-ubsan.h"
+#include "attribs.h" /* lookup_attribute */
+#include "asan.h" /* sanitize_flags_p */
 
 static bool coro_promise_type_found_p (tree, location_t);
 
@@ -4335,8 +4338,17 @@ cp_coroutine_transform::wrap_original_function_body ()
   finish_expr_stmt (initial_await);
   /* Append the original function body.  */
   add_stmt (coroutine_body);
+  /* Flowing off the end of a coroutine is equivalent to calling
+promise.return_void () or is UB if the promise does not contain
+that.  Do not add an unreachable unless the user has asked for
+checking of such cases.  */
   if (return_void)
add_stmt (return_void);
+  else if (sanitize_flags_p (SANITIZE_RETURN, orig_fn_decl))
+   add_stmt (ubsan_instrument_return (fn_start));
+  else if (flag_unreachable_traps
+  && !sanitize_flags_p (SANITIZE_UNREACHABLE, orig_fn_decl))
+   add_stmt (build_builtin_unreachable (fn_start));
   TRY_STMTS (tcb) = pop_stmt_list (TRY_STMTS (tcb));
   TRY_HANDLERS (tcb) = push_stmt_list ();
   /* Mimic what the parser does for the catch.  */
@@ -4393,8 +4405,14 @@ cp_coroutine_transform::wrap_original_function_body ()
   finish_expr_stmt (initial_await);
   /* Append the original function body.  */
   add_stmt (coroutine_body);
+  /* See comment above.  */
   if (return_void)
add_stmt (return_void);
+  else if (sanitize_flags_p (SANITIZE_RETURN, orig_fn_decl))
+   add_stmt (ubsan_instrument_return (fn_start));
+  else if (flag_unreachable_traps
+  && !sanitize_flags_p (SANITIZE_UNREACHABLE, orig_fn_decl))
+   add_stmt (build_builtin_unreachable (fn_start));
 }
 
   /* co_return branches to the final_suspend label, so declare that now.  */
-- 
2.39.2 (Apple Git-143)



[committed][PR rtl-optimization/116544] Fix test for promoted subregs

2024-09-01 Thread Jeff Law

This is a small bug in the ext-dce code's handling of promoted subregs.

Essentially when we see a promoted subreg we need to make additional bit 
groups live as various parts of the RTL path know that an extension of a 
suitably promoted subreg can be trivially eliminated.


When I added support for dealing with this quirk I failed to account for 
the larger modes properly and it ignored the case when the size of the 
inner object was > 32 bits.  Opps.


This does _not_ fix the outstanding x86 issue.  That's caused by 
something completely different and more concerning ;(


Bootstrapped and regression tested on x86.  Obviously fixes the testcase 
on riscv as well.


Pushing to the trunk.

jeffcommit 0562976d62e095f3a00c799288dee4e5b20114e2
Author: Jeff Law 
Date:   Sun Sep 1 22:16:04 2024 -0600

[committed][PR rtl-optimization/116544] Fix test for promoted subregs

This is a small bug in the ext-dce code's handling of promoted subregs.

Essentially when we see a promoted subreg we need to make additional bit 
groups
live as various parts of the RTL path know that an extension of a suitably
promoted subreg can be trivially eliminated.

When I added support for dealing with this quirk I failed to account for the
larger modes properly and it ignored the case when the size of the inner 
object
was > 32 bits.  Opps.

This does _not_ fix the outstanding x86 issue.  That's caused by something
completely different and more concerning ;(

Bootstrapped and regression tested on x86.  Obviously fixes the testcase on
riscv as well.

Pushing to the trunk.

PR rtl-optimization/116544
gcc/
* ext-dce.cc (ext_dce_process_uses): Fix thinko in promoted subreg
handling.

gcc/testsuite/
* gcc.dg/torture/pr116544.c: New test.

diff --git a/gcc/ext-dce.cc b/gcc/ext-dce.cc
index 4a2503f1831..2f3514ae797 100644
--- a/gcc/ext-dce.cc
+++ b/gcc/ext-dce.cc
@@ -846,7 +846,7 @@ ext_dce_process_uses (rtx_insn *insn, rtx obj,
bitmap_set_bit (livenow, rn + 1);
  if (size > 16)
bitmap_set_bit (livenow, rn + 2);
- if (size == 32)
+ if (size >= 32)
bitmap_set_bit (livenow, rn + 3);
  iter.skip_subrtxes ();
}
diff --git a/gcc/testsuite/gcc.dg/torture/pr116544.c 
b/gcc/testsuite/gcc.dg/torture/pr116544.c
new file mode 100644
index 000..15f52fecb3b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr116544.c
@@ -0,0 +1,22 @@
+/* { dg-options "-fno-strict-aliasing -fwrapv" }
+/* { dg-do run { target longlong64 } } */
+
+extern void abort (void);
+long long a;
+signed char b[60];
+signed char c;
+long long d[60];
+int e[30];
+long long *f = d;
+static void g(long long *j, long k) { *j = k; }
+int main() {
+  d[5] = 0x1;
+  for (int h = 2; h < 7; h += 3)
+for (int i = 0; i < (c || b[h]) + 10; i += 11)
+  e[2] = f[h];
+  g(&a, e[2]);
+  if (a != 0)
+abort ();
+  return 0;
+}
+


Re: [PATCH] RISC-V: Optimize the cost of the DFmode register move for RV32.

2024-09-01 Thread Jeff Law




On 8/27/24 3:17 AM, Xianmiao Qu wrote:

Currently, in RV32, even with the D extension enabled, the cost of DFmode
register moves is still set to 'COSTS_N_INSNS (2)'. This results in the
'lower-subreg' pass splitting DFmode register moves into two SImode SUBREG
register moves, leading to the generation of many redundant instructions.

As an example, consider the following test case:
   double foo (int t, double a, double b)
   {
 if (t > 0)
   return a;
 else
   return b;
   }

When compiling with -march=rv32imafdc -mabi=ilp32d, the following code is 
generated:
   .cfi_startproc
   addisp,sp,-32
   .cfi_def_cfa_offset 32
   fsd fa0,8(sp)
   fsd fa1,16(sp)
   lw  a4,8(sp)
   lw  a5,12(sp)
   lw  a2,16(sp)
   lw  a3,20(sp)
   bgt a0,zero,.L1
   mv  a4,a2
   mv  a5,a3
   .L1:
   sw  a4,24(sp)
   sw  a5,28(sp)
   fld fa0,24(sp)
   addisp,sp,32
   .cfi_def_cfa_offset 0
   jr  ra
   .cfi_endproc

After adjust the DFmode register move's cost to 'COSTS_N_INSNS (1)', the
generated code is as follows, with a significant reduction in the number
of instructions.
   .cfi_startproc
   ble a0,zero,.L5
   ret
   .L5:
   fmv.d   fa0,fa1
   ret
   .cfi_endproc

gcc/
* config/riscv/riscv.cc (riscv_rtx_costs): Optimize the cost of the
DFmode register move for RV32.

gcc/testsuite/
* gcc.target/riscv/rv32-movdf-cost.c: New test.

I pushed this to the trunk.
jeff