[PATCH] RISC-V: optimization on checking certain bits set ((x & mask) == val)

2024-12-06 Thread Oliver Kozul
The patch optimizes code generation for comparisons of the form
X & C1 == C2 by converting them to (X | ~C1) == (C2 | ~C1).
C1 is a constant that requires li and addi to be loaded,
while ~C1 requires a single lui instruction.

2024-12-06  Oliver Kozul  

  PR target/114087

gcc/ChangeLog:

  * config/riscv/riscv.md (*lui_constraint_and_to_or): New 
pattern.

gcc/testsuite/ChangeLog:

  * gcc.target/riscv/pr114087-1.c: New test.



CONFIDENTIALITY: The contents of this e-mail are confidential and intended only 
for the above addressee(s). If you are not the intended recipient, or the 
person responsible for delivering it to the intended recipient, copying or 
delivering it to anyone else or using it in any unauthorized manner is 
prohibited and may be unlawful. If you receive this e-mail by mistake, please 
notify the sender and the systems administrator at straym...@rt-rk.com 
immediately.
---
 gcc/config/riscv/riscv.md   | 21 +
 gcc/testsuite/gcc.target/riscv/pr114087-1.c | 10 ++
 2 files changed, 31 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr114087-1.c

diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 3a4cd1d93a0..add31bbf51c 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -858,6 +858,27 @@
   [(set_attr "type" "arith")
(set_attr "mode" "SI")])
 
+(define_insn_and_split "*lui_constraint_and_to_or"
+  [(set (match_operand:ANYI 0 "register_operand" "=r")
+(plus:ANYI (and:ANYI (match_operand:ANYI 1 "register_operand" "r")
+(match_operand 2 "const_int_operand"))
+(match_operand 3 "const_int_operand")))
+(clobber (match_scratch:X 4 "=&r"))]
+  "LUI_OPERAND (INTVAL (operands[2]) + 1)
+  && (INTVAL (operands[2]) & (-INTVAL (operands[3])))
+  == (-INTVAL (operands[3]))"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 4) (match_dup 5))
+   (set (match_dup 0) (ior:X (match_dup 1) (match_dup 4)))
+   (set (match_dup 4) (match_dup 6))
+   (set (match_dup 0) (minus:X (match_dup 0) (match_dup 4)))]
+  {
+operands[5] = GEN_INT (~INTVAL (operands[2]));
+operands[6] = GEN_INT ((~INTVAL (operands[2])) | (-INTVAL (operands[3])));
+  }
+  [(set_attr "type" "arith")])
+
 ;;
 ;;  
 ;;
diff --git a/gcc/testsuite/gcc.target/riscv/pr114087-1.c 
b/gcc/testsuite/gcc.target/riscv/pr114087-1.c
new file mode 100644
index 000..5e40b5f7b5b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr114087-1.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target rv64 } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Og" } } */
+/* { dg-options "-march=rv64gc -mabi=lp64d" } */
+
+int pred1a(int x) {
+  return ((x & 0x5FFF) == 0x14501DEF);
+}
+
+/* { dg-final { scan-assembler  {or\s*[a-x0-9]+,\s*[a-x0-9]+,\s*[a-x0-9]+} } } 
*/
-- 
2.43.0


[PATCH] RISC-V: optimization by converting LUI operands with SMALL_AFTER_COMMON_TRAILING_SHIFT

2024-12-11 Thread Oliver Kozul
The patch optimizes code generation for comparisons of the form
X & C1 == C2. When the bitwise AND mask could fit in a 12-bit immediate
operand of RISC-V "andi" instruction with help of right shifting.
For example, C1 = 0x5550 and C2 = 0x1450.
These numbers can be stored using 12 bits, which is advantageous in RISC-V,
since instructions such as ANDI exist. By shifting all used values
by 20 bits to the right, we can make use of the “and immediate” instruction,
thus improving performance.

2024-12-11  Oliver Kozul  

  PR target/114087

gcc/ChangeLog:

  * config/riscv/riscv.md (*lui_constraint_lshiftrt): New 
pattern.

gcc/testsuite/ChangeLog:

  * gcc.target/riscv/pr114087-2.c: New test.



CONFIDENTIALITY: The contents of this e-mail are confidential and intended only 
for the above addressee(s). If you are not the intended recipient, or the 
person responsible for delivering it to the intended recipient, copying or 
delivering it to anyone else or using it in any unauthorized manner is 
prohibited and may be unlawful. If you receive this e-mail by mistake, please 
notify the sender and the systems administrator at straym...@rt-rk.com 
immediately.
---
 gcc/config/riscv/riscv.md   | 25 +
 gcc/testsuite/gcc.target/riscv/pr114087-2.c | 11 +
 2 files changed, 36 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr114087-2.c

diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 3a4cd1d93a0..310cbd5617e 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -858,6 +858,31 @@
   [(set_attr "type" "arith")
(set_attr "mode" "SI")])
 
+(define_insn_and_split "*lui_constraint_lshiftrt"
+  [(set (match_operand:ANYI 0 "register_operand" "=r")
+  (plus:ANYI (and:ANYI (match_operand:ANYI 1 "register_operand" "r")
+(match_operand 2 "const_int_operand"))
+(match_operand 3 "const_int_operand")))]
+  "!SMALL_OPERAND (INTVAL (operands[2]))
+  && !SMALL_OPERAND (INTVAL (operands[3]))
+  && SMALL_AFTER_COMMON_TRAILING_SHIFT (INTVAL (operands[2]),
+  INTVAL (operands[3]))"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (lshiftrt:X (match_dup 1) (match_dup 4)))
+   (set (match_dup 0) (and:X (match_dup 0) (match_dup 5)))
+   (set (match_dup 0) (plus:X (match_dup 0) (match_dup 6)))]
+  {
+HOST_WIDE_INT mask = INTVAL (operands[2]);
+HOST_WIDE_INT val = INTVAL (operands[3]);
+int trailing_shift = COMMON_TRAILING_ZEROS (mask, val);
+
+operands[4] = GEN_INT (trailing_shift);
+operands[5] = GEN_INT (mask >> trailing_shift);
+operands[6] = GEN_INT (val >> trailing_shift);
+  }
+[(set_attr "type" "arith")])
+
 ;;
 ;;  
 ;;
diff --git a/gcc/testsuite/gcc.target/riscv/pr114087-2.c 
b/gcc/testsuite/gcc.target/riscv/pr114087-2.c
new file mode 100644
index 000..1e158e8b722
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr114087-2.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target rv64 } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Og" } } */
+/* { dg-options "-march=rv64gc -mabi=lp64d" } */
+
+int pred2a(int x) {
+  return ((x & 0x5550) == 0x1450);
+}
+
+/* { dg-final { scan-assembler  {srli\s*[a-x0-9]+,\s*[a-x0-9]+,\s*20}} } */
+/* { dg-final { scan-assembler  {andi\s*[a-x0-9]+,\s*[a-x0-9]+,\s*1365}} } */
\ No newline at end of file
-- 
2.43.0



Re: [PATCH] RISC-V: optimization on checking certain bits set ((x & mask) == val)

2024-12-15 Thread Oliver Kozul
Thank you again for your helpful comments.

I will implement the formatting changes you mentioned.

I agree that a description like:

;; Transform (X & C1) + C2 into (X | ~C1) - (-C2 | ~C1)
;; Where C1 is not a LUI operand, but ~C1 is a LUI operand

is more fitting since we are catching a plus expression.

Your proposed condition also makes more sense than the current one.
LUI_OPERAND (INTVAL (operands[2]) + 1) made sense when starting out,
but I never got around to changing to LUI_OPERAND (~INTVAL (operands[2]).

I did not know about the costs of constant synthesis.
I thought that the values of those constants are known at compile time, and 
therefore their creation would not have an impact on performance.
I will definitely take a look at riscv_const_insns and evaluate the synthesis 
complexity.

Best regards,
Oliver





From: Jeff Law 
Sent: Sunday, December 15, 2024 1:19 AM
To: Oliver Kozul ; gcc-patches@gcc.gnu.org 

Cc: Dusan Stojkovic ; Mile Davidovic 
; l...@gcc.gnu.org 
Subject: Re: [PATCH] RISC-V: optimization on checking certain bits set ((x & 
mask) == val)



On 12/6/24 6:12 AM, Oliver Kozul wrote:
> The patch optimizes code generation for comparisons of the form
> X & C1 == C2 by converting them to (X | ~C1) == (C2 | ~C1).
> C1 is a constant that requires li and addi to be loaded,
> while ~C1 requires a single lui instruction.
>
> 2024-12-06  Oliver Kozul  
>
>   PR target/114087
>
> gcc/ChangeLog:
>
>   * config/riscv/riscv.md (*lui_constraint_and_to_or):
> New pattern.
>
> gcc/testsuite/ChangeLog:
>
>   * gcc.target/riscv/pr114087-1.c: New test.
A few comments:



>
> +(define_insn_and_split "*lui_constraint_and_to_or"
> +  [(set (match_operand:ANYI 0 "register_operand" "=r")
> +(plus:ANYI (and:ANYI (match_operand:ANYI 1 "register_operand" "r")
> +(match_operand 2 "const_int_operand"))
> +(match_operand 3 "const_int_operand")))
> +(clobber (match_scratch:X 4 "=&r"))]
Similar formatting comment as I had on the other patch.  Easily fixed.
Just for giggles I asked chatgpt to format it and it came up with:

> (define_insn_and_split "*lui_constraint_and_to_or"
>   [(set (match_operand:ANYI 0 "register_operand" "=r")
> (plus:ANYI (and:ANYI (match_operand:ANYI 1 "register_operand" "r")
>  (match_operand 2 "const_int_operand"))
>(match_operand 3 "const_int_operand")))
>(clobber (match_scratch:X 4 "=&r"))]
>   "LUI_OPERAND (INTVAL (operands[2]) + 1)
>&& (INTVAL (operands[2]) & (-INTVAL (operands[3]))) == (-INTVAL 
> (operands[3]))"
>   "#"
>   "&& reload_completed"
>   [(set (match_dup 4) (match_dup 5))
>(set (match_dup 0) (ior:X (match_dup 1) (match_dup 4)))
>(set (match_dup 4) (match_dup 6))
>(set (match_dup 0) (minus:X (match_dup 0) (match_dup 4)))]
>   {
> operands[5] = GEN_INT (~INTVAL (operands[2]));
> operands[6] = GEN_INT ((~INTVAL (operands[2])) | (-INTVAL (operands[3])));
>   }
>   [(set_attr "type" "arith")])

Which looks basically correct.




More importantly we probably want a comment on what this pattern is
actually doing.  Perhaps something like:

;; Transform (X & C1) + C2 into (X | ~C1) - (-C2 | ~C1)
;; Where C1 is not a LUI operand, but ~C1 is a LUI operand

Did I get that correct?



> +  "LUI_OPERAND (INTVAL (operands[2]) + 1)
So shouldn't this have been LUI_OPERAND (~INTVAL (operands[2]))?  I can
kind-of see why you used +1, but given the C fragment in the pattern
uses ~operands[2], let's use the same here for the test if we can.




> +  && (INTVAL (operands[2]) & (-INTVAL (operands[3])))
> +  == (-INTVAL (operands[3]))"
Does it make sense to verify that operands[3] is a constant that
requires synthesis and that ~INTVAL (operands[2]) | -INTVAL
(operands[3]) is no more expensive to synthesize than operands[3]?

ie, saving the addi on the first constant isn't a win if the second
constant gets more complex to synthesize.  The API isn't great, but you
can use riscv_const_insns to find out how many instructions are
necessary to synthesize a constant.


Overall it looks good.  Just needs a bit of polishing.

jeff
CONFIDENTIALITY: The contents of this e-mail are confidential and intended only 
for the above addressee(s). If you are not the intended recipient, or the 
person responsible for delivering it to the intended recipient, copying or 
delivering it to anyone else or using it in any unauthorized manner is 
prohibited and may be unlawful. If you receive this e-mail by mistake, please 
notify the sender and the systems administrator at straym...@rt-rk.com 
immediately.


[PATCH] RISC-V: optimization on checking certain bits set ((x & mask) == val)

2024-12-16 Thread Oliver Kozul
The patch optimizes code generation for comparisons of the form
X & C1 == C2 by converting them to (X | ~C1) == (C2 | ~C1).
C1 is a constant that requires li and addi to be loaded,
while ~C1 requires a single lui instruction.
As the values of C1 and C2 are not visible within
the equality expression, a plus pattern is matched instead.

2024-12-16  Oliver Kozul  

  PR target/114087

gcc/ChangeLog:

  * config/riscv/riscv.md (*lui_constraint_and_to_or): New 
pattern

gcc/testsuite/ChangeLog:

  * gcc.target/riscv/pr114087-1.c: New test.



CONFIDENTIALITY: The contents of this e-mail are confidential and intended only 
for the above addressee(s). If you are not the intended recipient, or the 
person responsible for delivering it to the intended recipient, copying or 
delivering it to anyone else or using it in any unauthorized manner is 
prohibited and may be unlawful. If you receive this e-mail by mistake, please 
notify the sender and the systems administrator at straym...@rt-rk.com 
immediately.
---
 gcc/config/riscv/riscv.md   | 28 +
 gcc/testsuite/gcc.target/riscv/pr114087-1.c | 16 
 2 files changed, 44 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr114087-1.c

diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 3a4cd1d93a0..75b0947e597 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -858,6 +858,34 @@
   [(set_attr "type" "arith")
(set_attr "mode" "SI")])
 
+;; Transform (X & C1) + C2 into (X | ~C1) - (-C2 | ~C1)
+;; Where C1 is not a LUI operand, but ~C1 is a LUI operand
+
+(define_insn_and_split "*lui_constraint_and_to_or"
+   [(set (match_operand:ANYI 0 "register_operand" "=r")
+   (plus:ANYI (and:ANYI (match_operand:ANYI 1 "register_operand" "r")
+(match_operand 2 "const_int_operand"))
+(match_operand 3 "const_int_operand")))
+   (clobber (match_scratch:X 4 "=&r"))]
+  "LUI_OPERAND (~INTVAL (operands[2]))
+   && ((INTVAL (operands[2]) & (-INTVAL (operands[3])))
+   == (-INTVAL (operands[3])))
+   && riscv_const_insns (operands[3], false)
+   && (riscv_const_insns
+   (GEN_INT (~INTVAL (operands[2]) | -INTVAL (operands[3])), false)
+   <= riscv_const_insns (operands[3], false))"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 4) (match_dup 5))
+   (set (match_dup 0) (ior:X (match_dup 1) (match_dup 4)))
+   (set (match_dup 4) (match_dup 6))
+   (set (match_dup 0) (minus:X (match_dup 0) (match_dup 4)))]
+  {
+operands[5] = GEN_INT (~INTVAL (operands[2]));
+operands[6] = GEN_INT ((~INTVAL (operands[2])) | (-INTVAL (operands[3])));
+  }
+  [(set_attr "type" "arith")])
+
 ;;
 ;;  
 ;;
diff --git a/gcc/testsuite/gcc.target/riscv/pr114087-1.c 
b/gcc/testsuite/gcc.target/riscv/pr114087-1.c
new file mode 100644
index 000..9df02db6d5d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr114087-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target rv64 } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Og" } } */
+/* { dg-options "-march=rv64gc -mabi=lp64d" } */
+
+#include 
+#include 
+
+static uint32_t mask1 = 0x5FFF;
+static uint32_t val1  = 0x14501DEF;
+
+bool pred1a(uint32_t x) {
+  return ((x & mask1) == val1);
+}
+
+/* { dg-final { scan-assembler  {or\s*[a-x0-9]+,\s*[a-x0-9]+,\s*[a-x0-9]+} } } 
*/
-- 
2.43.0



[PATCH] RISC-V: optimization by converting to LUI operands with LUI_AFTER_COMMON_LEADING_SHIFT

2024-12-13 Thread Oliver Kozul
The patch optimizes code generation for comparisons of the form
X & C1 == C2. When the bitwise AND mask is stored in the lower 20 bits
it can be left shifted so it behaves as a LUI operand instead,
saving an addi instruction while loading.

2024-12-13  Oliver Kozul  

  PR target/114087

gcc/ChangeLog:

  * config/riscv/riscv.h (COMMON_LEADING_ZEROS): New macro.
  (LUI_AFTER_COMMON_LEADING_SHIFT): New macro.
  * config/riscv/riscv.md (*lui_constraint_ashift): New pattern.

gcc/testsuite/ChangeLog:

  * gcc.target/riscv/pr114087-3.c: New test.



CONFIDENTIALITY: The contents of this e-mail are confidential and intended only 
for the above addressee(s). If you are not the intended recipient, or the 
person responsible for delivering it to the intended recipient, copying or 
delivering it to anyone else or using it in any unauthorized manner is 
prohibited and may be unlawful. If you receive this e-mail by mistake, please 
notify the sender and the systems administrator at straym...@rt-rk.com 
immediately.
---
 gcc/config/riscv/riscv.h| 18 +++
 gcc/config/riscv/riscv.md   | 35 +
 gcc/testsuite/gcc.target/riscv/pr114087-3.c | 10 ++
 3 files changed, 63 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr114087-3.c

diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index 09de74667a9..92850a52251 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -677,6 +677,24 @@ enum reg_class
   (SMALL_OPERAND ((VAL1) >> COMMON_TRAILING_ZEROS (VAL1, VAL2))
\
&& SMALL_OPERAND ((VAL2) >> COMMON_TRAILING_ZEROS (VAL1, VAL2)))
 
+/* Returns the smaller (common) number of leading zeros for VAL1 and VAL2.  */
+#define COMMON_LEADING_ZEROS(VAL1, VAL2)   \
+  (clz_hwi (VAL1) < clz_hwi (VAL2) \
+   ? clz_hwi (VAL1)\
+   : clz_hwi (VAL2))
+
+/* Returns true if both VAL1 and VAL2 are LUI_OPERANDs after shifting by
+   the common number of leading zeros (-1 to account for sign).  */
+#define LUI_AFTER_COMMON_LEADING_SHIFT(VAL1, VAL2) \
+  ((LUI_OPERAND \
+  ((VAL1) << (COMMON_LEADING_ZEROS (VAL1, VAL2) - 1))  \
+   && LUI_OPERAND \
+   ((VAL2) << (COMMON_LEADING_ZEROS (VAL1, VAL2) - 1))) \
+   || ((LUI_OPERAND \
+   ((VAL1) << (COMMON_LEADING_ZEROS (VAL1, VAL2) - 33)) \
+   && LUI_OPERAND \
+   ((VAL2) << (COMMON_LEADING_ZEROS (VAL1, VAL2) - 33)
+
 /* Stack layout; function entry, exit and calling.  */
 
 #define STACK_GROWS_DOWNWARD 1
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 3a4cd1d93a0..a44caa6908d 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -858,6 +858,41 @@
   [(set_attr "type" "arith")
(set_attr "mode" "SI")])
 
+(define_insn_and_split "*lui_constraint_ashift"
+  [(set (match_operand:ANYI 0 "register_operand" "=r")
+(plus:ANYI (and:ANYI (match_operand:ANYI 1 "register_operand" "r")
+(match_operand 2 "const_int_operand"))
+(match_operand 3 "const_int_operand")))
+(clobber (match_scratch:X 4 "=&r"))]
+  "!LUI_OPERAND (INTVAL (operands[2]))
+  && !LUI_OPERAND (-INTVAL (operands[3]))
+  && !SMALL_OPERAND (INTVAL (operands[2]))
+  && !SMALL_OPERAND (-INTVAL (operands[3]))
+  && LUI_AFTER_COMMON_LEADING_SHIFT (INTVAL (operands[2]),
+  -INTVAL (operands[3]))"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (ashift:X (match_dup 1) (match_dup 5)))
+   (set (match_dup 4) (match_dup 6))
+   (set (match_dup 0) (and:X (match_dup 0) (match_dup 4)))
+   (set (match_dup 4) (match_dup 7))
+   (set (match_dup 0) (minus:X (match_dup 0) (match_dup 4)))]
+  {
+ HOST_WIDE_INT mask = INTVAL (operands[2]);
+HOST_WIDE_INT val = -INTVAL (operands[3]);
+int leading_shift = COMMON_LEADING_ZEROS (mask, val) - 1;
+
+if (TARGET_64BIT && leading_shift > 32)
+{
+  leading_shift -= 32;
+}
+
+operands[5] = GEN_INT (leading_shift);
+operands[6] = GEN_INT (mask << leading_shift);
+operands[7] = GEN_INT (val << leading_shift);
+  }
+[(set_attr "type" "arith")])
+
 ;;
 ;;  
 ;;
diff --git a/gcc/testsuite/gcc.target/riscv/pr114087-3.c 
b/gcc/testsuite/gcc.target/riscv/pr114087-3.c
new file mode 100644
index 000..d93fb354c25
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr114087-3.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target rv64 } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Og" } } */
+/* { dg-options "-march=rv64gc -mabi=lp64d" } */
+
+int pred3a(int x) {
+  return ((x & 0x0005) == 0x00045014);
+}
+
+/* { dg-final { scan-assembler  {slli\s*[a-x0-9]+,\s*[a-x0-9]+,\s*12}} } */
\ No newline at end of file
-- 
2.43.0