Hi Dave, I actually think using plus_xor_ior operator is useful. It means that if combine, inlining or some other RTL simplification generates these variants, these forms will still be recognized by the backend. It's more typing, but the compiler produces better code.
Here's what I have so far, but please feel free to modify anything. I'll leave
the
rest to you.
With this patch:
unsigned long long rotl4(unsigned long long x)
{
return (x<<4) | (x>>60);
}
unsigned long long rotr4(unsigned long long x)
{
return (x<<60) | (x>>4);
}
which previously generated:
rotl4: depd,z %r26,59,60,%r28
extrd,u %r26,3,4,%r26
bve (%r2)
or %r26,%r28,%r28
rotr4: extrd,u %r26,59,60,%r28
depd,z %r26,3,4,%r26
bve (%r2)
or %r26,%r28,%r28
now produces:
rotl4: bve (%r2)
shrpd %r26,%r26,60,%r28
rotr4: bve (%r2)
shrpd %r26,%r26,4,%r28
I'm guessing this is very similar to what you were thinking (or what I
described previously).
Many thanks again for trying out these patches/suggestions for me.
Best regards,
Roger
--
-----Original Message-----
From: John David Anglin <[email protected]>
Sent: 22 August 2020 23:09
To: Roger Sayle <[email protected]>; 'GCC Patches'
<[email protected]>
Cc: 'Jeff Law' <[email protected]>
Subject: Re: [PATCH] hppa: Improve expansion of ashldi3 when !TARGET_64BIT
On 2020-08-22 12:01 p.m., Roger Sayle wrote:
> I suspect that the issue with the 64-bit patterns is that the second
> variant of pa.md's define_insn "shrpdi4" is unlikely ever to match as
> (minus:DI (const_int 64) x) is never "canonical" when x is itself a
> CONST_INT. Splitting this define_insn into two (or three see below)
> separate forms; the first as it currently is and the second (as you
> suggest) with
> "TARGET_64BIT
> && INTVAL (operands[3]) + INTVAL (operands[4]) == 64"
> should do the trick.
I will go ahead and add the basic patterns. It seems it would be best if I
avoid using the "plus_xor_ior_operator". It also seems the 32-bit patterns
should avoid it.
>
> My first impression was that the DImode shrpd instructions would be
> most useful for implementing TI mode shifts, but that TI mode isn't
> supported by hppa64. But then I noticed that the more immediate
> benefit would be in supporting rotrdi3 and rotldi3 on TARGET_64BIT
> that currently don't have expanders nor insns defined. Here GCC
> currently generates three instructions where a single shrpd would be
> optimal.
It turns out we now need to support TI mode and __int128 for libgomp. The
hppa64-hpux target won't boot without it. I had just added a change to support
TI mode but it's untested.
Regards,
Dave
--
John David Anglin [email protected]
patchh3.log
Description: Binary data
diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md
index 6350c68..5f04c02 100644
--- a/gcc/config/pa/pa.md
+++ b/gcc/config/pa/pa.md
@@ -6604,32 +6604,82 @@
(set_attr "length" "4")])
; Shift right pair word 0 to 31 bits.
-(define_insn "shrpsi4"
- [(set (match_operand:SI 0 "register_operand" "=r,r")
- (ior:SI (ashift:SI (match_operand:SI 1 "register_operand" "r,r")
- (minus:SI (const_int 32)
- (match_operand:SI 3 "shift5_operand" "q,n")))
- (lshiftrt:SI (match_operand:SI 2 "register_operand" "r,r")
- (match_dup 3))))]
+(define_insn "*shrpsi4_1"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (match_operator:SI 4 "plus_xor_ior_operator"
+ [(ashift:SI (match_operand:SI 1 "register_operand" "r")
+ (minus:SI (const_int 32)
+ (match_operand:SI 3 "register_operand" "q")))
+ (lshiftrt:SI (match_operand:SI 2 "register_operand" "r")
+ (match_dup 3))]))]
""
- "@
- {vshd %1,%2,%0|shrpw %1,%2,%%sar,%0}
- {shd|shrpw} %1,%2,%3,%0"
+ "{vshd %1,%2,%0|shrpw %1,%2,%%sar,%0}"
+ [(set_attr "type" "shift")
+ (set_attr "length" "4")])
+
+(define_insn "*shrpsi4_2"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (match_operator:SI 4 "plus_xor_ior_operator"
+ [(lshiftrt:SI (match_operand:SI 2 "register_operand" "r")
+ (match_operand:SI 3 "register_operand" "q"))
+ (ashift:SI (match_operand:SI 1 "register_operand" "r")
+ (minus:SI (const_int 32)
+ (match_dup 3)))]))]
+ ""
+ "{vshd %1,%2,%0|shrpw %1,%2,%%sar,%0}"
[(set_attr "type" "shift")
(set_attr "length" "4")])
; Shift right pair doubleword 0 to 63 bits.
-(define_insn "shrpdi4"
- [(set (match_operand:DI 0 "register_operand" "=r,r")
- (ior:DI (ashift:DI (match_operand:SI 1 "register_operand" "r,r")
- (minus:DI (const_int 64)
- (match_operand:DI 3 "shift6_operand" "q,n")))
- (lshiftrt:DI (match_operand:DI 2 "register_operand" "r,r")
- (match_dup 3))))]
+(define_insn "*shrpdi4_1"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (match_operator:DI 4 "plus_xor_ior_operator"
+ [(ashift:DI (match_operand:DI 1 "register_operand" "r")
+ (minus:DI (const_int 64)
+ (match_operand:DI 3 "register_operand" "q")))
+ (lshiftrt:DI (match_operand:DI 2 "register_operand" "r")
+ (match_dup 3))]))]
"TARGET_64BIT"
- "@
- shrpd %1,%2,%%sar,%0
- shrpd %1,%2,%3,%0"
+ "shrpd %1,%2,%%sar,%0"
+ [(set_attr "type" "shift")
+ (set_attr "length" "4")])
+
+(define_insn "*shrpdi4_2"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (match_operator:DI 4 "plus_xor_ior_operator"
+ [(lshiftrt:DI (match_operand:DI 2 "register_operand" "r")
+ (match_operand:DI 3 "shift6_operand" "q"))
+ (ashift:DI (match_operand:SI 1 "register_operand" "r")
+ (minus:DI (const_int 64)
+ (match_dup 3)))]))]
+ "TARGET_64BIT"
+ "shrpd %1,%2,%%sar,%0"
+ [(set_attr "type" "shift")
+ (set_attr "length" "4")])
+
+(define_insn "*shrpdi4_3"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (match_operator:DI 5 "plus_xor_ior_operator"
+ [(ashift:DI (match_operand:DI 1 "register_operand" "r")
+ (match_operand:DI 3 "const_int_operand" "n"))
+ (lshiftrt:DI (match_operand:DI 2 "register_operand" "r")
+ (match_operand:DI 4 "const_int_operand" "n"))]))]
+ "TARGET_64BIT
+ && INTVAL (operands[3]) + INTVAL (operands[4]) == 64"
+ "shrpd %1,%2,%4,%0"
+ [(set_attr "type" "shift")
+ (set_attr "length" "4")])
+
+(define_insn "*shrpdi4_4"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (match_operator:DI 5 "plus_xor_ior_operator"
+ [(lshiftrt:DI (match_operand:DI 2 "register_operand" "r")
+ (match_operand:DI 4 "const_int_operand" "n"))
+ (ashift:DI (match_operand:DI 1 "register_operand" "r")
+ (match_operand:DI 3 "const_int_operand" "n"))]))]
+ "TARGET_64BIT
+ && INTVAL (operands[3]) + INTVAL (operands[4]) == 64"
+ "shrpd %1,%2,%4,%0"
[(set_attr "type" "shift")
(set_attr "length" "4")])
@@ -6668,7 +6718,7 @@
/* Else expand normally. */
}")
-(define_insn ""
+(define_insn "*rotlsi3_internal"
[(set (match_operand:SI 0 "register_operand" "=r")
(rotate:SI (match_operand:SI 1 "register_operand" "r")
(match_operand:SI 2 "const_int_operand" "n")))]
@@ -6681,6 +6731,54 @@
[(set_attr "type" "shift")
(set_attr "length" "4")])
+(define_insn "rotrdi3"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (rotatert:DI (match_operand:DI 1 "register_operand" "r,r")
+ (match_operand:DI 2 "shift6_operand" "q,n")))]
+ "TARGET_64BIT"
+ "*
+{
+ if (GET_CODE (operands[2]) == CONST_INT)
+ {
+ operands[2] = GEN_INT (INTVAL (operands[2]) & 63);
+ return \"shrpd %1,%1,%2,%0\";
+ }
+ else
+ return \"shrpd %1,%1,%%sar,%0\";
+}"
+ [(set_attr "type" "shift")
+ (set_attr "length" "4")])
+
+(define_expand "rotldi3"
+ [(set (match_operand:DI 0 "register_operand" "")
+ (rotate:DI (match_operand:DI 1 "register_operand" "")
+ (match_operand:DI 2 "arith32_operand" "")))]
+ "TARGET_64BIT"
+ "
+{
+ if (GET_CODE (operands[2]) != CONST_INT)
+ {
+ rtx temp = gen_reg_rtx (DImode);
+ emit_insn (gen_subdi3 (temp, GEN_INT (64), operands[2]));
+ emit_insn (gen_rotrdi3 (operands[0], operands[1], temp));
+ DONE;
+ }
+ /* Else expand normally. */
+}")
+
+(define_insn "*rotldi3_internal"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (rotate:DI (match_operand:DI 1 "register_operand" "r")
+ (match_operand:DI 2 "const_int_operand" "n")))]
+ "TARGET_64BIT"
+ "*
+{
+ operands[2] = GEN_INT ((64 - INTVAL (operands[2])) & 63);
+ return \"shrpd %1,%1,%2,%0\";
+}"
+ [(set_attr "type" "shift")
+ (set_attr "length" "4")])
+
(define_insn ""
[(set (match_operand:SI 0 "register_operand" "=r")
(match_operator:SI 5 "plus_xor_ior_operator"
