On Tue, Sep 5, 2017 at 12:28 PM, Alexander Monakov <[email protected]> wrote:
> On Mon, 4 Sep 2017, Uros Bizjak wrote:
>> introduced a couple of regressions on x86 (-m32, 32bit) testsuite:
>>
>> New failures:
>> FAIL: gcc.target/i386/pr71245-1.c scan-assembler-not (fistp|fild)
>> FAIL: gcc.target/i386/pr71245-2.c scan-assembler-not movlps
>
> Sorry. I suggest that the tests be XFAIL'ed, the peepholes introduced in the
> fix for PR 71245 removed, and the PR reopened (it's a missed-optimization PR).
> I can do all of the above if you agree.
>
> I think RTL peepholes are a poor way of fixing the original problem, which
> actually exists on all targets with separate int/fp registers. For instance,
> trunk (without my patch) still gets a far simpler testcase wrong (-O2,
> 64-bit):
Please note that 32bit x86 implements atomic DImode access with
fild/fistp combination, so the mentioned peephole avoids quite costly
instruction sequence.
For reference, attached patch implements additional peephole2 patterns
that also handle sequences with blockages.
Uros.
diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md
index 29b82f86d43a..eceaa73a6799 100644
--- a/gcc/config/i386/sync.md
+++ b/gcc/config/i386/sync.md
@@ -219,29 +219,71 @@
(set (match_operand:DI 2 "memory_operand")
(unspec:DI [(match_dup 0)]
UNSPEC_FIST_ATOMIC))
- (set (match_operand:DF 3 "fp_register_operand")
+ (set (match_operand:DF 3 "any_fp_register_operand")
(match_operand:DF 4 "memory_operand"))]
"!TARGET_64BIT
&& peep2_reg_dead_p (2, operands[0])
- && rtx_equal_p (operands[4], adjust_address_nv (operands[2], DFmode, 0))"
+ && rtx_equal_p (XEXP (operands[4], 0), XEXP (operands[2], 0))"
[(set (match_dup 3) (match_dup 5))]
"operands[5] = gen_lowpart (DFmode, operands[1]);")
(define_peephole2
+ [(set (match_operand:DF 0 "fp_register_operand")
+ (unspec:DF [(match_operand:DI 1 "memory_operand")]
+ UNSPEC_FILD_ATOMIC))
+ (set (match_operand:DI 2 "memory_operand")
+ (unspec:DI [(match_dup 0)]
+ UNSPEC_FIST_ATOMIC))
+ (set (mem:BLK (scratch:SI))
+ (unspec:BLK [(mem:BLK (scratch:SI))] UNSPEC_MEMORY_BLOCKAGE))
+ (set (match_operand:DF 3 "any_fp_register_operand")
+ (match_operand:DF 4 "memory_operand"))]
+ "!TARGET_64BIT
+ && peep2_reg_dead_p (2, operands[0])
+ && rtx_equal_p (XEXP (operands[4], 0), XEXP (operands[2], 0))"
+ [(const_int 0)]
+{
+ emit_move_insn (operands[3], gen_lowpart (DFmode, operands[1]));
+ emit_insn (gen_memory_blockage ());
+ DONE;
+})
+
+(define_peephole2
[(set (match_operand:DF 0 "sse_reg_operand")
(unspec:DF [(match_operand:DI 1 "memory_operand")]
UNSPEC_LDX_ATOMIC))
(set (match_operand:DI 2 "memory_operand")
(unspec:DI [(match_dup 0)]
UNSPEC_STX_ATOMIC))
- (set (match_operand:DF 3 "fp_register_operand")
+ (set (match_operand:DF 3 "any_fp_register_operand")
(match_operand:DF 4 "memory_operand"))]
"!TARGET_64BIT
&& peep2_reg_dead_p (2, operands[0])
- && rtx_equal_p (operands[4], adjust_address_nv (operands[2], DFmode, 0))"
+ && rtx_equal_p (XEXP (operands[4], 0), XEXP (operands[2], 0))"
[(set (match_dup 3) (match_dup 5))]
"operands[5] = gen_lowpart (DFmode, operands[1]);")
+(define_peephole2
+ [(set (match_operand:DF 0 "sse_reg_operand")
+ (unspec:DF [(match_operand:DI 1 "memory_operand")]
+ UNSPEC_LDX_ATOMIC))
+ (set (match_operand:DI 2 "memory_operand")
+ (unspec:DI [(match_dup 0)]
+ UNSPEC_STX_ATOMIC))
+ (set (mem:BLK (scratch:SI))
+ (unspec:BLK [(mem:BLK (scratch:SI))] UNSPEC_MEMORY_BLOCKAGE))
+ (set (match_operand:DF 3 "any_fp_register_operand")
+ (match_operand:DF 4 "memory_operand"))]
+ "!TARGET_64BIT
+ && peep2_reg_dead_p (2, operands[0])
+ && rtx_equal_p (XEXP (operands[4], 0), XEXP (operands[2], 0))"
+ [(const_int 0)]
+{
+ emit_move_insn (operands[3], gen_lowpart (DFmode, operands[1]));
+ emit_insn (gen_memory_blockage ());
+ DONE;
+})
+
(define_expand "atomic_store<mode>"
[(set (match_operand:ATOMIC 0 "memory_operand")
(unspec:ATOMIC [(match_operand:ATOMIC 1 "nonimmediate_operand")
@@ -331,7 +373,7 @@
(define_peephole2
[(set (match_operand:DF 0 "memory_operand")
- (match_operand:DF 1 "fp_register_operand"))
+ (match_operand:DF 1 "any_fp_register_operand"))
(set (match_operand:DF 2 "fp_register_operand")
(unspec:DF [(match_operand:DI 3 "memory_operand")]
UNSPEC_FILD_ATOMIC))
@@ -340,13 +382,34 @@
UNSPEC_FIST_ATOMIC))]
"!TARGET_64BIT
&& peep2_reg_dead_p (3, operands[2])
- && rtx_equal_p (operands[0], adjust_address_nv (operands[3], DFmode, 0))"
+ && rtx_equal_p (XEXP (operands[0], 0), XEXP (operands[3], 0))"
[(set (match_dup 5) (match_dup 1))]
"operands[5] = gen_lowpart (DFmode, operands[4]);")
(define_peephole2
[(set (match_operand:DF 0 "memory_operand")
- (match_operand:DF 1 "fp_register_operand"))
+ (match_operand:DF 1 "any_fp_register_operand"))
+ (set (mem:BLK (scratch:SI))
+ (unspec:BLK [(mem:BLK (scratch:SI))] UNSPEC_MEMORY_BLOCKAGE))
+ (set (match_operand:DF 2 "fp_register_operand")
+ (unspec:DF [(match_operand:DI 3 "memory_operand")]
+ UNSPEC_FILD_ATOMIC))
+ (set (match_operand:DI 4 "memory_operand")
+ (unspec:DI [(match_dup 2)]
+ UNSPEC_FIST_ATOMIC))]
+ "!TARGET_64BIT
+ && peep2_reg_dead_p (4, operands[2])
+ && rtx_equal_p (XEXP (operands[0], 0), XEXP (operands[3], 0))"
+ [(const_int 0)]
+{
+ emit_insn (gen_memory_blockage ());
+ emit_move_insn (gen_lowpart (DFmode, operands[4]), operands[1]);
+ DONE;
+})
+
+(define_peephole2
+ [(set (match_operand:DF 0 "memory_operand")
+ (match_operand:DF 1 "any_fp_register_operand"))
(set (match_operand:DF 2 "sse_reg_operand")
(unspec:DF [(match_operand:DI 3 "memory_operand")]
UNSPEC_LDX_ATOMIC))
@@ -355,10 +418,31 @@
UNSPEC_STX_ATOMIC))]
"!TARGET_64BIT
&& peep2_reg_dead_p (3, operands[2])
- && rtx_equal_p (operands[0], adjust_address_nv (operands[3], DFmode, 0))"
+ && rtx_equal_p (XEXP (operands[0], 0), XEXP (operands[3], 0))"
[(set (match_dup 5) (match_dup 1))]
"operands[5] = gen_lowpart (DFmode, operands[4]);")
+(define_peephole2
+ [(set (match_operand:DF 0 "memory_operand")
+ (match_operand:DF 1 "any_fp_register_operand"))
+ (set (mem:BLK (scratch:SI))
+ (unspec:BLK [(mem:BLK (scratch:SI))] UNSPEC_MEMORY_BLOCKAGE))
+ (set (match_operand:DF 2 "sse_reg_operand")
+ (unspec:DF [(match_operand:DI 3 "memory_operand")]
+ UNSPEC_LDX_ATOMIC))
+ (set (match_operand:DI 4 "memory_operand")
+ (unspec:DI [(match_dup 2)]
+ UNSPEC_STX_ATOMIC))]
+ "!TARGET_64BIT
+ && peep2_reg_dead_p (4, operands[2])
+ && rtx_equal_p (XEXP (operands[0], 0), XEXP (operands[3], 0))"
+ [(const_int 0)]
+{
+ emit_insn (gen_memory_blockage ());
+ emit_move_insn (gen_lowpart (DFmode, operands[4]), operands[1]);
+ DONE;
+})
+
;; ??? You'd think that we'd be able to perform this via FLOAT + FIX_TRUNC
;; operations. But the fix_trunc patterns want way more setup than we want
;; to provide. Note that the scratch is DFmode instead of XFmode in order