Hello!
Attached patch allows non-REX memory operands for x86_64 zero-extracts
involving high registers. This needs to be implemented using peephole2
pattern, since we can't distinguish non-REX memory operand from normal
memory operands.
2016-12-28 Uros Bizjak <[email protected]>
PR target/78904
* config/i386/constraints.md (Bn): New special memory constraint.
* config/i386/predicates.md (norex_memory_operand): New predicate.
* config/i386/i386.md (*extzvqi_mem_rex64): New insn pattern and
corresponding peephole2 pattern.
testsuite/ChangeLog:
2016-12-28 Uros Bizjak <[email protected]>
PR target/78904
* gcc.target/i386/pr78904-4.c: New test.
* gcc.target/i386/pr78904-5.c: Ditto.
Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.
Committed to mainline SVN.
Uros.
Index: config/i386/constraints.md
===================================================================
--- config/i386/constraints.md (revision 243954)
+++ config/i386/constraints.md (working copy)
@@ -169,6 +169,7 @@
;; g GOT memory operand.
;; m Vector memory operand
;; c Constant memory operand
+;; n Memory operand without REX prefix
;; s Sibcall memory operand, not valid for TARGET_X32
;; w Call memory operand, not valid for TARGET_X32
;; z Constant call address operand.
@@ -191,6 +192,10 @@
(and (match_operand 0 "memory_operand")
(match_test "constant_address_p (XEXP (op, 0))")))
+(define_special_memory_constraint "Bn"
+ "@internal Memory operand without REX prefix."
+ (match_operand 0 "norex_memory_operand"))
+
(define_constraint "Bs"
"@internal Sibcall memory operand."
(ior (and (not (match_test "TARGET_X32"))
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md (revision 243954)
+++ config/i386/i386.md (working copy)
@@ -2835,9 +2835,20 @@
[(set_attr "type" "imovx")
(set_attr "mode" "SI")])
+(define_insn "*extzvqi_mem_rex64"
+ [(set (match_operand:QI 0 "norex_memory_operand" "=Bn")
+ (subreg:QI
+ (zero_extract:SI (match_operand 1 "ext_register_operand" "Q")
+ (const_int 8)
+ (const_int 8)) 0))]
+ "TARGET_64BIT && reload_completed"
+ "mov{b}\t{%h1, %0|%0, %h1}"
+ [(set_attr "type" "imov")
+ (set_attr "mode" "QI")])
+
(define_insn "*extzvqi"
[(set (match_operand:QI 0 "nonimmediate_operand" "=QBc,?R,m")
- (subreg:QI
+ (subreg:QI
(zero_extract:SI (match_operand 1 "ext_register_operand" "Q,Q,Q")
(const_int 8)
(const_int 8)) 0))]
@@ -2863,6 +2874,21 @@
(const_string "SI")
(const_string "QI")))])
+(define_peephole2
+ [(set (match_operand:QI 0 "register_operand")
+ (subreg:QI
+ (zero_extract:SI (match_operand 1 "ext_register_operand")
+ (const_int 8)
+ (const_int 8)) 0))
+ (set (match_operand:QI 2 "norex_memory_operand") (match_dup 0))]
+ "TARGET_64BIT
+ && peep2_reg_dead_p (2, operands[0])"
+ [(set (match_dup 2)
+ (subreg:QI
+ (zero_extract:SI (match_dup 1)
+ (const_int 8)
+ (const_int 8)) 0))])
+
(define_expand "insv<mode>"
[(set (zero_extract:SWI248 (match_operand:SWI248 0 "register_operand")
(match_operand:SI 1 "const_int_operand")
Index: config/i386/predicates.md
===================================================================
--- config/i386/predicates.md (revision 243954)
+++ config/i386/predicates.md (working copy)
@@ -1037,6 +1037,10 @@
(ior (match_operand 0 "register_operand")
(match_operand 0 "const0_operand")))
+(define_predicate "norex_memory_operand"
+ (and (match_operand 0 "memory_operand")
+ (not (match_test "x86_extended_reg_mentioned_p (op)"))))
+
;; Return true for RTX codes that force SImode address.
(define_predicate "SImode_address_operand"
(match_code "subreg,zero_extend,and"))
Index: testsuite/gcc.target/i386/pr78904-4.c
===================================================================
--- testsuite/gcc.target/i386/pr78904-4.c (nonexistent)
+++ testsuite/gcc.target/i386/pr78904-4.c (working copy)
@@ -0,0 +1,21 @@
+/* PR target/78904 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -masm=att" } */
+
+typedef __SIZE_TYPE__ size_t;
+
+struct S1
+{
+ unsigned char pad1;
+ unsigned char val;
+ unsigned short pad2;
+};
+
+extern unsigned char t[256];
+
+void foo (struct S1 a, size_t i)
+{
+ t[i] = a.val;
+}
+
+/* { dg-final { scan-assembler "\[ \t\]movb\[\t \]*%.h," } } */
Index: testsuite/gcc.target/i386/pr78904-5.c
===================================================================
--- testsuite/gcc.target/i386/pr78904-5.c (nonexistent)
+++ testsuite/gcc.target/i386/pr78904-5.c (working copy)
@@ -0,0 +1,27 @@
+/* PR target/78904 */
+/* { dg-do assemble { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+typedef __SIZE_TYPE__ size_t;
+
+struct S1
+{
+ unsigned char pad1;
+ unsigned char val;
+ unsigned short pad2;
+};
+
+extern unsigned char t[256];
+
+void foo (struct S1 a, size_t i)
+{
+ t[i] = a.val;
+}
+
+void bar (struct S1 a, size_t i)
+{
+ register size_t _i __asm ("r10") = i;
+
+ asm volatile ("" : "+r" (_i));
+ t[_i] = a.val;
+}