Hello! Attached patch allows non-REX memory operands for x86_64 zero-extracts involving high registers. This needs to be implemented using peephole2 pattern, since we can't distinguish non-REX memory operand from normal memory operands.
2016-12-28 Uros Bizjak <ubiz...@gmail.com> PR target/78904 * config/i386/constraints.md (Bn): New special memory constraint. * config/i386/predicates.md (norex_memory_operand): New predicate. * config/i386/i386.md (*extzvqi_mem_rex64): New insn pattern and corresponding peephole2 pattern. testsuite/ChangeLog: 2016-12-28 Uros Bizjak <ubiz...@gmail.com> PR target/78904 * gcc.target/i386/pr78904-4.c: New test. * gcc.target/i386/pr78904-5.c: Ditto. Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}. Committed to mainline SVN. Uros.
Index: config/i386/constraints.md =================================================================== --- config/i386/constraints.md (revision 243954) +++ config/i386/constraints.md (working copy) @@ -169,6 +169,7 @@ ;; g GOT memory operand. ;; m Vector memory operand ;; c Constant memory operand +;; n Memory operand without REX prefix ;; s Sibcall memory operand, not valid for TARGET_X32 ;; w Call memory operand, not valid for TARGET_X32 ;; z Constant call address operand. @@ -191,6 +192,10 @@ (and (match_operand 0 "memory_operand") (match_test "constant_address_p (XEXP (op, 0))"))) +(define_special_memory_constraint "Bn" + "@internal Memory operand without REX prefix." + (match_operand 0 "norex_memory_operand")) + (define_constraint "Bs" "@internal Sibcall memory operand." (ior (and (not (match_test "TARGET_X32")) Index: config/i386/i386.md =================================================================== --- config/i386/i386.md (revision 243954) +++ config/i386/i386.md (working copy) @@ -2835,9 +2835,20 @@ [(set_attr "type" "imovx") (set_attr "mode" "SI")]) +(define_insn "*extzvqi_mem_rex64" + [(set (match_operand:QI 0 "norex_memory_operand" "=Bn") + (subreg:QI + (zero_extract:SI (match_operand 1 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) 0))] + "TARGET_64BIT && reload_completed" + "mov{b}\t{%h1, %0|%0, %h1}" + [(set_attr "type" "imov") + (set_attr "mode" "QI")]) + (define_insn "*extzvqi" [(set (match_operand:QI 0 "nonimmediate_operand" "=QBc,?R,m") - (subreg:QI + (subreg:QI (zero_extract:SI (match_operand 1 "ext_register_operand" "Q,Q,Q") (const_int 8) (const_int 8)) 0))] @@ -2863,6 +2874,21 @@ (const_string "SI") (const_string "QI")))]) +(define_peephole2 + [(set (match_operand:QI 0 "register_operand") + (subreg:QI + (zero_extract:SI (match_operand 1 "ext_register_operand") + (const_int 8) + (const_int 8)) 0)) + (set (match_operand:QI 2 "norex_memory_operand") (match_dup 0))] + "TARGET_64BIT + && peep2_reg_dead_p (2, operands[0])" + [(set (match_dup 2) + (subreg:QI + (zero_extract:SI (match_dup 1) + (const_int 8) + (const_int 8)) 0))]) + (define_expand "insv<mode>" [(set (zero_extract:SWI248 (match_operand:SWI248 0 "register_operand") (match_operand:SI 1 "const_int_operand") Index: config/i386/predicates.md =================================================================== --- config/i386/predicates.md (revision 243954) +++ config/i386/predicates.md (working copy) @@ -1037,6 +1037,10 @@ (ior (match_operand 0 "register_operand") (match_operand 0 "const0_operand"))) +(define_predicate "norex_memory_operand" + (and (match_operand 0 "memory_operand") + (not (match_test "x86_extended_reg_mentioned_p (op)")))) + ;; Return true for RTX codes that force SImode address. (define_predicate "SImode_address_operand" (match_code "subreg,zero_extend,and")) Index: testsuite/gcc.target/i386/pr78904-4.c =================================================================== --- testsuite/gcc.target/i386/pr78904-4.c (nonexistent) +++ testsuite/gcc.target/i386/pr78904-4.c (working copy) @@ -0,0 +1,21 @@ +/* PR target/78904 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -masm=att" } */ + +typedef __SIZE_TYPE__ size_t; + +struct S1 +{ + unsigned char pad1; + unsigned char val; + unsigned short pad2; +}; + +extern unsigned char t[256]; + +void foo (struct S1 a, size_t i) +{ + t[i] = a.val; +} + +/* { dg-final { scan-assembler "\[ \t\]movb\[\t \]*%.h," } } */ Index: testsuite/gcc.target/i386/pr78904-5.c =================================================================== --- testsuite/gcc.target/i386/pr78904-5.c (nonexistent) +++ testsuite/gcc.target/i386/pr78904-5.c (working copy) @@ -0,0 +1,27 @@ +/* PR target/78904 */ +/* { dg-do assemble { target { ! ia32 } } } */ +/* { dg-options "-O2" } */ + +typedef __SIZE_TYPE__ size_t; + +struct S1 +{ + unsigned char pad1; + unsigned char val; + unsigned short pad2; +}; + +extern unsigned char t[256]; + +void foo (struct S1 a, size_t i) +{ + t[i] = a.val; +} + +void bar (struct S1 a, size_t i) +{ + register size_t _i __asm ("r10") = i; + + asm volatile ("" : "+r" (_i)); + t[_i] = a.val; +}