Hello! While we still don't allow memory operands for x86_64, we can improve generated code by allowing memory operands until reload. This way, combine is free to create complex patterns, illustrated by following test:
--cut here-- struct S1 { unsigned char pad1; unsigned char val; unsigned short pad2; }; extern struct S1 t; struct S1 test_add (struct S1 a, struct S1 b) { a.val += t.val; return a; } --cut here-- Unpatched x86_64 compiler generates (-O2): movl %edi, %eax movzbl %ah, %edx addb t+1(%rip), %dl movb %dl, %ah while patched compiler generates: movzbl t+1(%rip), %edx movl %edi, %eax addb %dl, %ah Ideally, the memory operand would be merged to the operation, but high registers can't be used with REX prefix, so we have to take care there is no REX registers in the address. FWIW, define_memory_constraint can't be used to solve the above limitation, since reload converts memory operand to the form involving BASE_REG_CLASS, which on x86_64 includes REX registers as well. 2016-12-27 Uros Bizjak <ubiz...@gmail.com> PR target/78904 * config/i386/i386.md (*cmpqi_ext_1, *extvqi, *extzvqi): Use nonimmediate_operand instead of nonimmediate_x64nomem_operand. (*cmpqi_ext_3, insv<mode>_1, addqi_ext_1, *testqi_ext_1, andqi_ext_1) (*<any_or:code>qi_ext_1, *xorqi_ext_1_cc): Use general_operand instead of general_x64nomem_operand. * config/i386/predicates.md (nonimmediate_x64nomem_operand): Remove. (general_x64nomem_operand): Ditto. testsuite/ChangeLog: 2016-12-27 Uros Bizjak <ubiz...@gmail.com> PR target/78904 * gcc.target/i386/pr78904-2.c: New test. Patch was bootstrapped and regression tested on x86_64-linux-gnu. Committed to mainline SVN. Uros.
Index: config/i386/i386.md =================================================================== --- config/i386/i386.md (revision 243929) +++ config/i386/i386.md (working copy) @@ -1295,7 +1295,7 @@ (define_insn "*cmpqi_ext_1" [(set (reg FLAGS_REG) (compare - (match_operand:QI 0 "nonimmediate_x64nomem_operand" "Q,m") + (match_operand:QI 0 "nonimmediate_operand" "Q,m") (subreg:QI (zero_extract:SI (match_operand 1 "ext_register_operand" "Q,Q") @@ -1340,7 +1340,7 @@ (match_operand 0 "ext_register_operand" "Q,Q") (const_int 8) (const_int 8)) 0) - (match_operand:QI 1 "general_x64nomem_operand" "Qn,m")))] + (match_operand:QI 1 "general_operand" "Qn,m")))] "ix86_match_ccmode (insn, CCmode)" "cmp{b}\t{%1, %h0|%h0, %1}" [(set_attr "isa" "*,nox64") @@ -2781,7 +2781,7 @@ (set_attr "mode" "SI")]) (define_insn "*extvqi" - [(set (match_operand:QI 0 "nonimmediate_x64nomem_operand" "=Q,?R,m") + [(set (match_operand:QI 0 "nonimmediate_operand" "=Q,?R,m") (sign_extract:QI (match_operand 1 "ext_register_operand" "Q,Q,Q") (const_int 8) (const_int 8)))] @@ -2836,7 +2836,7 @@ (set_attr "mode" "SI")]) (define_insn "*extzvqi" - [(set (match_operand:QI 0 "nonimmediate_x64nomem_operand" "=Q,?R,m") + [(set (match_operand:QI 0 "nonimmediate_operand" "=Q,?R,m") (subreg:QI (zero_extract:SI (match_operand 1 "ext_register_operand" "Q,Q,Q") (const_int 8) @@ -2897,7 +2897,7 @@ [(set (zero_extract:SWI248 (match_operand 0 "ext_register_operand" "+Q,Q") (const_int 8) (const_int 8)) - (match_operand:SWI248 1 "general_x64nomem_operand" "Qn,m"))] + (match_operand:SWI248 1 "general_operand" "Qn,m"))] "" { if (CONST_INT_P (operands[1])) @@ -6087,7 +6087,7 @@ (zero_extract:SI (match_operand 1 "ext_register_operand" "0,0") (const_int 8) (const_int 8)) 0) - (match_operand:QI 2 "general_x64nomem_operand" "Qn,m")) 0)) + (match_operand:QI 2 "general_operand" "Qn,m")) 0)) (clobber (reg:CC FLAGS_REG))] "" { @@ -7889,7 +7889,7 @@ (zero_extract:SI (match_operand 0 "ext_register_operand" "Q,Q") (const_int 8) (const_int 8)) 0) - (match_operand:QI 1 "general_x64nomem_operand" "Qn,m")) + (match_operand:QI 1 "general_operand" "Qn,m")) (const_int 0)))] "ix86_match_ccmode (insn, CCNOmode)" "test{b}\t{%1, %h0|%h0, %1}" @@ -8417,7 +8417,7 @@ (zero_extract:SI (match_operand 1 "ext_register_operand" "0,0") (const_int 8) (const_int 8)) 0) - (match_operand:QI 2 "general_x64nomem_operand" "Qn,m")) 0)) + (match_operand:QI 2 "general_operand" "Qn,m")) 0)) (clobber (reg:CC FLAGS_REG))] "" "and{b}\t{%2, %h0|%h0, %2}" @@ -8803,7 +8803,7 @@ (zero_extract:SI (match_operand 1 "ext_register_operand" "0,0") (const_int 8) (const_int 8)) 0) - (match_operand:QI 2 "general_x64nomem_operand" "Qn,m")) 0)) + (match_operand:QI 2 "general_operand" "Qn,m")) 0)) (clobber (reg:CC FLAGS_REG))] "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" "<logic>{b}\t{%2, %h0|%h0, %2}" @@ -8913,7 +8913,7 @@ (zero_extract:SI (match_operand 1 "ext_register_operand" "0,0") (const_int 8) (const_int 8)) 0) - (match_operand:QI 2 "general_x64nomem_operand" "Qn,m")) + (match_operand:QI 2 "general_operand" "Qn,m")) (const_int 0))) (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q") (const_int 8) Index: config/i386/predicates.md =================================================================== --- config/i386/predicates.md (revision 243929) +++ config/i386/predicates.md (working copy) @@ -100,18 +100,6 @@ && (REGNO (op) > LAST_VIRTUAL_REGISTER || QI_REGNO_P (REGNO (op)))); }) -;; Match nonimmediate operands, but exclude memory operands on 64bit targets. -(define_predicate "nonimmediate_x64nomem_operand" - (if_then_else (match_test "TARGET_64BIT") - (match_operand 0 "register_operand") - (match_operand 0 "nonimmediate_operand"))) - -;; Match general operands, but exclude memory operands on 64bit targets. -(define_predicate "general_x64nomem_operand" - (if_then_else (match_test "TARGET_64BIT") - (match_operand 0 "nonmemory_operand") - (match_operand 0 "general_operand"))) - ;; Match register operands, but include memory operands for TARGET_SSE_MATH. (define_predicate "register_ssemem_operand" (if_then_else Index: testsuite/gcc.target/i386/pr78904-2.c =================================================================== --- testsuite/gcc.target/i386/pr78904-2.c (nonexistent) +++ testsuite/gcc.target/i386/pr78904-2.c (working copy) @@ -0,0 +1,48 @@ +/* PR target/78904 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -masm=att" } */ + +struct S1 +{ + unsigned char pad1; + unsigned char val; + unsigned short pad2; +}; + +extern struct S1 t; + +struct S1 test_and (struct S1 a, struct S1 b) +{ + a.val &= b.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]andb\[^\n\r]*, %.h" } } */ + +struct S1 test_or (struct S1 a, struct S1 b) +{ + a.val |= b.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]orb\[^\n\r]*, %.h" } } */ + +struct S1 test_xor (struct S1 a, struct S1 b) +{ + a.val ^= b.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]xorb\[^\n\r]*, %.h" } } */ + +struct S1 test_add (struct S1 a, struct S1 b) +{ + a.val += t.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]addb\[^\n\r]*, %.h" } } */