Hello! After recent x86 EXTZ/EXTZV improvements, we can extend BT splitters to generate BT instruction with immediate operands. The improvement can be seen with attached testcases.
The benefit is obvious for BT with immediates 32 <= n <= 63: 0: 48 b8 00 00 00 00 00 movabs $0x1000000000000000,%rax 7: 00 00 10 a: 48 85 c7 test %rax,%rdi vs.: 0: 48 0f ba e7 3c bt $0x3c,%rdi The benefit with operands 0 <= n <= 31 is also noticeable: 0: f7 c7 00 04 00 00 test $0x400,%edi vs.: 0: 0f ba e7 0a bt $0xa,%edi BT has *slightly* higher latency than TEST (0.33 vs. 0.25 cycles on a modern processor), so I have limited the conversion to -Os in case the bit-test is in the low 32 bits. In addition to 1556 "BT %reg, %reg" insns, already present in cc1 executable, patched compiler generated additional 628 "BT #<imm>,%reg" instructions in cc1. 2015-07-07 Uros Bizjak <ubiz...@gmail.com> * config/i386/i386.md (*jcc_bt<mode>): Only split before reload. Remove operand constraints. Change operand 2 predicate to nonmemory operand. Limit const_int values to mode bitsize. Only allow const_int values less than 32 when optimizing for size. (*jcc_bt<mode>_1, *jcc_bt<mode>_mask): Only split before reload. Remove operand constraints. (*bt<mode>): Use SImode for const_int values less than 32. (regmode): Remove mode attribute. testsuite/ChangeLog: 2015-07-07 Uros Bizjak <ubiz...@gmail.com> * gcc.target/i386/bt-3.c: New test. * gcc.target/i386/bt-4.c: Ditto. Patch was bootstrapped and regression tested on x86_64-linux-gnu {,-m32}. I'll commit the patch to mainline as soon as regression test ends. Uros.
Index: config/i386/i386.md =================================================================== --- config/i386/i386.md (revision 225484) +++ config/i386/i386.md (working copy) @@ -10765,8 +10765,6 @@ DONE; }) -(define_mode_attr regmode [(SI "k") (DI "q")]) - (define_insn "*bt<mode>" [(set (reg:CCC FLAGS_REG) (compare:CCC @@ -10775,11 +10773,132 @@ (const_int 1) (match_operand:SI 1 "nonmemory_operand" "rN")) (const_int 0)))] - "TARGET_USE_BT || optimize_function_for_size_p (cfun)" - "bt{<imodesuffix>}\t{%<regmode>1, %0|%0, %<regmode>1}" + "" +{ + switch (get_attr_mode (insn)) + { + case MODE_SI: + return "bt{l}\t{%1, %k0|%k0, %1}"; + + case MODE_DI: + return "bt{q}\t{%q1, %0|%0, %q1}"; + + default: + gcc_unreachable (); + } +} [(set_attr "type" "alu1") (set_attr "prefix_0f" "1") - (set_attr "mode" "<MODE>")]) + (set (attr "mode") + (if_then_else + (and (match_test "CONST_INT_P (operands[1])") + (match_test "INTVAL (operands[1]) < 32")) + (const_string "SI") + (const_string "<MODE>")))]) + +(define_insn_and_split "*jcc_bt<mode>" + [(set (pc) + (if_then_else (match_operator 0 "bt_comparison_operator" + [(zero_extract:SWI48 + (match_operand:SWI48 1 "register_operand") + (const_int 1) + (match_operand:SI 2 "nonmemory_operand")) + (const_int 0)]) + (label_ref (match_operand 3)) + (pc))) + (clobber (reg:CC FLAGS_REG))] + "(TARGET_USE_BT || optimize_function_for_size_p (cfun)) + && (CONST_INT_P (operands[2]) + ? (INTVAL (operands[2]) < GET_MODE_BITSIZE (<MODE>mode) + && INTVAL (operands[2]) + >= (optimize_function_for_size_p (cfun) ? 0 : 32)) + : register_operand (operands[2], SImode)) + && can_create_pseudo_p ()" + "#" + "&& 1" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extract:SWI48 + (match_dup 1) + (const_int 1) + (match_dup 2)) + (const_int 0))) + (set (pc) + (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)]) + (label_ref (match_dup 3)) + (pc)))] +{ + operands[0] = shallow_copy_rtx (operands[0]); + PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0]))); +}) + +(define_insn_and_split "*jcc_bt<mode>_1" + [(set (pc) + (if_then_else (match_operator 0 "bt_comparison_operator" + [(zero_extract:SWI48 + (match_operand:SWI48 1 "register_operand") + (const_int 1) + (zero_extend:SI + (match_operand:QI 2 "register_operand"))) + (const_int 0)]) + (label_ref (match_operand 3)) + (pc))) + (clobber (reg:CC FLAGS_REG))] + "(TARGET_USE_BT || optimize_function_for_size_p (cfun)) + && can_create_pseudo_p ()" + "#" + "&& 1" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extract:SWI48 + (match_dup 1) + (const_int 1) + (match_dup 2)) + (const_int 0))) + (set (pc) + (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)]) + (label_ref (match_dup 3)) + (pc)))] +{ + operands[2] = simplify_gen_subreg (SImode, operands[2], QImode, 0); + operands[0] = shallow_copy_rtx (operands[0]); + PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0]))); +}) + +;; Avoid useless masking of bit offset operand. +(define_insn_and_split "*jcc_bt<mode>_mask" + [(set (pc) + (if_then_else (match_operator 0 "bt_comparison_operator" + [(zero_extract:SWI48 + (match_operand:SWI48 1 "register_operand") + (const_int 1) + (and:SI + (match_operand:SI 2 "register_operand") + (match_operand 3 "const_int_operand")))]) + (label_ref (match_operand 4)) + (pc))) + (clobber (reg:CC FLAGS_REG))] + "(TARGET_USE_BT || optimize_function_for_size_p (cfun)) + && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1)) + == GET_MODE_BITSIZE (<MODE>mode)-1 + && can_create_pseudo_p ()" + "#" + "&& 1" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extract:SWI48 + (match_dup 1) + (const_int 1) + (match_dup 2)) + (const_int 0))) + (set (pc) + (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)]) + (label_ref (match_dup 4)) + (pc)))] +{ + operands[0] = shallow_copy_rtx (operands[0]); + PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0]))); +}) ;; Store-flag instructions. @@ -11036,102 +11155,6 @@ FAIL; }) -(define_insn_and_split "*jcc_bt<mode>" - [(set (pc) - (if_then_else (match_operator 0 "bt_comparison_operator" - [(zero_extract:SWI48 - (match_operand:SWI48 1 "register_operand" "r") - (const_int 1) - (match_operand:SI 2 "register_operand" "r")) - (const_int 0)]) - (label_ref (match_operand 3)) - (pc))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_USE_BT || optimize_function_for_size_p (cfun)" - "#" - "&& 1" - [(set (reg:CCC FLAGS_REG) - (compare:CCC - (zero_extract:SWI48 - (match_dup 1) - (const_int 1) - (match_dup 2)) - (const_int 0))) - (set (pc) - (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)]) - (label_ref (match_dup 3)) - (pc)))] -{ - operands[0] = shallow_copy_rtx (operands[0]); - PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0]))); -}) - -(define_insn_and_split "*jcc_bt<mode>_1" - [(set (pc) - (if_then_else (match_operator 0 "bt_comparison_operator" - [(zero_extract:SWI48 - (match_operand:SWI48 1 "register_operand" "r") - (const_int 1) - (zero_extend:SI - (match_operand:QI 2 "register_operand" "r"))) - (const_int 0)]) - (label_ref (match_operand 3)) - (pc))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_USE_BT || optimize_function_for_size_p (cfun)" - "#" - "&& 1" - [(set (reg:CCC FLAGS_REG) - (compare:CCC - (zero_extract:SWI48 - (match_dup 1) - (const_int 1) - (match_dup 2)) - (const_int 0))) - (set (pc) - (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)]) - (label_ref (match_dup 3)) - (pc)))] -{ - operands[2] = simplify_gen_subreg (SImode, operands[2], QImode, 0); - operands[0] = shallow_copy_rtx (operands[0]); - PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0]))); -}) - -;; Avoid useless masking of bit offset operand. -(define_insn_and_split "*jcc_bt<mode>_mask" - [(set (pc) - (if_then_else (match_operator 0 "bt_comparison_operator" - [(zero_extract:SWI48 - (match_operand:SWI48 1 "register_operand" "r") - (const_int 1) - (and:SI - (match_operand:SI 2 "register_operand" "r") - (match_operand 3 "const_int_operand" "n")))]) - (label_ref (match_operand 4)) - (pc))) - (clobber (reg:CC FLAGS_REG))] - "(TARGET_USE_BT || optimize_function_for_size_p (cfun)) - && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1)) - == GET_MODE_BITSIZE (<MODE>mode)-1" - "#" - "&& 1" - [(set (reg:CCC FLAGS_REG) - (compare:CCC - (zero_extract:SWI48 - (match_dup 1) - (const_int 1) - (match_dup 2)) - (const_int 0))) - (set (pc) - (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)]) - (label_ref (match_dup 4)) - (pc)))] -{ - operands[0] = shallow_copy_rtx (operands[0]); - PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0]))); -}) - ;; Define combination compare-and-branch fp compare instructions to help ;; combine. Index: testsuite/gcc.target/i386/bt-3.c =================================================================== --- testsuite/gcc.target/i386/bt-3.c (revision 0) +++ testsuite/gcc.target/i386/bt-3.c (working copy) @@ -0,0 +1,14 @@ +/* { dg-do compile { target { ! { ia32 } } } } */ +/* { dg-options "-O2 -mtune=core2" } */ + +extern void foo (void); + +int test (long long x) +{ + if (x & ( 0x01ULL << 60 )) + foo (); + + return 0; +} + +/* { dg-final { scan-assembler "btq\[ \t\]" } } */ Index: testsuite/gcc.target/i386/bt-4.c =================================================================== --- testsuite/gcc.target/i386/bt-4.c (revision 0) +++ testsuite/gcc.target/i386/bt-4.c (working copy) @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-Os -mtune=core2" } */ + +extern void foo (void); + +int test (long x) +{ + if (x & ( 0x01UL << 10 )) + foo (); + + return 0; +} + +/* { dg-final { scan-assembler "btl\[ \t\]" } } */