Hello! It turned out that "rep; bsf" is not a 100% substitute for "tzcnt" since the former sets ZF for zero input, while the later sets CF. Attached patch fixes this oversight in ffs sequence.
2012-05-24 Uros Bizjak <ubiz...@gmail.com> PR target/53399 * config/i386/i386.md (ffs<mode>2): Generate CCCmode compare for TARGET_BMI. (ffssi2_no_cmove): Ditto. (*ffs<mode>_1): Remove insn pattern. (*tzcnt<mode>_1): New insn pattern. (*bsf<mode>1): Ditto. Tested on x86_64-pc-linux-gnu {,-m32}. Tested on BMI target by Kirill. Committed to mainline SVN. Uros.
Index: i386.md =================================================================== --- i386.md (revision 187694) +++ i386.md (working copy) @@ -12132,26 +12132,31 @@ (define_expand "ffs<mode>2" [(set (match_dup 2) (const_int -1)) - (parallel [(set (reg:CCZ FLAGS_REG) - (compare:CCZ - (match_operand:SWI48 1 "nonimmediate_operand") - (const_int 0))) + (parallel [(set (match_dup 3) (match_dup 4)) (set (match_operand:SWI48 0 "register_operand") - (ctz:SWI48 (match_dup 1)))]) + (ctz:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand")))]) (set (match_dup 0) (if_then_else:SWI48 - (eq (reg:CCZ FLAGS_REG) (const_int 0)) + (eq (match_dup 3) (const_int 0)) (match_dup 2) (match_dup 0))) (parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (const_int 1))) (clobber (reg:CC FLAGS_REG))])] "" { + enum machine_mode flags_mode; + if (<MODE>mode == SImode && !TARGET_CMOVE) { emit_insn (gen_ffssi2_no_cmove (operands[0], operands [1])); DONE; } + + flags_mode = TARGET_BMI ? CCCmode : CCZmode; + operands[2] = gen_reg_rtx (<MODE>mode); + operands[3] = gen_rtx_REG (flags_mode, FLAGS_REG); + operands[4] = gen_rtx_COMPARE (flags_mode, operands[1], const0_rtx); }) (define_insn_and_split "ffssi2_no_cmove" @@ -12162,11 +12167,10 @@ "!TARGET_CMOVE" "#" "&& reload_completed" - [(parallel [(set (reg:CCZ FLAGS_REG) - (compare:CCZ (match_dup 1) (const_int 0))) + [(parallel [(set (match_dup 4) (match_dup 5)) (set (match_dup 0) (ctz:SI (match_dup 1)))]) (set (strict_low_part (match_dup 3)) - (eq:QI (reg:CCZ FLAGS_REG) (const_int 0))) + (eq:QI (match_dup 4) (const_int 0))) (parallel [(set (match_dup 2) (neg:SI (match_dup 2))) (clobber (reg:CC FLAGS_REG))]) (parallel [(set (match_dup 0) (ior:SI (match_dup 0) (match_dup 2))) @@ -12174,37 +12178,38 @@ (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1))) (clobber (reg:CC FLAGS_REG))])] { + enum machine_mode flags_mode = TARGET_BMI ? CCCmode : CCZmode; + operands[3] = gen_lowpart (QImode, operands[2]); + operands[4] = gen_rtx_REG (flags_mode, FLAGS_REG); + operands[5] = gen_rtx_COMPARE (flags_mode, operands[1], const0_rtx); + ix86_expand_clear (operands[2]); }) -(define_insn "*ffs<mode>_1" +(define_insn "*tzcnt<mode>_1" + [(set (reg:CCC FLAGS_REG) + (compare:CCC (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (const_int 0))) + (set (match_operand:SWI48 0 "register_operand" "=r") + (ctz:SWI48 (match_dup 1)))] + "TARGET_BMI" + "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set_attr "prefix_rep" "1") + (set_attr "mode" "<MODE>")]) + +(define_insn "*bsf<mode>_1" [(set (reg:CCZ FLAGS_REG) (compare:CCZ (match_operand:SWI48 1 "nonimmediate_operand" "rm") (const_int 0))) (set (match_operand:SWI48 0 "register_operand" "=r") (ctz:SWI48 (match_dup 1)))] "" -{ - if (TARGET_BMI) - return "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"; - else if (optimize_function_for_size_p (cfun)) - ; - else if (TARGET_GENERIC) - /* tzcnt expands to rep;bsf and we can use it even if !TARGET_BMI. */ - return "rep; bsf{<imodesuffix>}\t{%1, %0|%0, %1}"; - - return "bsf{<imodesuffix>}\t{%1, %0|%0, %1}"; -} + "bsf{<imodesuffix>}\t{%1, %0|%0, %1}" [(set_attr "type" "alu1") (set_attr "prefix_0f" "1") - (set (attr "prefix_rep") - (if_then_else - (ior (match_test "TARGET_BMI") - (and (not (match_test "optimize_function_for_size_p (cfun)")) - (match_test "TARGET_GENERIC"))) - (const_string "1") - (const_string "0"))) (set_attr "mode" "<MODE>")]) (define_insn "ctz<mode>2"