------- Comment #1 from Joey dot ye at intel dot com 2008-09-18 16:01 ------- Root cause is that instruction length of fused jcc is set to 16, which prevent the block from merging and copying. For some reason Core2 runs poorly with a unmerged branch block under certain circonstances.
Following patch fixes it: Index: i386.md =================================================================== --- i386.md (revision 3923) +++ i386.md (working copy) @@ -421,6 +421,9 @@ ] (const_int 1))) +(define_attr "length_jcc_fuse" "" + (const_int 0)) + ;; The (bounding maximum) length of an instruction in bytes. ;; ??? fistp and frndint are in fact fldcw/{fistp,frndint}/fldcw sequences. ;; Later we may want to split them and compute proper length as for @@ -442,7 +445,8 @@ (plus (attr "prefix_rep") (plus (attr "prefix_data16") (plus (attr "length_immediate") - (attr "length_address"))))))) + (plus (attr "length_address") + (attr "length_jcc_fuse")))))))) ;; The `memory' attribute is `none' if no memory is referenced, `load' or ;; `store' if there is a simple memory reference therein, or `unknown' @@ -645,7 +649,7 @@ (include "k6.md") (include "athlon.md") (include "geode.md") -;;(include "core2.md") +(include "core2.md") ;; Operand and operator predicates and constraints @@ -14033,7 +14037,8 @@ return "test{<imodesuffix>}\t%2, %2\n\t" "%+j%E1\t%l0\t" ASM_COMMENT_START " fused"; } - [(set_attr "type" "multi") + [(set_attr "type" "icmp") + (set_attr "length_jcc_fuse" "2") (set_attr "mode" "<MODE>")]) (define_insn "*jcc_fused_2" @@ -14048,7 +14053,8 @@ return "test{<imodesuffix>}\t%2, %2\n\t" "%+j%e1\t%l0\t" ASM_COMMENT_START " fused"; } - [(set_attr "type" "multi") + [(set_attr "type" "icmp") + (set_attr "length_jcc_fuse" "2") (set_attr "mode" "<MODE>")]) (define_insn "*jcc_fused_3" @@ -14066,7 +14072,8 @@ return "cmp{<imodesuffix>}\t{%3, %2|%2, %3}\n\t" "%+j%E1\t%l0\t" ASM_COMMENT_START " fused"; } - [(set_attr "type" "multi") + [(set_attr "type" "icmp") + (set_attr "length_jcc_fuse" "2") (set_attr "mode" "<MODE>")]) (define_insn "*jcc_fused_4" @@ -14084,7 +14091,8 @@ return "cmp{<imodesuffix>}\t{%3, %2|%2, %3}\n\t" "%+j%e1\t%l0\t" ASM_COMMENT_START " fused"; } - [(set_attr "type" "multi") + [(set_attr "type" "icmp") + (set_attr "length_jcc_fuse" "2") (set_attr "mode" "<MODE>")]) ;; In general it is not safe to assume too much about CCmode registers, -- http://gcc.gnu.org/bugzilla/show_bug.cgi?id=37571