------- Comment #20 from jakub at gcc dot gnu dot org 2007-10-28 21:11 ------- Actually, we don't probably need to write to rws_sum array at all when in safe_group_barried_needed and then we wouldn't need to copy it around (save and restore it) at all.
--- config/ia64/ia64.c~ 2007-10-28 22:00:24.000000000 +0100 +++ config/ia64/ia64.c 2007-10-28 22:04:26.000000000 +0100 @@ -5353,6 +5353,7 @@ static int rtx_needs_barrier (rtx, struc static void init_insn_group_barriers (void); static int group_barrier_needed (rtx); static int safe_group_barrier_needed (rtx); +static int in_safe_group_barrier; /* Update *RWS for REGNO, which is being written by the current instruction, with predicate PRED, and associated register flags in FLAGS. */ @@ -5407,7 +5408,8 @@ rws_access_regno (int regno, struct reg_ { case 0: /* The register has not been written yet. */ - rws_update (regno, flags, pred); + if (!in_safe_group_barrier) + rws_update (regno, flags, pred); break; case 1: @@ -5421,7 +5423,8 @@ rws_access_regno (int regno, struct reg_ ; else if ((rws_sum[regno].first_pred ^ 1) != pred) need_barrier = 1; - rws_update (regno, flags, pred); + if (!in_safe_group_barrier) + rws_update (regno, flags, pred); break; case 2: @@ -5433,8 +5436,11 @@ rws_access_regno (int regno, struct reg_ ; else need_barrier = 1; - rws_sum[regno].written_by_and = flags.is_and; - rws_sum[regno].written_by_or = flags.is_or; + if (!in_safe_group_barrier) + { + rws_sum[regno].written_by_and = flags.is_and; + rws_sum[regno].written_by_or = flags.is_or; + } break; default: @@ -6099,17 +6105,16 @@ int safe_group_barrier_needed_cnt[5]; static int safe_group_barrier_needed (rtx insn) { - struct reg_write_state rws_saved[NUM_REGS]; int saved_first_instruction; int t; - memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved); saved_first_instruction = first_instruction; + in_safe_group_barrier = 1; t = group_barrier_needed (insn); - memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved); first_instruction = saved_first_instruction; + in_safe_group_barrier = 0; return t; } together with the other patches gives (everything is x86_64-linux -> ia64-linux cross, would need to measure it on ia64-linux native) scheduling 2 : 5.20 (78%) usr 0.01 (50%) sys 5.20 (77%) wall 1970 kB (15%) ggc or ~ 45% speedup on this testcase. -- jakub at gcc dot gnu dot org changed: What |Removed |Added ---------------------------------------------------------------------------- CC| |wilson at gcc dot gnu dot | |org http://gcc.gnu.org/bugzilla/show_bug.cgi?id=33922