[PATCH] modulo-sched: skip loops with strange register defs [PR100225]

2021-04-28 Thread Roman Zhuykov via Gcc-patches
Hi all!

Situation from PR was already caught earlier locally.  So, I've just extracted 
appropriate part, it also slightly modifies loop checks related to 
non-single-set instructions.
Patch (attached) was successfully bootstrapped/regtested on aarch64-linux on 
all active branches (8-12) with modulo-sched enabled by default, usual 
amd64-linux reg-strap is also ok.

Pushing to trunk after 24h if no objections, backporting to 8-11 branches next 
week.

Roman

--
modulo-sched: skip loops with strange register defs [PR100225]

PR84878 fix adds an assertion which can fail, e.g. when stack pointer
is adjusted inside the loop.  We have to prevent it and search earlier
for any 'strange' instruction.  The solution is to skip the whole loop
if using 'note_stores' we found that one of hard registers is in
'df->regular_block_artificial_uses' set.

Also patch properly prohibit not single-set instruction in loop body.

gcc/ChangeLog:

PR rtl-optimization/100225
PR rtl-optimization/84878
* modulo-sched.c (sms_schedule): Use note_stores to skip loops
where we have an instruction which touches (writes) any hard
register from df->regular_block_artificial_uses set.
Allow not-single-set instruction only right before basic block
tail.

gcc/testsuite/ChangeLog:

PR rtl-optimization/100225
PR rtl-optimization/84878
* gcc.dg/pr100225.c: New test.

libgomp/ChangeLog:

* testsuite/libgomp.oacc-c-c++-common/atomic_capture-3.c: New test.

--
modulo-sched: skip loops with strange register defs [PR100225]

PR84878 fix adds an assertion which can fail, e.g. when stack pointer
is adjusted inside the loop.  We have to prevent it and search earlier
for any 'strange' instruction.  The solution is to skip the whole loop
if using 'note_stores' we found that one of hard registers is in
'df->regular_block_artificial_uses' set.

Also patch properly prohibit not single-set instruction in loop body.

gcc/ChangeLog:

	PR rtl-optimization/100225
	PR rtl-optimization/84878
	* modulo-sched.c (sms_schedule): Use note_stores to skip loops
	where we have an instruction which touches (writes) any hard
	register from df->regular_block_artificial_uses set.
	Allow not-single-set instruction only right before basic block
	tail.

gcc/testsuite/ChangeLog:

	PR rtl-optimization/100225
	PR rtl-optimization/84878
	* gcc.dg/pr100225.c: New test.

libgomp/ChangeLog:

	* testsuite/libgomp.oacc-c-c++-common/atomic_capture-3.c: New test.

diff --git a/gcc/modulo-sched.c b/gcc/modulo-sched.c
index 6ad960ec1f..e72e46db38 100644
--- a/gcc/modulo-sched.c
+++ b/gcc/modulo-sched.c
@@ -43,6 +43,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-pass.h"
 #include "dbgcnt.h"
 #include "loop-unroll.h"
+#include "hard-reg-set.h"
 
 #ifdef INSN_SCHEDULING
 
@@ -1356,6 +1357,7 @@ sms_schedule (void)
   basic_block condition_bb = NULL;
   edge latch_edge;
   HOST_WIDE_INT trip_count, max_trip_count;
+  HARD_REG_SET prohibited_regs;
 
   loop_optimizer_init (LOOPS_HAVE_PREHEADERS
 		   | LOOPS_HAVE_RECORDED_EXITS);
@@ -1385,6 +1387,8 @@ sms_schedule (void)
  We use loop->num as index into this array.  */
   g_arr = XCNEWVEC (ddg_ptr, number_of_loops (cfun));
 
+  REG_SET_TO_HARD_REG_SET (prohibited_regs, &df->regular_block_artificial_uses);
+
   if (dump_file)
   {
 fprintf (dump_file, "\n\nSMS analysis phase\n");
@@ -1469,23 +1473,31 @@ sms_schedule (void)
   }
 
   /* Don't handle BBs with calls or barriers
-	 or !single_set with the exception of instructions that include
-	 count_reg---these instructions are part of the control part
-	 that do-loop recognizes.
+	 or !single_set with the exception of do-loop control part insns.
  ??? Should handle insns defining subregs.  */
- for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
-  {
- rtx set;
-
-if (CALL_P (insn)
-|| BARRIER_P (insn)
-|| (NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
-&& !single_set (insn) && GET_CODE (PATTERN (insn)) != USE
-&& !reg_mentioned_p (count_reg, insn))
-|| (INSN_P (insn) && (set = single_set (insn))
-&& GET_CODE (SET_DEST (set)) == SUBREG))
-break;
-  }
+  for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
+	{
+	  if (INSN_P (insn))
+	{
+	  HARD_REG_SET regs;
+	  CLEAR_HARD_REG_SET (regs);
+	  note_stores (insn, record_hard_reg_sets, ®s);
+	  if (hard_reg_set_intersect_p (regs, prohibited_regs))
+		break;
+	}
+
+	  if (CALL_P (insn)
+	  || BARRIER_P (insn)
+	  || (INSN_P (insn) && single_set (insn)
+		  && GET_CODE (SET_DEST (single_set (insn))) == SUBREG)
+	  /* Not a single set.  */
+	  |

[committed] minor: fix indentation in ddg.c

2020-03-10 Thread Roman Zhuykov via Gcc-patches
This obvious patch fixes indentation in PR90001-related code.
Committed as r10-7106.

Roman

--
gcc/ChangeLog:
* ddg.c (create_ddg): Fix intendation.
(set_recurrence_length): Likewise.
(create_ddg_all_sccs): Likewise.

diff --git a/gcc/ddg.c b/gcc/ddg.c
index aae92adf89a..ca8cb74823d 100644
--- a/gcc/ddg.c
+++ b/gcc/ddg.c
@@ -633,7 +633,7 @@ create_ddg (basic_block bb, int closing_branch_deps)
   g->nodes[i].aux.count = -1;
   g->nodes[i].max_dist = XCNEWVEC (int, num_nodes);
   for (j = 0; j < num_nodes; j++)
- g->nodes[i].max_dist[j] = -1;
+   g->nodes[i].max_dist[j] = -1;
 
   g->nodes[i++].insn = insn;
   first_note = NULL;
@@ -838,7 +838,7 @@ set_recurrence_length (ddg_scc_ptr scc)
   int length = src->max_dist[dest->cuid];
 
   if (length < 0)
-continue;
+   continue;
 
   length += backarc->latency;
   result = MAX (result, (length / distance));
@@ -1069,8 +1069,8 @@ create_ddg_all_sccs (ddg_ptr g)
 
   n->max_dist[k] = 0;
   for (e = n->out; e; e = e->next_out)
-if (e->distance == 0 && g->nodes[e->dest->cuid].aux.count == 
n->aux.count)
-  n->max_dist[e->dest->cuid] = e->latency;
+   if (e->distance == 0 && g->nodes[e->dest->cuid].aux.count == 
n->aux.count)
+ n->max_dist[e->dest->cuid] = e->latency;
 }
 
   /* Run main Floid-Warshall loop.  We use only non-backarc edges
@@ -1079,19 +1079,19 @@ create_ddg_all_sccs (ddg_ptr g)
 {
   scc = g->nodes[k].aux.count;
   if (scc != -1)
-{
-  for (i = 0; i < num_nodes; i++)
-if (g->nodes[i].aux.count == scc)
-  for (j = 0; j < num_nodes; j++)
-if (g->nodes[j].aux.count == scc
-&& g->nodes[i].max_dist[k] >= 0
-&& g->nodes[k].max_dist[j] >= 0)
-  {
-way = g->nodes[i].max_dist[k] + g->nodes[k].max_dist[j];
-if (g->nodes[i].max_dist[j] < way)
-  g->nodes[i].max_dist[j] = way;
-  }
-}
+   {
+ for (i = 0; i < num_nodes; i++)
+   if (g->nodes[i].aux.count == scc)
+ for (j = 0; j < num_nodes; j++)
+   if (g->nodes[j].aux.count == scc
+   && g->nodes[i].max_dist[k] >= 0
+   && g->nodes[k].max_dist[j] >= 0)
+ {
+   way = g->nodes[i].max_dist[k] + g->nodes[k].max_dist[j];
+   if (g->nodes[i].max_dist[j] < way)
+ g->nodes[i].max_dist[j] = way;
+ }
+   }
 }
 
   /* Calculate recurrence_length using max_dist info.  */



[committed] loop-iv: make find_simple_exit static

2020-03-10 Thread Roman Zhuykov via Gcc-patches
This patch marks find_simple_exit function as static.
Committed as r10-7107 after successful bootstrap on x86_64 and powerpc64le.

Roman

--
Function 'find_simple_exit' is used only from loop-iv.c
In 2004-2006 it was also used in predict.c, but since r118694
(992c31e62304ed5d34247dbdef2db276d08fac05) it does not.

gcc/ChangeLog:
* loop-iv.c (find_simple_exit): Make it static.
* cfgloop.h: Remove the corresponding prototype.

diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h
index 11378cadd41..1c49a8b8c2d 100644
--- a/gcc/cfgloop.h
+++ b/gcc/cfgloop.h
@@ -499,7 +499,6 @@ extern bool iv_analyze_expr (rtx_insn *, scalar_int_mode, 
rtx,
 class rtx_iv *);
 extern rtx get_iv_value (class rtx_iv *, rtx);
 extern bool biv_p (rtx_insn *, scalar_int_mode, rtx);
-extern void find_simple_exit (class loop *, class niter_desc *);
 extern void iv_analysis_done (void);
 
 extern class niter_desc *get_simple_loop_desc (class loop *loop);
diff --git a/gcc/loop-iv.c b/gcc/loop-iv.c
index 6a59954f60a..d7b3d90f047 100644
--- a/gcc/loop-iv.c
+++ b/gcc/loop-iv.c
@@ -2915,7 +2915,7 @@ check_simple_exit (class loop *loop, edge e, class 
niter_desc *desc)
 
 /* Finds a simple exit of LOOP and stores its description into DESC.  */
 
-void
+static void
 find_simple_exit (class loop *loop, class niter_desc *desc)
 {
   unsigned i;


Re: Fix modulo-scheduler -fcompare-debug issues

2020-03-20 Thread Roman Zhuykov via Gcc-patches
Hi all!

12.03.2020 6:17, Jeff Law wrote:
>> Current modulo-sched implementation is a bit faulty from -fcompile-debug
>> perspective.
>>
>> But right now I see that when I enable -fmodulo-sched by default, powerpc64le
>> bootstrap give comparison failure as of r10-7056.
>>
>> Comparing stages 2 and 3
>> Bootstrap comparison failure!
>> gcc/ggc-page.o differs
>>
>> That doesn't happen on released branches, so it is a kind of "regression"
>> (certainly, nobody runs bootstrap with -fmodulo-sched).
> Even though we don't have a BZ, I think a bootstrap failure, even one with
> modulo-scheduling enabled is severe enough that we should try to fix it.
> 
> OK for the trunk.
> 
> jeff

11.03.2020 11:14, Richard Biener wrote:
> 
> Yes from a RM perspective, no comments about the technical details of
> the patch.
> 
> Richard.
> 

Haven't committed that patch yet but made some additional investigation.

(0) Running tests with -fcompare-debug (without -fmodulo-sched) shows a lot of 
different failures, I've created few PRs already and they were addressed 
(Thanks to Jakub!).

There are three kinds of issues.

(0a) Technical ones, I've not reported them.  Many checking techniques are not 
ready for the compiler to run twice for one driver invocation. E.g.

FAIL: c-c++-common/diagnostic-format-json-2.c  -Wc++-compat  (test for excess 
errors)

fails because second cc1 run prints additional '[]' to the output.

All tests in gcc.dg/pch directory have a similar issue.

(0b) When only dumps differ, but not the code, e.g. PR94223 and PR94167.

(0c) When it is a real "mistake", like PR94166 or PR94211.  Or -w difference 
like in PR94189.


Now, speaking about SMS, enabling it by default catches three (again!) kinds of 
separate issues with debug instructions.  And two first are observable for many 
years, while the third one is a bit more tricky and I caught it only in 2019.  
My previous patch targeted (2) and (3) together, and now I have to split it.

(1) The first issue is that reordering doesn't touch debug instructions at all. 
 When we have found a proper schedule permute_partial_schedule function is 
called.  It constructs new order in reverse, first we move the instruction that 
should be prev_insn (jump) to its place, than the previous one, etc.
It doesn't consider moving debug instructions, so all of them are crowded 
before any non-debug instruction.  This certainly doesn't help much for debug 
information and on ppc64le causes

FAIL: gcc.dg/guality/loop-1.c   -O2  -DPREVENT_OPTIMIZATION  line 20 i == 1

and few other failures.  I never actually tried to fix those, maybe the best 
solution here is to drop all debug inside the loop when SMS succeeded.  But in 
this case we will also lose something - at least for now we have correct debug 
info when the loop was finished.


(2) The second issue is a "simple" -fcompare-debug failure, which actually can 
not lead to different code in .text (and can not cause bootstrap failure).  As 
I mentioned in previous mail it can be observed in pr42631.c example for ~10 
years.  My previous patch and small attached patch2.txt (can be applied only 
after patch3.txt) solves this.  Note instructions "stick" to nearest following 
instruction and they are moved all together.  But when one compares -g0 and -g, 
the note can instead stick to a debug instruction, and according to (1) it will 
eventually "move up" together with that debug instruction instead of "sinking" 
with the following non-debug instruction.  An obvious solution is to extend the 
"sticking" approach to debug instructions themselves.  I've used that solution 
("previous" patch) locally for ~3 years.  This change also affects the approach 
described in (1) but unfortunately doesn't fix any testcase and make something 
even worse:

FAIL: gcc.dg/guality/pr90716.c   -O1  -DPREVENT_OPTIMIZATION  line 23 j + 1 == 9

So, IMHO we should not apply this in stage4.

(3) And the last one we have to fix now if we bother about -fmodulo-sched 
ppc64le bootstrap.  Failing examples are "gcc -fcompare-debug -O2 
-fmodulo-sched --param sms-min-sc=1 gcc/testsuite/gcc.dg/torture/pr87197.c" on 
ppc64le and "gcc -fcompare-debug -O2 gcc.c-torture/execute/pr70127.c" on 
aarch64.  Contrary to scheduler itself, when building DDG graph we consider all 
debug instructions and their dependencies.  This may sometimes lead to a 
different result in sms_order_nodes function, as it depends on SCCs in DDG.  My 
previous ad-hoc solution was just to remove any "debug->non-debug" edges in 
DDG.  Now I reimplemented that by fully removing debug instructions nodes.  
I've tested patch3.txt a lot last week in different scenarios and will commit 
it in a week if no objection.


Roman




Author: Roman Zhuykov 
Date:   Thu Mar 12 19:36:22 2020 +0300

modulo-sched: fix compare-debug issue with flying note insns

This patch fixes -fcompare-debug failures in testsuite when it runs
with -fmodulo-sched enabled.

The approach is to move debug 

Re: Fix modulo-scheduler -fcompare-debug issues

2020-03-28 Thread Roman Zhuykov via Gcc-patches

Hi,

Christophe Lyon wrote 2020-03-27 19:53:

Hi,


On Fri, 20 Mar 2020 at 11:56, Roman Zhuykov via Gcc-patches
 wrote:


Hi all!

12.03.2020 6:17, Jeff Law wrote:
>> Current modulo-sched implementation is a bit faulty from -fcompile-debug
>> perspective.
>>
>> But right now I see that when I enable -fmodulo-sched by default, powerpc64le
>> bootstrap give comparison failure as of r10-7056.
>>
>> Comparing stages 2 and 3
>> Bootstrap comparison failure!
>> gcc/ggc-page.o differs
>>
>> That doesn't happen on released branches, so it is a kind of "regression"
>> (certainly, nobody runs bootstrap with -fmodulo-sched).
> Even though we don't have a BZ, I think a bootstrap failure, even one with
> modulo-scheduling enabled is severe enough that we should try to fix it.
>
> OK for the trunk.
>
> jeff

11.03.2020 11:14, Richard Biener wrote:
>
> Yes from a RM perspective, no comments about the technical details of
> the patch.
>
> Richard.
>

Haven't committed that patch yet but made some additional 
investigation.


(0) Running tests with -fcompare-debug (without -fmodulo-sched) shows 
a lot of different failures, I've created few PRs already and they 
were addressed (Thanks to Jakub!).


There are three kinds of issues.

(0a) Technical ones, I've not reported them.  Many checking techniques 
are not ready for the compiler to run twice for one driver invocation. 
E.g.


FAIL: c-c++-common/diagnostic-format-json-2.c  -Wc++-compat  (test for 
excess errors)


fails because second cc1 run prints additional '[]' to the output.

All tests in gcc.dg/pch directory have a similar issue.

(0b) When only dumps differ, but not the code, e.g. PR94223 and 
PR94167.


(0c) When it is a real "mistake", like PR94166 or PR94211.  Or -w 
difference like in PR94189.



Now, speaking about SMS, enabling it by default catches three (again!) 
kinds of separate issues with debug instructions.  And two first are 
observable for many years, while the third one is a bit more tricky 
and I caught it only in 2019.  My previous patch targeted (2) and (3) 
together, and now I have to split it.


(1) The first issue is that reordering doesn't touch debug 
instructions at all.  When we have found a proper schedule 
permute_partial_schedule function is called.  It constructs new order 
in reverse, first we move the instruction that should be prev_insn 
(jump) to its place, than the previous one, etc.
It doesn't consider moving debug instructions, so all of them are 
crowded before any non-debug instruction.  This certainly doesn't help 
much for debug information and on ppc64le causes


FAIL: gcc.dg/guality/loop-1.c   -O2  -DPREVENT_OPTIMIZATION  line 20 i 
== 1


and few other failures.  I never actually tried to fix those, maybe 
the best solution here is to drop all debug inside the loop when SMS 
succeeded.  But in this case we will also lose something - at least 
for now we have correct debug info when the loop was finished.



(2) The second issue is a "simple" -fcompare-debug failure, which 
actually can not lead to different code in .text (and can not cause 
bootstrap failure).  As I mentioned in previous mail it can be 
observed in pr42631.c example for ~10 years.  My previous patch and 
small attached patch2.txt (can be applied only after patch3.txt) 
solves this.  Note instructions "stick" to nearest following 
instruction and they are moved all together.  But when one compares 
-g0 and -g, the note can instead stick to a debug instruction, and 
according to (1) it will eventually "move up" together with that debug 
instruction instead of "sinking" with the following non-debug 
instruction.  An obvious solution is to extend the "sticking" approach 
to debug instructions themselves.  I've used that solution ("previous" 
patch) locally for ~3 years.  This change also affects the approach 
described in (1) but unfortunately doesn't fix any testcase and make 
something even worse:


FAIL: gcc.dg/guality/pr90716.c   -O1  -DPREVENT_OPTIMIZATION  line 23 
j + 1 == 9


So, IMHO we should not apply this in stage4.

(3) And the last one we have to fix now if we bother about 
-fmodulo-sched ppc64le bootstrap.  Failing examples are "gcc 
-fcompare-debug -O2 -fmodulo-sched --param sms-min-sc=1 
gcc/testsuite/gcc.dg/torture/pr87197.c" on ppc64le and "gcc 
-fcompare-debug -O2 gcc.c-torture/execute/pr70127.c" on aarch64.  
Contrary to scheduler itself, when building DDG graph we consider all 
debug instructions and their dependencies.  This may sometimes lead to 
a different result in sms_order_nodes function, as it depends on SCCs 
in DDG.  My previous ad-hoc solution was just to remove any 
"debug->non-debug" edges in DDG.  Now I reimplemented that by fully 
removin

[PATCH] modulo-sched: Carefully process loop counter initialization [PR97421]

2020-11-29 Thread Roman Zhuykov via Gcc-patches
Hi all!

Same patch attached with commit message and inlined below. It was successfully 
reg-strapped on aarch64-linux. Planning also to briefly check amd64 build 
before push.

Pushing in a few days if no objections. Any opinion about backports?


Roman

--
modulo-sched: Carefully process loop counter initialization [PR97421]

Do not allow direct adjustment of pre-header initialization instruction for
count register if is read in some instruction below in that basic block.

gcc/ChangeLog:

PR rtl-optimization/97421
* modulo-sched.c (generate_prolog_epilog): Remove forward
declaration, adjust last argument name and type.
(const_iteration_count): Add bool pointer parameter to return
whether count register is read in pre-header after its
initialization.
(sms_schedule): Fix count register initialization adjustment
procedure according to what const_iteration_count said.

gcc/testsuite/ChangeLog:

PR rtl-optimization/97421
* gcc.c-torture/execute/pr97421-1.c: New test.
* gcc.c-torture/execute/pr97421-2.c: New test.
* gcc.c-torture/execute/pr97421-3.c: New test.

diff --git a/gcc/modulo-sched.c b/gcc/modulo-sched.c
index 6f699a874e..4568674aa6 100644
--- a/gcc/modulo-sched.c
+++ b/gcc/modulo-sched.c
@@ -210,8 +210,6 @@ static int sms_order_nodes (ddg_ptr, int, int *, int *);
 static void set_node_sched_params (ddg_ptr);
 static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
 static void permute_partial_schedule (partial_schedule_ptr, rtx_insn *);
-static void generate_prolog_epilog (partial_schedule_ptr, class loop *,
-rtx, rtx);
 static int calculate_stage_count (partial_schedule_ptr, int);
 static void calculate_must_precede_follow (ddg_node_ptr, int, int,
   int, int, sbitmap, sbitmap, sbitmap);
@@ -391,30 +389,40 @@ doloop_register_get (rtx_insn *head, rtx_insn *tail)
this constant.  Otherwise return 0.  */
 static rtx_insn *
 const_iteration_count (rtx count_reg, basic_block pre_header,
-  int64_t * count)
+  int64_t *count, bool* adjust_inplace)
 {
   rtx_insn *insn;
   rtx_insn *head, *tail;
 
+  *adjust_inplace = false;
+  bool read_after = false;
+
   if (! pre_header)
 return NULL;
 
   get_ebb_head_tail (pre_header, pre_header, &head, &tail);
 
   for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
-if (NONDEBUG_INSN_P (insn) && single_set (insn) &&
-   rtx_equal_p (count_reg, SET_DEST (single_set (insn
+if (single_set (insn) && rtx_equal_p (count_reg,
+ SET_DEST (single_set (insn
   {
rtx pat = single_set (insn);
 
if (CONST_INT_P (SET_SRC (pat)))
  {
*count = INTVAL (SET_SRC (pat));
+   *adjust_inplace = !read_after;
return insn;
  }
 
return NULL;
   }
+else if (NONDEBUG_INSN_P (insn) && reg_mentioned_p (count_reg, insn))
+  {
+   read_after = true;
+   if (reg_set_p (count_reg, insn))
+  break;
+  }
 
   return NULL;
 }
@@ -1126,7 +1134,7 @@ duplicate_insns_of_cycles (partial_schedule_ptr ps, int 
from_stage,
 /* Generate the instructions (including reg_moves) for prolog & epilog.  */
 static void
 generate_prolog_epilog (partial_schedule_ptr ps, class loop *loop,
-rtx count_reg, rtx count_init)
+   rtx count_reg, bool adjust_init)
 {
   int i;
   int last_stage = PS_STAGE_COUNT (ps) - 1;
@@ -1135,12 +1143,12 @@ generate_prolog_epilog (partial_schedule_ptr ps, class 
loop *loop,
   /* Generate the prolog, inserting its insns on the loop-entry edge.  */
   start_sequence ();
 
-  if (!count_init)
+  if (adjust_init)
 {
   /* Generate instructions at the beginning of the prolog to
- adjust the loop count by STAGE_COUNT.  If loop count is constant
- (count_init), this constant is adjusted by STAGE_COUNT in
- generate_prolog_epilog function.  */
+adjust the loop count by STAGE_COUNT.  If loop count is constant
+and it not used anywhere in prologue, this constant is adjusted by
+STAGE_COUNT outside of generate_prolog_epilog function.  */
   rtx sub_reg = NULL_RTX;
 
   sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS, count_reg,
@@ -1528,7 +1536,8 @@ sms_schedule (void)
   rtx_insn *count_init;
   int mii, rec_mii, stage_count, min_cycle;
   int64_t loop_count = 0;
-  bool opt_sc_p;
+  bool opt_sc_p, adjust_inplace = false;
+  basic_block pre_header;
 
   if (! (g = g_arr[loop->num]))
 continue;
@@ -1569,19 +1578,13 @@ sms_schedule (void)
}
 
 
-  /* In case of th loop have doloop register it gets special
-handling.  */
-  count_init = NULL;
-  if ((count_reg = do