I encountered one problem with loop-im pass.
I compiled the program dhry2reg which belongs to unixbench(
https://github.com/kdlucas/byte-unixbench).
The gcc used
gcc (GCC) 12.3.0
The commands executed as following
make
./Run -c -i 1 dhry2reg
The results are shown below.
Dhrystone 2 using register variables 0.1 lps (10.0 s, 1
samples)
System Benchmarks Partial Index BASELINE RESULT INDEX
Dhrystone 2 using register variables 116700.0 0.1 0.0
========
System Benchmarks Index Score (Partial Only) 10.0
Obviously, the "INDEX" is abnormal.
I wrote a demo named dhry.c based on the dhry2reg logic.
// dhry.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <signal.h>
#include <unistd.h>
int run_index;
typedef struct record {
struct record *next_rec;
int i;
} record, *pointer;
pointer global_pointer, next_global_pointer;
void report() {
printf("report:%d\n", run_index);
exit(0);
}
int main() {
printf("%d\n", run_index);
global_pointer = (pointer )malloc(sizeof(struct record));
next_global_pointer = (pointer )malloc(sizeof(struct record));
global_pointer->next_rec = next_global_pointer;
signal(SIGALRM, report);
/* get the clock running */
alarm(1);
char i[4];
// no exit
for(run_index=0;;++run_index){
*global_pointer->next_rec = *global_pointer;
}
}
gcc -O3 -fdump-tree-all -fdump-tree-all-graph dhry.c -o dhry
./dhry
0
report:0
gcc -O3 -fdump-tree-all -fdump-tree-all-graph dhry.c -o dhry
-fno-tree-loop-im
./dhry
0
report:1367490190
The generated gimple are shown below.
dhry.c.140t.laddress:
<bb 2> [local count: 10631108]:
run_index.1_1 = run_index;
printf ("%d\n", run_index.1_1);
_2 = malloc (16);
global_pointer = _2;
_3 = malloc (16);
next_global_pointer = _3;
MEM[(struct record *)_2].next_rec = _3;
signal (14, report);
alarm (1);
run_index = 0;
<bb 3> [local count: 1073741824]:
global_pointer.4_4 = global_pointer;
_5 = global_pointer.4_4->next_rec;
*_5 = *global_pointer.4_4;
run_index.6_6 = run_index;
_7 = run_index.6_6 + 1;
run_index = _7;
goto <bb 3>; [100.00%]
dhry.c.142t.lim2:
<bb 2> [local count: 10631108]:
run_index.1_1 = run_index;
printf ("%d\n", run_index.1_1);
_2 = malloc (16);
global_pointer = _2;
_3 = malloc (16);
next_global_pointer = _3;
MEM[(struct record *)_2].next_rec = _3;
signal (14, report);
alarm (1);
run_index = 0;
global_pointer.4_4 = global_pointer;
run_index_lsm.13_22 = run_index;
<bb 3> [local count: 1073741824]:
# run_index_lsm.13_21 = PHI <run_index_lsm.13_22(2),
run_index_lsm.13_23(4)>
_5 = global_pointer.4_4->next_rec;
*_5 = *global_pointer.4_4;
run_index.6_6 = run_index_lsm.13_21;
_7 = run_index.6_6 + 1;
run_index_lsm.13_23 = _7;
In loop-im pass, store-motion insert run_index_lsm = run_index before loop
and replace all references of run_index with run_index_lsm. And the
following
code writes run_index_lsm back to run_index.
/* Materialize ordered store sequences on exits. */
FOR_EACH_VEC_ELT (exits, i, e)
{
edge append_cond_position = NULL;
edge last_cond_fallthru = NULL;
if (i < sms.length ())
{
gcc_assert (sms[i].first == e);
execute_sm_exit (loop, e, sms[i].second, aux_map, sm_ord,
append_cond_position, last_cond_fallthru);
sms[i].second.release ();
}
if (!unord_refs.is_empty ())
execute_sm_exit (loop, e, unord_refs, aux_map, sm_unord,
append_cond_position, last_cond_fallthru);
/* Commit edge inserts here to preserve the order of stores
when an exit exits multiple loops. */
gsi_commit_one_edge_insert (e, NULL);
}
But run_index_lsm is not wrote back to run_index as there is no exit in
this loop.
so run_index will be zero after store motion is executed.
Is inifinite loop a undefined behavior, so it is permitted if run_index ==
0?
If not, I think we should not apply store motion on loop with no exit.
Xin Wang <[email protected]> 于2025年4月27日周日 11:29写道:
> The temporary variable will not be wrote back to memory as there
> is no exit of inifinite loop, so we prohibit applying store motion
> on loops with no exits.
>
> Signed-off-by: Xin Wang <[email protected]>
>
> ---
> gcc/tree-ssa-loop-im.cc | 3 +++
> 1 file changed, 3 insertions(+)
>
> diff --git a/gcc/tree-ssa-loop-im.cc b/gcc/tree-ssa-loop-im.cc
> index 225964c6215..de0450f5192 100644
> --- a/gcc/tree-ssa-loop-im.cc
> +++ b/gcc/tree-ssa-loop-im.cc
> @@ -3355,6 +3355,9 @@ loop_suitable_for_sm (class loop *loop
> ATTRIBUTE_UNUSED,
> unsigned i;
> edge ex;
>
> + if (exits.is_empty())
> + return false;
> +
> FOR_EACH_VEC_ELT (exits, i, ex)
> if (ex->flags & (EDGE_ABNORMAL | EDGE_EH))
> return false;
> --
> 2.25.1
>
>