https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92649
--- Comment #5 from Jiangning Liu <jiangning.liu at amperecomputing dot com> ---
Unrolling 1024 iterations would increase code size a lot, so usually we don't
do that. 1024 is only an example. Without knowing we could eliminate most of
them, we don't really want to do loop unrolling, I guess.
Yes. Assigning 5 to all a's elements is only an example as well. It could be
any random value or predefined number.
Let me give a more complicated case,
extern int rand(void);
#define LIVE_SIZE 100
#define DATA_SIZE 256
int f(void)
{
int a[DATA_SIZE], b[DATA_SIZE][DATA_SIZE];
int i,j;
long long s = 0;
int next;
for (i=0; i<DATA_SIZE; i++) {
a[i] = rand() % 16384;
for (j=0; j<DATA_SIZE; j++)
b[i][j] = rand() % 16384;
}
for (i=0; i<DATA_SIZE; i++) {
next = 0;
for (j=0; j<LIVE_SIZE; j++) {
if (a[j] % 2) {
a[j]++;
next++;
}
}
for (j=next; j<DATA_SIZE; j++)
a[j] = b[i][j];
}
for (i=0; i<LIVE_SIZE; i++)
s += a[i];
return s;
}
I expect this small program can be optimized to be,
extern int rand(void);
#define LIVE_SIZE 100
#define DATA_SIZE 256
int f(void)
{
int a[DATA_SIZE], b[DATA_SIZE][DATA_SIZE];
int i,j;
long long s = 0;
int next;
for (i=0; i<DATA_SIZE; i++) {
a[i] = rand() % 16384;
for (j=0; j<DATA_SIZE; j++)
b[i][j] = rand() % 16384;
}
for (i=0; i<DATA_SIZE; i++) {
next = 0;
for (j=0; j<LIVE_SIZE; j++) {
if (a[j] % 2) {
a[j]++;
next++;
}
}
for (j=next; j<LIVE_SIZE; j++) // Replace DATA_SIZE with
LIVE_SIZE
a[j] = b[i][j];
}
for (i=0; i<LIVE_SIZE; i++)
s += a[i];
return s;
}
The array range liveness analysis can know only the first LIVE_SIZE array
elements affect function result, so all remaining stores can be eliminated.
Shall we "invent" a new pass to handle this optimization?