Jan Hubicka wrote:
Also, for the simple function
double foo1(double x) { return x; }
we return 4 as a cost, because we have
double tmp = x; return tmp;
and count the move cost (MODIFY_EXPR) twice. We could fix this by not walking (i.e. ignoring) RETURN_EXPR.
That would work, yes. I was also thinking about ignoring MODIFY_EXPR for var = var as those likely gets propagated later.
This looks like a good idea. In fact going even further and ignoring all assigns to DECL_IGNORED_P allows us to have the same size estimates for all functions down the inlining chain for
int foo(int x) { return x; }
int foo1(int x) { return foo(x); }
...and for the equivalent with int foo(void) { return 0; }and all related functions. Which is what we want. Of course ignoring all stores to artificial variables may have other bad side-effects.
This results in a tramp3d-v3 performance increase from 1m56s to 27s (leafify brings us down to 23.5s).
Note that we still assign reasonable cost to memory stores:
inline void foo(double *x)
{
*x = 1.0;
}has a cost of 2, and
double y;
void bar(void)
{
foo(&y);
}too, if foo is inlined. Nice. Patch attached (with some unrelated stuff that just is cleanup) for you to play.
Any thoughts on this radical approach? A testcase that could be pessimized by this? Of course default inlining limits would need to be adjusted if we do this.
Richard.
Index: cgraphunit.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/cgraphunit.c,v
retrieving revision 1.93
diff -c -3 -p -r1.93 cgraphunit.c
*** cgraphunit.c 21 Feb 2005 14:39:46 -0000 1.93
--- cgraphunit.c 24 Feb 2005 19:04:18 -0000
*************** Software Foundation, 59 Temple Place - S
*** 190,197 ****
#include "function.h"
#include "tree-gimple.h"
- #define INSNS_PER_CALL 10
-
static void cgraph_expand_all_functions (void);
static void cgraph_mark_functions_to_output (void);
static void cgraph_expand_function (struct cgraph_node *);
--- 190,195 ----
Index: tree-inline.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-inline.h,v
retrieving revision 1.14
diff -c -3 -p -r1.14 tree-inline.h
*** tree-inline.h 8 Nov 2004 22:40:09 -0000 1.14
--- tree-inline.h 24 Feb 2005 19:04:18 -0000
*************** bool tree_inlinable_function_p (tree);
*** 29,34 ****
--- 29,35 ----
tree copy_tree_r (tree *, int *, void *);
void clone_body (tree, tree, void *);
tree save_body (tree, tree *, tree *);
+ int estimate_move_cost (tree type);
int estimate_num_insns (tree expr);
/* 0 if we should not perform inlining.
*************** int estimate_num_insns (tree expr);
*** 38,41 ****
--- 39,47 ----
extern int flag_inline_trees;
+ /* Instructions per call. Used in estimate_num_insns and in the
+ inliner to account for removed calls. */
+
+ #define INSNS_PER_CALL 10
+
#endif /* GCC_TREE_INLINE_H */
Index: tree-inline.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-inline.c,v
retrieving revision 1.170
diff -c -3 -p -r1.170 tree-inline.c
*** tree-inline.c 27 Jan 2005 14:36:17 -0000 1.170
--- tree-inline.c 24 Feb 2005 19:04:19 -0000
*************** inlinable_function_p (tree fn)
*** 1165,1170 ****
--- 1165,1189 ----
return inlinable;
}
+ /* Estimate the number of instructions needed for a move of
+ the specified type. */
+
+ int
+ estimate_move_cost (tree type)
+ {
+ HOST_WIDE_INT size;
+
+ if (VOID_TYPE_P (type))
+ return 0;
+
+ size = int_size_in_bytes (type);
+
+ if (size < 0 || size > MOVE_MAX_PIECES * MOVE_RATIO)
+ return INSNS_PER_CALL;
+ else
+ return ((size + MOVE_MAX_PIECES - 1) / MOVE_MAX_PIECES);
+ }
+
/* Used by estimate_num_insns. Estimate number of instructions seen
by given statement. */
*************** estimate_num_insns_1 (tree *tp, int *wal
*** 1245,1266 ****
/* Recognize assignments of large structures and constructors of
big arrays. */
- case INIT_EXPR:
case MODIFY_EXPR:
x = TREE_OPERAND (x, 0);
/* FALLTHRU */
- case TARGET_EXPR:
case CONSTRUCTOR:
! {
! HOST_WIDE_INT size;
!
! size = int_size_in_bytes (TREE_TYPE (x));
!
! if (size < 0 || size > MOVE_MAX_PIECES * MOVE_RATIO)
! *count += 10;
! else
! *count += ((size + MOVE_MAX_PIECES - 1) / MOVE_MAX_PIECES);
! }
break;
/* Assign cost of 1 to usual operations.
--- 1264,1278 ----
/* Recognize assignments of large structures and constructors of
big arrays. */
case MODIFY_EXPR:
+ if (DECL_P (TREE_OPERAND (x, 0)) && DECL_IGNORED_P (TREE_OPERAND (x,
0)))
+ break;
+ case INIT_EXPR:
+ case TARGET_EXPR:
x = TREE_OPERAND (x, 0);
/* FALLTHRU */
case CONSTRUCTOR:
! *count += estimate_move_cost (TREE_TYPE (x));
break;
/* Assign cost of 1 to usual operations.
*************** estimate_num_insns_1 (tree *tp, int *wal
*** 1363,1369 ****
default:
break;
}
! *count += 10;
break;
}
default:
--- 1375,1381 ----
default:
break;
}
! *count += INSNS_PER_CALL;
break;
}
default:
