Brievity is not my forte, so let me start with the questions. Can
somebody please point me to the pass and/or function where gcc
1.) decides rather or not to inline a function,
2.) decides rather or not to make a .constprop version of a function,
3.) a good pass (when all constant propagation is done) to search for fn
parameters and variables (marked with an attribute) that were not
constproped away, and finally
4.) what mechanism should I use for that search? (iterate through the
tree to find them and then see if they have rtl? I haven't worked in
this area yet.
Thanks!
In case you're interested in what I'm working on (probably not :) I'm
ready to experiment with adding an attribute named "constprop" (or
similar name) to the C family that marks a function parameters or local
variable as something that should be constant-propagated away. When all
constant propagation is completed (presuming -O2 or better), an error
should be generated for any that remain. This is a continuation of work
I put down a few years ago to use C metaprogramming to create
pseudo-template functions for generic programming in C that will build
and run correctly on any reasonably compliant C compiler, but with gcc
will generate highly efficient code.
We can currently do this in C by checking a parameter or variable at a
specific point in a function with a macro such as this:
#define gboing_check1(exp, msg, prefix, ln) \
do { \
extern void prefix ## _ ## ln(void) \
__attribute__((error(msg))); \
if (!(exp)) \
prefix ## _ ##ln(); \
} while (0)
#define gboing_check0(exp, msg, prefix, ln) \
gboing_check1(exp, msg, prefix, ln)
#define gboing_check(exp, msg, prefix) \
gboing_check0(exp, msg, prefix, __LINE__)
#define gboing_assert_const(exp) \
gboing_check(__builtin_constant_p(exp), \
"Expression not constant: " #exp, \
not_a_constant_error)
I would like to be able to just declare a parameter or variable in a way
to have gcc automate this check at every use. This implies that the
final function should never exist in any form other than a .constprop,
so probably only target static functions for now (I haven't learned how
LTO works yet). Here is a real-world example of what I call a C
pseudo-template function:
static inline __attribute__((always_inline, flatten)) void *
object_copy(void *dest, const void *src, size_t n, size_t align)
{
gboing_assert_const (n);
gboing_assert_const (align);
return memcpy((void *)__builtin_assume_aligned(dest, align),
(const void *)__builtin_assume_aligned(src, align),
n);
}
This is roughly analogous to this C++ template function:
template<size_t n, size_t align> static void *
object_copy(void *dest, const void *src)
{
return memcpy((void *)__builtin_assume_aligned(dest, align),
(const void *)__builtin_assume_aligned(src, align),
n);
}
With the currently available mechanisms, optimal performance cannot be
obtained for complex C pseudo-template functions without using
__attribute__((always_inline, flatten)), which has a lot of drawbacks
and complications. It would be better if instead, we can tell gcc which
parameters should be treated (in essence) as template parametersand let
gcc decide which functions should be inlined and which just need a
.constprop version and produce an error (or warning if desired) when
neither is possible. So the above example would look something like this:
static void *
my_object_copy(void *dest, const void *src,
size_t __attribute__((constprop)) n,
__attribute__((constprop)) size_t align)
{
return memcpy((void *)__builtin_assume_aligned(dest, align),
(const void *)__builtin_assume_aligned(src, align),
n);
}
I'm fairly comfortable with RTL at this point, but I'm still new to the
tree and I don't know much about SIMPLE, GIMPLE or GENERIC. I've added
the attribute and a new flag to tree_decl_common, so I'm just trying to
figure out the guts of this now. Below is my sloppy work so far. I
know that I have a whole lot more to do, like add a -f flag to enable it
(possibility emit warnings instead of errors) and a lot of re-tuning
what goes into the decision to make a .constprop version. Like poorly
used C++ templates, this will be an area that one can easily bloat their
code if they don't know what they're doing.
diff --git a/gcc/c-family/c-attribs.c b/gcc/c-family/c-attribs.c
index f2a88e147ba..b773d1a7dde 100644
--- a/gcc/c-family/c-attribs.c
+++ b/gcc/c-family/c-attribs.c
@@ -139,6 +139,7 @@ static tree handle_bnd_variable_size_attribute (tree *,
tree, tree, int, bool *)
static tree handle_bnd_legacy (tree *, tree, tree, int, bool *);
static tree handle_bnd_instrument (tree *, tree, tree, int, bool *);
static tree handle_fallthrough_attribute (tree *, tree, tree, int, bool *);
+static tree handle_constprop_attribute (tree *, tree , tree , int , bool *);
/* Table of machine-independent attributes common to all C-like languages.
@@ -345,6 +346,8 @@ const struct attribute_spec c_common_attribute_table[] =
handle_bnd_instrument, false },
{ "fallthrough", 0, 0, false, false, false,
handle_fallthrough_attribute, false },
+ { "constprop", 0, 0, false, false, false,
+ handle_constprop_attribute, false },
{ NULL, 0, 0, false, false, false, NULL, false }
};
@@ -3173,3 +3176,32 @@ handle_fallthrough_attribute (tree *, tree name, tree, int,
*no_add_attrs = true;
return NULL_TREE;
}
+
+static tree
+handle_constprop_attribute (tree *node, tree name, tree ARG_UNUSED (args),
+ int flags, bool *no_add_attrs)
+{
+ if (DECL_P (*node))
+ {
+ tree decl = *node;
+
+ if (TREE_CODE (decl) == PARM_DECL
+ || VAR_P (decl))
+ {
+ decl->decl_common.constprop = 1;
+ }
+ else
+ {
+ warning (OPT_Wattributes, "%qE attribute ignored", name);
+ *no_add_attrs = true;
+ }
+ }
+ else
+ {
+ if (!(flags & (int) ATTR_FLAG_TYPE_IN_PLACE))
+ *node = build_variant_type_copy (*node);
+ (*node)->decl_common.constprop = 1;
+ }
+
+ return NULL_TREE;
+}
diff --git a/gcc/tree-core.h b/gcc/tree-core.h
index a646ecb5c95..811d6bae48c 100644
--- a/gcc/tree-core.h
+++ b/gcc/tree-core.h
@@ -1619,7 +1619,9 @@ struct GTY(()) tree_decl_common {
/* DECL_ALIGN. It should have the same size as TYPE_ALIGN. */
unsigned int align : 6;
- /* 20 bits unused. */
+ unsigned int constprop : 1;
+
+ /* 19 bits unused. */
/* UID for points-to sets, stable over copying from inlining. */
unsigned int pt_uid;
Thanks!
Daniel