On 06/29/2017 01:17 PM, Jakub Jelinek wrote: > On Tue, Jun 20, 2017 at 03:06:56PM +0200, Martin Liška wrote: >> +/* Rewrite all usages of tree OP which is a PARM_DECL with a VAR_DECL >> + that is it's DECL_VALUE_EXPR. */ >> + >> +static tree >> +rewrite_usage_of_param (tree *op, int *walk_subtrees, void *) >> +{ >> + if (TREE_CODE (*op) == PARM_DECL && DECL_VALUE_EXPR (*op) != NULL_TREE) > > DECL_VALUE_EXPR testing is costly (it is a hash table lookup). > Therefore you should test DECL_HAS_VALUE_EXPR_P (*op) after checking > == PARM_DECL. And DECL_HAS_VALUE_EXPR_P should apply non-NULL > DECL_VALUE_EXPR. > That said, I wonder if we don't create DECL_VALUE_EXPR for PARM_DECLs in > other parts of the compiler, whether it wouldn't be safer to also test here > after == PARM_DECL and DECL_HAS_VALUE_EXPR_P check whether *op is in > addressable_params hash table.
Thanks for explanation, DECL_HAS_VALUE_EXPR_P is a flag, while DECL_VALUE_EXPR is a hash table lookup. > >> + { >> + *op = DECL_VALUE_EXPR (*op); >> + *walk_subtrees = 0; >> + } >> + >> + return NULL; >> +} >> + >> +/* For a given function FUN, rewrite all addressable parameters so that >> + a new automatic variable is introduced. Right after function entry >> + a parameter is assigned to the variable. */ >> + >> +static void >> +sanitize_rewrite_addressable_params (function *fun) >> +{ >> + gimple *g; >> + gimple_seq stmts = NULL; >> + auto_vec<tree> addressable_params; > > You don't really use the addressable_params vector anywhere, right? > Except for: > >> + >> + for (tree arg = DECL_ARGUMENTS (current_function_decl); >> + arg; arg = DECL_CHAIN (arg)) >> + { >> + if (TREE_ADDRESSABLE (arg) && !TREE_ADDRESSABLE (TREE_TYPE (arg))) >> + { >> + TREE_ADDRESSABLE (arg) = 0; >> + /* The parameter is no longer addressable. */ >> + tree type = TREE_TYPE (arg); >> + addressable_params.safe_push (arg); > > pushing stuff into it and later > >> + if (addressable_params.is_empty ()) >> + return; > > If you only need that, a bool flag if any params have been changed is > enough. But see above whether it wouldn't be safer to use a hash table > to verify it. Plus, I think it would be desirable to clear > DECL_HAS_VALUE_EXPR_P and SET_DECL_VALUE_EXPR to NULL afterwards > if (target_for_debug_bind (arg)) - whch can be done either the with vec > or with a hash table traversal, for that we don't care about the ordering. Good point, I decided to come up with a flag + vector of arguments where VALUE_EXPR should be set to NULL. > >> + >> + /* Create a new automatic variable. */ >> + tree var = build_decl (DECL_SOURCE_LOCATION (arg), >> + VAR_DECL, DECL_NAME (arg), type); >> + TREE_ADDRESSABLE (var) = 1; >> + DECL_ARTIFICIAL (var) = 1; >> + DECL_SEEN_IN_BIND_EXPR_P (var) = 0; > > This is 0 already from build_decl, IMHO no need to set it. Done. > >> + gimple_add_tmp_var (var); >> + >> + if (dump_file) >> + fprintf (dump_file, >> + "Rewriting parameter whose address is taken: %s\n", >> + IDENTIFIER_POINTER (DECL_NAME (arg))); >> + >> + SET_DECL_VALUE_EXPR (arg, var); > > But obviously you miss setting DECL_HAS_VALUE_EXPR_P here. Likewise. > >> + /* Assign value of parameter to newly created variable. */ >> + if ((TREE_CODE (type) == COMPLEX_TYPE >> + || TREE_CODE (type) == VECTOR_TYPE)) >> + { >> + /* We need to create a SSA name that will be used for the >> + assignment. */ > > Why don't you just set DECL_GIMPLE_REG_P (arg) = 1; for > COMPLEX_TYPE/VECTOR_TYPE? The arg is going to be only used to copy it into > the new var. And then just use get_or_create_ssa_default_def, > regardless of whether if is complex/vector or other. Doing so fails here: $ ./xgcc -B. /home/marxin/Programming/gcc/gcc/testsuite/g++.dg/asan/function-argument-1.C -fsanitize=address during RTL pass: expand /home/marxin/Programming/gcc/gcc/testsuite/g++.dg/asan/function-argument-1.C: In function ‘int foo(A)’: /home/marxin/Programming/gcc/gcc/testsuite/g++.dg/asan/function-argument-1.C:17:1: internal compiler error: in set_parm_rtl, at cfgexpand.c:1271 foo (A arg) ^~~ 0x938536 set_parm_rtl(tree_node*, rtx_def*) ../../gcc/cfgexpand.c:1271 0xab9813 assign_parms ../../gcc/function.c:3782 0xabc605 expand_function_start(tree_node*) ../../gcc/function.c:5221 0x943e01 execute ../../gcc/cfgexpand.c:6248 > >> + /* Replace all usages of PARM_DECLs with the newly >> + created variable VAR. */ >> + basic_block bb; >> + FOR_EACH_BB_FN (bb, fun) >> + { >> + gimple_stmt_iterator gsi; >> + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) >> + { >> + gimple *stmt = gsi_stmt (gsi); >> + gimple_stmt_iterator it = gsi_for_stmt (stmt); >> + walk_gimple_stmt (&it, NULL, rewrite_usage_of_param, NULL); >> + } >> + for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) >> + { >> + gphi *phi = dyn_cast<gphi *> (gsi_stmt (gsi)); >> + for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i) >> + { >> + hash_set<tree> visited_nodes; >> + walk_tree (gimple_phi_arg_def_ptr (phi, i), >> + rewrite_usage_of_param, NULL, &visited_nodes); >> + } > > Doesn't walk_gimple_stmt on the PHI handle this? No, I see following in asan bootstrap: ^[2^[1^[2^[2../../gcc/c/c-decl.c: In function ‘tree_node* grokfield(location_t, c_declarator*, c_declspecs*, tree, tree_node**)’: ../../gcc/c/c-decl.c:7525:1: error: address taken, but ADDRESSABLE bit not set grokfield (location_t loc, ^~~~~~~~~ PHI argument &width; for PHI node iftmp.2774_27 = PHI <&width(16), 0B(61), &width(54)> during GIMPLE pass: sanopt ../../gcc/c/c-decl.c:7525:1: internal compiler error: verify_ssa failed 0x115356db verify_ssa(bool, bool) ../../gcc/tree-ssa.c:1186 0x10f850ff execute_function_todo ../../gcc/passes.c:1996 0x10f8376b do_per_function ../../gcc/passes.c:1655 0x10f853a7 execute_todo ../../gcc/passes.c:2043 Sending new version that I'm going to test. Martin > > Jakub >
>From bd7580c259c0552490d63527782d00926c9c5473 Mon Sep 17 00:00:00 2001 From: marxin <mli...@suse.cz> Date: Wed, 14 Jun 2017 11:40:01 +0200 Subject: [PATCH] ASAN: handle addressable params (PR sanitize/81040). gcc/testsuite/ChangeLog: 2017-06-19 Martin Liska <mli...@suse.cz> PR sanitize/81040 * g++.dg/asan/function-argument-1.C: New test. * g++.dg/asan/function-argument-2.C: New test. * g++.dg/asan/function-argument-3.C: New test. gcc/ChangeLog: 2017-06-19 Martin Liska <mli...@suse.cz> PR sanitize/81040 * sanopt.c (rewrite_usage_of_param): New function. (sanitize_rewrite_addressable_params): Likewise. (pass_sanopt::execute): Call rewrite_usage_of_param. --- gcc/sanopt.c | 135 ++++++++++++++++++++++++ gcc/testsuite/g++.dg/asan/function-argument-1.C | 30 ++++++ gcc/testsuite/g++.dg/asan/function-argument-2.C | 24 +++++ gcc/testsuite/g++.dg/asan/function-argument-3.C | 27 +++++ 4 files changed, 216 insertions(+) create mode 100644 gcc/testsuite/g++.dg/asan/function-argument-1.C create mode 100644 gcc/testsuite/g++.dg/asan/function-argument-2.C create mode 100644 gcc/testsuite/g++.dg/asan/function-argument-3.C diff --git a/gcc/sanopt.c b/gcc/sanopt.c index 16bdba76042..acb09dddd5b 100644 --- a/gcc/sanopt.c +++ b/gcc/sanopt.c @@ -37,6 +37,12 @@ along with GCC; see the file COPYING3. If not see #include "gimple-ssa.h" #include "tree-phinodes.h" #include "ssa-iterators.h" +#include "gimplify.h" +#include "gimple-iterator.h" +#include "gimple-walk.h" +#include "cfghooks.h" +#include "tree-dfa.h" +#include "tree-ssa.h" /* This is used to carry information about basic blocks. It is attached to the AUX field of the standard CFG block. */ @@ -858,6 +864,132 @@ sanitize_asan_mark_poison (void) } } +/* Rewrite all usages of tree OP which is a PARM_DECL with a VAR_DECL + that is it's DECL_VALUE_EXPR. */ + +static tree +rewrite_usage_of_param (tree *op, int *walk_subtrees, void *) +{ + if (TREE_CODE (*op) == PARM_DECL && DECL_HAS_VALUE_EXPR_P (*op)) + { + *op = DECL_VALUE_EXPR (*op); + *walk_subtrees = 0; + } + + return NULL; +} + +/* For a given function FUN, rewrite all addressable parameters so that + a new automatic variable is introduced. Right after function entry + a parameter is assigned to the variable. */ + +static void +sanitize_rewrite_addressable_params (function *fun) +{ + gimple *g; + gimple_seq stmts = NULL; + bool has_any_addressable_param = false; + auto_vec<tree> clear_value_expr_list; + + for (tree arg = DECL_ARGUMENTS (current_function_decl); + arg; arg = DECL_CHAIN (arg)) + { + if (TREE_ADDRESSABLE (arg) && !TREE_ADDRESSABLE (TREE_TYPE (arg))) + { + TREE_ADDRESSABLE (arg) = 0; + /* The parameter is no longer addressable. */ + tree type = TREE_TYPE (arg); + has_any_addressable_param = true; + + /* Create a new automatic variable. */ + tree var = build_decl (DECL_SOURCE_LOCATION (arg), + VAR_DECL, DECL_NAME (arg), type); + TREE_ADDRESSABLE (var) = 1; + DECL_ARTIFICIAL (var) = 1; + + gimple_add_tmp_var (var); + + if (dump_file) + fprintf (dump_file, + "Rewriting parameter whose address is taken: %s\n", + IDENTIFIER_POINTER (DECL_NAME (arg))); + + DECL_HAS_VALUE_EXPR_P (arg) = 1; + SET_DECL_VALUE_EXPR (arg, var); + + /* Assign value of parameter to newly created variable. */ + if ((TREE_CODE (type) == COMPLEX_TYPE + || TREE_CODE (type) == VECTOR_TYPE)) + { + /* We need to create a SSA name that will be used for the + assignment. */ + DECL_GIMPLE_REG_P (arg) = 1; + tree tmp = get_or_create_ssa_default_def (cfun, arg); + g = gimple_build_assign (var, tmp); + gimple_set_location (g, DECL_SOURCE_LOCATION (arg)); + gimple_seq_add_stmt (&stmts, g); + } + else + { + g = gimple_build_assign (var, arg); + gimple_set_location (g, DECL_SOURCE_LOCATION (arg)); + gimple_seq_add_stmt (&stmts, g); + } + + if (target_for_debug_bind (arg)) + { + g = gimple_build_debug_bind (arg, var, NULL); + gimple_seq_add_stmt (&stmts, g); + clear_value_expr_list.safe_push (arg); + } + } + } + + if (!has_any_addressable_param) + return; + + /* Replace all usages of PARM_DECLs with the newly + created variable VAR. */ + basic_block bb; + FOR_EACH_BB_FN (bb, fun) + { + gimple_stmt_iterator gsi; + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + gimple_stmt_iterator it = gsi_for_stmt (stmt); + walk_gimple_stmt (&it, NULL, rewrite_usage_of_param, NULL); + } + for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gphi *phi = dyn_cast<gphi *> (gsi_stmt (gsi)); + for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i) + { + hash_set<tree> visited_nodes; + walk_tree (gimple_phi_arg_def_ptr (phi, i), + rewrite_usage_of_param, NULL, &visited_nodes); + } + } + } + + /* Unset value expr for parameters for which we created debug bind + expressions. */ + unsigned i; + tree arg; + FOR_EACH_VEC_ELT (clear_value_expr_list, i, arg) + { + DECL_HAS_VALUE_EXPR_P (arg) = 0; + SET_DECL_VALUE_EXPR (arg, NULL_TREE); + } + + /* Insert default assignments at the beginning of a function. */ + basic_block entry_bb = ENTRY_BLOCK_PTR_FOR_FN (fun); + entry_bb = split_edge (single_succ_edge (entry_bb)); + + gimple_stmt_iterator gsi = gsi_start_bb (entry_bb); + gsi_insert_seq_before (&gsi, stmts, GSI_NEW_STMT); +} + unsigned int pass_sanopt::execute (function *fun) { @@ -891,6 +1023,9 @@ pass_sanopt::execute (function *fun) sanitize_asan_mark_poison (); } + if (asan_sanitize_stack_p ()) + sanitize_rewrite_addressable_params (fun); + bool use_calls = ASAN_INSTRUMENTATION_WITH_CALL_THRESHOLD < INT_MAX && asan_num_accesses >= ASAN_INSTRUMENTATION_WITH_CALL_THRESHOLD; diff --git a/gcc/testsuite/g++.dg/asan/function-argument-1.C b/gcc/testsuite/g++.dg/asan/function-argument-1.C new file mode 100644 index 00000000000..148c4628316 --- /dev/null +++ b/gcc/testsuite/g++.dg/asan/function-argument-1.C @@ -0,0 +1,30 @@ +// { dg-do run } +// { dg-shouldfail "asan" } + +struct A +{ + int a[5]; +}; + +static __attribute__ ((noinline)) int +goo (A *a) +{ + int *ptr = &a->a[0]; + return *(volatile int *) (ptr - 1); +} + +__attribute__ ((noinline)) int +foo (A arg) +{ + return goo (&arg); +} + +int +main () +{ + return foo (A ()); +} + +// { dg-output "ERROR: AddressSanitizer: stack-buffer-underflow on address.*(\n|\r\n|\r)" } +// { dg-output "READ of size . at.*" } +// { dg-output ".*'arg' <== Memory access at offset \[0-9\]* underflows this variable.*" } diff --git a/gcc/testsuite/g++.dg/asan/function-argument-2.C b/gcc/testsuite/g++.dg/asan/function-argument-2.C new file mode 100644 index 00000000000..3a7c33bdaaa --- /dev/null +++ b/gcc/testsuite/g++.dg/asan/function-argument-2.C @@ -0,0 +1,24 @@ +// { dg-do run } +// { dg-shouldfail "asan" } + +static __attribute__ ((noinline)) int +goo (int *a) +{ + return *(volatile int *)a; +} + +__attribute__ ((noinline)) int +foo (char arg) +{ + return goo ((int *)&arg); +} + +int +main () +{ + return foo (12); +} + +// { dg-output "ERROR: AddressSanitizer: stack-buffer-overflow on address.*(\n|\r\n|\r)" } +// { dg-output "READ of size . at.*" } +// { dg-output ".*'arg' <== Memory access at offset \[0-9\]* partially overflows this variable.*" } diff --git a/gcc/testsuite/g++.dg/asan/function-argument-3.C b/gcc/testsuite/g++.dg/asan/function-argument-3.C new file mode 100644 index 00000000000..14617ba8425 --- /dev/null +++ b/gcc/testsuite/g++.dg/asan/function-argument-3.C @@ -0,0 +1,27 @@ +// { dg-do run } +// { dg-shouldfail "asan" } + +typedef int v4si __attribute__ ((vector_size (16))); + +static __attribute__ ((noinline)) int +goo (v4si *a) +{ + return (*(volatile v4si *) (a + 1))[2]; +} + +__attribute__ ((noinline)) int +foo (v4si arg) +{ + return goo (&arg); +} + +int +main () +{ + v4si v = {1,2,3,4}; + return foo (v); +} + +// { dg-output "ERROR: AddressSanitizer: stack-buffer-overflow on address.*(\n|\r\n|\r)" } +// { dg-output "READ of size . at.*" } +// { dg-output ".*'arg' <== Memory access at offset \[0-9\]* overflows this variable.*" } -- 2.13.1