On Tue, 12 Mar 2024, Jakub Jelinek wrote:
> Hi!
>
> ubsan, asan (both PR112709) and _BitInt lowering (PR113466) want to
> insert some instrumentation or adjustment statements before some statement.
> This unfortunately creates invalid IL if inserting before a returns_twice
> call, because we require that such calls are the first statement in a basic
> block and that we have an edge from the .ABNORMAL_DISPATCHER block to
> the block containing the returns_twice call (in addition to other edge(s)).
>
> The following patch adds helper functions for such insertions and uses it
> for now in ubsan (I'll post a follow up which uses it in asan and will
> work later on the _BitInt lowering PR).
>
> In particular, if the bb with returns_twice call at the start has just
> 2 edges, one EDGE_ABNORMAL from .ABNORMAL_DISPATCHER and another
> (non-EDGE_ABNORMAL/EDGE_EH) from some other bb, it just inserts the
> statement or sequence on that other edge.
> If the bb has more predecessor edges or the one not from
> .ABNORMAL_DISPATCHER is e.g. an EH edge (this latter case likely shouldn't
> happen, one would need labels or something like that), the patch splits the
> block with returns_twice call such that there is just one edge next to
> .ABNORMAL_DISPATCHER edge and adjusts PHIs as needed to make it happen.
> The functions also replace uses of PHIs from the returns_twice bb with
> the corresponding PHI arguments, because otherwise it would be invalid IL.
>
> E.g. in ubsan/pr112709-2.c (qux) we have before the ubsan pass
> <bb 10> :
> # .MEM_5(ab) = PHI <.MEM_4(9), .MEM_25(ab)(11)>
> # _7(ab) = PHI <_20(9), _8(ab)(11)>
> # .MEM_21(ab) = VDEF <.MEM_5(ab)>
> _22 = bar (*_7(ab));
> where bar is returns_twice call and bb 11 has .ABNORMAL_DISPATCHER call,
> this patch instruments it like:
> <bb 9> :
> # .MEM_4 = PHI <.MEM_17(ab)(4), .MEM_10(D)(5), .MEM_14(ab)(8)>
> # DEBUG BEGIN_STMT
> # VUSE <.MEM_4>
> _20 = p;
> # .MEM_27 = VDEF <.MEM_4>
> .UBSAN_NULL (_20, 0B, 0);
> # VUSE <.MEM_27>
> _2 = __builtin_dynamic_object_size (_20, 0);
> # .MEM_28 = VDEF <.MEM_27>
> .UBSAN_OBJECT_SIZE (_20, 1024, _2, 0);
>
> <bb 10> :
> # .MEM_5(ab) = PHI <.MEM_28(9), .MEM_25(ab)(11)>
> # _7(ab) = PHI <_20(9), _8(ab)(11)>
> # .MEM_21(ab) = VDEF <.MEM_5(ab)>
> _22 = bar (*_7(ab));
> The edge from .ABNORMAL_DISPATCHER is there just to represent the
> returning for 2nd and later times, the instrumentation can't be
> done at that point as there is no code executed during that point.
> The ubsan/pr112709-1.c testcase includes non-virtual PHIs to cover
> the handling of those as well.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2024-03-12 Jakub Jelinek <[email protected]>
>
> PR sanitizer/112709
> * gimple-iterator.h (gsi_insert_before_returns_twice_call,
> gsi_insert_seq_before_returns_twice_call): Declare.
> * gimple-iterator.cc: Include gimplify.h.
> (edge_before_returns_twice_call, gsi_insert_before_returns_twice_call,
> gsi_insert_seq_before_returns_twice_call): New functions.
> * ubsan.cc (ubsan_insert_before, ubsan_insert_seq_before): New
> functions.
> (instrument_mem_ref, instrument_pointer_overflow,
> instrument_nonnull_arg, instrument_nonnull_return): Use
> ubsan_insert_before instead of gsi_insert_before.
> (maybe_instrument_pointer_overflow): Use force_gimple_operand,
> gimple_seq_add_seq_without_update and ubsan_insert_seq_before
> instead of force_gimple_operand_gsi.
> (instrument_object_size): Likewise. Use ubsan_insert_before instead
> of gsi_insert_before.
>
> * gcc.dg/ubsan/pr112709-1.c: New test.
> * gcc.dg/ubsan/pr112709-2.c: New test.
>
> --- gcc/gimple-iterator.h.jj 2024-02-08 11:17:37.144661383 +0100
> +++ gcc/gimple-iterator.h 2024-03-11 16:16:24.670464737 +0100
> @@ -93,6 +93,10 @@ extern void gsi_insert_on_edge (edge, gi
> extern void gsi_insert_seq_on_edge (edge, gimple_seq);
> extern basic_block gsi_insert_on_edge_immediate (edge, gimple *);
> extern basic_block gsi_insert_seq_on_edge_immediate (edge, gimple_seq);
> +extern basic_block gsi_insert_before_returns_twice_call (basic_block,
> + gimple *);
> +extern basic_block gsi_insert_seq_before_returns_twice_call (basic_block,
> + gimple_seq);
> extern void gsi_commit_edge_inserts (void);
> extern void gsi_commit_one_edge_insert (edge, basic_block *);
> extern gphi_iterator gsi_start_phis (basic_block);
> --- gcc/gimple-iterator.cc.jj 2024-02-08 11:17:37.144661383 +0100
> +++ gcc/gimple-iterator.cc 2024-03-11 18:17:08.994804808 +0100
> @@ -32,6 +32,7 @@ along with GCC; see the file COPYING3.
> #include "tree-cfg.h"
> #include "tree-ssa.h"
> #include "value-prof.h"
> +#include "gimplify.h"
>
>
> /* Mark the statement STMT as modified, and update it. */
> @@ -944,3 +945,135 @@ gsi_start_phis (basic_block bb)
>
> return i;
> }
> +
> +/* Helper function for gsi_insert_before_returns_twice_call and
> + gsi_insert_seq_before_returns_twice_call. Find edge to
> + insert statements before returns_twice call at the start of BB,
> + if there isn't just one, split the bb and adjust PHIs to ensure
> + that. */
> +
> +static edge
> +edge_before_returns_twice_call (basic_block bb)
> +{
> + gimple_stmt_iterator gsi = gsi_start_nondebug_bb (bb);
> + gcc_checking_assert (is_gimple_call (gsi_stmt (gsi))
> + && (gimple_call_flags (gsi_stmt (gsi))
> + & ECF_RETURNS_TWICE) != 0);
> + edge_iterator ei;
> + edge e, ad_edge = NULL, other_edge = NULL;
> + bool split = false;
> + /* Look for a fallthru and possibly a single backedge. */
I can't see the backedge handling (I'm not sure it's actually required?)
> + FOR_EACH_EDGE (e, ei, bb->preds)
> + {
> + if ((e->flags & (EDGE_ABNORMAL | EDGE_EH)) == EDGE_ABNORMAL)
EDGE_EH is special because of the landing pad?
> + {
> + gimple_stmt_iterator gsi
> + = gsi_start_nondebug_after_labels_bb (e->src);
> + gimple *ad = gsi_stmt (gsi);
> + if (ad && gimple_call_internal_p (ad, IFN_ABNORMAL_DISPATCHER))
> + {
> + gcc_checking_assert (ad_edge == NULL);
> + ad_edge = e;
> + continue;
> + }
> + }
> + if (other_edge || e->flags & (EDGE_ABNORMAL | EDGE_EH))
> + split = true;
> + other_edge = e;
> + }
> + gcc_checking_assert (ad_edge);
> + if (other_edge == NULL)
> + split = true;
> + if (split)
> + {
> + other_edge = split_block_after_labels (bb);
> + e = make_edge (ad_edge->src, other_edge->dest, EDGE_ABNORMAL);
In theory there could be multiple abnormal edges. Did you try
to use make_forwarder_block instead of manually doing it? Or
does that fail because we end up redirecting the abnormal edges
which you avoid by re-making it new and scrapping the old one?
I fail to remember why we can't redirect abnormal edges - yes,
there's no flow to be redirected but the process of "redirection"
still should work.
> + for (gphi_iterator gsi = gsi_start_phis (other_edge->src);
> + !gsi_end_p (gsi); gsi_next (&gsi))
> + {
> + gphi *phi = gsi.phi ();
> + tree lhs = gimple_phi_result (phi);
> + tree new_lhs;
> + if (virtual_operand_p (lhs))
> + new_lhs = make_ssa_name (SSA_NAME_VAR (lhs));
> + else
> + new_lhs = make_ssa_name (TREE_TYPE (lhs));
> + gimple_phi_set_result (phi, new_lhs);
> + gphi *new_phi = create_phi_node (lhs, other_edge->dest);
> + add_phi_arg (new_phi, new_lhs, other_edge, UNKNOWN_LOCATION);
> + add_phi_arg (new_phi, gimple_phi_arg_def_from_edge (phi, ad_edge),
> + e, gimple_phi_arg_location_from_edge (phi, ad_edge));
> + }
> + remove_edge (ad_edge);
> + }
> + return other_edge;
> +}
> +
> +/* Insert G before BB which starts with an ECF_RETURNS_TWICE call.
> + If BB has a single predecessor edge except for edge from
> + .ABNORMAL_DISPATCHER, insert on that edge, otherwise split
> + BB before the call, adjust PHIs and insert into the new BB.
> + If G refers to any results of BB's PHIs, replace them afterwards
> + with corresponding PHI arg. */
> +
> +basic_block
> +gsi_insert_before_returns_twice_call (basic_block bb, gimple *g)
> +{
> + edge e = edge_before_returns_twice_call (bb);
> + basic_block new_bb = gsi_insert_on_edge_immediate (e, g);
> + use_operand_p use_p;
> + ssa_op_iter iter;
> + bool m = false;
> + FOR_EACH_SSA_USE_OPERAND (use_p, g, iter, SSA_OP_USE)
> + {
> + tree s = USE_FROM_PTR (use_p);
> + if (SSA_NAME_DEF_STMT (s)
> + && gimple_code (SSA_NAME_DEF_STMT (s)) == GIMPLE_PHI
> + && gimple_bb (SSA_NAME_DEF_STMT (s)) == e->dest)
> + {
> + tree r = gimple_phi_arg_def_from_edge (SSA_NAME_DEF_STMT (s), e);
> + SET_USE (use_p, unshare_expr (r));
> + m = true;
> + }
> + }
Ick. Doesn't the forwarder keep the PHI defs and new PHI defs
are created in e->dest instead, I don't see how we need this?
Btw, gsi_insert_before_returns_twice_call might be usable as
gsi_insert_after_labels ()? Or alternatively not expose
gsi_insert_before_returns_twice_call but instead make
split_block () behave "correctly" for abnormals (or add
split_block_before_returns_twice_call ()). But that's just bike-shedding.
Richard.
> + if (m)
> + update_stmt (g);
> + return new_bb;
> +}
> +
> +/* Similiarly for sequence SEQ. */
> +
> +basic_block
> +gsi_insert_seq_before_returns_twice_call (basic_block bb, gimple_seq seq)
> +{
> + if (gimple_seq_empty_p (seq))
> + return NULL;
> + gimple *f = gimple_seq_first_stmt (seq);
> + gimple *l = gimple_seq_last_stmt (seq);
> + edge e = edge_before_returns_twice_call (bb);
> + basic_block new_bb = gsi_insert_seq_on_edge_immediate (e, seq);
> + use_operand_p use_p;
> + ssa_op_iter iter;
> + for (gimple_stmt_iterator gsi = gsi_for_stmt (f); ; gsi_next (&gsi))
> + {
> + gimple *g = gsi_stmt (gsi);
> + bool m = false;
> + FOR_EACH_SSA_USE_OPERAND (use_p, g, iter, SSA_OP_USE)
> + {
> + tree s = USE_FROM_PTR (use_p);
> + if (SSA_NAME_DEF_STMT (s)
> + && gimple_code (SSA_NAME_DEF_STMT (s)) == GIMPLE_PHI
> + && gimple_bb (SSA_NAME_DEF_STMT (s)) == e->dest)
> + {
> + tree r = gimple_phi_arg_def_from_edge (SSA_NAME_DEF_STMT (s), e);
> + SET_USE (use_p, unshare_expr (r));
> + m = true;
> + }
> + }
> + if (m)
> + update_stmt (g);
> + if (g == l)
> + break;
> + }
> + return new_bb;
> +}
> --- gcc/ubsan.cc.jj 2024-01-03 11:51:32.312720350 +0100
> +++ gcc/ubsan.cc 2024-03-11 17:07:58.104243341 +0100
> @@ -1432,6 +1432,37 @@ ubsan_expand_vptr_ifn (gimple_stmt_itera
> return true;
> }
>
> +/* Insert G stmt before ITER. If ITER is a returns_twice call,
> + insert it on an appropriate edge instead. */
> +
> +static void
> +ubsan_insert_before (gimple_stmt_iterator *iter, gimple *g)
> +{
> + gimple *stmt = gsi_stmt (*iter);
> + if (stmt
> + && is_gimple_call (stmt)
> + && (gimple_call_flags (stmt) & ECF_RETURNS_TWICE) != 0)
> + gsi_insert_before_returns_twice_call (gsi_bb (*iter), g);
> + else
> + gsi_insert_before (iter, g, GSI_SAME_STMT);
> +}
> +
> +/* Similarly for sequence SEQ. */
> +
> +static void
> +ubsan_insert_seq_before (gimple_stmt_iterator *iter, gimple_seq seq)
> +{
> + if (gimple_seq_empty_p (seq))
> + return;
> + gimple *stmt = gsi_stmt (*iter);
> + if (stmt
> + && is_gimple_call (stmt)
> + && (gimple_call_flags (stmt) & ECF_RETURNS_TWICE) != 0)
> + gsi_insert_seq_before_returns_twice_call (gsi_bb (*iter), seq);
> + else
> + gsi_insert_seq_before (iter, seq, GSI_SAME_STMT);
> +}
> +
> /* Instrument a memory reference. BASE is the base of MEM, IS_LHS says
> whether the pointer is on the left hand side of the assignment. */
>
> @@ -1458,7 +1489,7 @@ instrument_mem_ref (tree mem, tree base,
> tree alignt = build_int_cst (pointer_sized_int_node, align);
> gcall *g = gimple_build_call_internal (IFN_UBSAN_NULL, 3, t, kind, alignt);
> gimple_set_location (g, gimple_location (gsi_stmt (*iter)));
> - gsi_insert_before (iter, g, GSI_SAME_STMT);
> + ubsan_insert_before (iter, g);
> }
>
> /* Perform the pointer instrumentation. */
> @@ -1485,7 +1516,7 @@ instrument_pointer_overflow (gimple_stmt
> return;
> gcall *g = gimple_build_call_internal (IFN_UBSAN_PTR, 2, ptr, off);
> gimple_set_location (g, gimple_location (gsi_stmt (*gsi)));
> - gsi_insert_before (gsi, g, GSI_SAME_STMT);
> + ubsan_insert_before (gsi, g);
> }
>
> /* Instrument pointer arithmetics if any. */
> @@ -1577,10 +1608,11 @@ maybe_instrument_pointer_overflow (gimpl
> else
> t = fold_convert (sizetype, moff);
> }
> - t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, true,
> - GSI_SAME_STMT);
> - base_addr = force_gimple_operand_gsi (gsi, base_addr, true, NULL_TREE,
> true,
> - GSI_SAME_STMT);
> + gimple_seq seq, this_seq;
> + t = force_gimple_operand (t, &seq, true, NULL_TREE);
> + base_addr = force_gimple_operand (base_addr, &this_seq, true, NULL_TREE);
> + gimple_seq_add_seq_without_update (&seq, this_seq);
> + ubsan_insert_seq_before (gsi, seq);
> instrument_pointer_overflow (gsi, base_addr, t);
> }
>
> @@ -2035,7 +2067,7 @@ instrument_nonnull_arg (gimple_stmt_iter
> {
> g = gimple_build_assign (make_ssa_name (TREE_TYPE (arg)), arg);
> gimple_set_location (g, loc[0]);
> - gsi_insert_before (gsi, g, GSI_SAME_STMT);
> + ubsan_insert_before (gsi, g);
> arg = gimple_assign_lhs (g);
> }
>
> @@ -2068,7 +2100,7 @@ instrument_nonnull_arg (gimple_stmt_iter
> g = gimple_build_call (fn, 1, data);
> }
> gimple_set_location (g, loc[0]);
> - gsi_insert_before (gsi, g, GSI_SAME_STMT);
> + ubsan_insert_before (gsi, g);
> ubsan_create_edge (g);
> }
> *gsi = gsi_for_stmt (stmt);
> @@ -2124,7 +2156,7 @@ instrument_nonnull_return (gimple_stmt_i
> g = gimple_build_call (fn, 2, data, data2);
> }
> gimple_set_location (g, loc[0]);
> - gsi_insert_before (gsi, g, GSI_SAME_STMT);
> + ubsan_insert_before (gsi, g);
> ubsan_create_edge (g);
> *gsi = gsi_for_stmt (stmt);
> }
> @@ -2231,6 +2263,7 @@ instrument_object_size (gimple_stmt_iter
> tree sizet;
> tree base_addr = base;
> gimple *bos_stmt = NULL;
> + gimple_seq seq = NULL;
> if (decl_p)
> base_addr = build1 (ADDR_EXPR,
> build_pointer_type (TREE_TYPE (base)), base);
> @@ -2244,19 +2277,12 @@ instrument_object_size (gimple_stmt_iter
> sizet = builtin_decl_explicit (BUILT_IN_DYNAMIC_OBJECT_SIZE);
> sizet = build_call_expr_loc (loc, sizet, 2, base_addr,
> integer_zero_node);
> - sizet = force_gimple_operand_gsi (gsi, sizet, false, NULL_TREE, true,
> - GSI_SAME_STMT);
> + sizet = force_gimple_operand (sizet, &seq, false, NULL_TREE);
> /* If the call above didn't end up being an integer constant, go one
> statement back and get the __builtin_object_size stmt. Save it,
> we might need it later. */
> if (SSA_VAR_P (sizet))
> - {
> - gsi_prev (gsi);
> - bos_stmt = gsi_stmt (*gsi);
> -
> - /* Move on to where we were. */
> - gsi_next (gsi);
> - }
> + bos_stmt = gsi_stmt (gsi_last (seq));
> }
> else
> return;
> @@ -2298,21 +2324,24 @@ instrument_object_size (gimple_stmt_iter
> && !TREE_ADDRESSABLE (base))
> mark_addressable (base);
>
> + /* We have to emit the check. */
> + gimple_seq this_seq;
> + t = force_gimple_operand (t, &this_seq, true, NULL_TREE);
> + gimple_seq_add_seq_without_update (&seq, this_seq);
> + ptr = force_gimple_operand (ptr, &this_seq, true, NULL_TREE);
> + gimple_seq_add_seq_without_update (&seq, this_seq);
> + ubsan_insert_seq_before (gsi, seq);
> +
> if (bos_stmt
> && gimple_call_builtin_p (bos_stmt, BUILT_IN_DYNAMIC_OBJECT_SIZE))
> ubsan_create_edge (bos_stmt);
>
> - /* We have to emit the check. */
> - t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, true,
> - GSI_SAME_STMT);
> - ptr = force_gimple_operand_gsi (gsi, ptr, true, NULL_TREE, true,
> - GSI_SAME_STMT);
> tree ckind = build_int_cst (unsigned_char_type_node,
> is_lhs ? UBSAN_STORE_OF : UBSAN_LOAD_OF);
> gimple *g = gimple_build_call_internal (IFN_UBSAN_OBJECT_SIZE, 4,
> ptr, t, sizet, ckind);
> gimple_set_location (g, loc);
> - gsi_insert_before (gsi, g, GSI_SAME_STMT);
> + ubsan_insert_before (gsi, g);
> }
>
> /* Instrument values passed to builtin functions. */
> --- gcc/testsuite/gcc.dg/ubsan/pr112709-1.c.jj 2024-03-11
> 16:55:09.038760951 +0100
> +++ gcc/testsuite/gcc.dg/ubsan/pr112709-1.c 2024-03-11 16:55:02.159854950
> +0100
> @@ -0,0 +1,52 @@
> +/* PR sanitizer/112709 */
> +/* { dg-do compile } */
> +/* { dg-options "-fsanitize=undefined -O2" } */
> +
> +struct S { char c[1024]; };
> +int foo (int);
> +
> +__attribute__((returns_twice, noipa)) struct S
> +bar (int x)
> +{
> + (void) x;
> + struct S s = {};
> + s.c[42] = 42;
> + return s;
> +}
> +
> +void
> +baz (struct S *p)
> +{
> + foo (1);
> + *p = bar (0);
> +}
> +
> +void
> +qux (int x, struct S *p)
> +{
> + if (x == 25)
> + x = foo (2);
> + else if (x == 42)
> + x = foo (foo (3));
> + *p = bar (x);
> +}
> +
> +void
> +corge (int x, struct S *p)
> +{
> + void *q[] = { &&l1, &&l2, &&l3, &&l3 };
> + if (x == 25)
> + {
> + l1:
> + x = foo (2);
> + }
> + else if (x == 42)
> + {
> + l2:
> + x = foo (foo (3));
> + }
> +l3:
> + *p = bar (x);
> + if (x < 4)
> + goto *q[x & 3];
> +}
> --- gcc/testsuite/gcc.dg/ubsan/pr112709-2.c.jj 2024-03-11
> 16:55:37.000378840 +0100
> +++ gcc/testsuite/gcc.dg/ubsan/pr112709-2.c 2024-03-11 17:13:37.517599492
> +0100
> @@ -0,0 +1,50 @@
> +/* PR sanitizer/112709 */
> +/* { dg-do compile } */
> +/* { dg-options "-fsanitize=undefined -O2" } */
> +
> +struct S { char c[1024]; } *p;
> +int foo (int);
> +
> +__attribute__((returns_twice, noipa)) int
> +bar (struct S x)
> +{
> + (void) x.c[0];
> + return 0;
> +}
> +
> +void
> +baz (int *y)
> +{
> + foo (1);
> + *y = bar (*p);
> +}
> +
> +void
> +qux (int x, int *y)
> +{
> + if (x == 25)
> + x = foo (2);
> + else if (x == 42)
> + x = foo (foo (3));
> + *y = bar (*p);
> +}
> +
> +void
> +corge (int x, int *y)
> +{
> + void *q[] = { &&l1, &&l2, &&l3, &&l3 };
> + if (x == 25)
> + {
> + l1:
> + x = foo (2);
> + }
> + else if (x == 42)
> + {
> + l2:
> + x = foo (foo (3));
> + }
> +l3:
> + *y = bar (*p);
> + if (x < 4)
> + goto *q[x & 3];
> +}
>
> Jakub
>
>
--
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)