On Tue, 12 Mar 2024, Jakub Jelinek wrote:

> Hi!
> 
> ubsan, asan (both PR112709) and _BitInt lowering (PR113466) want to
> insert some instrumentation or adjustment statements before some statement.
> This unfortunately creates invalid IL if inserting before a returns_twice
> call, because we require that such calls are the first statement in a basic
> block and that we have an edge from the .ABNORMAL_DISPATCHER block to
> the block containing the returns_twice call (in addition to other edge(s)).
> 
> The following patch adds helper functions for such insertions and uses it
> for now in ubsan (I'll post a follow up which uses it in asan and will
> work later on the _BitInt lowering PR).
> 
> In particular, if the bb with returns_twice call at the start has just
> 2 edges, one EDGE_ABNORMAL from .ABNORMAL_DISPATCHER and another
> (non-EDGE_ABNORMAL/EDGE_EH) from some other bb, it just inserts the
> statement or sequence on that other edge.
> If the bb has more predecessor edges or the one not from
> .ABNORMAL_DISPATCHER is e.g. an EH edge (this latter case likely shouldn't
> happen, one would need labels or something like that), the patch splits the
> block with returns_twice call such that there is just one edge next to
> .ABNORMAL_DISPATCHER edge and adjusts PHIs as needed to make it happen.
> The functions also replace uses of PHIs from the returns_twice bb with
> the corresponding PHI arguments, because otherwise it would be invalid IL.
> 
> E.g. in ubsan/pr112709-2.c (qux) we have before the ubsan pass
>   <bb 10> :
>   # .MEM_5(ab) = PHI <.MEM_4(9), .MEM_25(ab)(11)>
>   # _7(ab) = PHI <_20(9), _8(ab)(11)>
>   # .MEM_21(ab) = VDEF <.MEM_5(ab)>
>   _22 = bar (*_7(ab));
> where bar is returns_twice call and bb 11 has .ABNORMAL_DISPATCHER call,
> this patch instruments it like:
>   <bb 9> :
>   # .MEM_4 = PHI <.MEM_17(ab)(4), .MEM_10(D)(5), .MEM_14(ab)(8)>
>   # DEBUG BEGIN_STMT
>   # VUSE <.MEM_4>
>   _20 = p;
>   # .MEM_27 = VDEF <.MEM_4>
>   .UBSAN_NULL (_20, 0B, 0);
>   # VUSE <.MEM_27>
>   _2 = __builtin_dynamic_object_size (_20, 0);
>   # .MEM_28 = VDEF <.MEM_27>
>   .UBSAN_OBJECT_SIZE (_20, 1024, _2, 0);
>   
>   <bb 10> :
>   # .MEM_5(ab) = PHI <.MEM_28(9), .MEM_25(ab)(11)>
>   # _7(ab) = PHI <_20(9), _8(ab)(11)>
>   # .MEM_21(ab) = VDEF <.MEM_5(ab)>
>   _22 = bar (*_7(ab));
> The edge from .ABNORMAL_DISPATCHER is there just to represent the
> returning for 2nd and later times, the instrumentation can't be
> done at that point as there is no code executed during that point.
> The ubsan/pr112709-1.c testcase includes non-virtual PHIs to cover
> the handling of those as well.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> 2024-03-12  Jakub Jelinek  <ja...@redhat.com>
> 
>       PR sanitizer/112709
>       * gimple-iterator.h (gsi_insert_before_returns_twice_call,
>       gsi_insert_seq_before_returns_twice_call): Declare.
>       * gimple-iterator.cc: Include gimplify.h.
>       (edge_before_returns_twice_call, gsi_insert_before_returns_twice_call,
>       gsi_insert_seq_before_returns_twice_call): New functions.
>       * ubsan.cc (ubsan_insert_before, ubsan_insert_seq_before): New
>       functions.
>       (instrument_mem_ref, instrument_pointer_overflow,
>       instrument_nonnull_arg, instrument_nonnull_return): Use
>       ubsan_insert_before instead of gsi_insert_before.
>       (maybe_instrument_pointer_overflow): Use force_gimple_operand,
>       gimple_seq_add_seq_without_update and ubsan_insert_seq_before
>       instead of force_gimple_operand_gsi.
>       (instrument_object_size): Likewise.  Use ubsan_insert_before instead
>       of gsi_insert_before.
> 
>       * gcc.dg/ubsan/pr112709-1.c: New test.
>       * gcc.dg/ubsan/pr112709-2.c: New test.
> 
> --- gcc/gimple-iterator.h.jj  2024-02-08 11:17:37.144661383 +0100
> +++ gcc/gimple-iterator.h     2024-03-11 16:16:24.670464737 +0100
> @@ -93,6 +93,10 @@ extern void gsi_insert_on_edge (edge, gi
>  extern void gsi_insert_seq_on_edge (edge, gimple_seq);
>  extern basic_block gsi_insert_on_edge_immediate (edge, gimple *);
>  extern basic_block gsi_insert_seq_on_edge_immediate (edge, gimple_seq);
> +extern basic_block gsi_insert_before_returns_twice_call (basic_block,
> +                                                      gimple *);
> +extern basic_block gsi_insert_seq_before_returns_twice_call (basic_block,
> +                                                          gimple_seq);
>  extern void gsi_commit_edge_inserts (void);
>  extern void gsi_commit_one_edge_insert (edge, basic_block *);
>  extern gphi_iterator gsi_start_phis (basic_block);
> --- gcc/gimple-iterator.cc.jj 2024-02-08 11:17:37.144661383 +0100
> +++ gcc/gimple-iterator.cc    2024-03-11 18:17:08.994804808 +0100
> @@ -32,6 +32,7 @@ along with GCC; see the file COPYING3.
>  #include "tree-cfg.h"
>  #include "tree-ssa.h"
>  #include "value-prof.h"
> +#include "gimplify.h"
>  
>  
>  /* Mark the statement STMT as modified, and update it.  */
> @@ -944,3 +945,135 @@ gsi_start_phis (basic_block bb)
>  
>    return i;
>  }
> +
> +/* Helper function for gsi_insert_before_returns_twice_call and
> +   gsi_insert_seq_before_returns_twice_call.  Find edge to
> +   insert statements before returns_twice call at the start of BB,
> +   if there isn't just one, split the bb and adjust PHIs to ensure
> +   that.  */
> +
> +static edge
> +edge_before_returns_twice_call (basic_block bb)
> +{
> +  gimple_stmt_iterator gsi = gsi_start_nondebug_bb (bb);
> +  gcc_checking_assert (is_gimple_call (gsi_stmt (gsi))
> +                    && (gimple_call_flags (gsi_stmt (gsi))
> +                        & ECF_RETURNS_TWICE) != 0);
> +  edge_iterator ei;
> +  edge e, ad_edge = NULL, other_edge = NULL;
> +  bool split = false;
> +  /* Look for a fallthru and possibly a single backedge.  */

I can't see the backedge handling (I'm not sure it's actually required?)

> +  FOR_EACH_EDGE (e, ei, bb->preds)
> +    {
> +      if ((e->flags & (EDGE_ABNORMAL | EDGE_EH)) == EDGE_ABNORMAL)

EDGE_EH is special because of the landing pad?

> +     {
> +       gimple_stmt_iterator gsi
> +         = gsi_start_nondebug_after_labels_bb (e->src);
> +       gimple *ad = gsi_stmt (gsi);
> +       if (ad && gimple_call_internal_p (ad, IFN_ABNORMAL_DISPATCHER))
> +         {
> +           gcc_checking_assert (ad_edge == NULL);
> +           ad_edge = e;
> +           continue;
> +         }
> +     }
> +      if (other_edge || e->flags & (EDGE_ABNORMAL | EDGE_EH))
> +     split = true;
> +      other_edge = e;
> +    }
> +  gcc_checking_assert (ad_edge);
> +  if (other_edge == NULL)
> +    split = true;
> +  if (split)
> +    {
> +      other_edge = split_block_after_labels (bb);
> +      e = make_edge (ad_edge->src, other_edge->dest, EDGE_ABNORMAL);

In theory there could be multiple abnormal edges.  Did you try
to use make_forwarder_block instead of manually doing it?  Or
does that fail because we end up redirecting the abnormal edges
which you avoid by re-making it new and scrapping the old one?
I fail to remember why we can't redirect abnormal edges - yes,
there's no flow to be redirected but the process of "redirection"
still should work.

> +      for (gphi_iterator gsi = gsi_start_phis (other_edge->src);
> +        !gsi_end_p (gsi); gsi_next (&gsi))
> +     {
> +       gphi *phi = gsi.phi ();
> +       tree lhs = gimple_phi_result (phi);
> +       tree new_lhs;
> +       if (virtual_operand_p (lhs))
> +         new_lhs = make_ssa_name (SSA_NAME_VAR (lhs));
> +       else
> +         new_lhs = make_ssa_name (TREE_TYPE (lhs));
> +       gimple_phi_set_result (phi, new_lhs);
> +       gphi *new_phi = create_phi_node (lhs, other_edge->dest);
> +       add_phi_arg (new_phi, new_lhs, other_edge, UNKNOWN_LOCATION);
> +       add_phi_arg (new_phi, gimple_phi_arg_def_from_edge (phi, ad_edge),
> +                    e, gimple_phi_arg_location_from_edge (phi, ad_edge));
> +     }
> +      remove_edge (ad_edge);
> +    }
> +  return other_edge;
> +}
> +
> +/* Insert G before BB which starts with an ECF_RETURNS_TWICE call.
> +   If BB has a single predecessor edge except for edge from
> +   .ABNORMAL_DISPATCHER, insert on that edge, otherwise split
> +   BB before the call, adjust PHIs and insert into the new BB.
> +   If G refers to any results of BB's PHIs, replace them afterwards
> +   with corresponding PHI arg.  */
> +
> +basic_block
> +gsi_insert_before_returns_twice_call (basic_block bb, gimple *g)
> +{
> +  edge e = edge_before_returns_twice_call (bb);
> +  basic_block new_bb = gsi_insert_on_edge_immediate (e, g);
> +  use_operand_p use_p;
> +  ssa_op_iter iter;
> +  bool m = false;
> +  FOR_EACH_SSA_USE_OPERAND (use_p, g, iter, SSA_OP_USE)
> +    {
> +      tree s = USE_FROM_PTR (use_p);
> +      if (SSA_NAME_DEF_STMT (s)
> +       && gimple_code (SSA_NAME_DEF_STMT (s)) == GIMPLE_PHI
> +       && gimple_bb (SSA_NAME_DEF_STMT (s)) == e->dest)
> +     {
> +       tree r = gimple_phi_arg_def_from_edge (SSA_NAME_DEF_STMT (s), e);
> +       SET_USE (use_p, unshare_expr (r));
> +       m = true;
> +     }
> +    }

Ick.  Doesn't the forwarder keep the PHI defs and new PHI defs
are created in e->dest instead, I don't see how we need this?

Btw, gsi_insert_before_returns_twice_call might be usable as
gsi_insert_after_labels ()?  Or alternatively not expose
gsi_insert_before_returns_twice_call but instead make
split_block () behave "correctly" for abnormals (or add
split_block_before_returns_twice_call ()).  But that's just bike-shedding.

Richard.


> +  if (m)
> +    update_stmt (g);
> +  return new_bb;
> +}
> +
> +/* Similiarly for sequence SEQ.  */
> +
> +basic_block
> +gsi_insert_seq_before_returns_twice_call (basic_block bb, gimple_seq seq)
> +{
> +  if (gimple_seq_empty_p (seq))
> +    return NULL;
> +  gimple *f = gimple_seq_first_stmt (seq);
> +  gimple *l = gimple_seq_last_stmt (seq);
> +  edge e = edge_before_returns_twice_call (bb);
> +  basic_block new_bb = gsi_insert_seq_on_edge_immediate (e, seq);
> +  use_operand_p use_p;
> +  ssa_op_iter iter;
> +  for (gimple_stmt_iterator gsi = gsi_for_stmt (f); ; gsi_next (&gsi))
> +    {
> +      gimple *g = gsi_stmt (gsi);
> +      bool m = false;
> +      FOR_EACH_SSA_USE_OPERAND (use_p, g, iter, SSA_OP_USE)
> +     {
> +       tree s = USE_FROM_PTR (use_p);
> +       if (SSA_NAME_DEF_STMT (s)
> +           && gimple_code (SSA_NAME_DEF_STMT (s)) == GIMPLE_PHI
> +           && gimple_bb (SSA_NAME_DEF_STMT (s)) == e->dest)
> +         {
> +           tree r = gimple_phi_arg_def_from_edge (SSA_NAME_DEF_STMT (s), e);
> +           SET_USE (use_p, unshare_expr (r));
> +           m = true;
> +         }
> +     }
> +      if (m)
> +     update_stmt (g);
> +      if (g == l)
> +     break;
> +    }
> +  return new_bb;
> +}
> --- gcc/ubsan.cc.jj   2024-01-03 11:51:32.312720350 +0100
> +++ gcc/ubsan.cc      2024-03-11 17:07:58.104243341 +0100
> @@ -1432,6 +1432,37 @@ ubsan_expand_vptr_ifn (gimple_stmt_itera
>    return true;
>  }
>  
> +/* Insert G stmt before ITER.  If ITER is a returns_twice call,
> +   insert it on an appropriate edge instead.  */
> +
> +static void
> +ubsan_insert_before (gimple_stmt_iterator *iter, gimple *g)
> +{
> +  gimple *stmt = gsi_stmt (*iter);
> +  if (stmt
> +      && is_gimple_call (stmt)
> +      && (gimple_call_flags (stmt) & ECF_RETURNS_TWICE) != 0)
> +    gsi_insert_before_returns_twice_call (gsi_bb (*iter), g);
> +  else
> +    gsi_insert_before (iter, g, GSI_SAME_STMT);
> +}
> +
> +/* Similarly for sequence SEQ.  */
> +
> +static void
> +ubsan_insert_seq_before (gimple_stmt_iterator *iter, gimple_seq seq)
> +{
> +  if (gimple_seq_empty_p (seq))
> +    return;
> +  gimple *stmt = gsi_stmt (*iter);
> +  if (stmt
> +      && is_gimple_call (stmt)
> +      && (gimple_call_flags (stmt) & ECF_RETURNS_TWICE) != 0)
> +    gsi_insert_seq_before_returns_twice_call (gsi_bb (*iter), seq);
> +  else
> +    gsi_insert_seq_before (iter, seq, GSI_SAME_STMT);
> +}
> +
>  /* Instrument a memory reference.  BASE is the base of MEM, IS_LHS says
>     whether the pointer is on the left hand side of the assignment.  */
>  
> @@ -1458,7 +1489,7 @@ instrument_mem_ref (tree mem, tree base,
>    tree alignt = build_int_cst (pointer_sized_int_node, align);
>    gcall *g = gimple_build_call_internal (IFN_UBSAN_NULL, 3, t, kind, alignt);
>    gimple_set_location (g, gimple_location (gsi_stmt (*iter)));
> -  gsi_insert_before (iter, g, GSI_SAME_STMT);
> +  ubsan_insert_before (iter, g);
>  }
>  
>  /* Perform the pointer instrumentation.  */
> @@ -1485,7 +1516,7 @@ instrument_pointer_overflow (gimple_stmt
>      return;
>    gcall *g = gimple_build_call_internal (IFN_UBSAN_PTR, 2, ptr, off);
>    gimple_set_location (g, gimple_location (gsi_stmt (*gsi)));
> -  gsi_insert_before (gsi, g, GSI_SAME_STMT);
> +  ubsan_insert_before (gsi, g);
>  }
>  
>  /* Instrument pointer arithmetics if any.  */
> @@ -1577,10 +1608,11 @@ maybe_instrument_pointer_overflow (gimpl
>        else
>       t = fold_convert (sizetype, moff);
>      }
> -  t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, true,
> -                             GSI_SAME_STMT);
> -  base_addr = force_gimple_operand_gsi (gsi, base_addr, true, NULL_TREE, 
> true,
> -                                     GSI_SAME_STMT);
> +  gimple_seq seq, this_seq;
> +  t = force_gimple_operand (t, &seq, true, NULL_TREE);
> +  base_addr = force_gimple_operand (base_addr, &this_seq, true, NULL_TREE);
> +  gimple_seq_add_seq_without_update (&seq, this_seq);
> +  ubsan_insert_seq_before (gsi, seq);
>    instrument_pointer_overflow (gsi, base_addr, t);
>  }
>  
> @@ -2035,7 +2067,7 @@ instrument_nonnull_arg (gimple_stmt_iter
>           {
>             g = gimple_build_assign (make_ssa_name (TREE_TYPE (arg)), arg);
>             gimple_set_location (g, loc[0]);
> -           gsi_insert_before (gsi, g, GSI_SAME_STMT);
> +           ubsan_insert_before (gsi, g);
>             arg = gimple_assign_lhs (g);
>           }
>  
> @@ -2068,7 +2100,7 @@ instrument_nonnull_arg (gimple_stmt_iter
>             g = gimple_build_call (fn, 1, data);
>           }
>         gimple_set_location (g, loc[0]);
> -       gsi_insert_before (gsi, g, GSI_SAME_STMT);
> +       ubsan_insert_before (gsi, g);
>         ubsan_create_edge (g);
>       }
>        *gsi = gsi_for_stmt (stmt);
> @@ -2124,7 +2156,7 @@ instrument_nonnull_return (gimple_stmt_i
>         g = gimple_build_call (fn, 2, data, data2);
>       }
>        gimple_set_location (g, loc[0]);
> -      gsi_insert_before (gsi, g, GSI_SAME_STMT);
> +      ubsan_insert_before (gsi, g);
>        ubsan_create_edge (g);
>        *gsi = gsi_for_stmt (stmt);
>      }
> @@ -2231,6 +2263,7 @@ instrument_object_size (gimple_stmt_iter
>    tree sizet;
>    tree base_addr = base;
>    gimple *bos_stmt = NULL;
> +  gimple_seq seq = NULL;
>    if (decl_p)
>      base_addr = build1 (ADDR_EXPR,
>                       build_pointer_type (TREE_TYPE (base)), base);
> @@ -2244,19 +2277,12 @@ instrument_object_size (gimple_stmt_iter
>        sizet = builtin_decl_explicit (BUILT_IN_DYNAMIC_OBJECT_SIZE);
>        sizet = build_call_expr_loc (loc, sizet, 2, base_addr,
>                                  integer_zero_node);
> -      sizet = force_gimple_operand_gsi (gsi, sizet, false, NULL_TREE, true,
> -                                     GSI_SAME_STMT);
> +      sizet = force_gimple_operand (sizet, &seq, false, NULL_TREE);
>        /* If the call above didn't end up being an integer constant, go one
>        statement back and get the __builtin_object_size stmt.  Save it,
>        we might need it later.  */
>        if (SSA_VAR_P (sizet))
> -     {
> -       gsi_prev (gsi);
> -       bos_stmt = gsi_stmt (*gsi);
> -
> -       /* Move on to where we were.  */
> -       gsi_next (gsi);
> -     }
> +     bos_stmt = gsi_stmt (gsi_last (seq));
>      }
>    else
>      return;
> @@ -2298,21 +2324,24 @@ instrument_object_size (gimple_stmt_iter
>        && !TREE_ADDRESSABLE (base))
>      mark_addressable (base);
>  
> +  /* We have to emit the check.  */
> +  gimple_seq this_seq;
> +  t = force_gimple_operand (t, &this_seq, true, NULL_TREE);
> +  gimple_seq_add_seq_without_update (&seq, this_seq);
> +  ptr = force_gimple_operand (ptr, &this_seq, true, NULL_TREE);
> +  gimple_seq_add_seq_without_update (&seq, this_seq);
> +  ubsan_insert_seq_before (gsi, seq);
> +
>    if (bos_stmt
>        && gimple_call_builtin_p (bos_stmt, BUILT_IN_DYNAMIC_OBJECT_SIZE))
>      ubsan_create_edge (bos_stmt);
>  
> -  /* We have to emit the check.  */
> -  t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, true,
> -                             GSI_SAME_STMT);
> -  ptr = force_gimple_operand_gsi (gsi, ptr, true, NULL_TREE, true,
> -                               GSI_SAME_STMT);
>    tree ckind = build_int_cst (unsigned_char_type_node,
>                             is_lhs ? UBSAN_STORE_OF : UBSAN_LOAD_OF);
>    gimple *g = gimple_build_call_internal (IFN_UBSAN_OBJECT_SIZE, 4,
>                                        ptr, t, sizet, ckind);
>    gimple_set_location (g, loc);
> -  gsi_insert_before (gsi, g, GSI_SAME_STMT);
> +  ubsan_insert_before (gsi, g);
>  }
>  
>  /* Instrument values passed to builtin functions.  */
> --- gcc/testsuite/gcc.dg/ubsan/pr112709-1.c.jj        2024-03-11 
> 16:55:09.038760951 +0100
> +++ gcc/testsuite/gcc.dg/ubsan/pr112709-1.c   2024-03-11 16:55:02.159854950 
> +0100
> @@ -0,0 +1,52 @@
> +/* PR sanitizer/112709 */
> +/* { dg-do compile } */
> +/* { dg-options "-fsanitize=undefined -O2" } */
> +
> +struct S { char c[1024]; };
> +int foo (int);
> +
> +__attribute__((returns_twice, noipa)) struct S
> +bar (int x)
> +{
> +  (void) x;
> +  struct S s = {};
> +  s.c[42] = 42;
> +  return s;
> +}
> +
> +void
> +baz (struct S *p)
> +{
> +  foo (1);
> +  *p = bar (0);
> +}
> +
> +void
> +qux (int x, struct S *p)
> +{
> +  if (x == 25)
> +    x = foo (2);
> +  else if (x == 42)
> +    x = foo (foo (3));
> +  *p = bar (x);
> +}
> +
> +void
> +corge (int x, struct S *p)
> +{
> +  void *q[] = { &&l1, &&l2, &&l3, &&l3 };
> +  if (x == 25)
> +    {
> +    l1:
> +      x = foo (2);
> +    }
> +  else if (x == 42)
> +    {
> +    l2:
> +      x = foo (foo (3));
> +    }
> +l3:
> +  *p = bar (x);
> +  if (x < 4)
> +    goto *q[x & 3];
> +}
> --- gcc/testsuite/gcc.dg/ubsan/pr112709-2.c.jj        2024-03-11 
> 16:55:37.000378840 +0100
> +++ gcc/testsuite/gcc.dg/ubsan/pr112709-2.c   2024-03-11 17:13:37.517599492 
> +0100
> @@ -0,0 +1,50 @@
> +/* PR sanitizer/112709 */
> +/* { dg-do compile } */
> +/* { dg-options "-fsanitize=undefined -O2" } */
> +
> +struct S { char c[1024]; } *p;
> +int foo (int);
> +
> +__attribute__((returns_twice, noipa)) int
> +bar (struct S x)
> +{
> +  (void) x.c[0];
> +  return 0;
> +}
> +
> +void
> +baz (int *y)
> +{
> +  foo (1);
> +  *y = bar (*p);
> +}
> +
> +void
> +qux (int x, int *y)
> +{
> +  if (x == 25)
> +    x = foo (2);
> +  else if (x == 42)
> +    x = foo (foo (3));
> +  *y = bar (*p);
> +}
> +
> +void
> +corge (int x, int *y)
> +{
> +  void *q[] = { &&l1, &&l2, &&l3, &&l3 };
> +  if (x == 25)
> +    {
> +    l1:
> +      x = foo (2);
> +    }
> +  else if (x == 42)
> +    {
> +    l2:
> +      x = foo (foo (3));
> +    }
> +l3:
> +  *y = bar (*p);
> +  if (x < 4)
> +    goto *q[x & 3];
> +}
> 
>       Jakub
> 
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to