On Mon, Aug 18, 2025 at 4:12 PM Hongtao Liu <crazy...@gmail.com> wrote:
>
> On Mon, Aug 18, 2025 at 4:50 AM H.J. Lu <hjl.to...@gmail.com> wrote:
> >
> > We can't place a TLS call before a conditional jump in a basic block like
> >
> > (code_label 13 11 14 4 2 (nil) [1 uses])
> > (note 14 13 16 4 [bb 4] NOTE_INSN_BASIC_BLOCK)
> > (jump_insn 16 14 17 4 (set (pc)
> >         (if_then_else (le (reg:CCNO 17 flags)
> >                 (const_int 0 [0]))
> >             (label_ref 27)
> >             (pc))) "x.c":10:21 discrim 1 1462 {*jcc}
> >      (expr_list:REG_DEAD (reg:CCNO 17 flags)
> >         (int_list:REG_BR_PROB 628353713 (nil)))
> >  -> 27)
> >
> > since the TLS call will clobber flags register nor place a TLS call in a
> > basic block if any live caller-saved registers aren't dead at the end of
> > the basic block:
> >
> > ;; live  in      6 [bp] 7 [sp] 16 [argp] 17 [flags] 19 [frame] 104
> > ;; live  gen     0 [ax] 102 106 108 116 117 118 120
> > ;; live  kill    5 [di]
> >
> > Instead, we should place such call before all register setting basic
> > blocks which dominate the current basic block.
> >
> > Keep track the replaced GNU and GNU2 TLS instructions.  Use these info to
> > place the __tls_get_addr call and mark FLAGS register as dead.
> >
> > gcc/
> >
> >         PR target/121572
> >         * config/i386/i386-features.cc (replace_tls_call): Add a bitmap
> >         argument and put the updated TLS instruction in the bitmap.
> >         (ix86_get_dominator_for_reg): New.
> >         (ix86_place_single_tls_call): Add 2 bitmap arguments for updated
> >         GNU and GNU2 TLS instructions.  Add the live flag register to the
> >         bitmap.  Insert the __tls_get_addr call before INSN if it replaces
> >         a __tls_get_addr call.  Mark FLAGS register as dead if INSN
> >         replaced the GNU2 TLS instruction.  Clear the live register bitmap
> >         only for hard register.  If there is a conditional jump in the
> >         basic block or any live caller-saved registers aren't dead at the
> >         end of the basic block, get the basic block which dominates all
> >         basic blocks which set the live registers.
> >
> > gcc/testsuite/
> >
> >         PR target/121572
> >         * gcc.target/i386/pr121572-1a.c: New test.
> >         * gcc.target/i386/pr121572-1b.c: Likewise.
> >         * gcc.target/i386/pr121572-2a.c: Likewise.
> >         * gcc.target/i386/pr121572-2b.c: Likewise.
> >
> > Signed-off-by: H.J. Lu <hjl.to...@gmail.com>
> > ---
> >  gcc/config/i386/i386-features.cc            | 136 ++++++++++++++++----
> >  gcc/testsuite/gcc.target/i386/pr121572-1a.c |  41 ++++++
> >  gcc/testsuite/gcc.target/i386/pr121572-1b.c |  18 +++
> >  gcc/testsuite/gcc.target/i386/pr121572-2a.c |  39 ++++++
> >  gcc/testsuite/gcc.target/i386/pr121572-2b.c |   6 +
> >  5 files changed, 215 insertions(+), 25 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr121572-1a.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr121572-1b.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr121572-2a.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr121572-2b.c
> >
> > diff --git a/gcc/config/i386/i386-features.cc 
> > b/gcc/config/i386/i386-features.cc
> > index f0bdc5c1880..903f2b0b478 100644
> > --- a/gcc/config/i386/i386-features.cc
> > +++ b/gcc/config/i386/i386-features.cc
> > @@ -3684,10 +3684,12 @@ ix86_broadcast_inner (rtx op, machine_mode mode,
> >    return op;
> >  }
> >
> > -/* Replace CALL instruction in TLS_CALL_INSNS with SET from SRC.  */
> > +/* Replace CALL instruction in TLS_CALL_INSNS with SET from SRC and
> > +   put the updated instruction in UPDATED_TLS_INSNS.  */
> >
> >  static void
> > -replace_tls_call (rtx src, auto_bitmap &tls_call_insns)
> > +replace_tls_call (rtx src, auto_bitmap &tls_call_insns,
> > +                 auto_bitmap &updated_tls_insns)
> >  {
> >    bitmap_iterator bi;
> >    unsigned int id;
> > @@ -3716,6 +3718,9 @@ replace_tls_call (rtx src, auto_bitmap 
> > &tls_call_insns)
> >        if (recog_memoized (set_insn) < 0)
> >         gcc_unreachable ();
> >
> > +      /* Put SET_INSN in UPDATED_TLS_INSNS.  */
> > +      bitmap_set_bit (updated_tls_insns, INSN_UID (set_insn));
> > +
> >        if (dump_file)
> >         {
> >           fprintf (dump_file, "\nReplace:\n\n");
> > @@ -3732,15 +3737,48 @@ replace_tls_call (rtx src, auto_bitmap 
> > &tls_call_insns)
> >      }
> >  }
> >
> > +/* Return the basic block which dominates all basic blocks which set
> > +   hard register REGNO used in basic block BB.  */
> > +
> > +static basic_block
> > +ix86_get_dominator_for_reg (unsigned int regno, basic_block bb)
> > +{
> > +  basic_block set_bb;
> > +  auto_bitmap set_bbs;
> > +
> > +  /* Get all BBs which set REGNO and dominate the current BB from all
> > +     DEFs of REGNO.  */
> > +  for (df_ref def = DF_REG_DEF_CHAIN (regno);
> > +       def;
> > +       def = DF_REF_NEXT_REG (def))
> > +    if (!DF_REF_IS_ARTIFICIAL (def)
> > +       && !DF_REF_FLAGS_IS_SET (def, DF_REF_MAY_CLOBBER)
> > +       && !DF_REF_FLAGS_IS_SET (def, DF_REF_MUST_CLOBBER))
> > +      {
> > +       set_bb = DF_REF_BB (def);
> > +       if (dominated_by_p (CDI_DOMINATORS, bb, set_bb))
> > +         bitmap_set_bit (set_bbs, set_bb->index);
> > +      }
> > +
> > +  bb = nearest_common_dominator_for_set (CDI_DOMINATORS, set_bbs);
> > +  return bb;
> > +}
> > +
> >  /* Generate a TLS call of KIND with VAL and copy the call result to DEST,
> >     at entry of the nearest dominator for basic block map BBS, which is in
> >     the fake loop that contains the whole function, so that there is only
> > -   a single TLS CALL of KIND with VAL in the whole function.  If
> > -   TLSDESC_SET isn't nullptr, insert it before the TLS call.  */
> > +   a single TLS CALL of KIND with VAL in the whole function.
> > +   UPDATED_GNU_TLS_INSNS contains instructions which replace the GNU TLS
> > +   instructions.  UPDATED_GNU2_TLS_INSNS contains instructions which
> > +   replace the GNU2 TLS instructions.  If TLSDESC_SET isn't nullptr,
> > +   insert it before the TLS call.  */
> >
> >  static void
> >  ix86_place_single_tls_call (rtx dest, rtx val, x86_cse_kind kind,
> > -                           bitmap bbs, rtx tlsdesc_set = nullptr)
> > +                           auto_bitmap &bbs,
> > +                           auto_bitmap &updated_gnu_tls_insns,
> > +                           auto_bitmap &updated_gnu2_tls_insns,
> > +                           rtx tlsdesc_set = nullptr)
> >  {
> >    basic_block bb = nearest_common_dominator_for_set (CDI_DOMINATORS, bbs);
> >    while (bb->loop_father->latch
> > @@ -3748,6 +3786,7 @@ ix86_place_single_tls_call (rtx dest, rtx val, 
> > x86_cse_kind kind,
> >      bb = get_immediate_dominator (CDI_DOMINATORS,
> >                                   bb->loop_father->header);
> >
> > +place_tls_call:
> >    rtx_insn *insn = BB_HEAD (bb);
> >    while (insn && !NONDEBUG_INSN_P (insn))
> >      {
> > @@ -3824,7 +3863,8 @@ ix86_place_single_tls_call (rtx dest, rtx val, 
> > x86_cse_kind kind,
> >    auto_bitmap live_caller_saved_regs;
> >    bitmap in = df_live ? DF_LIVE_IN (bb) : DF_LR_IN (bb);
> >
> > -  bool flags_live_p = bitmap_bit_p (in, FLAGS_REG);
> > +  if (bitmap_bit_p (in, FLAGS_REG))
> > +    bitmap_set_bit (live_caller_saved_regs, FLAGS_REG);
> >
> >    unsigned int i;
> >
> > @@ -3845,13 +3885,46 @@ ix86_place_single_tls_call (rtx dest, rtx val, 
> > x86_cse_kind kind,
> >           if (!NONDEBUG_INSN_P (insn))
> >             continue;
> >
> > +         if (JUMP_P (insn))
> > +           {
> > +             /* This must be a conditional jump.  */
> > +             rtx label = JUMP_LABEL (insn);
> > +             if (label == nullptr
> > +                 || ANY_RETURN_P (label)
> > +                 || !(LABEL_P (label) || SYMBOL_REF_P (label)))
> > +               gcc_unreachable ();
Whenever we can't find a place w/ flag_reg dead, we should go to
place_tls_call, why just jump_insn only?

> > +
> > +             /* Place the call before all FLAGS_REG setting BBs since
> > +                we can't place a call before nor after a conditional
> > +                jump.  */
> > +             bb = ix86_get_dominator_for_reg (FLAGS_REG, bb);
> > +             goto place_tls_call;
> Can we put those codes related to find the **BEFORE/AFTER** into a
> function and called it recursively(or iteratively to decide find the
> final BEFORE/AFTER)
> So we can avoid those GOTO statements.
>
> i.e
>
> switch (kind)
>   {
>     ...
>   }
>
>   ix86_place_single_tls_call_place (....)
>
>   if (before)
>   emit_insn_before (..);
>   else if (after)
>    emit_insn_after (...);
>   else
>     gcc_unreachable ();
>
> ....
>
> > +           }
> > +
> >           /* Check if FLAGS register is live.  */
> >           set = single_set (insn);
> >           if (set)
> >             {
> >               rtx dest = SET_DEST (set);
> > -             if (REG_P (dest) && REGNO (dest) == FLAGS_REG)
> > -               flags_live_p = true;
> > +             if (REG_P (dest))
> > +               {
> > +                 if (bitmap_bit_p (updated_gnu_tls_insns,
> > +                                   INSN_UID (insn)))
> > +                   {
> > +                   /* Insert the __tls_get_addr call before INSN which
> > +                      replaces a __tls_get_addr call.  */
> > +                     before = insn;
> > +                     goto insert_before;
> > +                   }
> > +                 if (bitmap_bit_p (updated_gnu2_tls_insns,
> > +                                   INSN_UID (insn)))
> > +                   /* Mark FLAGS register as dead since FLAGS register
> > +                      would be clobbered by the GNU2 TLS instruction.  */
> > +                   bitmap_clear_bit (live_caller_saved_regs,
> > +                                     FLAGS_REG);
> > +                 else if (REGNO (dest) == FLAGS_REG)
> > +                   bitmap_set_bit (live_caller_saved_regs, FLAGS_REG);
> > +               }
> >             }
> >
> >           rtx link;
> > @@ -3863,29 +3936,30 @@ ix86_place_single_tls_call (rtx dest, rtx val, 
> > x86_cse_kind kind,
> >                 for (i = REGNO (XEXP (link, 0));
> >                      i < END_REGNO (XEXP (link, 0));
> >                      i++)
> > -                 bitmap_clear_bit (live_caller_saved_regs, i);
> > -
> > -               /* Check if FLAGS register is dead.  */
> > -               if (REGNO (XEXP (link, 0)) == FLAGS_REG)
> > -                 flags_live_p = false;
> > +                 if (i < FIRST_PSEUDO_REGISTER)
> > +                   bitmap_clear_bit (live_caller_saved_regs, i);
> >
> >                 if (bitmap_empty_p (live_caller_saved_regs))
> >                   {
> > -                   /* All live caller-saved registers are dead after
> > -                      this instruction.  Since TLS instructions
> > -                      clobber FLAGS register, it must be dead where
> > -                      the TLS will be inserted after.  */
> > -                   if (flags_live_p)
> > -                     gcc_unreachable ();
> >                     after = insn;
> >                     goto insert_after;
> >                   }
> >               }
> >         }
> >
> > -      /* All live caller-saved registers should be dead at the end
> > -        of this basic block.  */
> > -      gcc_unreachable ();
> > +      /* If any live caller-saved registers aren't dead at the end
> > +        of this basic block, get the basic block which dominates all
> > +        basic blocks which set the remaining live registers.  */
> > +      auto_bitmap set_bbs;
> > +      bitmap_iterator bi;
> > +      unsigned int id;
> > +      EXECUTE_IF_SET_IN_BITMAP (live_caller_saved_regs, 0, id, bi)
> > +       {
> > +         basic_block set_bb = ix86_get_dominator_for_reg (id, bb);
> > +         bitmap_set_bit (set_bbs, set_bb->index);
> > +       }
> > +      bb = nearest_common_dominator_for_set (CDI_DOMINATORS, set_bbs);
> > +      goto place_tls_call;
> >      }
> >
> >    /* Emit the TLS CALL insn.  */
> > @@ -3895,7 +3969,10 @@ insert_after:
> >        tls_insn = emit_insn_after (tls, after);
> >      }
> >    else
> > -    tls_insn = emit_insn_before (tls, before);
> > +    {
> > +insert_before:
> > +      tls_insn = emit_insn_before (tls, before);
> > +    }
> >
> >    rtx_insn *tlsdesc_insn = nullptr;
> >    if (tlsdesc_set)
> > @@ -4213,6 +4290,8 @@ pass_x86_cse::x86_cse (void)
> >    basic_block bb;
> >    rtx_insn *insn;
> >    unsigned int i;
> > +  auto_bitmap updated_gnu_tls_insns;
> > +  auto_bitmap updated_gnu2_tls_insns;
> >
> >    df_set_flags (DF_DEFER_INSN_RESCAN);
> >
> > @@ -4333,7 +4412,10 @@ pass_x86_cse::x86_cse (void)
> >           case X86_CSE_TLS_LD_BASE:
> >           case X86_CSE_TLSDESC:
> >             broadcast_reg = gen_reg_rtx (load->mode);
> > -           replace_tls_call (broadcast_reg, load->insns);
> > +           replace_tls_call (broadcast_reg, load->insns,
> > +                             (load->kind == X86_CSE_TLSDESC
> > +                              ? updated_gnu2_tls_insns
> > +                              : updated_gnu_tls_insns));
> >             load->broadcast_reg = broadcast_reg;
> >             break;
> >
> > @@ -4399,6 +4481,8 @@ pass_x86_cse::x86_cse (void)
> >                                               load->val,
> >                                               load->kind,
> >                                               load->bbs,
> > +                                             updated_gnu_tls_insns,
> > +                                             updated_gnu2_tls_insns,
> >                                               PATTERN (load->def_insn));
> >                   break;
> >                 case X86_CSE_VEC_DUP:
> > @@ -4442,7 +4526,9 @@ pass_x86_cse::x86_cse (void)
> >                   ix86_place_single_tls_call (load->broadcast_reg,
> >                                               load->val,
> >                                               load->kind,
> > -                                             load->bbs);
> > +                                             load->bbs,
> > +                                             updated_gnu_tls_insns,
> > +                                             updated_gnu2_tls_insns);
> >                   break;
> >                 case X86_CSE_CONST0_VECTOR:
> >                 case X86_CSE_CONSTM1_VECTOR:
> > diff --git a/gcc/testsuite/gcc.target/i386/pr121572-1a.c 
> > b/gcc/testsuite/gcc.target/i386/pr121572-1a.c
> > new file mode 100644
> > index 00000000000..270d8ff5cb6
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr121572-1a.c
> > @@ -0,0 +1,41 @@
> > +/* { dg-do compile { target *-*-linux* } } */
> > +/* { dg-options "-O0 -fpic -fplt -mtls-dialect=gnu" } */
> > +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
> > +/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } } 
> > {^\t?\.} } } */
> > +
> > +/*
> > +**bug:
> > +**.LFB[0-9]+:
> > +**...
> > +**     leaq    tv_cache@tlsld\(%rip\), %rdi
> > +**     call    __tls_get_addr@PLT
> > +**     movl    \$-1, %edi
> > +**     mov[l|q]        %[e|r]ax, %[e|r]bx
> > +**     call    val@PLT
> > +**...
> > +*/
> > +
> > +extern __thread int tv_cache __attribute__ ((visibility ("hidden")));
> > +extern void use_cache (int);
> > +extern int val (int v);
> > +
> > +__attribute__ ((optimize (2)))
> > +void
> > +bug (void)
> > +{
> > +  int compared = val (-1);
> > +
> > +  if (compared == 0 || (compared > 0 && val (2) == 0))
> > +    {
> > +      __builtin_trap ();
> > +    }
> > +
> > +  if (compared < 0)
> > +    {
> > +      use_cache (tv_cache);
> > +      return;
> > +    }
> > +
> > +  use_cache (tv_cache);
> > +  __builtin_trap ();
> > +}
> > diff --git a/gcc/testsuite/gcc.target/i386/pr121572-1b.c 
> > b/gcc/testsuite/gcc.target/i386/pr121572-1b.c
> > new file mode 100644
> > index 00000000000..8a6089109f5
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr121572-1b.c
> > @@ -0,0 +1,18 @@
> > +/* { dg-do compile { target *-*-linux* } } */
> > +/* { dg-options "-O0 -fpic -fplt -mtls-dialect=gnu2" } */
> > +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
> > +/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } } 
> > {^\t?\.} } } */
> > +
> > +/*
> > +**bug:
> > +**.LFB[0-9]+:
> > +**...
> > +**     lea[l|q]        tv_cache@TLSDESC\(%rip\), %[e|r]ax
> > +**     movl    \$-1, %edi
> > +**     call    \*tv_cache@TLSCALL\(%[e|r]ax\)
> > +**     mov[l|q]        %[e|r]ax, %[e|r]bx
> > +**     call    val@PLT
> > +**...
> > +*/
> > +
> > +#include "pr121572-1a.c"
> > diff --git a/gcc/testsuite/gcc.target/i386/pr121572-2a.c 
> > b/gcc/testsuite/gcc.target/i386/pr121572-2a.c
> > new file mode 100644
> > index 00000000000..38b254657d3
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr121572-2a.c
> > @@ -0,0 +1,39 @@
> > +/* { dg-do compile { target *-*-linux* } } */
> > +/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu" } */
> > +
> > +typedef enum
> > +{
> > +  MPFR_RNDN
> > +} mpfr_rnd_t;
> > +typedef int mpfr_t[1];
> > +long __gmpfr_emin, mpfr_agm_expo_0;
> > +_Thread_local long __gmpfr_emax;
> > +int mpfr_agm_compare, mpfr_agm___trans_tmp_1;
> > +mpfr_t mpfr_agm_u;
> > +void mpfr_mul (int *, int, int, mpfr_rnd_t);
> > +int
> > +mpfr_agm (int op1)
> > +{
> > +  int op2 = 0;
> > +  if (__builtin_expect (mpfr_agm_compare == 0, 0))
> > +    return 0;
> > +  if (mpfr_agm_compare > 0)
> > +    {
> > +      int t = op1;
> > +      op2 = t;
> > +    }
> > +  mpfr_agm_expo_0 = __gmpfr_emax;
> > +  for (;;)
> > +    {
> > +    retry:
> > +      mpfr_mul (mpfr_agm_u, op1, op2, MPFR_RNDN);
> > +      if (0)
> > +        goto retry;
> > +      if (__builtin_expect (mpfr_agm___trans_tmp_1, 1))
> > +        break;
> > +    }
> > +  __gmpfr_emin = __gmpfr_emax;
> > +  return 0;
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "call\[ \t\]__tls_get_addr@PLT" 1 { 
> > target { ! ia32 } } } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr121572-2b.c 
> > b/gcc/testsuite/gcc.target/i386/pr121572-2b.c
> > new file mode 100644
> > index 00000000000..33d70024324
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr121572-2b.c
> > @@ -0,0 +1,6 @@
> > +/* { dg-do compile { target *-*-linux* } } */
> > +/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu2" } */
> > +
> > +#include "pr121572-2a.c"
> > +
> > +/* { dg-final { scan-assembler-times "call\[ 
> > \t\]\\*__gmpfr_emax@TLSCALL\\(%(?:r|e)ax\\)" 1 { target { ! ia32 } } } } */
> > --
> > 2.50.1
> >
>
>
> --
> BR,
> Hongtao



-- 
BR,
Hongtao

Reply via email to