On Mon, Aug 18, 2025 at 4:12 PM Hongtao Liu <crazy...@gmail.com> wrote: > > On Mon, Aug 18, 2025 at 4:50 AM H.J. Lu <hjl.to...@gmail.com> wrote: > > > > We can't place a TLS call before a conditional jump in a basic block like > > > > (code_label 13 11 14 4 2 (nil) [1 uses]) > > (note 14 13 16 4 [bb 4] NOTE_INSN_BASIC_BLOCK) > > (jump_insn 16 14 17 4 (set (pc) > > (if_then_else (le (reg:CCNO 17 flags) > > (const_int 0 [0])) > > (label_ref 27) > > (pc))) "x.c":10:21 discrim 1 1462 {*jcc} > > (expr_list:REG_DEAD (reg:CCNO 17 flags) > > (int_list:REG_BR_PROB 628353713 (nil))) > > -> 27) > > > > since the TLS call will clobber flags register nor place a TLS call in a > > basic block if any live caller-saved registers aren't dead at the end of > > the basic block: > > > > ;; live in 6 [bp] 7 [sp] 16 [argp] 17 [flags] 19 [frame] 104 > > ;; live gen 0 [ax] 102 106 108 116 117 118 120 > > ;; live kill 5 [di] > > > > Instead, we should place such call before all register setting basic > > blocks which dominate the current basic block. > > > > Keep track the replaced GNU and GNU2 TLS instructions. Use these info to > > place the __tls_get_addr call and mark FLAGS register as dead. > > > > gcc/ > > > > PR target/121572 > > * config/i386/i386-features.cc (replace_tls_call): Add a bitmap > > argument and put the updated TLS instruction in the bitmap. > > (ix86_get_dominator_for_reg): New. > > (ix86_place_single_tls_call): Add 2 bitmap arguments for updated > > GNU and GNU2 TLS instructions. Add the live flag register to the > > bitmap. Insert the __tls_get_addr call before INSN if it replaces > > a __tls_get_addr call. Mark FLAGS register as dead if INSN > > replaced the GNU2 TLS instruction. Clear the live register bitmap > > only for hard register. If there is a conditional jump in the > > basic block or any live caller-saved registers aren't dead at the > > end of the basic block, get the basic block which dominates all > > basic blocks which set the live registers. > > > > gcc/testsuite/ > > > > PR target/121572 > > * gcc.target/i386/pr121572-1a.c: New test. > > * gcc.target/i386/pr121572-1b.c: Likewise. > > * gcc.target/i386/pr121572-2a.c: Likewise. > > * gcc.target/i386/pr121572-2b.c: Likewise. > > > > Signed-off-by: H.J. Lu <hjl.to...@gmail.com> > > --- > > gcc/config/i386/i386-features.cc | 136 ++++++++++++++++---- > > gcc/testsuite/gcc.target/i386/pr121572-1a.c | 41 ++++++ > > gcc/testsuite/gcc.target/i386/pr121572-1b.c | 18 +++ > > gcc/testsuite/gcc.target/i386/pr121572-2a.c | 39 ++++++ > > gcc/testsuite/gcc.target/i386/pr121572-2b.c | 6 + > > 5 files changed, 215 insertions(+), 25 deletions(-) > > create mode 100644 gcc/testsuite/gcc.target/i386/pr121572-1a.c > > create mode 100644 gcc/testsuite/gcc.target/i386/pr121572-1b.c > > create mode 100644 gcc/testsuite/gcc.target/i386/pr121572-2a.c > > create mode 100644 gcc/testsuite/gcc.target/i386/pr121572-2b.c > > > > diff --git a/gcc/config/i386/i386-features.cc > > b/gcc/config/i386/i386-features.cc > > index f0bdc5c1880..903f2b0b478 100644 > > --- a/gcc/config/i386/i386-features.cc > > +++ b/gcc/config/i386/i386-features.cc > > @@ -3684,10 +3684,12 @@ ix86_broadcast_inner (rtx op, machine_mode mode, > > return op; > > } > > > > -/* Replace CALL instruction in TLS_CALL_INSNS with SET from SRC. */ > > +/* Replace CALL instruction in TLS_CALL_INSNS with SET from SRC and > > + put the updated instruction in UPDATED_TLS_INSNS. */ > > > > static void > > -replace_tls_call (rtx src, auto_bitmap &tls_call_insns) > > +replace_tls_call (rtx src, auto_bitmap &tls_call_insns, > > + auto_bitmap &updated_tls_insns) > > { > > bitmap_iterator bi; > > unsigned int id; > > @@ -3716,6 +3718,9 @@ replace_tls_call (rtx src, auto_bitmap > > &tls_call_insns) > > if (recog_memoized (set_insn) < 0) > > gcc_unreachable (); > > > > + /* Put SET_INSN in UPDATED_TLS_INSNS. */ > > + bitmap_set_bit (updated_tls_insns, INSN_UID (set_insn)); > > + > > if (dump_file) > > { > > fprintf (dump_file, "\nReplace:\n\n"); > > @@ -3732,15 +3737,48 @@ replace_tls_call (rtx src, auto_bitmap > > &tls_call_insns) > > } > > } > > > > +/* Return the basic block which dominates all basic blocks which set > > + hard register REGNO used in basic block BB. */ > > + > > +static basic_block > > +ix86_get_dominator_for_reg (unsigned int regno, basic_block bb) > > +{ > > + basic_block set_bb; > > + auto_bitmap set_bbs; > > + > > + /* Get all BBs which set REGNO and dominate the current BB from all > > + DEFs of REGNO. */ > > + for (df_ref def = DF_REG_DEF_CHAIN (regno); > > + def; > > + def = DF_REF_NEXT_REG (def)) > > + if (!DF_REF_IS_ARTIFICIAL (def) > > + && !DF_REF_FLAGS_IS_SET (def, DF_REF_MAY_CLOBBER) > > + && !DF_REF_FLAGS_IS_SET (def, DF_REF_MUST_CLOBBER)) > > + { > > + set_bb = DF_REF_BB (def); > > + if (dominated_by_p (CDI_DOMINATORS, bb, set_bb)) > > + bitmap_set_bit (set_bbs, set_bb->index); > > + } > > + > > + bb = nearest_common_dominator_for_set (CDI_DOMINATORS, set_bbs); > > + return bb; > > +} > > + > > /* Generate a TLS call of KIND with VAL and copy the call result to DEST, > > at entry of the nearest dominator for basic block map BBS, which is in > > the fake loop that contains the whole function, so that there is only > > - a single TLS CALL of KIND with VAL in the whole function. If > > - TLSDESC_SET isn't nullptr, insert it before the TLS call. */ > > + a single TLS CALL of KIND with VAL in the whole function. > > + UPDATED_GNU_TLS_INSNS contains instructions which replace the GNU TLS > > + instructions. UPDATED_GNU2_TLS_INSNS contains instructions which > > + replace the GNU2 TLS instructions. If TLSDESC_SET isn't nullptr, > > + insert it before the TLS call. */ > > > > static void > > ix86_place_single_tls_call (rtx dest, rtx val, x86_cse_kind kind, > > - bitmap bbs, rtx tlsdesc_set = nullptr) > > + auto_bitmap &bbs, > > + auto_bitmap &updated_gnu_tls_insns, > > + auto_bitmap &updated_gnu2_tls_insns, > > + rtx tlsdesc_set = nullptr) > > { > > basic_block bb = nearest_common_dominator_for_set (CDI_DOMINATORS, bbs); > > while (bb->loop_father->latch > > @@ -3748,6 +3786,7 @@ ix86_place_single_tls_call (rtx dest, rtx val, > > x86_cse_kind kind, > > bb = get_immediate_dominator (CDI_DOMINATORS, > > bb->loop_father->header); > > > > +place_tls_call: > > rtx_insn *insn = BB_HEAD (bb); > > while (insn && !NONDEBUG_INSN_P (insn)) > > { > > @@ -3824,7 +3863,8 @@ ix86_place_single_tls_call (rtx dest, rtx val, > > x86_cse_kind kind, > > auto_bitmap live_caller_saved_regs; > > bitmap in = df_live ? DF_LIVE_IN (bb) : DF_LR_IN (bb); > > > > - bool flags_live_p = bitmap_bit_p (in, FLAGS_REG); > > + if (bitmap_bit_p (in, FLAGS_REG)) > > + bitmap_set_bit (live_caller_saved_regs, FLAGS_REG); > > > > unsigned int i; > > > > @@ -3845,13 +3885,46 @@ ix86_place_single_tls_call (rtx dest, rtx val, > > x86_cse_kind kind, > > if (!NONDEBUG_INSN_P (insn)) > > continue; > > > > + if (JUMP_P (insn)) > > + { > > + /* This must be a conditional jump. */ > > + rtx label = JUMP_LABEL (insn); > > + if (label == nullptr > > + || ANY_RETURN_P (label) > > + || !(LABEL_P (label) || SYMBOL_REF_P (label))) > > + gcc_unreachable (); Whenever we can't find a place w/ flag_reg dead, we should go to place_tls_call, why just jump_insn only?
> > + > > + /* Place the call before all FLAGS_REG setting BBs since > > + we can't place a call before nor after a conditional > > + jump. */ > > + bb = ix86_get_dominator_for_reg (FLAGS_REG, bb); > > + goto place_tls_call; > Can we put those codes related to find the **BEFORE/AFTER** into a > function and called it recursively(or iteratively to decide find the > final BEFORE/AFTER) > So we can avoid those GOTO statements. > > i.e > > switch (kind) > { > ... > } > > ix86_place_single_tls_call_place (....) > > if (before) > emit_insn_before (..); > else if (after) > emit_insn_after (...); > else > gcc_unreachable (); > > .... > > > + } > > + > > /* Check if FLAGS register is live. */ > > set = single_set (insn); > > if (set) > > { > > rtx dest = SET_DEST (set); > > - if (REG_P (dest) && REGNO (dest) == FLAGS_REG) > > - flags_live_p = true; > > + if (REG_P (dest)) > > + { > > + if (bitmap_bit_p (updated_gnu_tls_insns, > > + INSN_UID (insn))) > > + { > > + /* Insert the __tls_get_addr call before INSN which > > + replaces a __tls_get_addr call. */ > > + before = insn; > > + goto insert_before; > > + } > > + if (bitmap_bit_p (updated_gnu2_tls_insns, > > + INSN_UID (insn))) > > + /* Mark FLAGS register as dead since FLAGS register > > + would be clobbered by the GNU2 TLS instruction. */ > > + bitmap_clear_bit (live_caller_saved_regs, > > + FLAGS_REG); > > + else if (REGNO (dest) == FLAGS_REG) > > + bitmap_set_bit (live_caller_saved_regs, FLAGS_REG); > > + } > > } > > > > rtx link; > > @@ -3863,29 +3936,30 @@ ix86_place_single_tls_call (rtx dest, rtx val, > > x86_cse_kind kind, > > for (i = REGNO (XEXP (link, 0)); > > i < END_REGNO (XEXP (link, 0)); > > i++) > > - bitmap_clear_bit (live_caller_saved_regs, i); > > - > > - /* Check if FLAGS register is dead. */ > > - if (REGNO (XEXP (link, 0)) == FLAGS_REG) > > - flags_live_p = false; > > + if (i < FIRST_PSEUDO_REGISTER) > > + bitmap_clear_bit (live_caller_saved_regs, i); > > > > if (bitmap_empty_p (live_caller_saved_regs)) > > { > > - /* All live caller-saved registers are dead after > > - this instruction. Since TLS instructions > > - clobber FLAGS register, it must be dead where > > - the TLS will be inserted after. */ > > - if (flags_live_p) > > - gcc_unreachable (); > > after = insn; > > goto insert_after; > > } > > } > > } > > > > - /* All live caller-saved registers should be dead at the end > > - of this basic block. */ > > - gcc_unreachable (); > > + /* If any live caller-saved registers aren't dead at the end > > + of this basic block, get the basic block which dominates all > > + basic blocks which set the remaining live registers. */ > > + auto_bitmap set_bbs; > > + bitmap_iterator bi; > > + unsigned int id; > > + EXECUTE_IF_SET_IN_BITMAP (live_caller_saved_regs, 0, id, bi) > > + { > > + basic_block set_bb = ix86_get_dominator_for_reg (id, bb); > > + bitmap_set_bit (set_bbs, set_bb->index); > > + } > > + bb = nearest_common_dominator_for_set (CDI_DOMINATORS, set_bbs); > > + goto place_tls_call; > > } > > > > /* Emit the TLS CALL insn. */ > > @@ -3895,7 +3969,10 @@ insert_after: > > tls_insn = emit_insn_after (tls, after); > > } > > else > > - tls_insn = emit_insn_before (tls, before); > > + { > > +insert_before: > > + tls_insn = emit_insn_before (tls, before); > > + } > > > > rtx_insn *tlsdesc_insn = nullptr; > > if (tlsdesc_set) > > @@ -4213,6 +4290,8 @@ pass_x86_cse::x86_cse (void) > > basic_block bb; > > rtx_insn *insn; > > unsigned int i; > > + auto_bitmap updated_gnu_tls_insns; > > + auto_bitmap updated_gnu2_tls_insns; > > > > df_set_flags (DF_DEFER_INSN_RESCAN); > > > > @@ -4333,7 +4412,10 @@ pass_x86_cse::x86_cse (void) > > case X86_CSE_TLS_LD_BASE: > > case X86_CSE_TLSDESC: > > broadcast_reg = gen_reg_rtx (load->mode); > > - replace_tls_call (broadcast_reg, load->insns); > > + replace_tls_call (broadcast_reg, load->insns, > > + (load->kind == X86_CSE_TLSDESC > > + ? updated_gnu2_tls_insns > > + : updated_gnu_tls_insns)); > > load->broadcast_reg = broadcast_reg; > > break; > > > > @@ -4399,6 +4481,8 @@ pass_x86_cse::x86_cse (void) > > load->val, > > load->kind, > > load->bbs, > > + updated_gnu_tls_insns, > > + updated_gnu2_tls_insns, > > PATTERN (load->def_insn)); > > break; > > case X86_CSE_VEC_DUP: > > @@ -4442,7 +4526,9 @@ pass_x86_cse::x86_cse (void) > > ix86_place_single_tls_call (load->broadcast_reg, > > load->val, > > load->kind, > > - load->bbs); > > + load->bbs, > > + updated_gnu_tls_insns, > > + updated_gnu2_tls_insns); > > break; > > case X86_CSE_CONST0_VECTOR: > > case X86_CSE_CONSTM1_VECTOR: > > diff --git a/gcc/testsuite/gcc.target/i386/pr121572-1a.c > > b/gcc/testsuite/gcc.target/i386/pr121572-1a.c > > new file mode 100644 > > index 00000000000..270d8ff5cb6 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/pr121572-1a.c > > @@ -0,0 +1,41 @@ > > +/* { dg-do compile { target *-*-linux* } } */ > > +/* { dg-options "-O0 -fpic -fplt -mtls-dialect=gnu" } */ > > +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ > > +/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } } > > {^\t?\.} } } */ > > + > > +/* > > +**bug: > > +**.LFB[0-9]+: > > +**... > > +** leaq tv_cache@tlsld\(%rip\), %rdi > > +** call __tls_get_addr@PLT > > +** movl \$-1, %edi > > +** mov[l|q] %[e|r]ax, %[e|r]bx > > +** call val@PLT > > +**... > > +*/ > > + > > +extern __thread int tv_cache __attribute__ ((visibility ("hidden"))); > > +extern void use_cache (int); > > +extern int val (int v); > > + > > +__attribute__ ((optimize (2))) > > +void > > +bug (void) > > +{ > > + int compared = val (-1); > > + > > + if (compared == 0 || (compared > 0 && val (2) == 0)) > > + { > > + __builtin_trap (); > > + } > > + > > + if (compared < 0) > > + { > > + use_cache (tv_cache); > > + return; > > + } > > + > > + use_cache (tv_cache); > > + __builtin_trap (); > > +} > > diff --git a/gcc/testsuite/gcc.target/i386/pr121572-1b.c > > b/gcc/testsuite/gcc.target/i386/pr121572-1b.c > > new file mode 100644 > > index 00000000000..8a6089109f5 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/pr121572-1b.c > > @@ -0,0 +1,18 @@ > > +/* { dg-do compile { target *-*-linux* } } */ > > +/* { dg-options "-O0 -fpic -fplt -mtls-dialect=gnu2" } */ > > +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ > > +/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } } > > {^\t?\.} } } */ > > + > > +/* > > +**bug: > > +**.LFB[0-9]+: > > +**... > > +** lea[l|q] tv_cache@TLSDESC\(%rip\), %[e|r]ax > > +** movl \$-1, %edi > > +** call \*tv_cache@TLSCALL\(%[e|r]ax\) > > +** mov[l|q] %[e|r]ax, %[e|r]bx > > +** call val@PLT > > +**... > > +*/ > > + > > +#include "pr121572-1a.c" > > diff --git a/gcc/testsuite/gcc.target/i386/pr121572-2a.c > > b/gcc/testsuite/gcc.target/i386/pr121572-2a.c > > new file mode 100644 > > index 00000000000..38b254657d3 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/pr121572-2a.c > > @@ -0,0 +1,39 @@ > > +/* { dg-do compile { target *-*-linux* } } */ > > +/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu" } */ > > + > > +typedef enum > > +{ > > + MPFR_RNDN > > +} mpfr_rnd_t; > > +typedef int mpfr_t[1]; > > +long __gmpfr_emin, mpfr_agm_expo_0; > > +_Thread_local long __gmpfr_emax; > > +int mpfr_agm_compare, mpfr_agm___trans_tmp_1; > > +mpfr_t mpfr_agm_u; > > +void mpfr_mul (int *, int, int, mpfr_rnd_t); > > +int > > +mpfr_agm (int op1) > > +{ > > + int op2 = 0; > > + if (__builtin_expect (mpfr_agm_compare == 0, 0)) > > + return 0; > > + if (mpfr_agm_compare > 0) > > + { > > + int t = op1; > > + op2 = t; > > + } > > + mpfr_agm_expo_0 = __gmpfr_emax; > > + for (;;) > > + { > > + retry: > > + mpfr_mul (mpfr_agm_u, op1, op2, MPFR_RNDN); > > + if (0) > > + goto retry; > > + if (__builtin_expect (mpfr_agm___trans_tmp_1, 1)) > > + break; > > + } > > + __gmpfr_emin = __gmpfr_emax; > > + return 0; > > +} > > + > > +/* { dg-final { scan-assembler-times "call\[ \t\]__tls_get_addr@PLT" 1 { > > target { ! ia32 } } } } */ > > diff --git a/gcc/testsuite/gcc.target/i386/pr121572-2b.c > > b/gcc/testsuite/gcc.target/i386/pr121572-2b.c > > new file mode 100644 > > index 00000000000..33d70024324 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/pr121572-2b.c > > @@ -0,0 +1,6 @@ > > +/* { dg-do compile { target *-*-linux* } } */ > > +/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu2" } */ > > + > > +#include "pr121572-2a.c" > > + > > +/* { dg-final { scan-assembler-times "call\[ > > \t\]\\*__gmpfr_emax@TLSCALL\\(%(?:r|e)ax\\)" 1 { target { ! ia32 } } } } */ > > -- > > 2.50.1 > > > > > -- > BR, > Hongtao -- BR, Hongtao