For TLS calls: 1. UNSPEC_TLS_GD:
(parallel [ (set (reg:DI 0 ax) (call:DI (mem:QI (symbol_ref:DI ("__tls_get_addr"))) (const_int 0 [0]))) (unspec:DI [(symbol_ref:DI ("e") [flags 0x50]) (reg/f:DI 7 sp)] UNSPEC_TLS_GD) (clobber (reg:DI 5 di))]) 2. UNSPEC_TLS_LD_BASE: (parallel [ (set (reg:DI 0 ax) (call:DI (mem:QI (symbol_ref:DI ("__tls_get_addr"))) (const_int 0 [0]))) (unspec:DI [(reg/f:DI 7 sp)] UNSPEC_TLS_LD_BASE)]) 3. UNSPEC_TLSDESC: (parallel [ (set (reg/f:DI 104) (plus:DI (unspec:DI [ (symbol_ref:DI ("_TLS_MODULE_BASE_") [flags 0x10]) (reg:DI 114) (reg/f:DI 7 sp)] UNSPEC_TLSDESC) (const:DI (unspec:DI [ (symbol_ref:DI ("e") [flags 0x1a]) ] UNSPEC_DTPOFF)))) (clobber (reg:CC 17 flags))]) (parallel [ (set (reg:DI 101) (unspec:DI [(symbol_ref:DI ("e") [flags 0x50]) (reg:DI 112) (reg/f:DI 7 sp)] UNSPEC_TLSDESC)) (clobber (reg:CC 17 flags))]) they return the same value for the same input value. But multiple calls with the same input value may be generated for simple programs like: void a(long *); int b(void); void c(void); static __thread long e; long d(void) { a(&e); if (b()) c(); return e; } When compiled with -O2 -fPIC -mtls-dialect=gnu2, the following codes are generated: .type d, @function d: .LFB0: .cfi_startproc pushq %rbx .cfi_def_cfa_offset 16 .cfi_offset 3, -16 leaq e@TLSDESC(%rip), %rbx movq %rbx, %rax call *e@TLSCALL(%rax) addq %fs:0, %rax movq %rax, %rdi call a@PLT call b@PLT testl %eax, %eax jne .L8 movq %rbx, %rax call *e@TLSCALL(%rax) popq %rbx .cfi_remember_state .cfi_def_cfa_offset 8 movq %fs:(%rax), %rax ret .p2align 4,,10 .p2align 3 .L8: .cfi_restore_state call c@PLT movq %rbx, %rax call *e@TLSCALL(%rax) popq %rbx .cfi_def_cfa_offset 8 movq %fs:(%rax), %rax ret .cfi_endproc There are 3 "call *e@TLSCALL(%rax)". They all return the same value. Rename the remove_redundant_vector pass to the x86_cse pass, for 64bit, extend it to also remove redundant TLS calls to generate: d: .LFB0: .cfi_startproc pushq %rbx .cfi_def_cfa_offset 16 .cfi_offset 3, -16 leaq e@TLSDESC(%rip), %rax movq %fs:0, %rdi call *e@TLSCALL(%rax) addq %rax, %rdi movq %rax, %rbx call a@PLT call b@PLT testl %eax, %eax jne .L8 movq %fs:(%rbx), %rax popq %rbx .cfi_remember_state .cfi_def_cfa_offset 8 ret .p2align 4,,10 .p2align 3 .L8: .cfi_restore_state call c@PLT movq %fs:(%rbx), %rax popq %rbx .cfi_def_cfa_offset 8 ret .cfi_endproc with only one "call *e@TLSCALL(%rax)". This reduces the number of __tls_get_addr calls in libgcc.a by 72%: __tls_get_addr calls before after libgcc.a 868 243 gcc/ PR target/81501 * config/i386/i386-features.cc (x86_cse_kind): Add X86_CSE_TLS_GD, X86_CSE_TLS_LD_BASE and X86_CSE_TLSDESC. (redundant_load): Renamed to ... (redundant_pattern): This. (replace_tls_call): New. (ix86_place_single_tls_call): Likewise. (remove_redundant_vector_load): Renamed to ... (x86_cse): This. Extend to remove redundant TLS calls. (pass_remove_redundant_vector_load): Renamed to ... (pass_x86_cse): This. (make_pass_remove_redundant_vector_load): Renamed to ... (make_pass_x86_cse): This. (config/i386/i386-passes.def): Replace pass_remove_redundant_vector_load with pass_x86_cse. config/i386/i386-protos.h (ix86_tls_get_addr): New. (make_pass_remove_redundant_vector_load): Renamed to ... (make_pass_x86_cse): This. * config/i386/i386.cc (ix86_tls_get_addr): Remove static. * config/i386/i386.h (machine_function): Add tls_descriptor_call_multiple_p. * config/i386/i386.md (@tls_global_dynamic_64_<mode>): Set tls_descriptor_call_multiple_p. (@tls_local_dynamic_base_64_<mode>): Likewise. (@tls_dynamic_gnu2_64_<mode>): Likewise. (*tls_dynamic_gnu2_lea_64_<mode>): Renamed to ... (tls_dynamic_gnu2_lea_64_<mode>): This. (*tls_dynamic_gnu2_call_64_<mode>): Renamed to ... (tls_dynamic_gnu2_call_64_<mode>): This. (*tls_dynamic_gnu2_combine_64_<mode>): Renamed to ... (tls_dynamic_gnu2_combine_64_<mode>): This. gcc/testsuite/ PR target/81501 * g++.target/i386/pr81501-1.C: New test. * gcc.target/i386/pr81501-1a.c: Likewise. * gcc.target/i386/pr81501-1b.c: Likewise. * gcc.target/i386/pr81501-2a.c: Likewise. * gcc.target/i386/pr81501-2b.c: Likewise. * gcc.target/i386/pr81501-3.c: Likewise. * gcc.target/i386/pr81501-4a.c: Likewise. * gcc.target/i386/pr81501-4b.c: Likewise. * gcc.target/i386/pr81501-5.c: Likewise. * gcc.target/i386/pr81501-6a.c: Likewise. * gcc.target/i386/pr81501-6b.c: Likewise. * gcc.target/i386/pr81501-7.c: Likewise. * gcc.target/i386/pr81501-8a.c: Likewise. * gcc.target/i386/pr81501-8b.c: Likewise. Signed-off-by: H.J. Lu <hjl.to...@gmail.com> --- gcc/config/i386/i386-features.cc | 679 +++++++++++++++++++-- gcc/config/i386/i386-passes.def | 2 +- gcc/config/i386/i386-protos.h | 4 +- gcc/config/i386/i386.cc | 2 +- gcc/config/i386/i386.h | 3 + gcc/config/i386/i386.md | 20 +- gcc/testsuite/g++.target/i386/pr81501-1.C | 16 + gcc/testsuite/gcc.target/i386/pr81501-1a.c | 17 + gcc/testsuite/gcc.target/i386/pr81501-1b.c | 6 + gcc/testsuite/gcc.target/i386/pr81501-2a.c | 17 + gcc/testsuite/gcc.target/i386/pr81501-2b.c | 6 + gcc/testsuite/gcc.target/i386/pr81501-3.c | 9 + gcc/testsuite/gcc.target/i386/pr81501-4a.c | 51 ++ gcc/testsuite/gcc.target/i386/pr81501-4b.c | 6 + gcc/testsuite/gcc.target/i386/pr81501-5.c | 13 + gcc/testsuite/gcc.target/i386/pr81501-6a.c | 67 ++ gcc/testsuite/gcc.target/i386/pr81501-6b.c | 7 + gcc/testsuite/gcc.target/i386/pr81501-7.c | 20 + gcc/testsuite/gcc.target/i386/pr81501-8a.c | 82 +++ gcc/testsuite/gcc.target/i386/pr81501-8b.c | 25 + 20 files changed, 978 insertions(+), 74 deletions(-) create mode 100644 gcc/testsuite/g++.target/i386/pr81501-1.C create mode 100644 gcc/testsuite/gcc.target/i386/pr81501-1a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr81501-1b.c create mode 100644 gcc/testsuite/gcc.target/i386/pr81501-2a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr81501-2b.c create mode 100644 gcc/testsuite/gcc.target/i386/pr81501-3.c create mode 100644 gcc/testsuite/gcc.target/i386/pr81501-4a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr81501-4b.c create mode 100644 gcc/testsuite/gcc.target/i386/pr81501-5.c create mode 100644 gcc/testsuite/gcc.target/i386/pr81501-6a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr81501-6b.c create mode 100644 gcc/testsuite/gcc.target/i386/pr81501-7.c create mode 100644 gcc/testsuite/gcc.target/i386/pr81501-8a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr81501-8b.c diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc index 054f8d5ddc8..8e2c69ea313 100644 --- a/gcc/config/i386/i386-features.cc +++ b/gcc/config/i386/i386-features.cc @@ -3493,10 +3493,13 @@ enum x86_cse_kind { X86_CSE_CONST0_VECTOR, X86_CSE_CONSTM1_VECTOR, - X86_CSE_VEC_DUP + X86_CSE_VEC_DUP, + X86_CSE_TLS_GD, + X86_CSE_TLS_LD_BASE, + X86_CSE_TLSDESC }; -struct redundant_load +struct redundant_pattern { /* Bitmap of basic blocks with broadcast instructions. */ auto_bitmap bbs; @@ -3671,22 +3674,323 @@ ix86_broadcast_inner (rtx op, machine_mode mode, return op; } -/* At entry of the nearest common dominator for basic blocks with vector - CONST0_RTX and integer CONSTM1_RTX uses, generate a single widest - vector set instruction for all CONST0_RTX and integer CONSTM1_RTX - uses. +/* Replace CALL instruction in TLS_CALL_INSNS with SET from SRC. */ - NB: We want to generate only a single widest vector set to cover the - whole function. The LCM algorithm isn't appropriate here since it - may place a vector set inside the loop. */ +static void +replace_tls_call (rtx src, auto_bitmap &tls_call_insns) +{ + bitmap_iterator bi; + unsigned int id; + + EXECUTE_IF_SET_IN_BITMAP (tls_call_insns, 0, id, bi) + { + rtx_insn *insn = DF_INSN_UID_GET (id)->insn; + + /* If this isn't a CALL, only GNU2 TLS implicit CALL patterns are + allowed. */ + if (!CALL_P (insn)) + switch (INSN_CODE (insn)) + { + case CODE_FOR_tls_dynamic_gnu2_call_64_si: + case CODE_FOR_tls_dynamic_gnu2_call_64_di: + case CODE_FOR_tls_dynamic_gnu2_combine_64_si: + case CODE_FOR_tls_dynamic_gnu2_combine_64_di: + break; + default: + gcc_unreachable (); + } + + rtx pat = PATTERN (insn); + if (GET_CODE (pat) != PARALLEL) + gcc_unreachable (); + + int j; + rtx op, dest = nullptr; + for (j = XVECLEN (pat, 0) - 1; j >= 0; j--) + { + op = XVECEXP (pat, 0, j); + if (GET_CODE (op) == SET) + { + dest = SET_DEST (op); + break; + } + } + + rtx set = gen_rtx_SET (dest, src); + rtx_insn *set_insn = emit_insn_after (set, insn); + if (recog_memoized (set_insn) < 0) + gcc_unreachable (); + + if (dump_file) + { + fprintf (dump_file, "\nReplace:\n\n"); + print_rtl_single (dump_file, insn); + fprintf (dump_file, "\nwith:\n\n"); + print_rtl_single (dump_file, set_insn); + fprintf (dump_file, "\n"); + } + + /* Delete the CALL insn. */ + delete_insn (insn); + + df_insn_rescan (set_insn); + } +} + +/* Generate a TLS call of KIND with VAL and copy the call result to DEST, + at entry of the nearest dominator for basic block map BBS, which is in + the fake loop that contains the whole function, so that there is only + a single TLS CALL of KIND with VAL in the whole function. If + TLSDESC_SET isn't nullptr, insert it before the TLS call. */ + +static void +ix86_place_single_tls_call (rtx dest, rtx val, x86_cse_kind kind, + bitmap bbs, rtx tlsdesc_set = nullptr) +{ + basic_block bb = nearest_common_dominator_for_set (CDI_DOMINATORS, bbs); + while (bb->loop_father->latch + != EXIT_BLOCK_PTR_FOR_FN (cfun)) + bb = get_immediate_dominator (CDI_DOMINATORS, + bb->loop_father->header); + + rtx_insn *insn = BB_HEAD (bb); + while (insn && !NONDEBUG_INSN_P (insn)) + { + if (insn == BB_END (bb)) + { + insn = NULL; + break; + } + insn = NEXT_INSN (insn); + } + + rtx rax = nullptr, rdi; + rtx eqv = nullptr; + rtx caddr; + rtx set; + rtx clob; + rtx symbol; + rtx tls; + rtx_insn *tls_insn; + + switch (kind) + { + case X86_CSE_TLS_GD: + rax = gen_rtx_REG (Pmode, AX_REG); + rdi = gen_rtx_REG (Pmode, DI_REG); + caddr = ix86_tls_get_addr (); + + symbol = XVECEXP (val, 0, 0); + tls = gen_tls_global_dynamic_64 (Pmode, rax, symbol, caddr, rdi); + + if (GET_MODE (symbol) != Pmode) + symbol = gen_rtx_ZERO_EXTEND (Pmode, symbol); + eqv = symbol; + break; + + case X86_CSE_TLS_LD_BASE: + rax = gen_rtx_REG (Pmode, AX_REG); + rdi = gen_rtx_REG (Pmode, DI_REG); + caddr = ix86_tls_get_addr (); + + tls = gen_tls_local_dynamic_base_64 (Pmode, rax, caddr, rdi); + + /* Attach a unique REG_EQUAL to DEST, to allow the RTL optimizers + to share the LD_BASE result with other LD model accesses. */ + eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), + UNSPEC_TLS_LD_BASE); + + break; + + case X86_CSE_TLSDESC: + set = gen_rtx_SET (dest, val); + clob = gen_rtx_CLOBBER (VOIDmode, + gen_rtx_REG (CCmode, FLAGS_REG)); + tls = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clob)); + break; + + default: + gcc_unreachable (); + } + + rtx_insn *before = nullptr; + rtx_insn *after = nullptr; + if (insn == BB_HEAD (bb)) + before = insn; + else + after = insn ? PREV_INSN (insn) : BB_END (bb); + + /* TLS_GD and TLS_LD_BASE instructions are normal functions which + clobber caller-saved registers. TLSDESC instructions are special + functions which only clobber RAX. If any registers clobbered by + the TLS instruction are live in this basic block, we must insert + the TLS instruction after all live registers clobbered by the TLS + instruction are dead. */ + + auto_bitmap live_caller_saved_regs; + bitmap in = df_live ? DF_LIVE_IN (bb) : DF_LR_IN (bb); + + bool flags_live_p = bitmap_bit_p (in, FLAGS_REG); + + unsigned int i; + + /* Get all live caller-saved registers. */ + if (kind == X86_CSE_TLSDESC) + { + if (bitmap_bit_p (in, AX_REG)) + bitmap_set_bit (live_caller_saved_regs, AX_REG); + } + else + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + if (call_used_regs[i] + && !fixed_regs[i] + && bitmap_bit_p (in, i)) + bitmap_set_bit (live_caller_saved_regs, i); + + if (!bitmap_empty_p (live_caller_saved_regs)) + { + /* Search for REG_DEAD notes in this basic block. */ + FOR_BB_INSNS (bb, insn) + { + if (!NONDEBUG_INSN_P (insn)) + continue; + + /* Check if FLAGS register is live. */ + set = single_set (insn); + if (set) + { + rtx dest = SET_DEST (set); + if (REG_P (dest) && REGNO (dest) == FLAGS_REG) + flags_live_p = true; + } + + rtx link; + for (link = REG_NOTES (insn); link; link = XEXP (link, 1)) + if (REG_NOTE_KIND (link) == REG_DEAD + && REG_P (XEXP (link, 0))) + { + /* Mark the live caller-saved register as dead. */ + for (i = REGNO (XEXP (link, 0)); + i < END_REGNO (XEXP (link, 0)); + i++) + bitmap_clear_bit (live_caller_saved_regs, i); + + /* Check if FLAGS register is dead. */ + if (REGNO (XEXP (link, 0)) == FLAGS_REG) + flags_live_p = false; + + if (bitmap_empty_p (live_caller_saved_regs)) + { + /* All live caller-saved registers are dead after + this instruction. Since TLS instructions + clobber FLAGS register, it must be dead where + the TLS will be inserted after. */ + if (flags_live_p) + gcc_unreachable (); + after = insn; + goto insert_after; + } + } + } + + /* All live caller-saved registers should be dead at the end + of this basic block. */ + gcc_unreachable (); + } + + /* Emit the TLS CALL insn. */ + if (after) + { +insert_after: + tls_insn = emit_insn_after (tls, after); + } + else + tls_insn = emit_insn_before (tls, before); + + rtx_insn *tlsdesc_insn = nullptr; + if (tlsdesc_set) + { + rtx dest = copy_rtx (SET_DEST (tlsdesc_set)); + rtx src = copy_rtx (SET_SRC (tlsdesc_set)); + tlsdesc_set = gen_rtx_SET (dest, src); + tlsdesc_insn = emit_insn_before (tlsdesc_set, tls_insn); + } + + if (kind != X86_CSE_TLSDESC) + { + RTL_CONST_CALL_P (tls_insn) = 1; + + /* Indicate that this function can't jump to non-local gotos. */ + make_reg_eh_region_note_nothrow_nononlocal (tls_insn); + } + + if (recog_memoized (tls_insn) < 0) + gcc_unreachable (); + + if (dump_file) + { + if (after) + { + fprintf (dump_file, "\nPlace:\n\n"); + if (tlsdesc_insn) + print_rtl_single (dump_file, tlsdesc_insn); + print_rtl_single (dump_file, tls_insn); + fprintf (dump_file, "\nafter:\n\n"); + print_rtl_single (dump_file, after); + fprintf (dump_file, "\n"); + } + else + { + fprintf (dump_file, "\nPlace:\n\n"); + if (tlsdesc_insn) + print_rtl_single (dump_file, tlsdesc_insn); + print_rtl_single (dump_file, tls_insn); + fprintf (dump_file, "\nbefore:\n\n"); + print_rtl_single (dump_file, insn); + fprintf (dump_file, "\n"); + } + } + + if (kind != X86_CSE_TLSDESC) + { + /* Copy RAX to DEST. */ + set = gen_rtx_SET (dest, rax); + rtx_insn *set_insn = emit_insn_after (set, tls_insn); + set_dst_reg_note (set_insn, REG_EQUAL, copy_rtx (eqv), dest); + if (dump_file) + { + fprintf (dump_file, "\nPlace:\n\n"); + print_rtl_single (dump_file, set_insn); + fprintf (dump_file, "\nafter:\n\n"); + print_rtl_single (dump_file, tls_insn); + fprintf (dump_file, "\n"); + } + } +} + +/* At entry of the nearest common dominator for basic blocks with + + 1. Vector CONST0_RTX patterns. + 2. Vector CONSTM1_RTX patterns. + 3. Vector broadcast patterns. + 4. UNSPEC_TLS_GD patterns. + 5. UNSPEC_TLS_LD_BASE patterns. + 6. UNSPEC_TLSDESC patterns. + + generate a single pattern whose destination is used to replace the + source in all identical patterns. + + NB: We want to generate a pattern, which is executed only once, to + cover the whole function. The LCM algorithm isn't appropriate here + since it may place a pattern inside the loop. */ static unsigned int -remove_redundant_vector_load (void) +x86_cse (void) { timevar_push (TV_MACH_DEP); - auto_vec<redundant_load *> loads; - redundant_load *load; + auto_vec<redundant_pattern *> loads; + redundant_pattern *load; basic_block bb; rtx_insn *insn; unsigned int i; @@ -3702,46 +4006,250 @@ remove_redundant_vector_load (void) if (!NONDEBUG_INSN_P (insn)) continue; - rtx set = single_set (insn); + bool matched = false; + + /* Remove redundant pattens if there are more than 2 of + them. */ + unsigned int threshold = 2; + + rtx val, set, dest = nullptr, src; + rtx_insn *def_insn; + machine_mode mode = VOIDmode; + machine_mode scalar_mode = VOIDmode; + x86_cse_kind kind; + + if (TARGET_64BIT + && cfun->machine->tls_descriptor_call_multiple_p + && CALL_P (insn)) + { + /* Record the redundant TLS CALLs for 64-bit: + + (parallel [ + (set (reg:DI 0 ax) + (call:DI (mem:QI (symbol_ref:DI ("__tls_get_addr"))) + (const_int 0 [0]))) + (unspec:DI [(symbol_ref:DI ("foo") [flags 0x50]) + (reg/f:DI 7 sp)] UNSPEC_TLS_GD) + (clobber (reg:DI 5 di))]) + + + and + + (parallel [ + (set (reg:DI 0 ax) + (call:DI (mem:QI (symbol_ref:DI ("__tls_get_addr"))) + (const_int 0 [0]))) + (unspec:DI [(reg/f:DI 7 sp)] UNSPEC_TLS_LD_BASE)]) + + */ + + unspec unspec_val = (unspec) 0; + rtx pat = PATTERN (insn); + if (GET_CODE (pat) != PARALLEL) + continue; + + int j; + rtx op; + for (j = XVECLEN (pat, 0) - 1; j >= 0; j--) + { + op = XVECEXP (pat, 0, j); + switch (GET_CODE (op)) + { + case SET: + /* Only a single SET is allowed. */ + if (dest) + gcc_unreachable (); + dest = SET_DEST (op); + scalar_mode = mode = GET_MODE (dest); + break; + case UNSPEC: + if (XINT (op, 1) == UNSPEC_TLS_LD_BASE + || XINT (op, 1) == UNSPEC_TLS_GD) + { + val = op; + unspec_val = (unspec) XINT (op, 1); + } + break; + default: + break; + } + } + + switch (unspec_val) + { + case 0: + continue; + + case UNSPEC_TLS_GD: + kind = X86_CSE_TLS_GD; + break; + + case UNSPEC_TLS_LD_BASE: + kind = X86_CSE_TLS_LD_BASE; + break; + + default: + gcc_unreachable (); + } + + def_insn = nullptr; + + goto check_redundant_pattern; + } + + set = single_set (insn); if (!set) continue; + src = SET_SRC (set); + + insn_code icode; + + if (TARGET_64BIT + && cfun->machine->tls_descriptor_call_multiple_p) + { + /* Record GNU2 TLS CALLs for 64-bit: + + (parallel [ + (set (reg/f:DI 104) + (plus:DI (unspec:DI [ + (symbol_ref:DI ("_TLS_MODULE_BASE_") [flags 0x10]) + (reg:DI 114) + (reg/f:DI 7 sp)] UNSPEC_TLSDESC) + (const:DI (unspec:DI [ + (symbol_ref:DI ("e") [flags 0x1a]) + ] UNSPEC_DTPOFF)))) + (clobber (reg:CC 17 flags))]) + + and + + (parallel [ + (set (reg:DI 101) + (unspec:DI [ + (symbol_ref:DI ("foo") [flags 0x50]) + (reg:DI 112) + (reg/f:DI 7 sp)] UNSPEC_TLSDESC)) + (clobber (reg:CC 17 flags))]) + + */ + + icode = (insn_code) INSN_CODE (insn); + if ((icode == CODE_FOR_tls_dynamic_gnu2_call_64_si) + || (icode == CODE_FOR_tls_dynamic_gnu2_call_64_di)) + val = src; + else if (GET_CODE (src) == PLUS + && ((icode + == CODE_FOR_tls_dynamic_gnu2_combine_64_si) + || (icode + == CODE_FOR_tls_dynamic_gnu2_combine_64_di))) + { + val = src; + src = XEXP (src, 0); + } + else + continue; + + kind = X86_CSE_TLSDESC; + gcc_assert (GET_CODE (src) == UNSPEC); + src = XVECEXP (src, 0, 1); + scalar_mode = mode = GET_MODE (src); + if (REG_P (src)) + { + /* All definitions of reg:DI 129 in + + (set (reg:DI 110) + (unspec:DI + [(symbol_ref:DI ("foo")) + (reg:DI 129) + (reg/f:DI 7 sp)] UNSPEC_TLSDESC)) + + should have the same source as in + + (set (reg:DI 129) + (unspec:DI + [(symbol_ref:DI ("foo"))] UNSPEC_TLSDESC)) + + */ + + df_ref ref; + rtx_insn *set_insn = nullptr; + rtx tls_src = nullptr; + for (ref = DF_REG_DEF_CHAIN (REGNO (src)); + ref; + ref = DF_REF_NEXT_REG (ref)) + { + if (DF_REF_IS_ARTIFICIAL (ref)) + break; + + set_insn = DF_REF_INSN (ref); + icode = (insn_code) INSN_CODE (set_insn); + if ((icode + != CODE_FOR_tls_dynamic_gnu2_lea_64_si) + && (icode + != CODE_FOR_tls_dynamic_gnu2_lea_64_di)) + { + set_insn = nullptr; + break; + } + + rtx tls_set = PATTERN (set_insn); + if (!tls_src) + tls_src = SET_SRC (tls_set); + else if (!rtx_equal_p (tls_src, + SET_SRC (tls_set))) + { + set_insn = nullptr; + break; + } + } + + if (!set_insn) + continue; + + set = single_set (insn); + if (!set) + continue; + + def_insn = set_insn; + } + else if (GET_CODE (src) == UNSPEC + && XINT (src, 1) == UNSPEC_TLSDESC + && SYMBOL_REF_P (XVECEXP (src, 0, 0))) + def_insn = nullptr; + else + gcc_unreachable (); + + goto check_redundant_pattern; + } + /* Record single set vector instruction with CONST0_RTX and CONSTM1_RTX source. Record basic blocks with CONST0_RTX and CONSTM1_RTX. Count CONST0_RTX and CONSTM1_RTX. Record the maximum size of CONST0_RTX and CONSTM1_RTX. */ - rtx dest = SET_DEST (set); - machine_mode mode = GET_MODE (dest); + dest = SET_DEST (set); + mode = GET_MODE (dest); /* Skip non-vector instruction. */ if (!VECTOR_MODE_P (mode)) continue; - rtx src = SET_SRC (set); /* Skip non-vector load instruction. */ if (!REG_P (dest) && !SUBREG_P (dest)) continue; - rtx_insn *def_insn; - machine_mode scalar_mode; - x86_cse_kind kind; - rtx val = ix86_broadcast_inner (src, mode, &scalar_mode, - &kind, &def_insn); + val = ix86_broadcast_inner (src, mode, &scalar_mode, &kind, + &def_insn); if (!val) continue; - /* Remove redundant register loads if there are more than 2 - loads will be used. */ - unsigned int threshold = 2; - +check_redundant_pattern: /* Check if there is a matching redundant vector load. */ - bool matched = false; FOR_EACH_VEC_ELT (loads, i, load) if (load->val && load->kind == kind && load->mode == scalar_mode && (load->bb == bb - || kind < X86_CSE_VEC_DUP + || kind != X86_CSE_VEC_DUP /* Non all 0s/1s vector load must be in the same basic block if it is in a recursive call. */ || !recursive_call_p) @@ -3751,7 +4259,8 @@ remove_redundant_vector_load (void) bitmap_set_bit (load->insns, INSN_UID (insn)); /* Record the maximum vector size. */ - if (load->size < GET_MODE_SIZE (mode)) + if (kind <= X86_CSE_VEC_DUP + && load->size < GET_MODE_SIZE (mode)) load->size = GET_MODE_SIZE (mode); /* Record the basic block. */ @@ -3765,7 +4274,7 @@ remove_redundant_vector_load (void) continue; /* We see this vector broadcast the first time. */ - load = new redundant_load; + load = new redundant_pattern; load->val = copy_rtx (val); load->mode = scalar_mode; @@ -3788,6 +4297,23 @@ remove_redundant_vector_load (void) FOR_EACH_VEC_ELT (loads, i, load) if (load->count >= load->threshold) { + switch (load->kind) + { + case X86_CSE_TLS_GD: + case X86_CSE_TLS_LD_BASE: + case X86_CSE_TLSDESC: + broadcast_reg = gen_reg_rtx (load->mode); + replace_tls_call (broadcast_reg, load->insns); + load->broadcast_reg = broadcast_reg; + replaced = true; + break; + default: + break; + } + + if (load->kind > X86_CSE_VEC_DUP) + continue; + machine_mode mode = ix86_get_vector_cse_mode (load->size, load->mode); broadcast_reg = gen_reg_rtx (mode); @@ -3843,41 +4369,64 @@ remove_redundant_vector_load (void) { if (load->def_insn) { - /* Insert a broadcast after the original scalar - definition. */ - rtx set = gen_rtx_SET (load->broadcast_reg, + rtx set; + if (load->kind == X86_CSE_TLSDESC) + ix86_place_single_tls_call (load->broadcast_reg, + load->val, + load->kind, + load->bbs, + PATTERN (load->def_insn)); + else + { + /* Insert a broadcast after the original scalar + definition. */ + set = gen_rtx_SET (load->broadcast_reg, load->broadcast_source); - insn = emit_insn_after (set, load->def_insn); + insn = emit_insn_after (set, load->def_insn); - if (cfun->can_throw_non_call_exceptions) - { - /* Handle REG_EH_REGION note in DEF_INSN. */ - rtx note = find_reg_note (load->def_insn, - REG_EH_REGION, nullptr); - if (note) + if (cfun->can_throw_non_call_exceptions) { - control_flow_insns.safe_push (load->def_insn); - add_reg_note (insn, REG_EH_REGION, - XEXP (note, 0)); + /* Handle REG_EH_REGION note in DEF_INSN. */ + rtx note = find_reg_note (load->def_insn, + REG_EH_REGION, nullptr); + if (note) + { + control_flow_insns.safe_push (load->def_insn); + add_reg_note (insn, REG_EH_REGION, + XEXP (note, 0)); + } } - } - if (dump_file) - { - fprintf (dump_file, "\nAdd:\n\n"); - print_rtl_single (dump_file, insn); - fprintf (dump_file, "\nafter:\n\n"); - print_rtl_single (dump_file, load->def_insn); - fprintf (dump_file, "\n"); + if (dump_file) + { + fprintf (dump_file, "\nAdd:\n\n"); + print_rtl_single (dump_file, insn); + fprintf (dump_file, "\nafter:\n\n"); + print_rtl_single (dump_file, load->def_insn); + fprintf (dump_file, "\n"); + } } } else - ix86_place_single_vector_set (load->broadcast_reg, - load->broadcast_source, - load->bbs, - (load->kind == X86_CSE_VEC_DUP - ? load->val - : nullptr)); + switch (load->kind) + { + case X86_CSE_TLS_GD: + case X86_CSE_TLS_LD_BASE: + case X86_CSE_TLSDESC: + ix86_place_single_tls_call (load->broadcast_reg, + load->val, + load->kind, + load->bbs); + break; + default: + ix86_place_single_vector_set (load->broadcast_reg, + load->broadcast_source, + load->bbs, + (load->kind == X86_CSE_VEC_DUP + ? load->val + : nullptr)); + break; + } } loop_optimizer_finalize (); @@ -3909,10 +4458,10 @@ remove_redundant_vector_load (void) namespace { -const pass_data pass_data_remove_redundant_vector_load = +const pass_data pass_data_x86_cse = { RTL_PASS, /* type */ - "rrvl", /* name */ + "x86_cse", /* name */ OPTGROUP_NONE, /* optinfo_flags */ TV_MACH_DEP, /* tv_id */ 0, /* properties_required */ @@ -3922,11 +4471,11 @@ const pass_data pass_data_remove_redundant_vector_load = 0, /* todo_flags_finish */ }; -class pass_remove_redundant_vector_load : public rtl_opt_pass +class pass_x86_cse : public rtl_opt_pass { public: - pass_remove_redundant_vector_load (gcc::context *ctxt) - : rtl_opt_pass (pass_data_remove_redundant_vector_load, ctxt) + pass_x86_cse (gcc::context *ctxt) + : rtl_opt_pass (pass_data_x86_cse, ctxt) {} /* opt_pass methods: */ @@ -3939,16 +4488,16 @@ public: unsigned int execute (function *) final override { - return remove_redundant_vector_load (); + return x86_cse (); } -}; // class pass_remove_redundant_vector_load +}; // class pass_x86_cse } // anon namespace rtl_opt_pass * -make_pass_remove_redundant_vector_load (gcc::context *ctxt) +make_pass_x86_cse (gcc::context *ctxt) { - return new pass_remove_redundant_vector_load (ctxt); + return new pass_x86_cse (ctxt); } /* Convert legacy instructions that clobbers EFLAGS to APX_NF diff --git a/gcc/config/i386/i386-passes.def b/gcc/config/i386/i386-passes.def index 06f0288b067..553b46d1fdc 100644 --- a/gcc/config/i386/i386-passes.def +++ b/gcc/config/i386/i386-passes.def @@ -35,6 +35,6 @@ along with GCC; see the file COPYING3. If not see PR116174. */ INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_align_tight_loops); - INSERT_PASS_AFTER (pass_late_combine, 1, pass_remove_redundant_vector_load); + INSERT_PASS_AFTER (pass_late_combine, 1, pass_x86_cse); INSERT_PASS_AFTER (pass_late_combine, 1, pass_remove_partial_avx_dependency); INSERT_PASS_AFTER (pass_rtl_ifcvt, 1, pass_apx_nf_convert); diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 69bc0ee570d..ee6b78b2c77 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -290,6 +290,7 @@ extern rtx ix86_tls_module_base (void); extern bool ix86_gpr_tls_address_pattern_p (rtx); extern bool ix86_tls_address_pattern_p (rtx); extern rtx ix86_rewrite_tls_address (rtx); +extern rtx ix86_tls_get_addr (void); extern void ix86_expand_vector_init (bool, rtx, rtx); extern void ix86_expand_vector_set (bool, rtx, rtx, int); @@ -430,8 +431,7 @@ extern rtl_opt_pass *make_pass_insert_endbr_and_patchable_area (gcc::context *); extern rtl_opt_pass *make_pass_remove_partial_avx_dependency (gcc::context *); -extern rtl_opt_pass *make_pass_remove_redundant_vector_load - (gcc::context *); +extern rtl_opt_pass *make_pass_x86_cse (gcc::context *); extern rtl_opt_pass *make_pass_apx_nf_convert (gcc::context *); extern rtl_opt_pass *make_pass_align_tight_loops (gcc::context *); diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index eb5b2eb6a86..70802202100 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -12440,7 +12440,7 @@ ix86_tls_index (void) static GTY(()) rtx ix86_tls_symbol; -static rtx +rtx ix86_tls_get_addr (void) { if (!ix86_tls_symbol) diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 3f7ad68db3a..9d6d05a6911 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -2864,6 +2864,9 @@ struct GTY(()) machine_function { approximation. */ BOOL_BITFIELD tls_descriptor_call_expanded_p : 1; + /* True if TLS descriptor is called more than once. */ + BOOL_BITFIELD tls_descriptor_call_multiple_p : 1; + /* If true, the current function has a STATIC_CHAIN is placed on the stack below the return address. */ BOOL_BITFIELD static_chain_on_stack : 1; diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 21b9f5ccd7a..a26b134f66a 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -23270,7 +23270,11 @@ (define_expand "@tls_global_dynamic_64_<mode>" UNSPEC_TLS_GD) (clobber (match_operand:P 3 "register_operand"))])] "TARGET_64BIT" - "ix86_tls_descriptor_calls_expanded_in_cfun = true;") +{ + if (ix86_tls_descriptor_calls_expanded_in_cfun) + cfun->machine->tls_descriptor_call_multiple_p = true; + ix86_tls_descriptor_calls_expanded_in_cfun = true; +}) (define_insn "*tls_local_dynamic_base_32_gnu" [(set (match_operand:SI 0 "register_operand" "=a") @@ -23365,7 +23369,11 @@ (define_expand "@tls_local_dynamic_base_64_<mode>" (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE) (clobber (match_operand:P 2 "register_operand"))])] "TARGET_64BIT" - "ix86_tls_descriptor_calls_expanded_in_cfun = true;") +{ + if (ix86_tls_descriptor_calls_expanded_in_cfun) + cfun->machine->tls_descriptor_call_multiple_p = true; + ix86_tls_descriptor_calls_expanded_in_cfun = true; +}) ;; Local dynamic of a single variable is a lose. Show combine how ;; to convert that back to global dynamic. @@ -23559,10 +23567,12 @@ (define_expand "@tls_dynamic_gnu2_64_<mode>" "TARGET_64BIT && TARGET_GNU2_TLS" { operands[2] = can_create_pseudo_p () ? gen_reg_rtx (ptr_mode) : operands[0]; + if (ix86_tls_descriptor_calls_expanded_in_cfun) + cfun->machine->tls_descriptor_call_multiple_p = true; ix86_tls_descriptor_calls_expanded_in_cfun = true; }) -(define_insn "*tls_dynamic_gnu2_lea_64_<mode>" +(define_insn "tls_dynamic_gnu2_lea_64_<mode>" [(set (match_operand:PTR 0 "register_operand" "=r") (unspec:PTR [(match_operand 1 "tls_symbolic_operand")] UNSPEC_TLSDESC))] @@ -23573,7 +23583,7 @@ (define_insn "*tls_dynamic_gnu2_lea_64_<mode>" (set_attr "length" "7") (set_attr "length_address" "4")]) -(define_insn "*tls_dynamic_gnu2_call_64_<mode>" +(define_insn "tls_dynamic_gnu2_call_64_<mode>" [(set (match_operand:PTR 0 "register_operand" "=a") (unspec:PTR [(match_operand 1 "tls_symbolic_operand") (match_operand:PTR 2 "register_operand" "0") @@ -23586,7 +23596,7 @@ (define_insn "*tls_dynamic_gnu2_call_64_<mode>" (set_attr "length" "2") (set_attr "length_address" "0")]) -(define_insn_and_split "*tls_dynamic_gnu2_combine_64_<mode>" +(define_insn_and_split "tls_dynamic_gnu2_combine_64_<mode>" [(set (match_operand:PTR 0 "register_operand" "=&a") (plus:PTR (unspec:PTR [(match_operand 2 "tls_modbase_operand") diff --git a/gcc/testsuite/g++.target/i386/pr81501-1.C b/gcc/testsuite/g++.target/i386/pr81501-1.C new file mode 100644 index 00000000000..b2e89f4a5f0 --- /dev/null +++ b/gcc/testsuite/g++.target/i386/pr81501-1.C @@ -0,0 +1,16 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-std=c++14 -mtls-dialect=gnu -O2 -fpic -fplt" } */ +/* { dg-final { scan-assembler-times "call\[ \t\]__tls_get_addr@PLT" 1 { target { ! ia32 } } } } */ + +struct foo +{ + foo(); + ~foo(); +}; + +foo * +test () +{ + static thread_local foo foo_tls; + return &foo_tls; +} diff --git a/gcc/testsuite/gcc.target/i386/pr81501-1a.c b/gcc/testsuite/gcc.target/i386/pr81501-1a.c new file mode 100644 index 00000000000..30b4642a9ee --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr81501-1a.c @@ -0,0 +1,17 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu" } */ + +void a(long *); +int b(void); +void c(void); +static __thread long e; +long +d(void) +{ + a(&e); + if (b()) + c(); + return e; +} + +/* { dg-final { scan-assembler-times "call\[ \t\]__tls_get_addr@PLT" 1 { target { ! ia32 } } } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr81501-1b.c b/gcc/testsuite/gcc.target/i386/pr81501-1b.c new file mode 100644 index 00000000000..de25f226990 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr81501-1b.c @@ -0,0 +1,6 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu2" } */ + +#include "pr81501-1a.c" + +/* { dg-final { scan-assembler-times "call\[ \t\]\\*e@TLSCALL\\(%(?:r|e)ax\\)" 1 { target { ! ia32 } } } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr81501-2a.c b/gcc/testsuite/gcc.target/i386/pr81501-2a.c new file mode 100644 index 00000000000..a06302a468f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr81501-2a.c @@ -0,0 +1,17 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu" } */ + +void a(long *); +int b(void); +void c(void); +extern __thread long e; +long +d(void) +{ + a(&e); + if (b()) + c(); + return e; +} + +/* { dg-final { scan-assembler-times "call\[ \t\]__tls_get_addr@PLT" 1 { target { ! ia32 } } } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr81501-2b.c b/gcc/testsuite/gcc.target/i386/pr81501-2b.c new file mode 100644 index 00000000000..4afb7426c81 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr81501-2b.c @@ -0,0 +1,6 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu2" } */ + +#include "pr81501-2a.c" + +/* { dg-final { scan-assembler-times "call\[ \t\]\\*e@TLSCALL\\(%(?:r|e)ax\\)" 1 { target { ! ia32 } } } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr81501-3.c b/gcc/testsuite/gcc.target/i386/pr81501-3.c new file mode 100644 index 00000000000..d4220630900 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr81501-3.c @@ -0,0 +1,9 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu2" } */ + +static __thread int local1; +int * +get_local1 (void) +{ + return &local1; +} diff --git a/gcc/testsuite/gcc.target/i386/pr81501-4a.c b/gcc/testsuite/gcc.target/i386/pr81501-4a.c new file mode 100644 index 00000000000..775c5fd3b68 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr81501-4a.c @@ -0,0 +1,51 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu" } */ +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ +/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } } {^\t?\.} } } */ + +/* +**in_dso: +**.LFB[0-9]+: +**... +** movl %edi, %.* +**... +** mov(l|q) %(e|r)si, %.* +**... +** call __tls_get_addr@PLT +**... +*/ + +__thread int foo; + +extern void bar1 (int *, int *); +extern void bar2 (int); +extern void bar3 (const char *); + +int +in_dso (int n, int *caller_foop) +{ + int *foop; + int result = 0; + + bar3 ("foo"); /* Make sure PLT is used before macros. */ + asm ("" ::: "memory"); + + foop = &foo; + + if (caller_foop != (void *) 0 && foop != caller_foop) + { + bar1 (caller_foop, foop); + result = 1; + } + else if (*foop != n) + { + bar2 (n); + result = 1; + } + + *foop = 16; + + return result; +} + +/* { dg-final { scan-assembler-times "call\[ \t\]__tls_get_addr@PLT" 1 { target { ! ia32 } } } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr81501-4b.c b/gcc/testsuite/gcc.target/i386/pr81501-4b.c new file mode 100644 index 00000000000..5d35712b70d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr81501-4b.c @@ -0,0 +1,6 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu2" } */ + +#include "pr81501-4a.c" + +/* { dg-final { scan-assembler-times "call\[ \t\]\\*\*foo@TLSCALL\\(%(?:r|e)ax\\)" 1 { target { ! ia32 } } } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr81501-5.c b/gcc/testsuite/gcc.target/i386/pr81501-5.c new file mode 100644 index 00000000000..7f666e1c006 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr81501-5.c @@ -0,0 +1,13 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu" } */ + +extern __thread int __bid_IDEC_glbflags; +extern long __bid64qq_div_bid_y_0_1; +extern void get_BID64(int *); +void +__bid64qq_div(void) +{ + if (__bid64qq_div_bid_y_0_1) + __bid_IDEC_glbflags |= 1; + get_BID64(&__bid_IDEC_glbflags); +} diff --git a/gcc/testsuite/gcc.target/i386/pr81501-6a.c b/gcc/testsuite/gcc.target/i386/pr81501-6a.c new file mode 100644 index 00000000000..2e4c04b43e2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr81501-6a.c @@ -0,0 +1,67 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu" } */ +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ +/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } } {^\t?\.} } } */ + +/* +**in_dso: +**.LFB[0-9]+: +**... +** mov(l|q) %(e|r)dx, %.* +**... +** movl %edi, %.* +**... +** mov(l|q) %(e|r)si, %.* +**... +** call __tls_get_addr@PLT +**... +*/ + +__thread int foo; +__thread int bar; + +extern void fun1 (int *, int *); +extern void fun2 (int); +extern void fun3 (const char *); + +int +in_dso (int n, int *caller_foop, int *caller_barp) +{ + int *foop; + int *barp; + int result = 0; + + fun3 ("foo"); /* Make sure PLT is used before macros. */ + asm ("" ::: "memory"); + + foop = &foo; + barp = &bar; + + if (caller_foop != (void *) 0 && foop != caller_foop) + { + fun1 (caller_foop, foop); + result = 1; + if (caller_barp != (void *) 0 && barp != caller_barp) + { + fun1 (caller_barp, barp); + result = 2; + } + else if (*barp != n) + { + fun2 (n); + result = 3; + } + } + else if (*foop != n) + { + fun2 (n); + result = 4; + } + + *barp = 16; + *foop = 16; + + return result; +} + +/* { dg-final { scan-assembler-times "call\[ \t\]__tls_get_addr@PLT" 2 { target { ! ia32 } } } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr81501-6b.c b/gcc/testsuite/gcc.target/i386/pr81501-6b.c new file mode 100644 index 00000000000..703422dd186 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr81501-6b.c @@ -0,0 +1,7 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu2" } */ + +#include "pr81501-6a.c" + +/* { dg-final { scan-assembler-times "call\[ \t\]\\*foo@TLSCALL\\(%(?:r|e)ax\\)" 1 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "call\[ \t\]\\*bar@TLSCALL\\(%(?:r|e)ax\\)" 1 { target { ! ia32 } } } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr81501-7.c b/gcc/testsuite/gcc.target/i386/pr81501-7.c new file mode 100644 index 00000000000..b2fe5d5eb85 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr81501-7.c @@ -0,0 +1,20 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu" } */ + +extern int __bid_IDEC_glbround, __bid64qqq_fma_save_fpsf; +extern __thread int __bid_IDEC_glbflags; +typedef struct { + long w[2]; +} UINT128; +extern long __bid64qqq_fma_res_0_1; +extern void bid128_ext_fma(UINT128, UINT128); +void +__bid64qqq_fma(UINT128 y, UINT128 z) +{ + __bid_IDEC_glbflags = 0; + bid128_ext_fma(y, z); + if (__bid_IDEC_glbround || __bid64qqq_fma_res_0_1) + __bid_IDEC_glbflags |= __bid64qqq_fma_save_fpsf; +} + +/* { dg-final { scan-assembler-times "call\[ \t\]__tls_get_addr@PLT" 1 { target { ! ia32 } } } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr81501-8a.c b/gcc/testsuite/gcc.target/i386/pr81501-8a.c new file mode 100644 index 00000000000..f98742315c9 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr81501-8a.c @@ -0,0 +1,82 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu" } */ +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ +/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } } {^\t?\.} } } */ + +/* +**in_dso: +**.LFB[0-9]+: +**... +** mov(l|q) %(e|r)dx, %.* +**... +** movl %edi, %.* +**... +** mov(l|q) %(e|r)si, %.* +**... +** testb %al, %al +**... +** call __tls_get_addr@PLT +**... +*/ + +#include <stdarg.h> + +__thread int foo; +__thread int bar; + +extern void fun1 (int *, int *); +extern void fun2 (int); +extern void fun3 (const char *); + +int +in_dso (int n, int *caller_foop, int *caller_barp, ...) +{ + int *foop; + int *barp; + int result; + va_list ap; + double d; + + va_start (ap, caller_barp); + + result = 0; + + fun3 ("foo"); /* Make sure PLT is used before macros. */ + asm ("" ::: "memory"); + + foop = &foo; + barp = &bar; + + if (caller_foop != (void *) 0 && foop != caller_foop) + { + fun1 (caller_foop, foop); + result = 1; + if (caller_barp != (void *) 0 && barp != caller_barp) + { + fun1 (caller_barp, barp); + result = 2; + } + else if (*barp != n) + { + fun2 (n); + result = 3; + } + } + else if (*foop != n) + { + fun2 (n); + result = 4; + } + + *barp = 16; + *foop = 16; + + d = va_arg (ap, double); + if (d != 1234.0) + result = 10; + va_end (ap); + + return result; +} + +/* { dg-final { scan-assembler-times "call\[ \t\]__tls_get_addr@PLT" 2 { target { ! ia32 } } } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr81501-8b.c b/gcc/testsuite/gcc.target/i386/pr81501-8b.c new file mode 100644 index 00000000000..0882507254e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr81501-8b.c @@ -0,0 +1,25 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu2" } */ +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ +/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } } {^\t?\.} } } */ + +/* +**in_dso: +**.LFB[0-9]+: +**... +** testb %al, %al +**... +** lea(l|q) bar@TLSDESC\(%rip\), %(e|r)ax +**... +** call \*bar@TLSCALL\(%(e|r)ax\) +**... +** lea(l|q) foo@TLSDESC\(%rip\), %(e|r)ax +**... +** call \*foo@TLSCALL\(%(e|r)ax\) +**... +*/ + +#include "pr81501-8a.c" + +/* { dg-final { scan-assembler-times "call\[ \t\]\\*foo@TLSCALL\\(%(?:r|e)ax\\)" 1 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "call\[ \t\]\\*bar@TLSCALL\\(%(?:r|e)ax\\)" 1 { target { ! ia32 } } } } */ -- 2.50.0