For TLS calls:

1. UNSPEC_TLS_GD:

  (parallel [
    (set (reg:DI 0 ax)
         (call:DI (mem:QI (symbol_ref:DI ("__tls_get_addr")))
                  (const_int 0 [0])))
    (unspec:DI [(symbol_ref:DI ("e") [flags 0x50])
                (reg/f:DI 7 sp)] UNSPEC_TLS_GD)
    (clobber (reg:DI 5 di))])

2. UNSPEC_TLS_LD_BASE:

  (parallel [
    (set (reg:DI 0 ax)
         (call:DI (mem:QI (symbol_ref:DI ("__tls_get_addr")))
                  (const_int 0 [0])))
    (unspec:DI [(reg/f:DI 7 sp)] UNSPEC_TLS_LD_BASE)])

3. UNSPEC_TLSDESC:

  (parallel [
     (set (reg/f:DI 104)
           (plus:DI (unspec:DI [
                       (symbol_ref:DI ("_TLS_MODULE_BASE_") [flags 0x10])
                       (reg:DI 114)
                       (reg/f:DI 7 sp)] UNSPEC_TLSDESC)
                    (const:DI (unspec:DI [
                                 (symbol_ref:DI ("e") [flags 0x1a])
                              ] UNSPEC_DTPOFF))))
     (clobber (reg:CC 17 flags))])

  (parallel [
    (set (reg:DI 101)
         (unspec:DI [(symbol_ref:DI ("e") [flags 0x50])
                     (reg:DI 112)
                     (reg/f:DI 7 sp)] UNSPEC_TLSDESC))
    (clobber (reg:CC 17 flags))])

they return the same value for the same input value.  But multiple calls
with the same input value may be generated for simple programs like:

void a(long *);
int b(void);
void c(void);
static __thread long e;
long
d(void)
{
  a(&e);
  if (b())
    c();
  return e;
}

When compiled with -O2 -fPIC -mtls-dialect=gnu2, the following codes are
generated:

        .type   d, @function
d:
.LFB0:
        .cfi_startproc
        pushq   %rbx
        .cfi_def_cfa_offset 16
        .cfi_offset 3, -16
        leaq    e@TLSDESC(%rip), %rbx
        movq    %rbx, %rax
        call    *e@TLSCALL(%rax)
        addq    %fs:0, %rax
        movq    %rax, %rdi
        call    a@PLT
        call    b@PLT
        testl   %eax, %eax
        jne     .L8
        movq    %rbx, %rax
        call    *e@TLSCALL(%rax)
        popq    %rbx
        .cfi_remember_state
        .cfi_def_cfa_offset 8
        movq    %fs:(%rax), %rax
        ret
        .p2align 4,,10
        .p2align 3
.L8:
        .cfi_restore_state
        call    c@PLT
        movq    %rbx, %rax
        call    *e@TLSCALL(%rax)
        popq    %rbx
        .cfi_def_cfa_offset 8
        movq    %fs:(%rax), %rax
        ret
        .cfi_endproc

There are 3 "call *e@TLSCALL(%rax)".  They all return the same value.
Rename the remove_redundant_vector pass to the x86_cse pass, for 64bit,
extend it to also remove redundant TLS calls to generate:

d:
.LFB0:
        .cfi_startproc
        pushq   %rbx
        .cfi_def_cfa_offset 16
        .cfi_offset 3, -16
        leaq    e@TLSDESC(%rip), %rax
        movq    %fs:0, %rdi
        call    *e@TLSCALL(%rax)
        addq    %rax, %rdi
        movq    %rax, %rbx
        call    a@PLT
        call    b@PLT
        testl   %eax, %eax
        jne     .L8
        movq    %fs:(%rbx), %rax
        popq    %rbx
        .cfi_remember_state
        .cfi_def_cfa_offset 8
        ret
        .p2align 4,,10
        .p2align 3
.L8:
        .cfi_restore_state
        call    c@PLT
        movq    %fs:(%rbx), %rax
        popq    %rbx
        .cfi_def_cfa_offset 8
        ret
        .cfi_endproc

with only one "call *e@TLSCALL(%rax)".  This reduces the number of
__tls_get_addr calls in libgcc.a by 72%:

__tls_get_addr calls     before         after
libgcc.a                 868            243

gcc/

        PR target/81501
        * config/i386/i386-features.cc (x86_cse_kind): Add X86_CSE_TLS_GD,
        X86_CSE_TLS_LD_BASE and X86_CSE_TLSDESC.
        (redundant_load): Renamed to ...
        (redundant_pattern): This.
        (replace_tls_call): New.
        (ix86_place_single_tls_call): Likewise.
        (remove_redundant_vector_load): Renamed to ...
        (x86_cse): This.  Extend to remove redundant TLS calls.
        (pass_remove_redundant_vector_load): Renamed to ...
        (pass_x86_cse): This.
        (make_pass_remove_redundant_vector_load): Renamed to ...
        (make_pass_x86_cse): This.
        (config/i386/i386-passes.def): Replace
        pass_remove_redundant_vector_load with pass_x86_cse.
        config/i386/i386-protos.h (ix86_tls_get_addr): New.
        (make_pass_remove_redundant_vector_load): Renamed to ...
        (make_pass_x86_cse): This.
        * config/i386/i386.cc (ix86_tls_get_addr): Remove static.
        * config/i386/i386.h (machine_function): Add
        tls_descriptor_call_multiple_p.
        * config/i386/i386.md (@tls_global_dynamic_64_<mode>): Set
        tls_descriptor_call_multiple_p.
        (@tls_local_dynamic_base_64_<mode>): Likewise.
        (@tls_dynamic_gnu2_64_<mode>): Likewise.
        (*tls_dynamic_gnu2_lea_64_<mode>): Renamed to ...
        (tls_dynamic_gnu2_lea_64_<mode>): This.
        (*tls_dynamic_gnu2_call_64_<mode>): Renamed to ...
        (tls_dynamic_gnu2_call_64_<mode>): This.
        (*tls_dynamic_gnu2_combine_64_<mode>): Renamed to ...
        (tls_dynamic_gnu2_combine_64_<mode>): This.

gcc/testsuite/

        PR target/81501
        * g++.target/i386/pr81501-1.C: New test.
        * gcc.target/i386/pr81501-1a.c: Likewise.
        * gcc.target/i386/pr81501-1b.c: Likewise.
        * gcc.target/i386/pr81501-2a.c: Likewise.
        * gcc.target/i386/pr81501-2b.c: Likewise.
        * gcc.target/i386/pr81501-3.c: Likewise.
        * gcc.target/i386/pr81501-4a.c: Likewise.
        * gcc.target/i386/pr81501-4b.c: Likewise.
        * gcc.target/i386/pr81501-5.c: Likewise.
        * gcc.target/i386/pr81501-6a.c: Likewise.
        * gcc.target/i386/pr81501-6b.c: Likewise.
        * gcc.target/i386/pr81501-7.c: Likewise.
        * gcc.target/i386/pr81501-8a.c: Likewise.
        * gcc.target/i386/pr81501-8b.c: Likewise.

Signed-off-by: H.J. Lu <hjl.to...@gmail.com>
---
 gcc/config/i386/i386-features.cc           | 679 +++++++++++++++++++--
 gcc/config/i386/i386-passes.def            |   2 +-
 gcc/config/i386/i386-protos.h              |   4 +-
 gcc/config/i386/i386.cc                    |   2 +-
 gcc/config/i386/i386.h                     |   3 +
 gcc/config/i386/i386.md                    |  20 +-
 gcc/testsuite/g++.target/i386/pr81501-1.C  |  16 +
 gcc/testsuite/gcc.target/i386/pr81501-1a.c |  17 +
 gcc/testsuite/gcc.target/i386/pr81501-1b.c |   6 +
 gcc/testsuite/gcc.target/i386/pr81501-2a.c |  17 +
 gcc/testsuite/gcc.target/i386/pr81501-2b.c |   6 +
 gcc/testsuite/gcc.target/i386/pr81501-3.c  |   9 +
 gcc/testsuite/gcc.target/i386/pr81501-4a.c |  51 ++
 gcc/testsuite/gcc.target/i386/pr81501-4b.c |   6 +
 gcc/testsuite/gcc.target/i386/pr81501-5.c  |  13 +
 gcc/testsuite/gcc.target/i386/pr81501-6a.c |  67 ++
 gcc/testsuite/gcc.target/i386/pr81501-6b.c |   7 +
 gcc/testsuite/gcc.target/i386/pr81501-7.c  |  20 +
 gcc/testsuite/gcc.target/i386/pr81501-8a.c |  82 +++
 gcc/testsuite/gcc.target/i386/pr81501-8b.c |  25 +
 20 files changed, 978 insertions(+), 74 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/i386/pr81501-1.C
 create mode 100644 gcc/testsuite/gcc.target/i386/pr81501-1a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr81501-1b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr81501-2a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr81501-2b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr81501-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr81501-4a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr81501-4b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr81501-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr81501-6a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr81501-6b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr81501-7.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr81501-8a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr81501-8b.c

diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index 054f8d5ddc8..8e2c69ea313 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -3493,10 +3493,13 @@ enum x86_cse_kind
 {
   X86_CSE_CONST0_VECTOR,
   X86_CSE_CONSTM1_VECTOR,
-  X86_CSE_VEC_DUP
+  X86_CSE_VEC_DUP,
+  X86_CSE_TLS_GD,
+  X86_CSE_TLS_LD_BASE,
+  X86_CSE_TLSDESC
 };
 
-struct redundant_load
+struct redundant_pattern
 {
   /* Bitmap of basic blocks with broadcast instructions.  */
   auto_bitmap bbs;
@@ -3671,22 +3674,323 @@ ix86_broadcast_inner (rtx op, machine_mode mode,
   return op;
 }
 
-/* At entry of the nearest common dominator for basic blocks with vector
-   CONST0_RTX and integer CONSTM1_RTX uses, generate a single widest
-   vector set instruction for all CONST0_RTX and integer CONSTM1_RTX
-   uses.
+/* Replace CALL instruction in TLS_CALL_INSNS with SET from SRC.  */
 
-   NB: We want to generate only a single widest vector set to cover the
-   whole function.  The LCM algorithm isn't appropriate here since it
-   may place a vector set inside the loop.  */
+static void
+replace_tls_call (rtx src, auto_bitmap &tls_call_insns)
+{
+  bitmap_iterator bi;
+  unsigned int id;
+
+  EXECUTE_IF_SET_IN_BITMAP (tls_call_insns, 0, id, bi)
+    {
+      rtx_insn *insn = DF_INSN_UID_GET (id)->insn;
+
+      /* If this isn't a CALL, only GNU2 TLS implicit CALL patterns are
+        allowed.  */
+      if (!CALL_P (insn))
+       switch (INSN_CODE (insn))
+         {
+         case CODE_FOR_tls_dynamic_gnu2_call_64_si:
+         case CODE_FOR_tls_dynamic_gnu2_call_64_di:
+         case CODE_FOR_tls_dynamic_gnu2_combine_64_si:
+         case CODE_FOR_tls_dynamic_gnu2_combine_64_di:
+           break;
+         default:
+           gcc_unreachable ();
+         }
+
+      rtx pat = PATTERN (insn);
+      if (GET_CODE (pat) != PARALLEL)
+       gcc_unreachable ();
+
+      int j;
+      rtx op, dest = nullptr;
+      for (j = XVECLEN (pat, 0) - 1; j >= 0; j--)
+       {
+         op = XVECEXP (pat, 0, j);
+         if (GET_CODE (op) == SET)
+           {
+             dest = SET_DEST (op);
+             break;
+           }
+       }
+
+      rtx set = gen_rtx_SET (dest, src);
+      rtx_insn *set_insn = emit_insn_after (set, insn);
+      if (recog_memoized (set_insn) < 0)
+       gcc_unreachable ();
+
+      if (dump_file)
+       {
+         fprintf (dump_file, "\nReplace:\n\n");
+         print_rtl_single (dump_file, insn);
+         fprintf (dump_file, "\nwith:\n\n");
+         print_rtl_single (dump_file, set_insn);
+         fprintf (dump_file, "\n");
+       }
+
+      /* Delete the CALL insn.  */
+      delete_insn (insn);
+
+      df_insn_rescan (set_insn);
+    }
+}
+
+/* Generate a TLS call of KIND with VAL and copy the call result to DEST,
+   at entry of the nearest dominator for basic block map BBS, which is in
+   the fake loop that contains the whole function, so that there is only
+   a single TLS CALL of KIND with VAL in the whole function.  If
+   TLSDESC_SET isn't nullptr, insert it before the TLS call.  */
+
+static void
+ix86_place_single_tls_call (rtx dest, rtx val, x86_cse_kind kind,
+                           bitmap bbs, rtx tlsdesc_set = nullptr)
+{
+  basic_block bb = nearest_common_dominator_for_set (CDI_DOMINATORS, bbs);
+  while (bb->loop_father->latch
+        != EXIT_BLOCK_PTR_FOR_FN (cfun))
+    bb = get_immediate_dominator (CDI_DOMINATORS,
+                                 bb->loop_father->header);
+
+  rtx_insn *insn = BB_HEAD (bb);
+  while (insn && !NONDEBUG_INSN_P (insn))
+    {
+      if (insn == BB_END (bb))
+       {
+         insn = NULL;
+         break;
+       }
+      insn = NEXT_INSN (insn);
+    }
+
+  rtx rax = nullptr, rdi;
+  rtx eqv = nullptr;
+  rtx caddr;
+  rtx set;
+  rtx clob;
+  rtx symbol;
+  rtx tls;
+  rtx_insn *tls_insn;
+
+  switch (kind)
+    {
+    case X86_CSE_TLS_GD:
+      rax = gen_rtx_REG (Pmode, AX_REG);
+      rdi = gen_rtx_REG (Pmode, DI_REG);
+      caddr = ix86_tls_get_addr ();
+
+      symbol = XVECEXP (val, 0, 0);
+      tls = gen_tls_global_dynamic_64 (Pmode, rax, symbol, caddr, rdi);
+
+      if (GET_MODE (symbol) != Pmode)
+       symbol = gen_rtx_ZERO_EXTEND (Pmode, symbol);
+      eqv = symbol;
+      break;
+
+    case X86_CSE_TLS_LD_BASE:
+      rax = gen_rtx_REG (Pmode, AX_REG);
+      rdi = gen_rtx_REG (Pmode, DI_REG);
+      caddr = ix86_tls_get_addr ();
+
+      tls = gen_tls_local_dynamic_base_64 (Pmode, rax, caddr, rdi);
+
+      /* Attach a unique REG_EQUAL to DEST, to allow the RTL optimizers
+        to share the LD_BASE result with other LD model accesses.  */
+      eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
+                           UNSPEC_TLS_LD_BASE);
+
+      break;
+
+    case X86_CSE_TLSDESC:
+      set = gen_rtx_SET (dest, val);
+      clob = gen_rtx_CLOBBER (VOIDmode,
+                             gen_rtx_REG (CCmode, FLAGS_REG));
+      tls = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clob));
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  rtx_insn *before = nullptr;
+  rtx_insn *after = nullptr;
+  if (insn == BB_HEAD (bb))
+    before = insn;
+  else
+    after = insn ? PREV_INSN (insn) : BB_END (bb);
+
+  /* TLS_GD and TLS_LD_BASE instructions are normal functions which
+     clobber caller-saved registers.  TLSDESC instructions are special
+     functions which only clobber RAX.  If any registers clobbered by
+     the TLS instruction are live in this basic block, we must insert
+     the TLS instruction after all live registers clobbered by the TLS
+     instruction are dead.  */
+
+  auto_bitmap live_caller_saved_regs;
+  bitmap in = df_live ? DF_LIVE_IN (bb) : DF_LR_IN (bb);
+
+  bool flags_live_p = bitmap_bit_p (in, FLAGS_REG);
+
+  unsigned int i;
+
+  /* Get all live caller-saved registers.  */
+  if (kind == X86_CSE_TLSDESC)
+    {
+      if (bitmap_bit_p (in, AX_REG))
+       bitmap_set_bit (live_caller_saved_regs, AX_REG);
+    }
+  else
+    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+      if (call_used_regs[i]
+         && !fixed_regs[i]
+         && bitmap_bit_p (in, i))
+       bitmap_set_bit (live_caller_saved_regs, i);
+
+  if (!bitmap_empty_p (live_caller_saved_regs))
+    {
+      /* Search for REG_DEAD notes in this basic block.  */
+      FOR_BB_INSNS (bb, insn)
+       {
+         if (!NONDEBUG_INSN_P (insn))
+           continue;
+
+         /* Check if FLAGS register is live.  */
+         set = single_set (insn);
+         if (set)
+           {
+             rtx dest = SET_DEST (set);
+             if (REG_P (dest) && REGNO (dest) == FLAGS_REG)
+               flags_live_p = true;
+           }
+
+         rtx link;
+         for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
+           if (REG_NOTE_KIND (link) == REG_DEAD
+               && REG_P (XEXP (link, 0)))
+             {
+               /* Mark the live caller-saved register as dead.  */
+               for (i = REGNO (XEXP (link, 0));
+                    i < END_REGNO (XEXP (link, 0));
+                    i++)
+                 bitmap_clear_bit (live_caller_saved_regs, i);
+
+               /* Check if FLAGS register is dead.  */
+               if (REGNO (XEXP (link, 0)) == FLAGS_REG)
+                 flags_live_p = false;
+
+               if (bitmap_empty_p (live_caller_saved_regs))
+                 {
+                   /* All live caller-saved registers are dead after
+                      this instruction.  Since TLS instructions
+                      clobber FLAGS register, it must be dead where
+                      the TLS will be inserted after.  */
+                   if (flags_live_p)
+                     gcc_unreachable ();
+                   after = insn;
+                   goto insert_after;
+                 }
+             }
+       }
+
+      /* All live caller-saved registers should be dead at the end
+        of this basic block.  */
+      gcc_unreachable ();
+    }
+
+  /* Emit the TLS CALL insn.  */
+  if (after)
+    {
+insert_after:
+      tls_insn = emit_insn_after (tls, after);
+    }
+  else
+    tls_insn = emit_insn_before (tls, before);
+
+  rtx_insn *tlsdesc_insn = nullptr;
+  if (tlsdesc_set)
+    {
+      rtx dest = copy_rtx (SET_DEST (tlsdesc_set));
+      rtx src = copy_rtx (SET_SRC (tlsdesc_set));
+      tlsdesc_set = gen_rtx_SET (dest, src);
+      tlsdesc_insn = emit_insn_before (tlsdesc_set, tls_insn);
+    }
+
+  if (kind != X86_CSE_TLSDESC)
+    {
+      RTL_CONST_CALL_P (tls_insn) = 1;
+
+      /* Indicate that this function can't jump to non-local gotos.  */
+      make_reg_eh_region_note_nothrow_nononlocal (tls_insn);
+    }
+
+  if (recog_memoized (tls_insn) < 0)
+    gcc_unreachable ();
+
+  if (dump_file)
+    {
+      if (after)
+       {
+         fprintf (dump_file, "\nPlace:\n\n");
+         if (tlsdesc_insn)
+           print_rtl_single (dump_file, tlsdesc_insn);
+         print_rtl_single (dump_file, tls_insn);
+         fprintf (dump_file, "\nafter:\n\n");
+         print_rtl_single (dump_file, after);
+         fprintf (dump_file, "\n");
+       }
+      else
+       {
+         fprintf (dump_file, "\nPlace:\n\n");
+         if (tlsdesc_insn)
+           print_rtl_single (dump_file, tlsdesc_insn);
+         print_rtl_single (dump_file, tls_insn);
+         fprintf (dump_file, "\nbefore:\n\n");
+         print_rtl_single (dump_file, insn);
+         fprintf (dump_file, "\n");
+       }
+    }
+
+  if (kind != X86_CSE_TLSDESC)
+    {
+      /* Copy RAX to DEST.  */
+      set = gen_rtx_SET (dest, rax);
+      rtx_insn *set_insn = emit_insn_after (set, tls_insn);
+      set_dst_reg_note (set_insn, REG_EQUAL, copy_rtx (eqv), dest);
+      if (dump_file)
+       {
+         fprintf (dump_file, "\nPlace:\n\n");
+         print_rtl_single (dump_file, set_insn);
+         fprintf (dump_file, "\nafter:\n\n");
+         print_rtl_single (dump_file, tls_insn);
+         fprintf (dump_file, "\n");
+       }
+    }
+}
+
+/* At entry of the nearest common dominator for basic blocks with
+
+   1. Vector CONST0_RTX patterns.
+   2. Vector CONSTM1_RTX patterns.
+   3. Vector broadcast patterns.
+   4. UNSPEC_TLS_GD patterns.
+   5. UNSPEC_TLS_LD_BASE patterns.
+   6. UNSPEC_TLSDESC patterns.
+
+   generate a single pattern whose destination is used to replace the
+   source in all identical patterns.
+
+   NB: We want to generate a pattern, which is executed only once, to
+   cover the whole function.  The LCM algorithm isn't appropriate here
+   since it may place a pattern inside the loop.  */
 
 static unsigned int
-remove_redundant_vector_load (void)
+x86_cse (void)
 {
   timevar_push (TV_MACH_DEP);
 
-  auto_vec<redundant_load *> loads;
-  redundant_load *load;
+  auto_vec<redundant_pattern *> loads;
+  redundant_pattern *load;
   basic_block bb;
   rtx_insn *insn;
   unsigned int i;
@@ -3702,46 +4006,250 @@ remove_redundant_vector_load (void)
          if (!NONDEBUG_INSN_P (insn))
            continue;
 
-         rtx set = single_set (insn);
+         bool matched = false;
+
+         /* Remove redundant pattens if there are more than 2 of
+            them.  */
+         unsigned int threshold = 2;
+
+         rtx val, set, dest = nullptr, src;
+         rtx_insn *def_insn;
+         machine_mode mode = VOIDmode;
+         machine_mode scalar_mode = VOIDmode;
+         x86_cse_kind kind;
+
+         if (TARGET_64BIT
+             && cfun->machine->tls_descriptor_call_multiple_p
+             && CALL_P (insn))
+           {
+             /* Record the redundant TLS CALLs for 64-bit:
+
+                (parallel [
+                  (set (reg:DI 0 ax)
+                       (call:DI (mem:QI (symbol_ref:DI ("__tls_get_addr")))
+                       (const_int 0 [0])))
+                  (unspec:DI [(symbol_ref:DI ("foo") [flags 0x50])
+                              (reg/f:DI 7 sp)] UNSPEC_TLS_GD)
+                  (clobber (reg:DI 5 di))])
+
+
+                and
+
+                (parallel [
+                  (set (reg:DI 0 ax)
+                       (call:DI (mem:QI (symbol_ref:DI ("__tls_get_addr")))
+                       (const_int 0 [0])))
+                       (unspec:DI [(reg/f:DI 7 sp)] UNSPEC_TLS_LD_BASE)])
+
+              */
+
+             unspec unspec_val = (unspec) 0;
+             rtx pat = PATTERN (insn);
+             if (GET_CODE (pat) != PARALLEL)
+               continue;
+
+             int j;
+             rtx op;
+             for (j = XVECLEN (pat, 0) - 1; j >= 0; j--)
+               {
+                 op = XVECEXP (pat, 0, j);
+                 switch (GET_CODE (op))
+                   {
+                   case SET:
+                     /* Only a single SET is allowed.  */
+                     if (dest)
+                       gcc_unreachable ();
+                     dest = SET_DEST (op);
+                     scalar_mode = mode = GET_MODE (dest);
+                     break;
+                   case UNSPEC:
+                     if (XINT (op, 1) == UNSPEC_TLS_LD_BASE
+                         || XINT (op, 1) == UNSPEC_TLS_GD)
+                       {
+                         val = op;
+                         unspec_val = (unspec) XINT (op, 1);
+                       }
+                     break;
+                   default:
+                     break;
+                   }
+               }
+
+             switch (unspec_val)
+               {
+               case 0:
+                 continue;
+
+               case UNSPEC_TLS_GD:
+                 kind = X86_CSE_TLS_GD;
+                 break;
+
+               case UNSPEC_TLS_LD_BASE:
+                 kind = X86_CSE_TLS_LD_BASE;
+                 break;
+
+               default:
+                 gcc_unreachable ();
+               }
+
+             def_insn = nullptr;
+
+             goto check_redundant_pattern;
+           }
+
+         set = single_set (insn);
          if (!set)
            continue;
 
+         src = SET_SRC (set);
+
+         insn_code icode;
+
+         if (TARGET_64BIT
+             && cfun->machine->tls_descriptor_call_multiple_p)
+           {
+             /* Record GNU2 TLS CALLs for 64-bit:
+
+                (parallel [
+                  (set (reg/f:DI 104)
+                       (plus:DI (unspec:DI [
+                                  (symbol_ref:DI ("_TLS_MODULE_BASE_") [flags 
0x10])
+                                  (reg:DI 114)
+                                  (reg/f:DI 7 sp)] UNSPEC_TLSDESC)
+                                (const:DI (unspec:DI [
+                                  (symbol_ref:DI ("e") [flags 0x1a])
+                                 ] UNSPEC_DTPOFF))))
+                  (clobber (reg:CC 17 flags))])
+
+                and
+
+                (parallel [
+                  (set (reg:DI 101)
+                       (unspec:DI [
+                         (symbol_ref:DI ("foo") [flags 0x50])
+                         (reg:DI 112)
+                         (reg/f:DI 7 sp)] UNSPEC_TLSDESC))
+                  (clobber (reg:CC 17 flags))])
+
+                */
+
+             icode = (insn_code) INSN_CODE (insn);
+             if ((icode == CODE_FOR_tls_dynamic_gnu2_call_64_si)
+                 || (icode == CODE_FOR_tls_dynamic_gnu2_call_64_di))
+               val = src;
+             else if (GET_CODE (src) == PLUS
+                      && ((icode
+                           == CODE_FOR_tls_dynamic_gnu2_combine_64_si)
+                          || (icode
+                              == CODE_FOR_tls_dynamic_gnu2_combine_64_di)))
+               {
+                 val = src;
+                 src = XEXP (src, 0);
+               }
+             else
+               continue;
+
+             kind = X86_CSE_TLSDESC;
+             gcc_assert (GET_CODE (src) == UNSPEC);
+             src = XVECEXP (src, 0, 1);
+             scalar_mode = mode = GET_MODE (src);
+             if (REG_P (src))
+               {
+                 /* All definitions of reg:DI 129 in
+
+                    (set (reg:DI 110)
+                         (unspec:DI
+                           [(symbol_ref:DI ("foo"))
+                            (reg:DI 129)
+                            (reg/f:DI 7 sp)] UNSPEC_TLSDESC))
+
+                    should have the same source as in
+
+                    (set (reg:DI 129)
+                         (unspec:DI
+                           [(symbol_ref:DI ("foo"))] UNSPEC_TLSDESC))
+
+                  */
+
+                 df_ref ref;
+                 rtx_insn *set_insn = nullptr;
+                 rtx tls_src = nullptr;
+                 for (ref = DF_REG_DEF_CHAIN (REGNO (src));
+                      ref;
+                      ref = DF_REF_NEXT_REG (ref))
+                   {
+                     if (DF_REF_IS_ARTIFICIAL (ref))
+                       break;
+
+                     set_insn = DF_REF_INSN (ref);
+                     icode = (insn_code) INSN_CODE (set_insn);
+                      if ((icode
+                           != CODE_FOR_tls_dynamic_gnu2_lea_64_si)
+                          && (icode
+                              != CODE_FOR_tls_dynamic_gnu2_lea_64_di))
+                        {
+                          set_insn = nullptr;
+                          break;
+                        }
+
+                      rtx tls_set = PATTERN (set_insn);
+                      if (!tls_src)
+                        tls_src = SET_SRC (tls_set);
+                      else if (!rtx_equal_p (tls_src,
+                                             SET_SRC (tls_set)))
+                        {
+                          set_insn = nullptr;
+                          break;
+                        }
+                   }
+
+                 if (!set_insn)
+                   continue;
+
+                 set = single_set (insn);
+                 if (!set)
+                   continue;
+
+                 def_insn = set_insn;
+               }
+             else if (GET_CODE (src) == UNSPEC
+                      && XINT (src, 1) == UNSPEC_TLSDESC
+                      && SYMBOL_REF_P (XVECEXP (src, 0, 0)))
+               def_insn = nullptr;
+             else
+               gcc_unreachable ();
+
+             goto check_redundant_pattern;
+           }
+
          /* Record single set vector instruction with CONST0_RTX and
             CONSTM1_RTX source.  Record basic blocks with CONST0_RTX and
             CONSTM1_RTX.  Count CONST0_RTX and CONSTM1_RTX.  Record the
             maximum size of CONST0_RTX and CONSTM1_RTX.  */
 
-         rtx dest = SET_DEST (set);
-         machine_mode mode = GET_MODE (dest);
+         dest = SET_DEST (set);
+         mode = GET_MODE (dest);
          /* Skip non-vector instruction.  */
          if (!VECTOR_MODE_P (mode))
            continue;
 
-         rtx src = SET_SRC (set);
          /* Skip non-vector load instruction.  */
          if (!REG_P (dest) && !SUBREG_P (dest))
            continue;
 
-         rtx_insn *def_insn;
-         machine_mode scalar_mode;
-         x86_cse_kind kind;
-         rtx val = ix86_broadcast_inner (src, mode, &scalar_mode,
-                                         &kind, &def_insn);
+         val = ix86_broadcast_inner (src, mode, &scalar_mode, &kind,
+                                     &def_insn);
          if (!val)
            continue;
 
-          /* Remove redundant register loads if there are more than 2
-             loads will be used.  */
-         unsigned int threshold = 2;
-
+check_redundant_pattern:
          /* Check if there is a matching redundant vector load.   */
-         bool matched = false;
          FOR_EACH_VEC_ELT (loads, i, load)
            if (load->val
                && load->kind == kind
                && load->mode == scalar_mode
                && (load->bb == bb
-                   || kind < X86_CSE_VEC_DUP
+                   || kind != X86_CSE_VEC_DUP
                    /* Non all 0s/1s vector load must be in the same
                       basic block if it is in a recursive call.  */
                    || !recursive_call_p)
@@ -3751,7 +4259,8 @@ remove_redundant_vector_load (void)
                bitmap_set_bit (load->insns, INSN_UID (insn));
 
                /* Record the maximum vector size.  */
-               if (load->size < GET_MODE_SIZE (mode))
+               if (kind <= X86_CSE_VEC_DUP
+                   && load->size < GET_MODE_SIZE (mode))
                  load->size = GET_MODE_SIZE (mode);
 
                /* Record the basic block.  */
@@ -3765,7 +4274,7 @@ remove_redundant_vector_load (void)
            continue;
 
          /* We see this vector broadcast the first time.  */
-         load = new redundant_load;
+         load = new redundant_pattern;
 
          load->val = copy_rtx (val);
          load->mode = scalar_mode;
@@ -3788,6 +4297,23 @@ remove_redundant_vector_load (void)
   FOR_EACH_VEC_ELT (loads, i, load)
     if (load->count >= load->threshold)
       {
+       switch (load->kind)
+         {
+         case X86_CSE_TLS_GD:
+         case X86_CSE_TLS_LD_BASE:
+         case X86_CSE_TLSDESC:
+           broadcast_reg = gen_reg_rtx (load->mode);
+           replace_tls_call (broadcast_reg, load->insns);
+           load->broadcast_reg = broadcast_reg;
+           replaced = true;
+           break;
+         default:
+           break;
+         }
+
+       if (load->kind > X86_CSE_VEC_DUP)
+         continue;
+
        machine_mode mode = ix86_get_vector_cse_mode (load->size,
                                                      load->mode);
        broadcast_reg = gen_reg_rtx (mode);
@@ -3843,41 +4369,64 @@ remove_redundant_vector_load (void)
          {
            if (load->def_insn)
              {
-               /* Insert a broadcast after the original scalar
-                  definition.  */
-               rtx set = gen_rtx_SET (load->broadcast_reg,
+               rtx set;
+               if (load->kind == X86_CSE_TLSDESC)
+                 ix86_place_single_tls_call (load->broadcast_reg,
+                                             load->val,
+                                             load->kind,
+                                             load->bbs,
+                                             PATTERN (load->def_insn));
+               else
+                 {
+                   /* Insert a broadcast after the original scalar
+                      definition.  */
+                   set = gen_rtx_SET (load->broadcast_reg,
                                       load->broadcast_source);
-               insn = emit_insn_after (set, load->def_insn);
+                   insn = emit_insn_after (set, load->def_insn);
 
-               if (cfun->can_throw_non_call_exceptions)
-                 {
-                   /* Handle REG_EH_REGION note in DEF_INSN.  */
-                   rtx note = find_reg_note (load->def_insn,
-                                             REG_EH_REGION, nullptr);
-                   if (note)
+                   if (cfun->can_throw_non_call_exceptions)
                      {
-                       control_flow_insns.safe_push (load->def_insn);
-                       add_reg_note (insn, REG_EH_REGION,
-                                     XEXP (note, 0));
+                       /* Handle REG_EH_REGION note in DEF_INSN.  */
+                       rtx note = find_reg_note (load->def_insn,
+                                                 REG_EH_REGION, nullptr);
+                       if (note)
+                         {
+                           control_flow_insns.safe_push (load->def_insn);
+                           add_reg_note (insn, REG_EH_REGION,
+                                         XEXP (note, 0));
+                         }
                      }
-                 }
 
-               if (dump_file)
-                 {
-                   fprintf (dump_file, "\nAdd:\n\n");
-                   print_rtl_single (dump_file, insn);
-                   fprintf (dump_file, "\nafter:\n\n");
-                   print_rtl_single (dump_file, load->def_insn);
-                   fprintf (dump_file, "\n");
+                   if (dump_file)
+                     {
+                       fprintf (dump_file, "\nAdd:\n\n");
+                       print_rtl_single (dump_file, insn);
+                       fprintf (dump_file, "\nafter:\n\n");
+                       print_rtl_single (dump_file, load->def_insn);
+                       fprintf (dump_file, "\n");
+                     }
                  }
              }
            else
-             ix86_place_single_vector_set (load->broadcast_reg,
-                                           load->broadcast_source,
-                                           load->bbs,
-                                           (load->kind == X86_CSE_VEC_DUP
-                                            ? load->val
-                                            : nullptr));
+             switch (load->kind)
+               {
+               case X86_CSE_TLS_GD:
+               case X86_CSE_TLS_LD_BASE:
+               case X86_CSE_TLSDESC:
+                 ix86_place_single_tls_call (load->broadcast_reg,
+                                             load->val,
+                                             load->kind,
+                                             load->bbs);
+                 break;
+               default:
+                 ix86_place_single_vector_set (load->broadcast_reg,
+                                               load->broadcast_source,
+                                               load->bbs,
+                                               (load->kind == X86_CSE_VEC_DUP
+                                                ? load->val
+                                                : nullptr));
+                 break;
+               }
          }
 
       loop_optimizer_finalize ();
@@ -3909,10 +4458,10 @@ remove_redundant_vector_load (void)
 
 namespace {
 
-const pass_data pass_data_remove_redundant_vector_load =
+const pass_data pass_data_x86_cse =
 {
   RTL_PASS, /* type */
-  "rrvl", /* name */
+  "x86_cse", /* name */
   OPTGROUP_NONE, /* optinfo_flags */
   TV_MACH_DEP, /* tv_id */
   0, /* properties_required */
@@ -3922,11 +4471,11 @@ const pass_data pass_data_remove_redundant_vector_load =
   0, /* todo_flags_finish */
 };
 
-class pass_remove_redundant_vector_load : public rtl_opt_pass
+class pass_x86_cse : public rtl_opt_pass
 {
 public:
-  pass_remove_redundant_vector_load (gcc::context *ctxt)
-    : rtl_opt_pass (pass_data_remove_redundant_vector_load, ctxt)
+  pass_x86_cse (gcc::context *ctxt)
+    : rtl_opt_pass (pass_data_x86_cse, ctxt)
   {}
 
   /* opt_pass methods: */
@@ -3939,16 +4488,16 @@ public:
 
   unsigned int execute (function *) final override
     {
-      return remove_redundant_vector_load ();
+      return x86_cse ();
     }
-}; // class pass_remove_redundant_vector_load
+}; // class pass_x86_cse
 
 } // anon namespace
 
 rtl_opt_pass *
-make_pass_remove_redundant_vector_load (gcc::context *ctxt)
+make_pass_x86_cse (gcc::context *ctxt)
 {
-  return new pass_remove_redundant_vector_load (ctxt);
+  return new pass_x86_cse (ctxt);
 }
 
 /* Convert legacy instructions that clobbers EFLAGS to APX_NF
diff --git a/gcc/config/i386/i386-passes.def b/gcc/config/i386/i386-passes.def
index 06f0288b067..553b46d1fdc 100644
--- a/gcc/config/i386/i386-passes.def
+++ b/gcc/config/i386/i386-passes.def
@@ -35,6 +35,6 @@ along with GCC; see the file COPYING3.  If not see
      PR116174.  */
   INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_align_tight_loops);
 
-  INSERT_PASS_AFTER (pass_late_combine, 1, pass_remove_redundant_vector_load);
+  INSERT_PASS_AFTER (pass_late_combine, 1, pass_x86_cse);
   INSERT_PASS_AFTER (pass_late_combine, 1, pass_remove_partial_avx_dependency);
   INSERT_PASS_AFTER (pass_rtl_ifcvt, 1, pass_apx_nf_convert);
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 69bc0ee570d..ee6b78b2c77 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -290,6 +290,7 @@ extern rtx ix86_tls_module_base (void);
 extern bool ix86_gpr_tls_address_pattern_p (rtx);
 extern bool ix86_tls_address_pattern_p (rtx);
 extern rtx ix86_rewrite_tls_address (rtx);
+extern rtx ix86_tls_get_addr (void);
 
 extern void ix86_expand_vector_init (bool, rtx, rtx);
 extern void ix86_expand_vector_set (bool, rtx, rtx, int);
@@ -430,8 +431,7 @@ extern rtl_opt_pass 
*make_pass_insert_endbr_and_patchable_area
   (gcc::context *);
 extern rtl_opt_pass *make_pass_remove_partial_avx_dependency
   (gcc::context *);
-extern rtl_opt_pass *make_pass_remove_redundant_vector_load
-  (gcc::context *);
+extern rtl_opt_pass *make_pass_x86_cse (gcc::context *);
 extern rtl_opt_pass *make_pass_apx_nf_convert (gcc::context *);
 extern rtl_opt_pass *make_pass_align_tight_loops (gcc::context *);
 
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index eb5b2eb6a86..70802202100 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -12440,7 +12440,7 @@ ix86_tls_index (void)
 
 static GTY(()) rtx ix86_tls_symbol;
 
-static rtx
+rtx
 ix86_tls_get_addr (void)
 {
   if (!ix86_tls_symbol)
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 3f7ad68db3a..9d6d05a6911 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2864,6 +2864,9 @@ struct GTY(()) machine_function {
      approximation.  */
   BOOL_BITFIELD tls_descriptor_call_expanded_p : 1;
 
+  /* True if TLS descriptor is called more than once.  */
+  BOOL_BITFIELD tls_descriptor_call_multiple_p : 1;
+
   /* If true, the current function has a STATIC_CHAIN is placed on the
      stack below the return address.  */
   BOOL_BITFIELD static_chain_on_stack : 1;
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 21b9f5ccd7a..a26b134f66a 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -23270,7 +23270,11 @@ (define_expand "@tls_global_dynamic_64_<mode>"
               UNSPEC_TLS_GD)
      (clobber (match_operand:P 3 "register_operand"))])]
   "TARGET_64BIT"
-  "ix86_tls_descriptor_calls_expanded_in_cfun = true;")
+{
+  if (ix86_tls_descriptor_calls_expanded_in_cfun)
+    cfun->machine->tls_descriptor_call_multiple_p = true;
+  ix86_tls_descriptor_calls_expanded_in_cfun = true;
+})
 
 (define_insn "*tls_local_dynamic_base_32_gnu"
   [(set (match_operand:SI 0 "register_operand" "=a")
@@ -23365,7 +23369,11 @@ (define_expand "@tls_local_dynamic_base_64_<mode>"
       (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE)
       (clobber (match_operand:P 2 "register_operand"))])]
   "TARGET_64BIT"
-  "ix86_tls_descriptor_calls_expanded_in_cfun = true;")
+{
+  if (ix86_tls_descriptor_calls_expanded_in_cfun)
+    cfun->machine->tls_descriptor_call_multiple_p = true;
+  ix86_tls_descriptor_calls_expanded_in_cfun = true;
+})
 
 ;; Local dynamic of a single variable is a lose.  Show combine how
 ;; to convert that back to global dynamic.
@@ -23559,10 +23567,12 @@ (define_expand "@tls_dynamic_gnu2_64_<mode>"
   "TARGET_64BIT && TARGET_GNU2_TLS"
 {
   operands[2] = can_create_pseudo_p () ? gen_reg_rtx (ptr_mode) : operands[0];
+  if (ix86_tls_descriptor_calls_expanded_in_cfun)
+    cfun->machine->tls_descriptor_call_multiple_p = true;
   ix86_tls_descriptor_calls_expanded_in_cfun = true;
 })
 
-(define_insn "*tls_dynamic_gnu2_lea_64_<mode>"
+(define_insn "tls_dynamic_gnu2_lea_64_<mode>"
   [(set (match_operand:PTR 0 "register_operand" "=r")
        (unspec:PTR [(match_operand 1 "tls_symbolic_operand")]
                    UNSPEC_TLSDESC))]
@@ -23573,7 +23583,7 @@ (define_insn "*tls_dynamic_gnu2_lea_64_<mode>"
    (set_attr "length" "7")
    (set_attr "length_address" "4")])
 
-(define_insn "*tls_dynamic_gnu2_call_64_<mode>"
+(define_insn "tls_dynamic_gnu2_call_64_<mode>"
   [(set (match_operand:PTR 0 "register_operand" "=a")
        (unspec:PTR [(match_operand 1 "tls_symbolic_operand")
                   (match_operand:PTR 2 "register_operand" "0")
@@ -23586,7 +23596,7 @@ (define_insn "*tls_dynamic_gnu2_call_64_<mode>"
    (set_attr "length" "2")
    (set_attr "length_address" "0")])
 
-(define_insn_and_split "*tls_dynamic_gnu2_combine_64_<mode>"
+(define_insn_and_split "tls_dynamic_gnu2_combine_64_<mode>"
   [(set (match_operand:PTR 0 "register_operand" "=&a")
        (plus:PTR
         (unspec:PTR [(match_operand 2 "tls_modbase_operand")
diff --git a/gcc/testsuite/g++.target/i386/pr81501-1.C 
b/gcc/testsuite/g++.target/i386/pr81501-1.C
new file mode 100644
index 00000000000..b2e89f4a5f0
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr81501-1.C
@@ -0,0 +1,16 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-std=c++14 -mtls-dialect=gnu -O2 -fpic -fplt" } */
+/* { dg-final { scan-assembler-times "call\[ \t\]__tls_get_addr@PLT" 1 { 
target { ! ia32 } } } } */
+
+struct foo
+{
+  foo();
+  ~foo();
+};
+
+foo *
+test ()
+{
+  static thread_local foo foo_tls;
+  return &foo_tls;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr81501-1a.c 
b/gcc/testsuite/gcc.target/i386/pr81501-1a.c
new file mode 100644
index 00000000000..30b4642a9ee
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81501-1a.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu" } */
+
+void a(long *);
+int b(void);
+void c(void);
+static __thread long e;
+long
+d(void)
+{
+  a(&e);
+  if (b())
+    c();
+  return e;
+}
+
+/* { dg-final { scan-assembler-times "call\[ \t\]__tls_get_addr@PLT" 1 { 
target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81501-1b.c 
b/gcc/testsuite/gcc.target/i386/pr81501-1b.c
new file mode 100644
index 00000000000..de25f226990
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81501-1b.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu2" } */
+
+#include "pr81501-1a.c"
+
+/* { dg-final { scan-assembler-times "call\[ \t\]\\*e@TLSCALL\\(%(?:r|e)ax\\)" 
1 { target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81501-2a.c 
b/gcc/testsuite/gcc.target/i386/pr81501-2a.c
new file mode 100644
index 00000000000..a06302a468f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81501-2a.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu" } */
+
+void a(long *);
+int b(void);
+void c(void);
+extern __thread long e;
+long
+d(void)
+{
+  a(&e);
+  if (b())
+    c();
+  return e;
+}
+
+/* { dg-final { scan-assembler-times "call\[ \t\]__tls_get_addr@PLT" 1 { 
target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81501-2b.c 
b/gcc/testsuite/gcc.target/i386/pr81501-2b.c
new file mode 100644
index 00000000000..4afb7426c81
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81501-2b.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu2" } */
+
+#include "pr81501-2a.c"
+
+/* { dg-final { scan-assembler-times "call\[ \t\]\\*e@TLSCALL\\(%(?:r|e)ax\\)" 
1 { target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81501-3.c 
b/gcc/testsuite/gcc.target/i386/pr81501-3.c
new file mode 100644
index 00000000000..d4220630900
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81501-3.c
@@ -0,0 +1,9 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu2" } */
+
+static __thread int local1;
+int *
+get_local1 (void)
+{
+  return &local1;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr81501-4a.c 
b/gcc/testsuite/gcc.target/i386/pr81501-4a.c
new file mode 100644
index 00000000000..775c5fd3b68
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81501-4a.c
@@ -0,0 +1,51 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
+/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } } 
{^\t?\.} } } */
+
+/*
+**in_dso:
+**.LFB[0-9]+:
+**...
+**     movl    %edi, %.*
+**...
+**     mov(l|q)        %(e|r)si, %.*
+**...
+**     call    __tls_get_addr@PLT
+**...
+*/
+
+__thread int foo;
+
+extern void bar1 (int *, int *);
+extern void bar2 (int);
+extern void bar3 (const char *);
+
+int
+in_dso (int n, int *caller_foop)
+{
+  int *foop;
+  int result = 0;
+
+  bar3 ("foo");                        /* Make sure PLT is used before macros. 
 */
+  asm ("" ::: "memory");
+
+  foop = &foo;
+
+  if (caller_foop != (void *) 0 && foop != caller_foop)
+    {
+      bar1 (caller_foop, foop);
+      result = 1;
+    }
+  else if (*foop != n)
+    {
+      bar2 (n);
+      result = 1;
+    }
+
+  *foop = 16;
+
+  return result;
+}
+
+/* { dg-final { scan-assembler-times "call\[ \t\]__tls_get_addr@PLT" 1 { 
target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81501-4b.c 
b/gcc/testsuite/gcc.target/i386/pr81501-4b.c
new file mode 100644
index 00000000000..5d35712b70d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81501-4b.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu2" } */
+
+#include "pr81501-4a.c"
+
+/* { dg-final { scan-assembler-times "call\[ 
\t\]\\*\*foo@TLSCALL\\(%(?:r|e)ax\\)" 1 { target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81501-5.c 
b/gcc/testsuite/gcc.target/i386/pr81501-5.c
new file mode 100644
index 00000000000..7f666e1c006
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81501-5.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu" } */
+
+extern __thread int __bid_IDEC_glbflags;
+extern long __bid64qq_div_bid_y_0_1;
+extern void get_BID64(int *);
+void
+__bid64qq_div(void)
+{
+  if (__bid64qq_div_bid_y_0_1)
+    __bid_IDEC_glbflags |= 1;
+  get_BID64(&__bid_IDEC_glbflags);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr81501-6a.c 
b/gcc/testsuite/gcc.target/i386/pr81501-6a.c
new file mode 100644
index 00000000000..2e4c04b43e2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81501-6a.c
@@ -0,0 +1,67 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
+/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } } 
{^\t?\.} } } */
+
+/*
+**in_dso:
+**.LFB[0-9]+:
+**...
+**     mov(l|q)        %(e|r)dx, %.*
+**...
+**     movl    %edi, %.*
+**...
+**     mov(l|q)        %(e|r)si, %.*
+**...
+**     call    __tls_get_addr@PLT
+**...
+*/
+
+__thread int foo;
+__thread int bar;
+
+extern void fun1 (int *, int *);
+extern void fun2 (int);
+extern void fun3 (const char *);
+
+int
+in_dso (int n, int *caller_foop, int *caller_barp)
+{
+  int *foop;
+  int *barp;
+  int result = 0;
+
+  fun3 ("foo");                        /* Make sure PLT is used before macros. 
 */
+  asm ("" ::: "memory");
+
+  foop = &foo;
+  barp = &bar;
+
+  if (caller_foop != (void *) 0 && foop != caller_foop)
+    {
+      fun1 (caller_foop, foop);
+      result = 1;
+      if (caller_barp != (void *) 0 && barp != caller_barp)
+       {
+         fun1 (caller_barp, barp);
+         result = 2;
+       }
+      else if (*barp != n)
+       {
+         fun2 (n);
+         result = 3;
+       }
+    }
+  else if (*foop != n)
+    {
+      fun2 (n);
+      result = 4;
+    }
+
+  *barp = 16;
+  *foop = 16;
+
+  return result;
+}
+
+/* { dg-final { scan-assembler-times "call\[ \t\]__tls_get_addr@PLT" 2 { 
target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81501-6b.c 
b/gcc/testsuite/gcc.target/i386/pr81501-6b.c
new file mode 100644
index 00000000000..703422dd186
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81501-6b.c
@@ -0,0 +1,7 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu2" } */
+
+#include "pr81501-6a.c"
+
+/* { dg-final { scan-assembler-times "call\[ 
\t\]\\*foo@TLSCALL\\(%(?:r|e)ax\\)" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "call\[ 
\t\]\\*bar@TLSCALL\\(%(?:r|e)ax\\)" 1 { target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81501-7.c 
b/gcc/testsuite/gcc.target/i386/pr81501-7.c
new file mode 100644
index 00000000000..b2fe5d5eb85
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81501-7.c
@@ -0,0 +1,20 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu" } */
+
+extern int __bid_IDEC_glbround, __bid64qqq_fma_save_fpsf;
+extern __thread int __bid_IDEC_glbflags;
+typedef struct {
+  long w[2];
+} UINT128;
+extern long __bid64qqq_fma_res_0_1;
+extern void bid128_ext_fma(UINT128, UINT128);
+void
+__bid64qqq_fma(UINT128 y, UINT128 z)
+{
+  __bid_IDEC_glbflags = 0;
+  bid128_ext_fma(y, z);
+  if (__bid_IDEC_glbround || __bid64qqq_fma_res_0_1)
+    __bid_IDEC_glbflags |= __bid64qqq_fma_save_fpsf;
+}
+
+/* { dg-final { scan-assembler-times "call\[ \t\]__tls_get_addr@PLT" 1 { 
target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81501-8a.c 
b/gcc/testsuite/gcc.target/i386/pr81501-8a.c
new file mode 100644
index 00000000000..f98742315c9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81501-8a.c
@@ -0,0 +1,82 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
+/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } } 
{^\t?\.} } } */
+
+/*
+**in_dso:
+**.LFB[0-9]+:
+**...
+**     mov(l|q)        %(e|r)dx, %.*
+**...
+**     movl    %edi, %.*
+**...
+**     mov(l|q)        %(e|r)si, %.*
+**...
+**     testb   %al, %al
+**...
+**     call    __tls_get_addr@PLT
+**...
+*/
+
+#include <stdarg.h>
+
+__thread int foo;
+__thread int bar;
+
+extern void fun1 (int *, int *);
+extern void fun2 (int);
+extern void fun3 (const char *);
+
+int
+in_dso (int n, int *caller_foop, int *caller_barp, ...)
+{
+  int *foop;
+  int *barp;
+  int result;
+  va_list ap;
+  double d;
+
+  va_start (ap, caller_barp);
+
+  result = 0;
+
+  fun3 ("foo");                        /* Make sure PLT is used before macros. 
 */
+  asm ("" ::: "memory");
+
+  foop = &foo;
+  barp = &bar;
+
+  if (caller_foop != (void *) 0 && foop != caller_foop)
+    {
+      fun1 (caller_foop, foop);
+      result = 1;
+      if (caller_barp != (void *) 0 && barp != caller_barp)
+       {
+         fun1 (caller_barp, barp);
+         result = 2;
+       }
+      else if (*barp != n)
+       {
+         fun2 (n);
+         result = 3;
+       }
+    }
+  else if (*foop != n)
+    {
+      fun2 (n);
+      result = 4;
+    }
+
+  *barp = 16;
+  *foop = 16;
+
+  d = va_arg (ap, double);
+  if (d != 1234.0)
+    result = 10;
+  va_end (ap);
+
+  return result;
+}
+
+/* { dg-final { scan-assembler-times "call\[ \t\]__tls_get_addr@PLT" 2 { 
target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81501-8b.c 
b/gcc/testsuite/gcc.target/i386/pr81501-8b.c
new file mode 100644
index 00000000000..0882507254e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81501-8b.c
@@ -0,0 +1,25 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu2" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
+/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } } 
{^\t?\.} } } */
+
+/*
+**in_dso:
+**.LFB[0-9]+:
+**...
+**     testb   %al, %al
+**...
+**     lea(l|q)        bar@TLSDESC\(%rip\), %(e|r)ax
+**...
+**     call    \*bar@TLSCALL\(%(e|r)ax\)
+**...
+**     lea(l|q)        foo@TLSDESC\(%rip\), %(e|r)ax
+**...
+**     call    \*foo@TLSCALL\(%(e|r)ax\)
+**...
+*/
+
+#include "pr81501-8a.c"
+
+/* { dg-final { scan-assembler-times "call\[ 
\t\]\\*foo@TLSCALL\\(%(?:r|e)ax\\)" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "call\[ 
\t\]\\*bar@TLSCALL\\(%(?:r|e)ax\\)" 1 { target { ! ia32 } } } } */
-- 
2.50.0

Reply via email to