On Mon, Aug 17, 2015 at 10:17:00AM -0700, H.J. Lu wrote:
> On Mon, Aug 17, 2015 at 10:08 AM, Alexander Monakov <amona...@ispras.ru> 
> wrote:
> >> >> Perhaps add a comment that GOT slots are 64-bit on x32?
> >> >>
> >> >
> >> > Good idea.  I will update my patch.
> >> >
> >>
> >> How about this?
> >>
> >>
> >> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> >> index bf8a21d..216dee6 100644
> >> --- a/gcc/config/i386/i386.c
> >> +++ b/gcc/config/i386/i386.c
> >> @@ -25690,6 +25690,10 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx 
> >> callarg1,
> >>   fnaddr);
> >>   }
> >>        fnaddr = gen_const_mem (Pmode, fnaddr);
> >> +      /* Pmode may not be the same as word_mode for x32, which
> >
> > I think 'Pmode is not the same as word_mode on x32' is more appropriate 
> > here.
> 
> "-maddress-mode=long -mx32" makes Pmode == word_mode.
> 
> >> + doesn't support indirect branch va 32-bit memory slot.
> >
> > Typo: s/va/via.
> >
> 
> Fixed.
> 
> Here is the updated patch.
> 

Hi Jefff,

Can you review this?

Thanks.


H.J.
---
It boils down to that -fno-plt should convert calling an external
function, foo, from

call foo@PLT

to

call *foo@GOT

to avoid one extra direct branch to PLT.  The proper place for this is
in backend during expanding a function call.  The backend already takes
of many details for calling an external function, like setting up a PIC
register.  Using the GOT slot instead of PLT slot, just one of those
details.  For x86, it should be done in ix86_expand_call, not
prepare_call_address and hope for the best, which doesn't always
happen.  Also non-PIC case can only be handled in backend.

This patch reverts -fno-plt in prepare_call_address and handles it in
ix86_expand_call.  Other backends may need similar changes to support
-fno-plt.  Alternately, we can introduce a target hook to indicate
whether an external function should be called via register for -fno-plt
so that i386 backend can disable it in prepare_call_address.

sibcall_memory_operand is also updated to accept the GOT slot so that

call *foo@GOT(%reg)

can be generated by ix86_expand_call for 32-bit and 64-bit large model.

gcc/

        PR target/67215
        * calls.c (prepare_call_address): Don't handle -fno-plt here.
        * config/i386/i386.c (ix86_expand_call): Generate indirect call
        via GOT for -fno-plt.  Support indirect call via GOT for x32.
        * config/i386/predicates.md (sibcall_memory_operand): Allow
        GOT memory operand.

gcc/testsuite/

        PR target/67215
        * gcc.target/i386/pr67215-1.c: New test.
        * gcc.target/i386/pr67215-2.c: Likewise.
        * gcc.target/i386/pr67215-3.c: Likewise.
---
 gcc/calls.c                               | 12 ------
 gcc/config/i386/i386.c                    | 71 ++++++++++++++++++++++++-------
 gcc/config/i386/predicates.md             |  7 ++-
 gcc/testsuite/gcc.target/i386/pr67215-1.c | 20 +++++++++
 gcc/testsuite/gcc.target/i386/pr67215-2.c | 20 +++++++++
 gcc/testsuite/gcc.target/i386/pr67215-3.c | 12 ++++++
 6 files changed, 113 insertions(+), 29 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr67215-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr67215-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr67215-3.c

diff --git a/gcc/calls.c b/gcc/calls.c
index 5636725..7cce9be 100644
--- a/gcc/calls.c
+++ b/gcc/calls.c
@@ -203,18 +203,6 @@ prepare_call_address (tree fndecl_or_type, rtx funexp, rtx 
static_chain_value,
               && targetm.small_register_classes_for_mode_p (FUNCTION_MODE))
              ? force_not_mem (memory_address (FUNCTION_MODE, funexp))
              : memory_address (FUNCTION_MODE, funexp));
-  else if (flag_pic
-          && fndecl_or_type
-          && TREE_CODE (fndecl_or_type) == FUNCTION_DECL
-          && (!flag_plt
-              || lookup_attribute ("noplt", DECL_ATTRIBUTES (fndecl_or_type)))
-          && !targetm.binds_local_p (fndecl_or_type))
-    {
-      /* This is done only for PIC code.  There is no easy interface to force 
the
-        function address into GOT for non-PIC case.  non-PIC case needs to be
-        handled specially by the backend.  */
-      funexp = force_reg (Pmode, funexp);
-    }
   else if (! sibcallp)
     {
       if (!NO_FUNCTION_CSE && optimize && ! flag_no_function_cse)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 05fa5e1..ac9a6c4 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -25649,21 +25649,54 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx 
callarg1,
       /* Static functions and indirect calls don't need the pic register.  
Also,
         check if PLT was explicitly avoided via no-plt or "noplt" attribute, 
making
         it an indirect call.  */
+      rtx addr = XEXP (fnaddr, 0);
       if (flag_pic
-         && (!TARGET_64BIT
-             || (ix86_cmodel == CM_LARGE_PIC
-                 && DEFAULT_ABI != MS_ABI))
-         && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
-         && !SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0))
-         && flag_plt
-         && (SYMBOL_REF_DECL ((XEXP (fnaddr, 0))) == NULL_TREE
-             || !lookup_attribute ("noplt",
-                    DECL_ATTRIBUTES (SYMBOL_REF_DECL (XEXP (fnaddr, 0))))))
+         && GET_CODE (addr) == SYMBOL_REF
+         && !SYMBOL_REF_LOCAL_P (addr))
        {
-         use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
-         if (ix86_use_pseudo_pic_reg ())
-           emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
-                           pic_offset_table_rtx);
+         if (flag_plt
+             && (SYMBOL_REF_DECL (addr) == NULL_TREE
+                 || !lookup_attribute ("noplt",
+                                       DECL_ATTRIBUTES (SYMBOL_REF_DECL 
(addr)))))
+           {
+             if (!TARGET_64BIT
+                 || (ix86_cmodel == CM_LARGE_PIC
+                     && DEFAULT_ABI != MS_ABI))
+               {
+                 use_reg (&use, gen_rtx_REG (Pmode,
+                                             REAL_PIC_OFFSET_TABLE_REGNUM));
+                 if (ix86_use_pseudo_pic_reg ())
+                   emit_move_insn (gen_rtx_REG (Pmode,
+                                                REAL_PIC_OFFSET_TABLE_REGNUM),
+                                   pic_offset_table_rtx);
+               }
+           }
+         else if (!TARGET_PECOFF && !TARGET_MACHO)
+           {
+             if (TARGET_64BIT)
+               {
+                 fnaddr = gen_rtx_UNSPEC (Pmode,
+                                          gen_rtvec (1, addr),
+                                          UNSPEC_GOTPCREL);
+                 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
+               }
+             else
+               {
+                 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
+                                          UNSPEC_GOT);
+                 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
+                 fnaddr = gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
+                                        fnaddr);
+               }
+             fnaddr = gen_const_mem (Pmode, fnaddr);
+             /* Pmode may not be the same as word_mode for x32, which
+                doesn't support indirect branch via 32-bit memory slot.
+                Since x32 GOT slot is 64 bit with zero upper 32 bits,
+                indirect branch via x32 GOT slot is OK.  */
+             if (GET_MODE (fnaddr) != word_mode)
+               fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
+             fnaddr = gen_rtx_MEM (QImode, fnaddr);
+           }
        }
     }
 
@@ -25685,9 +25718,15 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
       && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
       && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
     fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
-  else if (sibcall
-          ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
-          : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
+  /* Since x32 GOT slot is 64 bit with zero upper 32 bits, indirect
+     branch via x32 GOT slot is OK.  */
+  else if (!(TARGET_X32
+            && MEM_P (fnaddr)
+            && GET_CODE (XEXP (fnaddr, 0)) == ZERO_EXTEND
+            && GOT_memory_operand (XEXP (XEXP (fnaddr, 0), 0), Pmode))
+          && (sibcall
+              ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
+              : !call_insn_operand (XEXP (fnaddr, 0), word_mode)))
     {
       fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
       fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index a9c8623..be2df76 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -593,7 +593,12 @@
 ;; Return true if OP is a memory operands that can be used in sibcalls.
 (define_predicate "sibcall_memory_operand"
   (and (match_operand 0 "memory_operand")
-       (match_test "CONSTANT_P (XEXP (op, 0))")))
+       (match_test "CONSTANT_P (XEXP (op, 0))
+                   || (GET_CODE (XEXP (op, 0)) == PLUS
+                       && REG_P (XEXP (XEXP (op, 0), 0))
+                       && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST
+                       && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 0)) == UNSPEC
+                       && XINT (XEXP (XEXP (XEXP (op, 0), 1), 0), 1) == 
UNSPEC_GOT)")))
 
 ;; Test for a valid operand for a call instruction.
 ;; Allow constant call address operands in Pmode only.
diff --git a/gcc/testsuite/gcc.target/i386/pr67215-1.c 
b/gcc/testsuite/gcc.target/i386/pr67215-1.c
new file mode 100644
index 0000000..fd37f8e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr67215-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fpic -fno-plt" } */
+
+extern char* bar (int);
+extern char* arr[32];
+
+void
+foo (void)
+{
+  int i;
+
+  for (i = 0; i < 32; i++)
+    arr[i] = bar (128);
+}
+
+/* { dg-final { scan-assembler "call\[ \t\]*.bar@GOTPCREL" { target { ! ia32 } 
} } } */
+/* { dg-final { scan-assembler "call\[ \t\]*.bar@GOT\\(" { target ia32 } } } */
+/* { dg-final { scan-assembler-not "mov(l|q)\[ \t\]*.bar@GOTPCREL" { target { 
! ia32 } } } } */
+/* { dg-final { scan-assembler-not "movl\[ \t\]*.bar@GOT\\(" { target ia32 } } 
} */
+/* { dg-final { scan-assembler-not "call\[ \t\]*.bar@PLT" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr67215-2.c 
b/gcc/testsuite/gcc.target/i386/pr67215-2.c
new file mode 100644
index 0000000..ebf2919
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr67215-2.c
@@ -0,0 +1,20 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fpic" } */
+
+extern char* bar (int) __attribute__ ((noplt));
+extern char* arr[32];
+
+void
+foo (void)
+{
+  int i;
+
+  for (i = 0; i < 32; i++)
+    arr[i] = bar (128);
+}
+
+/* { dg-final { scan-assembler "call\[ \t\]*.bar@GOTPCREL" { target { ! ia32 } 
} } } */
+/* { dg-final { scan-assembler "call\[ \t\]*.bar@GOT\\(" { target ia32 } } } */
+/* { dg-final { scan-assembler-not "mov(l|q)\[ \t\]*.bar@GOTPCREL" { target { 
! ia32 } } } } */
+/* { dg-final { scan-assembler-not "movl\[ \t\]*.bar@GOT\\(" { target ia32 } } 
} */
+/* { dg-final { scan-assembler-not "call\[ \t\]*.bar@PLT" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr67215-3.c 
b/gcc/testsuite/gcc.target/i386/pr67215-3.c
new file mode 100644
index 0000000..dbd9a2f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr67215-3.c
@@ -0,0 +1,12 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fpic -fno-plt -fdump-rtl-expand" } */
+
+extern int bar (void);
+
+int
+foo (void)
+{
+  return bar ();
+}
+
+/* { dg-final { scan-rtl-dump "\\(call \\(mem:QI \\(mem/u/c:" "expand" } } */
-- 
2.4.3

Reply via email to