https://gcc.gnu.org/g:3861d362ec7e3c50742fc43833fe9d8674f4070e

commit r15-6733-g3861d362ec7e3c50742fc43833fe9d8674f4070e
Author: Thomas Schwinge <tschwi...@baylibre.com>
Date:   Sat Dec 7 00:17:49 2024 +0100

    nvptx: PTX 'alloca' for '-mptx=7.3'+, '-march=sm_52'+ [PR65181]
    
    ..., and use it for '-mno-soft-stack': PTX "native" stacks.
    
            PR target/65181
            gcc/
            * config/nvptx/nvptx.cc (nvptx_get_drap_rtx): Handle
            '!TARGET_SOFT_STACK'.
            * config/nvptx/nvptx.md (define_c_enum "unspec"): Add
            'UNSPEC_STACKSAVE', 'UNSPEC_STACKRESTORE'.
            (define_expand "allocate_stack", define_expand "save_stack_block")
            (define_expand "save_stack_block"): Handle '!TARGET_SOFT_STACK',
            PTX 'alloca'.
            (define_insn "@nvptx_alloca_<mode>")
            (define_insn "@nvptx_stacksave_<mode>")
            (define_insn "@nvptx_stackrestore_<mode>"): New.
            * doc/invoke.texi (Nvidia PTX Options): Update '-msoft-stack',
            '-mno-soft-stack'.
            * doc/sourcebuild.texi (nvptx-specific attributes): Document
            'nvptx_runtime_alloca_ptx'.
            (Add Options): Document 'nvptx_alloca_ptx'.
            gcc/testsuite/
            * gcc.target/nvptx/alloca-1.c: Evolve into...
            * gcc.target/nvptx/alloca-1-O0.c: ... this, ...
            * gcc.target/nvptx/alloca-1-O1.c: ... this, and...
            * gcc.target/nvptx/alloca-1-sm_30.c: ... this.
            * gcc.target/nvptx/vla-1.c: Evolve into...
            * gcc.target/nvptx/vla-1-O0.c: ... this, ...
            * gcc.target/nvptx/vla-1-O1.c: ... this, and...
            * gcc.target/nvptx/vla-1-sm_30.c: ... this.
            * gcc.c-torture/execute/pr36321.c: Adjust.
            * gcc.target/nvptx/__builtin_alloca_0-1-O0.c: Likewise.
            * gcc.target/nvptx/__builtin_alloca_0-1-O1.c: Likewise.
            * gcc.target/nvptx/__builtin_stack_save___builtin_stack_restore-1.c:
            Likewise.
            * gcc.target/nvptx/softstack.c: Likewise.
            * 
gcc.target/nvptx/__builtin_stack_save___builtin_stack_restore-1-sm_30.c:
            New.
            * gcc.target/nvptx/alloca-2-O0.c: Likewise.
            * gcc.target/nvptx/alloca-3-O1.c: Likewise.
            * gcc.target/nvptx/alloca-4-O3.c: Likewise.
            * gcc.target/nvptx/alloca-5.c: Likewise.
            * lib/target-supports.exp (check_effective_target_alloca): Adjust.
            (check_nvptx_default_ptx_isa_target_architecture_at_least)
            (check_nvptx_runtime_ptx_isa_target_architecture_at_least)
            (check_effective_target_nvptx_runtime_alloca_ptx)
            (add_options_for_nvptx_alloca_ptx): New.
            libgomp/
            * fortran.c (omp_get_device_from_uid_): Adjust.
            * testsuite/libgomp.oacc-fortran/privatized-ref-2.f90: Likewise.

Diff:
---
 gcc/config/nvptx/nvptx.cc                          |   4 +-
 gcc/config/nvptx/nvptx.md                          |  92 +++++++++++++++---
 gcc/doc/invoke.texi                                |  13 ++-
 gcc/doc/sourcebuild.texi                           |   6 ++
 gcc/testsuite/gcc.c-torture/execute/pr36321.c      |   3 +
 .../gcc.target/nvptx/__builtin_alloca_0-1-O0.c     |   2 +
 .../gcc.target/nvptx/__builtin_alloca_0-1-O1.c     |   2 +
 ...in_stack_save___builtin_stack_restore-1-sm_30.c |  28 ++++++
 ..._builtin_stack_save___builtin_stack_restore-1.c |   8 +-
 gcc/testsuite/gcc.target/nvptx/alloca-1-O0.c       |  49 ++++++++++
 gcc/testsuite/gcc.target/nvptx/alloca-1-O1.c       |  33 +++++++
 .../nvptx/{alloca-1.c => alloca-1-sm_30.c}         |   1 +
 gcc/testsuite/gcc.target/nvptx/alloca-2-O0.c       |  12 +++
 gcc/testsuite/gcc.target/nvptx/alloca-3-O1.c       |  40 ++++++++
 gcc/testsuite/gcc.target/nvptx/alloca-4-O3.c       |  55 +++++++++++
 gcc/testsuite/gcc.target/nvptx/alloca-5.c          | 107 +++++++++++++++++++++
 gcc/testsuite/gcc.target/nvptx/softstack.c         |   2 +
 gcc/testsuite/gcc.target/nvptx/vla-1-O0.c          |  29 ++++++
 gcc/testsuite/gcc.target/nvptx/vla-1-O1.c          |  40 ++++++++
 .../gcc.target/nvptx/{vla-1.c => vla-1-sm_30.c}    |   1 +
 gcc/testsuite/lib/target-supports.exp              | 105 +++++++++++++++++++-
 libgomp/fortran.c                                  |   4 +-
 .../libgomp.oacc-fortran/privatized-ref-2.f90      |  10 --
 23 files changed, 611 insertions(+), 35 deletions(-)

diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc
index 5860b3df6dd7..060f45318f45 100644
--- a/gcc/config/nvptx/nvptx.cc
+++ b/gcc/config/nvptx/nvptx.cc
@@ -245,7 +245,7 @@ default_ptx_version_option (void)
      warp convergence.  */
   res = MAX (res, PTX_VERSION_6_0);
 
-  /* For sm_52+, pick at least 7.3.  */
+  /* For sm_52+, pick at least 7.3, to enable PTX 'alloca'.  */
   if (ptx_isa_option >= PTX_ISA_SM52)
     res = MAX (res, PTX_VERSION_7_3);
 
@@ -1797,7 +1797,7 @@ nvptx_function_ok_for_sibcall (tree, tree)
 static rtx
 nvptx_get_drap_rtx (void)
 {
-  if (TARGET_SOFT_STACK && stack_realign_drap)
+  if (stack_realign_drap)
     return arg_pointer_rtx;
   return NULL_RTX;
 }
diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
index b300f2e596c5..a22a088fb3ac 100644
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -35,8 +35,9 @@
    UNSPEC_FPINT_NEARBYINT
 
    UNSPEC_ALLOCA
-
    UNSPEC_SET_SOFTSTACK
+   UNSPEC_STACKSAVE
+   UNSPEC_STACKRESTORE
 
    UNSPEC_DIM_SIZE
 
@@ -1663,22 +1664,47 @@
    (match_operand 1 "nvptx_register_operand")]
   ""
 {
-  if (TARGET_SOFT_STACK)
+  if (!TARGET_SOFT_STACK
+      && TARGET_PTX_7_3
+      && TARGET_SM52)
+    emit_insn (gen_nvptx_alloca (Pmode, operands[0], operands[1]));
+  else if (!TARGET_SOFT_STACK)
+    {
+      sorry ("target cannot support alloca");
+      emit_insn (gen_nop ());
+    }
+  else if (TARGET_SOFT_STACK)
     {
       emit_move_insn (stack_pointer_rtx,
                      gen_rtx_MINUS (Pmode, stack_pointer_rtx, operands[1]));
       emit_insn (gen_set_softstack (Pmode, stack_pointer_rtx));
       emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
-      DONE;
     }
-  /* The ptx documentation specifies an alloca intrinsic (for 32 bit
-     only)  but notes it is not implemented.  The assembler emits a
-     confused error message.  Issue a blunt one now instead.  */
-  sorry ("target cannot support alloca");
-  emit_insn (gen_nop ());
+  else
+    gcc_unreachable ();
   DONE;
 })
 
+(define_insn "@nvptx_alloca_<mode>"
+  [(set (match_operand:P 0 "nvptx_register_operand" "=R")
+        (unspec:P [(match_operand:P 1 "nvptx_nonmemory_operand" "Ri")]
+                 UNSPEC_ALLOCA))]
+  "TARGET_PTX_7_3
+   && TARGET_SM52"
+  {
+    /* Convert the address from '.local' state space to generic.  That way,
+       we don't have to use 'st.local', 'ld.local', and can easily pass the
+       address to other "generic functions".
+       TODO 'gcc.target/nvptx/alloca-5.c' */
+    output_asm_insn ("{", NULL);
+    output_asm_insn ("\\t.reg%t0\\t%0_local;", operands);
+    output_asm_insn ("\\talloca%u0\\t%0_local, %1;", operands);
+    output_asm_insn ("\\tcvta.local%u0\\t%0, %0_local;", operands);
+    output_asm_insn ("}", NULL);
+    return "";
+  }
+  [(set_attr "predicable" "no")])
+
 (define_insn "@set_softstack_<mode>"
   [(unspec [(match_operand:P 0 "nvptx_register_operand" "R")]
           UNSPEC_SET_SOFTSTACK)]
@@ -1692,30 +1718,64 @@
    (match_operand 1 "register_operand" "")]
   "!TARGET_SOFT_STACK"
 {
-  /* The concept of a '%stack' pointer doesn't apply like this for
-     PTX "native" stacks.  GCC however occasionally synthesizes
-     '__builtin_stack_save ()', '__builtin_stack_restore ()', and isn't able to
-     optimize them all away.  Just submit a dummy -- user code shouldn't be
-     able to observe this.  */
-  emit_move_insn (operands[0], GEN_INT (0xdeadbeef));
+  if (TARGET_PTX_7_3
+      && TARGET_SM52)
+    {
+      gcc_checking_assert (REG_P (operands[0]));
+      emit_insn (gen_nvptx_stacksave (Pmode, operands[0], operands[1]));
+    }
+  else
+    {
+      /* The concept of a '%stack' pointer doesn't apply like this.
+         GCC however occasionally synthesizes '__builtin_stack_save ()',
+        '__builtin_stack_restore ()', and isn't able to optimize them all
+        away.  Just submit a dummy -- user code shouldn't be able to observe
+        this.  */
+      emit_move_insn (operands[0], GEN_INT (0xdeadbeef));
+    }
   DONE;
 })
 
+(define_insn "@nvptx_stacksave_<mode>"
+  [(set (match_operand:P 0 "nvptx_register_operand" "=R")
+        (unspec:P [(match_operand:P 1 "register_operand" "R")]
+        UNSPEC_STACKSAVE))]
+  "TARGET_PTX_7_3
+   && TARGET_SM52"
+  "%.\\tstacksave%u0\\t%0;")
+
 (define_expand "restore_stack_block"
   [(match_operand 0 "register_operand" "")
    (match_operand 1 "register_operand" "")]
   ""
 {
-  if (!TARGET_SOFT_STACK)
+  if (!TARGET_SOFT_STACK
+      && TARGET_PTX_7_3
+      && TARGET_SM52)
+    {
+      operands[1] = force_reg (Pmode, operands[1]);
+      emit_insn (gen_nvptx_stackrestore (Pmode, operands[0], operands[1]));
+    }
+  else if (!TARGET_SOFT_STACK)
     ; /* See 'save_stack_block'.  */
-  else
+  else if (TARGET_SOFT_STACK)
     {
       emit_move_insn (operands[0], operands[1]);
       emit_insn (gen_set_softstack (Pmode, operands[0]));
     }
+  else
+    gcc_unreachable ();
   DONE;
 })
 
+(define_insn "@nvptx_stackrestore_<mode>"
+  [(set (match_operand:P 0 "nvptx_register_operand" "=R")
+        (unspec:P [(match_operand:P 1 "nvptx_register_operand" "R")]
+         UNSPEC_STACKRESTORE))]
+  "TARGET_PTX_7_3
+   && TARGET_SM52"
+  "%.\\tstackrestore%u1\\t%1;")
+
 (define_expand "save_stack_function"
   [(match_operand 0 "register_operand" "")
    (match_operand 1 "register_operand" "")]
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 4583181f4f53..17fe2c64c1f8 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -30232,8 +30232,19 @@ Apply partitioned execution optimizations.  This is 
the default when any
 level of optimization is selected.
 
 @opindex msoft-stack
+@opindex mno-soft-stack
 @item -msoft-stack
-Generate code that does not use @code{.local} memory
+@itemx -mno-soft-stack
+For @option{-mno-soft-stack} (the default, unless @option{-mgomp} has
+been specified), use PTX ``native'' stacks, that is,
+generate code that uses @code{.local} memory or PTX @code{alloca}
+directly for stack storage.
+Unless @option{-mptx=7.3} or higher and @option{-march=sm_52} or
+higher are active, variable-length arrays and dynamically allocating
+memory on the stack with @code{alloca} are not supported.
+
+For @option{-msoft-stack} (implied by @option{-mgomp}),
+generate code that does not use @code{.local} memory or PTX @code{alloca}
 directly for stack storage. Instead, a per-warp stack pointer is
 maintained explicitly. This enables variable-length stack allocation (with
 variable-length arrays or @code{alloca}), and when global memory is used for
diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
index 0e0a7c806ebc..b5c1b23e5271 100644
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -2434,6 +2434,9 @@ nvptx code by default compiles for at least PTX ISA 
version 6.0.
 The nvptx runtime environment supports the PTX ISA directive
 @code{.alias}.
 
+@item nvptx_runtime_alloca_ptx
+The nvptx runtime environment supports PTX 'alloca'.
+
 @item nvptx_softstack
 nvptx @option{-msoft-stack} is enabled.
 @end table
@@ -3359,6 +3362,9 @@ Only MIPS targets support this feature, and only then in 
certain modes.
 @item nvptx_alias_ptx
 Enable using the PTX ISA directive @code{.alias} on nvptx targets.
 
+@item nvptx_alloca_ptx
+Enable PTX 'alloca' on nvptx targets.
+
 @item riscv_a
 Add the 'A' extension to the -march string on RISC-V targets.
 
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr36321.c 
b/gcc/testsuite/gcc.c-torture/execute/pr36321.c
index 4af993dc0fd9..8fd91d8c768b 100644
--- a/gcc/testsuite/gcc.c-torture/execute/pr36321.c
+++ b/gcc/testsuite/gcc.c-torture/execute/pr36321.c
@@ -1,4 +1,7 @@
 /* { dg-skip-if "requires alloca" { ! alloca } { "-O0" } { "" } } */
+/* See 'gcc.target/nvptx/__builtin_alloca_0-1-O0.c'.
+   { dg-xfail-if TODO { nvptx-*-* && { ! nvptx_softstack } } { "-O0" } { "" } 
} */
+
 extern void abort (void);
 
 extern __SIZE_TYPE__ strlen (const char *);
diff --git a/gcc/testsuite/gcc.target/nvptx/__builtin_alloca_0-1-O0.c 
b/gcc/testsuite/gcc.target/nvptx/__builtin_alloca_0-1-O0.c
index 8c00a66c7387..4b8a676da1dc 100644
--- a/gcc/testsuite/gcc.target/nvptx/__builtin_alloca_0-1-O0.c
+++ b/gcc/testsuite/gcc.target/nvptx/__builtin_alloca_0-1-O0.c
@@ -6,6 +6,8 @@
 /* { dg-additional-options -save-temps } */
 /* { dg-final { check-function-bodies {** } {} } } */
 
+/* See 'gcc.c-torture/execute/pr36321.c', '-O0'.  */
+
 void sink(void *);
 
 void f(void)
diff --git a/gcc/testsuite/gcc.target/nvptx/__builtin_alloca_0-1-O1.c 
b/gcc/testsuite/gcc.target/nvptx/__builtin_alloca_0-1-O1.c
index 2d61065d44d6..e81eeb5597fa 100644
--- a/gcc/testsuite/gcc.target/nvptx/__builtin_alloca_0-1-O1.c
+++ b/gcc/testsuite/gcc.target/nvptx/__builtin_alloca_0-1-O1.c
@@ -6,6 +6,8 @@
 /* { dg-additional-options -save-temps } */
 /* { dg-final { check-function-bodies {** } {} } } */
 
+/* See 'gcc.c-torture/execute/pr36321.c', '-O0'.  */
+
 void sink(void *);
 
 void f(void)
diff --git 
a/gcc/testsuite/gcc.target/nvptx/__builtin_stack_save___builtin_stack_restore-1-sm_30.c
 
b/gcc/testsuite/gcc.target/nvptx/__builtin_stack_save___builtin_stack_restore-1-sm_30.c
new file mode 100644
index 000000000000..c2ac4915d15d
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/nvptx/__builtin_stack_save___builtin_stack_restore-1-sm_30.c
@@ -0,0 +1,28 @@
+/* Document what we do for '__builtin_stack_save()', 
'__builtin_stack_restore()'.  */
+
+/* { dg-do assemble } */
+/* { dg-options {-O3 -mno-soft-stack} } */
+/* { dg-additional-options -march=sm_30 } */
+/* { dg-additional-options -save-temps } */
+/* { dg-final { check-function-bodies {** } {} } } */
+
+void *p;
+
+void f(void)
+{
+  // 0xdeadbeef
+  p = __builtin_stack_save();
+  asm volatile ("" : : : "memory");
+  // no-op
+  __builtin_stack_restore(p);
+  asm volatile ("" : : : "memory");
+}
+/*
+** f:
+** \.visible \.func f
+** {
+**     \.reg\.u64 (%r[0-9]+);
+**             mov\.u64        \1, 3735928559;
+**             st\.global\.u64 \[p\], \1;
+**     ret;
+*/
diff --git 
a/gcc/testsuite/gcc.target/nvptx/__builtin_stack_save___builtin_stack_restore-1.c
 
b/gcc/testsuite/gcc.target/nvptx/__builtin_stack_save___builtin_stack_restore-1.c
index 35a879fd5973..a8eda0e04afa 100644
--- 
a/gcc/testsuite/gcc.target/nvptx/__builtin_stack_save___builtin_stack_restore-1.c
+++ 
b/gcc/testsuite/gcc.target/nvptx/__builtin_stack_save___builtin_stack_restore-1.c
@@ -2,6 +2,7 @@
 
 /* { dg-do assemble } */
 /* { dg-options {-O3 -mno-soft-stack} } */
+/* { dg-add-options nvptx_alloca_ptx } */
 /* { dg-additional-options -save-temps } */
 /* { dg-final { check-function-bodies {** } {} } } */
 
@@ -9,10 +10,8 @@ void *p;
 
 void f(void)
 {
-  // 0xdeadbeef
   p = __builtin_stack_save();
   asm volatile ("" : : : "memory");
-  // no-op
   __builtin_stack_restore(p);
   asm volatile ("" : : : "memory");
 }
@@ -21,7 +20,10 @@ void f(void)
 ** \.visible \.func f
 ** {
 **     \.reg\.u64 (%r[0-9]+);
-**             mov\.u64        \1, 3735928559;
+**     \.reg\.u64 (%r[0-9]+);
+**             stacksave\.u64  \1;
 **             st\.global\.u64 \[p\], \1;
+**             ld\.global\.u64 \2, \[p\];
+**             stackrestore\.u64       \2;
 **     ret;
 */
diff --git a/gcc/testsuite/gcc.target/nvptx/alloca-1-O0.c 
b/gcc/testsuite/gcc.target/nvptx/alloca-1-O0.c
new file mode 100644
index 000000000000..1fa8fb5873ab
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/alloca-1-O0.c
@@ -0,0 +1,49 @@
+/* { dg-do assemble } */
+/* { dg-options {-O0 -mno-soft-stack} } */
+/* { dg-add-options nvptx_alloca_ptx } */
+/* { dg-additional-options -save-temps } */
+/* { dg-final { check-function-bodies {** } {} } } */
+
+void sink(void *);
+
+void f(void)
+{
+  sink(__builtin_alloca(123));
+}
+/*
+** f:
+** \.visible \.func f
+** {
+**     \.reg\.u64 (%r[0-9]+);
+**     \.reg\.u64 (%r[0-9]+);
+**     \.reg\.u64 (%r[0-9]+);
+**     \.reg\.u64 (%r[0-9]+);
+**     \.reg\.u64 (%r[0-9]+);
+**     \.reg\.u64 (%r[0-9]+);
+**     \.reg\.u64 (%r[0-9]+);
+**     \.reg\.u64 (%r[0-9]+);
+**     \.reg\.u64 (%r[0-9]+);
+**     \.reg\.u64 (%r[0-9]+);
+**     \.reg\.u64 (%r[0-9]+);
+**             mov\.u64        \11, 16;
+**             add\.u64        \2, \11, -1;
+**             add\.u64        \3, \2, 123;
+**             div\.u64        \4, \3, 16;
+**             mul\.lo\.u64    \5, \4, 16;
+**     {
+**             \.reg\.u64      \6_local;
+**             alloca\.u64     \6_local, \5;
+**             cvta\.local\.u64        \6, \6_local;
+**     }
+**             add\.u64        \7, \6, 15;
+**             shr\.u64        \8, \7, 4;
+**             shl\.b64        \9, \8, 4;
+**             mov\.u64        \1, \9;
+**             mov\.u64        \10, \1;
+**     {
+**             \.param\.u64 %out_arg1;
+**             st\.param\.u64 \[%out_arg1\], \10;
+**             call sink, \(%out_arg1\);
+**     }
+**     ret;
+*/
diff --git a/gcc/testsuite/gcc.target/nvptx/alloca-1-O1.c 
b/gcc/testsuite/gcc.target/nvptx/alloca-1-O1.c
new file mode 100644
index 000000000000..9ef9d4fd41ba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/alloca-1-O1.c
@@ -0,0 +1,33 @@
+/* { dg-do assemble } */
+/* { dg-options {-O1 -mno-soft-stack} } */
+/* { dg-add-options nvptx_alloca_ptx } */
+/* { dg-additional-options -save-temps } */
+/* { dg-final { check-function-bodies {** } {} } } */
+
+void sink(void *);
+
+void f(void)
+{
+  sink(__builtin_alloca(123));
+}
+/*
+** f:
+** \.visible \.func f
+** {
+**     \.reg\.u64 (%r[0-9]+);
+**     \.reg\.u64 (%r[0-9]+);
+**     \.reg\.u64 (%r[0-9]+);
+**     {
+**             \.reg\.u64      \1_local;
+**             alloca\.u64     \1_local, 128;
+**             cvta\.local\.u64        \1, \1_local;
+**     }
+**             add\.u64        \2, \1, 15;
+**             and\.b64        \3, \2, -16;
+**     {
+**             \.param\.u64 %out_arg1;
+**             st\.param\.u64 \[%out_arg1\], \3;
+**             call sink, \(%out_arg1\);
+**     }
+**     ret;
+*/
diff --git a/gcc/testsuite/gcc.target/nvptx/alloca-1.c 
b/gcc/testsuite/gcc.target/nvptx/alloca-1-sm_30.c
similarity index 83%
rename from gcc/testsuite/gcc.target/nvptx/alloca-1.c
rename to gcc/testsuite/gcc.target/nvptx/alloca-1-sm_30.c
index 0aa6f107b526..261a603ec4ce 100644
--- a/gcc/testsuite/gcc.target/nvptx/alloca-1.c
+++ b/gcc/testsuite/gcc.target/nvptx/alloca-1-sm_30.c
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options -mno-soft-stack } */
+/* { dg-additional-options -march=sm_30 } */
 
 void sink(void *);
 
diff --git a/gcc/testsuite/gcc.target/nvptx/alloca-2-O0.c 
b/gcc/testsuite/gcc.target/nvptx/alloca-2-O0.c
new file mode 100644
index 000000000000..cadb629c74da
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/alloca-2-O0.c
@@ -0,0 +1,12 @@
+/* { dg-do link } */
+/* { dg-do run { target nvptx_runtime_alloca_ptx } } */
+/* { dg-options {-O0 -mno-soft-stack} } */
+/* { dg-add-options nvptx_alloca_ptx } */
+/* { dg-additional-options -save-temps } */
+
+int
+main(void)
+{
+  return !(__builtin_alloca(100) != __builtin_alloca(10));
+}
+/* { dg-final { scan-assembler-times {(?n)\talloca\.u64\t%r[0-9]+_local, 
%r[0-9]+;$} 2 } } */
diff --git a/gcc/testsuite/gcc.target/nvptx/alloca-3-O1.c 
b/gcc/testsuite/gcc.target/nvptx/alloca-3-O1.c
new file mode 100644
index 000000000000..78105760e970
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/alloca-3-O1.c
@@ -0,0 +1,40 @@
+/* { dg-do assemble } */
+/* { dg-options {-O1 -mno-soft-stack} } */
+/* { dg-add-options nvptx_alloca_ptx } */
+/* { dg-additional-options -save-temps } */
+/* { dg-final { check-function-bodies {** } {} } } */
+
+void sink(void *);
+
+void *p;
+
+void f(void)
+{
+  p = __builtin_stack_save();
+  sink(__builtin_alloca(25));
+  __builtin_stack_restore(p);
+}
+/*
+** f:
+** \.visible \.func f
+** {
+**     \.reg\.u64 (%r[0-9]+);
+**     \.reg\.u64 (%r[0-9]+);
+**     \.reg\.u64 (%r[0-9]+);
+**     \.reg\.u64 (%r[0-9]+);
+**             stacksave\.u64  \1;
+**             st\.global\.u64 \[p\], \1;
+**     {
+**             \.reg\.u64      \2_local;
+**             alloca\.u64     \2_local, 32;
+**             cvta\.local\.u64        \2, \2_local;
+**     }
+**             add\.u64        \3, \2, 15;
+**             and\.b64        \4, \3, -16;
+**     {
+**             \.param\.u64 %out_arg1;
+**             st\.param\.u64 \[%out_arg1\], \4;
+**             call sink, \(%out_arg1\);
+**     }
+**     ret;
+*/
diff --git a/gcc/testsuite/gcc.target/nvptx/alloca-4-O3.c 
b/gcc/testsuite/gcc.target/nvptx/alloca-4-O3.c
new file mode 100644
index 000000000000..df1320ea2642
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/alloca-4-O3.c
@@ -0,0 +1,55 @@
+/* { dg-do assemble } */
+/* { dg-options {-O3 -mno-soft-stack} } */
+/* { dg-add-options nvptx_alloca_ptx } */
+/* { dg-additional-options -save-temps } */
+/* { dg-final { check-function-bodies {** } {} } } */
+
+void sink(void *);
+
+void f(void)
+{
+  void *p;
+  p = __builtin_stack_save();
+  sink(__builtin_alloca(25));
+  __builtin_stack_restore(p);
+  sink(__builtin_alloca(13));
+}
+/*
+** f:
+** .visible .func f
+** {
+**     \.reg\.u64 (%r[0-9]+);
+**     \.reg\.u64 (%r[0-9]+);
+**     \.reg\.u64 (%r[0-9]+);
+**     \.reg\.u64 (%r[0-9]+);
+**     \.reg\.u64 (%r[0-9]+);
+**     \.reg\.u64 (%r[0-9]+);
+**     \.reg\.u64 (%r[0-9]+);
+**             stacksave\.u64  \1;
+**     {
+**             \.reg\.u64      \2_local;
+**             alloca\.u64     \2_local, 32;
+**             cvta\.local\.u64        \2, \2_local;
+**     }
+**             add\.u64        \3, \2, 15;
+**             and\.b64        \4, \3, -16;
+**     {
+**             \.param\.u64 %out_arg1;
+**             st\.param\.u64 \[%out_arg1\], \4;
+**             call sink, \(%out_arg1\);
+**     }
+**             stackrestore\.u64       \1;
+**     {
+**             \.reg\.u64      \5_local;
+**             alloca\.u64     \5_local, 16;
+**             cvta\.local\.u64        \5, \5_local;
+**     }
+**             add\.u64        \6, \5, 15;
+**             and\.b64        \7, \6, -16;
+**     {
+**             \.param\.u64 %out_arg1;
+**             st\.param\.u64 \[%out_arg1\], \7;
+**             call sink, \(%out_arg1\);
+**     }
+**     ret;
+*/
diff --git a/gcc/testsuite/gcc.target/nvptx/alloca-5.c 
b/gcc/testsuite/gcc.target/nvptx/alloca-5.c
new file mode 100644
index 000000000000..ada0df0d065a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/alloca-5.c
@@ -0,0 +1,107 @@
+/* { dg-do link } */
+/* { dg-do run { target nvptx_runtime_alloca_ptx } } */
+/* { dg-options {-O2 -mno-soft-stack} } */
+/* { dg-add-options nvptx_alloca_ptx } */
+/* { dg-additional-options -save-temps } */
+/* { dg-final { check-function-bodies {** } {} } } */
+
+/* See also 'gcc.target/nvptx/softstack.c'.  */
+
+static __attribute__((noipa)) int f(int *p)
+{
+  return __sync_lock_test_and_set(p, 1);
+}
+/*
+** f:
+** \.func \(\.param\.u32 %value_out\) f \(\.param\.u64 %in_ar0\)
+** {
+**     \.reg\.u32 %value;
+**     \.reg\.u64 %ar0;
+**     ld\.param\.u64 %ar0, \[%in_ar0\];
+**     \.reg\.u32 (%r[0-9]+);
+**     \.reg\.u64 (%r[0-9]+);
+**             mov\.u64        \2, %ar0;
+**             atom\.exch\.b32 \1, \[\2\], 1;
+**             membar\.sys;
+**             mov\.u32        %value, \1;
+**     st\.param\.u32  \[%value_out\], %value;
+**     ret;
+*/
+
+static __attribute__((noipa)) int g(int n)
+{
+  /* Check that variable-length stack allocation works.  */
+  int v[n];
+  v[0] = 0;
+  /* Check that atomic operations can be applied to auto data.  */
+  return f(v) == 0 && v[0] == 1;
+}
+/*
+** g:
+** \.func \(\.param\.u32 %value_out\) g \(\.param\.u32 %in_ar0\)
+** {
+**     \.reg\.u32 %value;
+**     \.reg\.u32 %ar0;
+**     ld\.param\.u32 %ar0, \[%in_ar0\];
+**     \.reg\.u32 (%r[0-9]+);
+**     \.reg\.u32 (%r[0-9]+);
+**     \.reg\.u64 (%r[0-9]+);
+**     \.reg\.u64 (%r[0-9]+);
+**     \.reg\.u64 (%r[0-9]+);
+**     \.reg\.u64 (%r[0-9]+);
+**     \.reg\.u64 (%r[0-9]+);
+**     \.reg\.u64 (%r[0-9]+);
+**     \.reg\.u64 (%r[0-9]+);
+**     \.reg\.u32 (%r[0-9]+);
+**     \.reg\.u32 (%r[0-9]+);
+**     \.reg\.pred (%r[0-9]+);
+**     \.reg\.u32 (%r[0-9]+);
+**     \.reg\.pred (%r[0-9]+);
+**             mov\.u32        \2, %ar0;
+**             cvt\.s64\.s32   \3, \2;
+**             shl\.b64        \4, \3, 2;
+**             add\.u64        \5, \4, 15;
+**             and\.b64        \6, \5, -16;
+**     {
+**             \.reg\.u64      \7_local;
+**             alloca\.u64     \7_local, \6;
+**             cvta\.local\.u64        \7, \7_local;
+**     }
+**             add\.u64        \8, \7, 3;
+**             and\.b64        \9, \8, -4;
+**             mov\.u32        \10, 0;
+**             st\.u32 \[\9\], \10;
+**     {
+**             \.param\.u32 %value_in;
+**             \.param\.u64 %out_arg1;
+**             st\.param\.u64 \[%out_arg1\], \9;
+**             call \(%value_in\), f, \(%out_arg1\);
+**             ld\.param\.u32  \11, \[%value_in\];
+**     }
+**             setp\.ne\.u32   \12, \11, 0;
+**     @\12    bra     (\$L[0-9]+);
+**             ld\.u32 \13, \[\9\];
+**             setp\.eq\.u32   \14, \13, 1;
+**             selp\.u32       \1, 1, 0, \14;
+**             bra     (\$L[0-9]+);
+** \15:
+**             mov\.u32        \1, \10;
+** \16:
+**             mov\.u32        %value, \1;
+**     st\.param\.u32  \[%value_out\], %value;
+**     ret;
+*/
+
+int main()
+{
+  if (!g(1))
+    __builtin_abort();
+  return 0;
+}
+
+/* PTX 'atom' isn't acceptable for '.local' memory:
+   'operation not supported on global/shared address space' [sic]
+   ('CUDA_ERROR_INVALID_ADDRESS_SPACE'), thus FAILs for 'alloca'ed memory.
+   We'd have to use the 'nvptx_mem_local_p' replacements, but currently lack a
+   mechanism for doing so (TODO).
+   { dg-xfail-run-if TODO { *-*-* } } */
diff --git a/gcc/testsuite/gcc.target/nvptx/softstack.c 
b/gcc/testsuite/gcc.target/nvptx/softstack.c
index 73e60f282a74..7b84a21bd4f3 100644
--- a/gcc/testsuite/gcc.target/nvptx/softstack.c
+++ b/gcc/testsuite/gcc.target/nvptx/softstack.c
@@ -1,6 +1,8 @@
 /* { dg-options "-O2 -msoft-stack" } */
 /* { dg-do run } */
 
+/* See also 'gcc.target/nvptx/alloca-5.c'.  */
+
 static __attribute__((noinline,noclone)) int f(int *p)
 {
   return __sync_lock_test_and_set(p, 1);
diff --git a/gcc/testsuite/gcc.target/nvptx/vla-1-O0.c 
b/gcc/testsuite/gcc.target/nvptx/vla-1-O0.c
new file mode 100644
index 000000000000..622011f0cdd7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/vla-1-O0.c
@@ -0,0 +1,29 @@
+/* { dg-do assemble } */
+/* { dg-options {-O0 -mno-soft-stack} } */
+/* { dg-add-options nvptx_alloca_ptx } */
+/* { dg-additional-options -save-temps } */
+/* { dg-final { check-function-bodies {**} {} } } */
+
+void sink(void *);
+
+void f(int s)
+{
+  char a[s];
+  sink(a);
+}
+/*
+** f:
+**     ...
+**             cvt\.s64\.s32   (%r[0-9]+), (%r[0-9]+);
+**             mov\.u64        (%r[0-9]+), 16;
+**             add\.u64        (%r[0-9]+), \3, -1;
+**             add\.u64        (%r[0-9]+), \1, \4;
+**             div\.u64        (%r[0-9]+), \5, 16;
+**             mul\.lo\.u64    (%r[0-9]+), \6, 16;
+**     {
+**             \.reg\.u64      (%r[0-9]+)_local;
+**             alloca\.u64     \8_local, \7;
+**             cvta\.local\.u64        \8, \8_local;
+**     }
+**     ...
+*/
diff --git a/gcc/testsuite/gcc.target/nvptx/vla-1-O1.c 
b/gcc/testsuite/gcc.target/nvptx/vla-1-O1.c
new file mode 100644
index 000000000000..7f297a619389
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/vla-1-O1.c
@@ -0,0 +1,40 @@
+/* { dg-do assemble } */
+/* { dg-options {-O1 -mno-soft-stack} } */
+/* { dg-add-options nvptx_alloca_ptx } */
+/* { dg-additional-options -save-temps } */
+/* { dg-final { check-function-bodies {** } {} } } */
+
+void sink(void *);
+
+void f(int s)
+{
+  char a[s];
+  sink(a);
+}
+/*
+** f:
+** \.visible \.func f \(\.param\.u32 %in_ar0\)
+** {
+**     \.reg\.u32 %ar0;
+**     ld\.param\.u32 %ar0, \[%in_ar0\];
+**     \.reg\.u32 (%r[0-9]+);
+**     \.reg\.u64 (%r[0-9]+);
+**     \.reg\.u64 (%r[0-9]+);
+**     \.reg\.u64 (%r[0-9]+);
+**     \.reg\.u64 (%r[0-9]+);
+**             mov\.u32        \1, %ar0;
+**             cvt\.s64\.s32   \2, \1;
+**             add\.u64        \3, \2, 15;
+**             and\.b64        \4, \3, -16;
+**     {
+**             \.reg\.u64      \5_local;
+**             alloca\.u64     \5_local, \4;
+**             cvta\.local\.u64        \5, \5_local;
+**     }
+**     {
+**             \.param\.u64 %out_arg1;
+**             st\.param\.u64 \[%out_arg1\], \5;
+**             call sink, \(%out_arg1\);
+**     }
+**     ret;
+*/
diff --git a/gcc/testsuite/gcc.target/nvptx/vla-1.c 
b/gcc/testsuite/gcc.target/nvptx/vla-1-sm_30.c
similarity index 83%
rename from gcc/testsuite/gcc.target/nvptx/vla-1.c
rename to gcc/testsuite/gcc.target/nvptx/vla-1-sm_30.c
index 5baf95cecfcc..2bf2c91d60e9 100644
--- a/gcc/testsuite/gcc.target/nvptx/vla-1.c
+++ b/gcc/testsuite/gcc.target/nvptx/vla-1-sm_30.c
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options -mno-soft-stack } */
+/* { dg-additional-options -march=sm_30 } */
 
 void sink(void *);
 
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index d7d7217be058..a89f531f8876 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -1009,9 +1009,37 @@ proc check_effective_target_alloca {} {
       return 0
     }
     if { [istarget nvptx-*-*] } {
+       # For nvptx, 'alloca' support depends on the configuration.  In case
+       # of PTX "native" stacks, for 'dg-do run', it additionally depends on
+       # runtime support.
        if { ![check_effective_target_nvptx_softstack] } {
-           return 0
+           # '-mno-soft-stack': PTX "native" stacks
+
+           # Not supported unless '-mptx=7.3'+ and '-march=sm_52'+.
+           if { !([check_nvptx_default_ptx_isa_version_at_least 7 3]
+                  && [check_nvptx_default_ptx_isa_target_architecture_at_least 
sm_52]) } {
+               return 0
+           }
+
+           # Find 'dg-do-what' in an outer frame.
+           set level 1
+           while true {
+               upvar $level dg-do-what dg-do-what
+               if [info exists dg-do-what] then break
+               incr level
+           }
+           verbose "check_effective_target_alloca: found dg-do-what at level 
$level" 2
+
+           if { [string equal [lindex ${dg-do-what} 0] run] } {
+               # For 'dg-do run', it additionally depends on runtime support.
+               # (If not supported, we don't try to demote 'run' to 'link',
+               # but instead simply fail the effective-target 'alloca' check.)
+               return [check_effective_target_nvptx_runtime_alloca_ptx]
+           } else {
+               return 1
+           }
        } else {
+           # '-msoft-stack'
            return 1
        }
     }
@@ -14100,6 +14128,35 @@ proc 
check_effective_target_nvptx_default_ptx_isa_version_at_least_6_0 { } {
     return [check_nvptx_default_ptx_isa_version_at_least 6 0]
 }
 
+# Return 1 if nvptx code by default compiles for at least the specified PTX ISA
+# target architecture.
+
+proc check_nvptx_default_ptx_isa_target_architecture_at_least { ta } {
+    set name nvptx_default_ptx_isa_target_architecture_at_least_${ta}
+
+    if [regexp {^sm_(\d+)$} $ta dummy ptx_sm] {
+       set ptx_sm "${ptx_sm}0"
+    } else {
+       error "check_nvptx_default_ptx_isa_target_architecture_at_least: 
illegal argument: $ta"
+    }
+
+    set supported_p \
+       [concat \
+            "(__PTX_SM__ >= $ptx_sm)"]
+
+    set src \
+       [list \
+            "#if $supported_p" \
+            "#else" \
+            "#error unsupported" \
+            "#endif"]
+    set src [join $src "\n"]
+    
+    set res [check_no_compiler_messages $name assembly $src ""]
+
+    return $res
+}
+
 # Return 1 if nvptx '-msoft-stack' is enabled.
 
 proc check_effective_target_nvptx_softstack { } {
@@ -14132,6 +14189,28 @@ proc check_nvptx_runtime_ptx_isa_version_at_least { 
major minor } {
     return $res
 }
 
+# Return 1 if nvptx code with the specified PTX ISA target architecture or
+# higher can be run.
+
+proc check_nvptx_runtime_ptx_isa_target_architecture_at_least { ta } {
+    set name nvptx_runtime_ptx_isa_target_architecture_${ta}
+
+    set default \
+       [check_nvptx_default_ptx_isa_target_architecture_at_least ${ta}]
+
+    if { $default } {
+       set flag ""
+    } else {
+       set flag "-march=$ta -mptx=_"
+    }
+
+    set res [check_runtime $name {
+       int main (void) { return 0; }
+    } $flag]
+
+    return $res
+}
+
 # Return 1 if the nvptx runtime environment supports the PTX ISA directive
 # '.alias'.
 
@@ -14139,6 +14218,13 @@ proc check_effective_target_nvptx_runtime_alias_ptx { 
} {
     return [check_nvptx_runtime_ptx_isa_version_at_least 6 3]
 }
 
+# Return 1 if the nvptx runtime environment supports PTX 'alloca'.
+
+proc check_effective_target_nvptx_runtime_alloca_ptx { } {
+    return [expr { [check_nvptx_runtime_ptx_isa_version_at_least 7 3]
+                  && [check_nvptx_runtime_ptx_isa_target_architecture_at_least 
sm_52] }]
+}
+
 # Add options to enable nvptx using the PTX ISA directive '.alias'.
 
 proc add_options_for_nvptx_alias_ptx { flags } {
@@ -14150,3 +14236,20 @@ proc add_options_for_nvptx_alias_ptx { flags } {
 
     return $flags
 }
+
+# Add options to enable nvptx using PTX 'alloca'.
+
+proc add_options_for_nvptx_alloca_ptx { flags } {
+    # We don't add '-mno-soft-stack' here; the users should take care of that
+    # explicitly.
+
+    if { ![check_nvptx_default_ptx_isa_version_at_least 7 3] } {
+       append flags " -mptx=7.3"
+    }
+
+    if { ![check_nvptx_default_ptx_isa_target_architecture_at_least sm_52] } {
+       append flags " -march=sm_52"
+    }
+
+    return $flags
+}
diff --git a/libgomp/fortran.c b/libgomp/fortran.c
index 087cb49945ca..7a0386e5ccb3 100644
--- a/libgomp/fortran.c
+++ b/libgomp/fortran.c
@@ -846,8 +846,8 @@ omp_get_device_from_uid_ (const char *uid, size_t uid_len)
   /* Inside the target region, invoking this routine is undefined
      behavior; thus, resolve it already here - instead of inside
      libgomp/config/.../target.c.
-     Note that on nvptx __builtin_alloca is defined, but fails with a sorry
-     during compilation, as it is unsupported until isa 7.3 / sm_52.  */
+     This also circumvents issues due to not all nvptx configurations
+     supporting 'alloca'.  */
   return omp_invalid_device;
 #endif
 }
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/privatized-ref-2.f90 
b/libgomp/testsuite/libgomp.oacc-fortran/privatized-ref-2.f90
index 8cf79a10e8d2..14e8f99d391e 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/privatized-ref-2.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/privatized-ref-2.f90
@@ -1,12 +1,5 @@
 ! { dg-do run }
 
-! PR65181 "Support for alloca in nvptx"
-! { dg-excess-errors "lto1, mkoffload and lto-wrapper fatal errors" { target 
openacc_nvidia_accel_selected } }
-! Aside from restricting this testcase to non-nvptx offloading, and duplicating
-! it with 'dg-do link' for nvptx offloading, there doesn't seem to be a way to
-! XFAIL the "UNRESOLVED: [...] compilation failed to produce executable", or
-! get rid of it, unfortunately.
-
 ! { dg-additional-options "-fopt-info-note-omp" }
 ! { dg-additional-options "--param=openacc-privatization=noisy" }
 ! { dg-additional-options "-foffload=-fopt-info-note-omp" }
@@ -59,7 +52,6 @@ contains
     ! { dg-note {variable 'array' in 'private' clause is candidate for 
adjusting OpenACC privatization level} "" { target *-*-* } l_loop$c_loop }
     ! { dg-note {variable 'array' ought to be adjusted for OpenACC 
privatization level: 'gang'} "" { target *-*-* } l_loop$c_loop }
     ! { dg-note {variable 'array' adjusted for OpenACC privatization level: 
'gang'} "" { target { ! { openacc_host_selected || { 
openacc_nvidia_accel_selected && __OPTIMIZE__ } } } } l_loop$c_loop }
-    ! { dg-message {sorry, unimplemented: target cannot support alloca} 
PR65181 { target openacc_nvidia_accel_selected } l_loop$c_loop }
     do i = 1, 10
       array(i) = i
     end do
@@ -91,7 +83,6 @@ contains
     ! { dg-note {variable 'array\.[0-9]+' in 'private' clause is candidate for 
adjusting OpenACC privatization level} "" { target *-*-* } l_loop$c_loop }
     ! { dg-note {variable 'array\.[0-9]+' ought to be adjusted for OpenACC 
privatization level: 'gang'} "" { target *-*-* } l_loop$c_loop }
     ! { dg-note {variable 'array\.[0-9]+' adjusted for OpenACC privatization 
level: 'gang'} "" { target { ! { openacc_host_selected || { 
openacc_nvidia_accel_selected && __OPTIMIZE__ } } } } l_loop$c_loop }
-    ! { dg-message {sorry, unimplemented: target cannot support alloca} 
PR65181 { target openacc_nvidia_accel_selected } l_loop$c_loop }
     do i = 1, 10
       array(i) = 9*i
     end do
@@ -117,7 +108,6 @@ contains
     ! { dg-note {variable 'str' ought to be adjusted for OpenACC privatization 
level: 'gang'} "" { target *-*-* } l_loop$c_loop }
     ! { dg-note {variable 'str' adjusted for OpenACC privatization level: 
'gang'} "" { target { ! { openacc_host_selected || { 
openacc_nvidia_accel_selected && __OPTIMIZE__ } } } } l_loop$c_loop }
     ! { dg-note {variable 'char\.[0-9]+' declared in block isn't candidate for 
adjusting OpenACC privatization level: artificial} "" { target *-*-* } 
l_loop$c_loop }
-    ! { dg-message {sorry, unimplemented: target cannot support alloca} 
PR65181 { target openacc_nvidia_accel_selected } l_loop$c_loop }
     do i = 1, 10
       str(i:i) = achar(ichar('A') + i)
     end do

Reply via email to