https://gcc.gnu.org/g:5c438e57d7ebef246b1c61be06983d507bf23a40
commit 5c438e57d7ebef246b1c61be06983d507bf23a40 Author: Thomas Schwinge <tschwi...@baylibre.com> Date: Mon Feb 24 16:13:11 2025 +0100 nvptx: Support '-mfake-ptx-alloca' With '-mfake-ptx-alloca' enabled, the user-visible behavior changes only for configurations where PTX 'alloca' is not available. Rather than a compile-time 'sorry, unimplemented: dynamic stack allocation not supported' in presence of dynamic stack allocation, compilation and assembly then succeeds. However, attempting to link in such '*.o' files then fails due to unresolved symbol '__GCC_nvptx__PTX_alloca_not_supported'. This is meant to be used in scenarios where large volumes of code are compiled, a small fraction of which runs into dynamic stack allocation, but these parts are not important for specific use cases, and we'd thus like the build to succeed, and error out just upon actual, very rare use of the offending '*.o' files. gcc/ * config/nvptx/nvptx.opt (-mfake-ptx-alloca): New. * config/nvptx/nvptx-protos.h (nvptx_output_fake_ptx_alloca): Declare. * config/nvptx/nvptx.cc (nvptx_output_fake_ptx_alloca): New. * config/nvptx/nvptx.md (define_insn "@nvptx_alloca_<mode>") [!(TARGET_PTX_7_3 && TARGET_SM52)]: Use it for '-mfake-ptx-alloca'. gcc/testsuite/ * gcc.target/nvptx/alloca-1-O0_-mfake-ptx-alloca.c: New. * gcc.target/nvptx/alloca-2-O0_-mfake-ptx-alloca.c: Likewise. * gcc.target/nvptx/alloca-4-O3_-mfake-ptx-alloca.c: Likewise. * gcc.target/nvptx/vla-1-O0_-mfake-ptx-alloca.c: Likewise. * gcc.target/nvptx/alloca-4-O3.c: 'dg-additional-options -mfake-ptx-alloca'. (cherry picked from commit 1146410c0feb0e82c689b1333fdf530a2b34dc2b) Diff: --- gcc/ChangeLog.omp | 11 +++++ gcc/config/nvptx/nvptx-protos.h | 1 + gcc/config/nvptx/nvptx.cc | 21 ++++++++++ gcc/config/nvptx/nvptx.md | 4 ++ gcc/config/nvptx/nvptx.opt | 15 +++++++ gcc/testsuite/ChangeLog.omp | 10 +++++ .../nvptx/alloca-1-O0_-mfake-ptx-alloca.c | 49 ++++++++++++++++++++++ .../nvptx/alloca-2-O0_-mfake-ptx-alloca.c | 18 ++++++++ gcc/testsuite/gcc.target/nvptx/alloca-4-O3.c | 2 + .../nvptx/alloca-4-O3_-mfake-ptx-alloca.c | 48 +++++++++++++++++++++ .../gcc.target/nvptx/vla-1-O0_-mfake-ptx-alloca.c | 29 +++++++++++++ 11 files changed, 208 insertions(+) diff --git a/gcc/ChangeLog.omp b/gcc/ChangeLog.omp index e578ad89f743..b7743c80926f 100644 --- a/gcc/ChangeLog.omp +++ b/gcc/ChangeLog.omp @@ -3,6 +3,17 @@ Backported from trunk: 2025-02-27 Thomas Schwinge <tschwi...@baylibre.com> + * config/nvptx/nvptx.opt (-mfake-ptx-alloca): New. + * config/nvptx/nvptx-protos.h (nvptx_output_fake_ptx_alloca): + Declare. + * config/nvptx/nvptx.cc (nvptx_output_fake_ptx_alloca): New. + * config/nvptx/nvptx.md (define_insn "@nvptx_alloca_<mode>") + [!(TARGET_PTX_7_3 && TARGET_SM52)]: Use it for + '-mfake-ptx-alloca'. + + Backported from trunk: + 2025-02-27 Thomas Schwinge <tschwi...@baylibre.com> + * config/nvptx/nvptx.md (define_expand "allocate_stack") [!TARGET_SOFT_STACK]: Move 'sorry ("dynamic stack allocation not supported");'... diff --git a/gcc/config/nvptx/nvptx-protos.h b/gcc/config/nvptx/nvptx-protos.h index ed2ec0e3282a..540497507275 100644 --- a/gcc/config/nvptx/nvptx-protos.h +++ b/gcc/config/nvptx/nvptx-protos.h @@ -56,6 +56,7 @@ extern rtx nvptx_expand_compare (rtx); extern const char *nvptx_ptx_type_from_mode (machine_mode, bool); extern const char *nvptx_output_mov_insn (rtx, rtx); extern const char *nvptx_output_call_insn (rtx_insn *, rtx, rtx); +extern const char *nvptx_output_fake_ptx_alloca (void); extern const char *nvptx_output_return (void); extern const char *nvptx_output_set_softstack (unsigned); extern const char *nvptx_output_simt_enter (rtx, rtx, rtx); diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc index 14766b8d769f..6dd358bf3d58 100644 --- a/gcc/config/nvptx/nvptx.cc +++ b/gcc/config/nvptx/nvptx.cc @@ -1813,6 +1813,27 @@ nvptx_output_set_softstack (unsigned src_regno) } return ""; } + +/* Output a fake PTX 'alloca'. */ + +const char * +nvptx_output_fake_ptx_alloca (void) +{ +#define FAKE_PTX_ALLOCA_NAME "__GCC_nvptx__PTX_alloca_not_supported" + static tree decl; + if (!decl) + { + tree alloca_type = TREE_TYPE (builtin_decl_explicit (BUILT_IN_ALLOCA)); + decl = build_decl (UNKNOWN_LOCATION, FUNCTION_DECL, + get_identifier (FAKE_PTX_ALLOCA_NAME), alloca_type); + DECL_EXTERNAL (decl) = 1; + TREE_PUBLIC (decl) = 1; + nvptx_record_needed_fndecl (decl); + } + return "\tcall\t(%0), " FAKE_PTX_ALLOCA_NAME ", (%1);"; +#undef FAKE_PTX_ALLOCA_NAME +} + /* Output a return instruction. Also copy the return value to its outgoing location. */ diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md index 097b1751402a..4458959f2f85 100644 --- a/gcc/config/nvptx/nvptx.md +++ b/gcc/config/nvptx/nvptx.md @@ -1726,6 +1726,8 @@ output_asm_insn ("}", NULL); return ""; } + else if (nvptx_fake_ptx_alloca) + return nvptx_output_fake_ptx_alloca (); else { sorry_at (INSN_LOCATION (insn), @@ -1754,6 +1756,7 @@ gcc_checking_assert (REG_P (operands[0])); emit_insn (gen_nvptx_stacksave (Pmode, operands[0], operands[1])); } + /* We don't bother to special-case '-mfake-ptx-alloca' here. */ else { /* The concept of a '%stack' pointer doesn't apply like this. @@ -1786,6 +1789,7 @@ operands[1] = force_reg (Pmode, operands[1]); emit_insn (gen_nvptx_stackrestore (Pmode, operands[0], operands[1])); } + /* We don't bother to special-case '-mfake-ptx-alloca' here. */ else if (!TARGET_SOFT_STACK) ; /* See 'save_stack_block'. */ else if (TARGET_SOFT_STACK) diff --git a/gcc/config/nvptx/nvptx.opt b/gcc/config/nvptx/nvptx.opt index c0636279eb24..1e70d23dfd0d 100644 --- a/gcc/config/nvptx/nvptx.opt +++ b/gcc/config/nvptx/nvptx.opt @@ -167,3 +167,18 @@ Target Var(nvptx_alias) Init(0) Undocumented mexperimental Target Var(nvptx_experimental) Init(0) Undocumented + +mfake-ptx-alloca +Target Var(nvptx_fake_ptx_alloca) Init(0) Undocumented +; With '-mfake-ptx-alloca' enabled, the user-visible behavior changes only +; for configurations where PTX 'alloca' is not available. Rather than a +; compile-time 'sorry, unimplemented: dynamic stack allocation not supported' +; in presence of dynamic stack allocation, compilation and assembly then +; succeeds. However, attempting to link in such '*.o' files then fails due +; to unresolved symbol '__GCC_nvptx__PTX_alloca_not_supported'. +; +; This is meant to be used in scenarios where large volumes of code are +; compiled, a small fraction of which runs into dynamic stack allocation, but +; these parts are not important for specific use cases, and we'd thus like the +; build to succeed, and error out just upon actual, very rare use of the +; offending '*.o' files. diff --git a/gcc/testsuite/ChangeLog.omp b/gcc/testsuite/ChangeLog.omp index 4f04e8e47f85..514874cf01be 100644 --- a/gcc/testsuite/ChangeLog.omp +++ b/gcc/testsuite/ChangeLog.omp @@ -3,6 +3,16 @@ Backported from trunk: 2025-02-27 Thomas Schwinge <tschwi...@baylibre.com> + * gcc.target/nvptx/alloca-1-O0_-mfake-ptx-alloca.c: New. + * gcc.target/nvptx/alloca-2-O0_-mfake-ptx-alloca.c: Likewise. + * gcc.target/nvptx/alloca-4-O3_-mfake-ptx-alloca.c: Likewise. + * gcc.target/nvptx/vla-1-O0_-mfake-ptx-alloca.c: Likewise. + * gcc.target/nvptx/alloca-4-O3.c: + 'dg-additional-options -mfake-ptx-alloca'. + + Backported from trunk: + 2025-02-27 Thomas Schwinge <tschwi...@baylibre.com> + * gcc.target/nvptx/alloca-1-unused-O0-sm_30.c: Adjust. Backported from trunk: diff --git a/gcc/testsuite/gcc.target/nvptx/alloca-1-O0_-mfake-ptx-alloca.c b/gcc/testsuite/gcc.target/nvptx/alloca-1-O0_-mfake-ptx-alloca.c new file mode 100644 index 000000000000..7c405592e693 --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/alloca-1-O0_-mfake-ptx-alloca.c @@ -0,0 +1,49 @@ +/* { dg-do assemble } */ +/* { dg-options {-O0 -mno-soft-stack} } */ +/* { dg-additional-options -march=sm_30 } */ +/* { dg-additional-options -mfake-ptx-alloca } */ +/* { dg-additional-options -save-temps } */ +/* { dg-final { check-function-bodies {** } {} } } */ + +void sink(void *); + +void f(void) +{ + sink(__builtin_alloca(123)); + /* { dg-bogus {sorry, unimplemented: dynamic stack allocation not supported} {} { target *-*-* } .-1 } */ +} +/* +** f: +** \.visible \.func f +** { +** \.reg\.u64 (%r[0-9]+); +** \.reg\.u64 (%r[0-9]+); +** \.reg\.u64 (%r[0-9]+); +** \.reg\.u64 (%r[0-9]+); +** \.reg\.u64 (%r[0-9]+); +** \.reg\.u64 (%r[0-9]+); +** \.reg\.u64 (%r[0-9]+); +** \.reg\.u64 (%r[0-9]+); +** \.reg\.u64 (%r[0-9]+); +** \.reg\.u64 (%r[0-9]+); +** \.reg\.u64 (%r[0-9]+); +** mov\.u64 \11, 16; +** add\.u64 \2, \11, -1; +** add\.u64 \3, \2, 123; +** div\.u64 \4, \3, 16; +** mul\.lo\.u64 \5, \4, 16; +** call \(\6\), __GCC_nvptx__PTX_alloca_not_supported, \(\5\); +** add\.u64 \7, \6, 15; +** shr\.u64 \8, \7, 4; +** shl\.b64 \9, \8, 4; +** mov\.u64 \1, \9; +** mov\.u64 \10, \1; +** { +** \.param\.u64 %out_arg1; +** st\.param\.u64 \[%out_arg1\], \10; +** call sink, \(%out_arg1\); +** } +** ret; +*/ + +/* { dg-final { scan-assembler-times {(?n)^\.extern \.func \(\.param\.u64 %value_out\) __GCC_nvptx__PTX_alloca_not_supported \(\.param\.u64 %in_ar0\);$} 1 } } */ diff --git a/gcc/testsuite/gcc.target/nvptx/alloca-2-O0_-mfake-ptx-alloca.c b/gcc/testsuite/gcc.target/nvptx/alloca-2-O0_-mfake-ptx-alloca.c new file mode 100644 index 000000000000..4cc4d0c93f28 --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/alloca-2-O0_-mfake-ptx-alloca.c @@ -0,0 +1,18 @@ +/* { dg-do link } */ +/* { dg-options {-O0 -mno-soft-stack} } */ +/* { dg-additional-options -march=sm_30 } */ +/* { dg-additional-options -mfake-ptx-alloca } */ +/* { dg-additional-options -save-temps } */ + +int +main(void) +{ + return !(__builtin_alloca(100) != __builtin_alloca(10)); +} +/* { dg-final { scan-assembler-times {(?n)\tcall\t\(%r[0-9]+\), __GCC_nvptx__PTX_alloca_not_supported, \(%r[0-9]+\);$} 2 } } */ + +/* { dg-final { scan-assembler-times {(?n)^\.extern \.func \(\.param\.u64 %value_out\) __GCC_nvptx__PTX_alloca_not_supported \(\.param\.u64 %in_ar0\);$} 1 } } */ + +/* { dg-message __GCC_nvptx__PTX_alloca_not_supported {unresolved symbol} { target *-*-* } 0 } */ + +/* { dg-final output-exists-not } */ diff --git a/gcc/testsuite/gcc.target/nvptx/alloca-4-O3.c b/gcc/testsuite/gcc.target/nvptx/alloca-4-O3.c index df1320ea2642..4374bb54ab37 100644 --- a/gcc/testsuite/gcc.target/nvptx/alloca-4-O3.c +++ b/gcc/testsuite/gcc.target/nvptx/alloca-4-O3.c @@ -1,6 +1,8 @@ /* { dg-do assemble } */ /* { dg-options {-O3 -mno-soft-stack} } */ /* { dg-add-options nvptx_alloca_ptx } */ +/* Verify the fake one isn't used if the real PTX 'alloca' is available. + { dg-additional-options -mfake-ptx-alloca } */ /* { dg-additional-options -save-temps } */ /* { dg-final { check-function-bodies {** } {} } } */ diff --git a/gcc/testsuite/gcc.target/nvptx/alloca-4-O3_-mfake-ptx-alloca.c b/gcc/testsuite/gcc.target/nvptx/alloca-4-O3_-mfake-ptx-alloca.c new file mode 100644 index 000000000000..e394763f01b4 --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/alloca-4-O3_-mfake-ptx-alloca.c @@ -0,0 +1,48 @@ +/* { dg-do assemble } */ +/* { dg-options {-O3 -mno-soft-stack} } */ +/* { dg-additional-options {-march=sm_30 -mfake-ptx-alloca} } */ +/* { dg-additional-options -save-temps } */ +/* { dg-final { check-function-bodies {** } {} } } */ + +void sink(void *); + +void f(void) +{ + void *p; + p = __builtin_stack_save(); + sink(__builtin_alloca(25)); + /* { dg-bogus {sorry, unimplemented: dynamic stack allocation not supported} {} { target *-*-* } .-1 } */ + __builtin_stack_restore(p); + sink(__builtin_alloca(13)); + /* { dg-bogus {sorry, unimplemented: dynamic stack allocation not supported} {} { target *-*-* } .-1 } */ +} +/* +** f: +** .visible .func f +** { +** \.reg\.u64 (%r[0-9]+); +** \.reg\.u64 (%r[0-9]+); +** \.reg\.u64 (%r[0-9]+); +** \.reg\.u64 (%r[0-9]+); +** \.reg\.u64 (%r[0-9]+); +** \.reg\.u64 (%r[0-9]+); +** call \(\1\), __GCC_nvptx__PTX_alloca_not_supported, \(32\); +** add\.u64 \2, \1, 15; +** and\.b64 \3, \2, -16; +** { +** \.param\.u64 %out_arg1; +** st\.param\.u64 \[%out_arg1\], \3; +** call sink, \(%out_arg1\); +** } +** call \(\4\), __GCC_nvptx__PTX_alloca_not_supported, \(16\); +** add\.u64 \5, \4, 15; +** and\.b64 \6, \5, -16; +** { +** \.param\.u64 %out_arg1; +** st\.param\.u64 \[%out_arg1\], \6; +** call sink, \(%out_arg1\); +** } +** ret; +*/ + +/* { dg-final { scan-assembler-times {(?n)^\.extern \.func \(\.param\.u64 %value_out\) __GCC_nvptx__PTX_alloca_not_supported \(\.param\.u64 %in_ar0\);$} 1 } } */ diff --git a/gcc/testsuite/gcc.target/nvptx/vla-1-O0_-mfake-ptx-alloca.c b/gcc/testsuite/gcc.target/nvptx/vla-1-O0_-mfake-ptx-alloca.c new file mode 100644 index 000000000000..3e5134ae3807 --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/vla-1-O0_-mfake-ptx-alloca.c @@ -0,0 +1,29 @@ +/* { dg-do assemble } */ +/* { dg-options {-O0 -mno-soft-stack} } */ +/* { dg-additional-options -march=sm_30 } */ +/* { dg-additional-options -mfake-ptx-alloca } */ +/* { dg-additional-options -save-temps } */ +/* { dg-final { check-function-bodies {** } {} } } */ + +void sink(void *); + +void f(int s) +{ + char a[s]; + /* { dg-bogus {sorry, unimplemented: dynamic stack allocation not supported} {} { target *-*-* } .-1 } */ + sink(a); +} +/* +** f: +** ... +** cvt\.s64\.s32 (%r[0-9]+), (%r[0-9]+); +** mov\.u64 (%r[0-9]+), 16; +** add\.u64 (%r[0-9]+), \3, -1; +** add\.u64 (%r[0-9]+), \1, \4; +** div\.u64 (%r[0-9]+), \5, 16; +** mul\.lo\.u64 (%r[0-9]+), \6, 16; +** call \((%r[0-9]+)\), __GCC_nvptx__PTX_alloca_not_supported, \(\7\); +** ... +*/ + +/* { dg-final { scan-assembler-times {(?n)^\.extern \.func \(\.param\.u64 %value_out\) __GCC_nvptx__PTX_alloca_not_supported \(\.param\.u64 %in_ar0\);$} 1 } } */