Hi Prathamesh! On 2024-09-23T08:24:36+0000, Prathamesh Kulkarni <prathame...@nvidia.com> wrote: > Thanks for the review and sorry for late reply.
No worries. My replies often are way more delayed... ;'-| > The attached patch addresses the above suggestions. > Does it look OK ? ACK, thanks! > (Also, could you please test it at your end as well?) As expected: PASS: gcc.target/nvptx/alias-to-alias-1.c (test for excess errors) +PASS: gcc.target/nvptx/alias-to-alias-1.c execution test PASS: gcc.target/nvptx/alias-to-alias-1.c scan-assembler-times (?n)\\tcall bar;$ 0 PASS: gcc.target/nvptx/alias-to-alias-1.c scan-assembler-times (?n)\\tcall baz;$ 1 PASS: gcc.target/nvptx/alias-to-alias-1.c scan-assembler-times (?n)\\tcall foo;$ 0 PASS: gcc.target/nvptx/alias-to-alias-1.c scan-assembler-times (?n)^// BEGIN GLOBAL FUNCTION DECL: bar$ 1 PASS: gcc.target/nvptx/alias-to-alias-1.c scan-assembler-times (?n)^// BEGIN GLOBAL FUNCTION DECL: baz$ 1 PASS: gcc.target/nvptx/alias-to-alias-1.c scan-assembler-times (?n)^// BEGIN GLOBAL FUNCTION DECL: foo$ 1 PASS: gcc.target/nvptx/alias-to-alias-1.c scan-assembler-times (?n)^// BEGIN GLOBAL FUNCTION DEF: bar$ 1 PASS: gcc.target/nvptx/alias-to-alias-1.c scan-assembler-times (?n)^// BEGIN GLOBAL FUNCTION DEF: baz$ 1 PASS: gcc.target/nvptx/alias-to-alias-1.c scan-assembler-times (?n)^// BEGIN GLOBAL FUNCTION DEF: foo$ 1 PASS: gcc.target/nvptx/alias-to-alias-1.c scan-assembler-times (?n)^\\.alias bar,foo;$ 1 -PASS: gcc.target/nvptx/alias-to-alias-1.c scan-assembler-times (?n)^\\.alias baz,bar;$ 1 +PASS: gcc.target/nvptx/alias-to-alias-1.c scan-assembler-times (?n)^\\.alias baz,foo;$ 1 PASS: gcc.target/nvptx/alias-to-alias-1.c scan-assembler-times (?n)^\\.visible \\.func bar;$ 1 PASS: gcc.target/nvptx/alias-to-alias-1.c scan-assembler-times (?n)^\\.visible \\.func baz;$ 1 PASS: gcc.target/nvptx/alias-to-alias-1.c scan-assembler-times (?n)^\\.visible \\.func foo$ 1 PASS: gcc.target/nvptx/alias-to-alias-1.c scan-assembler-times (?n)^\\.visible \\.func foo;$ 1 Grüße Thomas > nvptx: Partial support for aliases to aliases. > > For the following test (adapted from pr96390.c): > > __attribute__((noipa)) int foo () { return 42; } > int bar () __attribute__((alias ("foo"))); > int baz () __attribute__((alias ("bar"))); > > int main () > { > int n; > #pragma omp target map(from:n) > n = baz (); > return n; > } > > gcc emits following ptx for baz: > .visible .func (.param.u32 %value_out) bar; > .alias bar,foo; > .visible .func (.param.u32 %value_out) baz; > .alias baz,bar; > > which is incorrect since PTX requires aliasee to be a defined function. > The patch instead uses cgraph_node::get(name)->ultimate_alias_target, > which generates the following PTX: > > .visible .func (.param.u32 %value_out) baz; > .alias baz,foo; > > gcc/ChangeLog: > PR target/104957 > * config/nvptx/nvptx.cc (nvptx_asm_output_def_from_decls): Use > cgraph_node::get(name)->ultimate_alias_target instead of value. > > gcc/testsuite/ChangeLog: > PR target/104957 > * gcc.target/nvptx/alias-to-alias-1.c: Adjust. > > Signed-off-by: Prathamesh Kulkarni <prathame...@nvidia.com> > Co-authored-by: Thomas Schwinge <tschwi...@baylibre.com> > > diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc > index 4a7c64f05eb..96a1134220e 100644 > --- a/gcc/config/nvptx/nvptx.cc > +++ b/gcc/config/nvptx/nvptx.cc > @@ -7582,7 +7582,8 @@ nvptx_mem_local_p (rtx mem) > while (0) > > void > -nvptx_asm_output_def_from_decls (FILE *stream, tree name, tree value) > +nvptx_asm_output_def_from_decls (FILE *stream, tree name, > + tree value ATTRIBUTE_UNUSED) > { > if (nvptx_alias == 0 || !TARGET_PTX_6_3) > { > @@ -7617,7 +7618,8 @@ nvptx_asm_output_def_from_decls (FILE *stream, tree > name, tree value) > return; > } > > - if (!cgraph_node::get (name)->referred_to_p ()) > + cgraph_node *cnode = cgraph_node::get (name); > + if (!cnode->referred_to_p ()) > /* Prevent "Internal error: reference to deleted section". */ > return; > > @@ -7626,11 +7628,27 @@ nvptx_asm_output_def_from_decls (FILE *stream, tree > name, tree value) > fputs (s.str ().c_str (), stream); > > tree id = DECL_ASSEMBLER_NAME (name); > + > + /* Walk alias chain to get reference callgraph node. > + The rationale of using ultimate_alias_target here is that > + PTX's .alias directive only supports 1-level aliasing where > + aliasee is function defined in same module. > + > + So for the following case: > + int foo() { return 42; } > + int bar () __attribute__((alias ("foo"))); > + int baz () __attribute__((alias ("bar"))); > + > + should resolve baz to foo: > + .visible .func (.param.u32 %value_out) baz; > + .alias baz,foo; */ > + symtab_node *alias_target_node = cnode->ultimate_alias_target (); > + tree alias_target_id = DECL_ASSEMBLER_NAME (alias_target_node->decl); > std::stringstream s_def; > write_fn_marker (s_def, true, TREE_PUBLIC (name), IDENTIFIER_POINTER (id)); > fputs (s_def.str ().c_str (), stream); > NVPTX_ASM_OUTPUT_DEF (stream, IDENTIFIER_POINTER (id), > - IDENTIFIER_POINTER (value)); > + IDENTIFIER_POINTER (alias_target_id)); > } > > #undef NVPTX_ASM_OUTPUT_DEF > diff --git a/gcc/testsuite/gcc.target/nvptx/alias-to-alias-1.c > b/gcc/testsuite/gcc.target/nvptx/alias-to-alias-1.c > index 7bce7a358c7..08de9e6d69d 100644 > --- a/gcc/testsuite/gcc.target/nvptx/alias-to-alias-1.c > +++ b/gcc/testsuite/gcc.target/nvptx/alias-to-alias-1.c > @@ -1,6 +1,8 @@ > /* Alias to alias; 'libgomp.c-c++-common/pr96390.c'. */ > > -/* { dg-do compile } */ > +/* { dg-do link } */ > +/* { dg-do run { target nvptx_runtime_alias_ptx } } */ > +/* { dg-options -save-temps } */ > /* { dg-add-options nvptx_alias_ptx } */ > > int v; > @@ -32,7 +34,7 @@ main (void) > /* { dg-final { scan-assembler-times {(?n)^// BEGIN GLOBAL FUNCTION DECL: > baz$} 1 } } > { dg-final { scan-assembler-times {(?n)^\.visible \.func baz;$} 1 } } > { dg-final { scan-assembler-times {(?n)^// BEGIN GLOBAL FUNCTION DEF: > baz$} 1 } } > - { dg-final { scan-assembler-times {(?n)^\.alias baz,bar;$} 1 } } */ > + { dg-final { scan-assembler-times {(?n)^\.alias baz,foo;$} 1 } } */ > > /* { dg-final { scan-assembler-times {(?n)\tcall foo;$} 0 } } > { dg-final { scan-assembler-times {(?n)\tcall bar;$} 0 } }