I've applied this to gomp4, which ports a bunch of cleanups and fixes from trunk.
nathan
2015-09-09 Nathan Sidwell <nat...@codesourcery.com> Port from trunk: * config/nvptx/nvptx.md (allocate_stack): Emit sorry. (atomic_compare_and_swap<mode>_1): Use sel_truesi. * config/nvptx/nvptx.c (nvptx_write_function_decl): Reformat. (nvptx_reorg_subreg): Pass insn pattern to asm_operands. (walk_args_for_param): Use arg_promotion for both split and non-split args. (nvptx_declare_function_name): Insert formatting tabs for consistency. Look at crtl->stack_alignment_needed to determine alignment. (nvptx_get_drap_rtx): New. (TARGET_GET_DRAP_RTX): Override. * config/nvptx/nvptx.h (MAX_STACK_ALIGNMENT): Set. Index: gcc/config/nvptx/nvptx.c =================================================================== --- gcc/config/nvptx/nvptx.c (revision 227598) +++ gcc/config/nvptx/nvptx.c (working copy) @@ -425,7 +425,8 @@ nvptx_write_function_decl (std::stringst /* Declare argument types. */ if ((args != NULL_TREE - && !(TREE_CODE (args) == TREE_LIST && TREE_VALUE (args) == void_type_node)) + && !(TREE_CODE (args) == TREE_LIST + && TREE_VALUE (args) == void_type_node)) || is_main || return_in_mem || DECL_STATIC_CHAIN (decl)) @@ -509,8 +510,8 @@ walk_args_for_param (FILE *file, tree ar mode = DFmode; } - mode = arg_promotion (mode); } + mode = arg_promotion (mode); while (count-- > 0) { i++; @@ -649,7 +650,7 @@ nvptx_declare_function_name (FILE *file, else if (TYPE_MODE (result_type) != VOIDmode) { machine_mode mode = arg_promotion (TYPE_MODE (result_type)); - fprintf (file, ".reg%s %%retval;\n", + fprintf (file, "\t.reg%s %%retval;\n", nvptx_ptx_type_from_mode (mode, false)); } @@ -701,9 +702,11 @@ nvptx_declare_function_name (FILE *file, sz = get_frame_size (); if (sz > 0 || cfun->machine->has_call_with_sc) { + int alignment = crtl->stack_alignment_needed / BITS_PER_UNIT; + fprintf (file, "\t.reg.u%d %%frame;\n" - "\t.local.align 8 .b8 %%farray[" HOST_WIDE_INT_PRINT_DEC"];\n", - BITS_PER_WORD, sz == 0 ? 1 : sz); + "\t.local.align %d .b8 %%farray[" HOST_WIDE_INT_PRINT_DEC"];\n", + BITS_PER_WORD, alignment, sz == 0 ? 1 : sz); fprintf (file, "\tcvta.local.u%d %%frame, %%farray;\n", BITS_PER_WORD); } @@ -719,10 +722,10 @@ nvptx_declare_function_name (FILE *file, walk_args_for_param (file, TYPE_ARG_TYPES (fntype), DECL_ARGUMENTS (decl), true, return_in_mem); if (return_in_mem) - fprintf (file, "ld.param.u%d %%ar1, [%%in_ar1];\n", + fprintf (file, "\tld.param.u%d %%ar1, [%%in_ar1];\n", GET_MODE_BITSIZE (Pmode)); if (stdarg_p (fntype)) - fprintf (file, "ld.param.u%d %%argp, [%%in_argp];\n", + fprintf (file, "\tld.param.u%d %%argp, [%%in_argp];\n", GET_MODE_BITSIZE (Pmode)); } @@ -830,6 +833,14 @@ nvptx_function_ok_for_sibcall (tree, tre return false; } +/* Return Dynamic ReAlignment Pointer RTX. For PTX there isn't any. */ + +static rtx +nvptx_get_drap_rtx (void) +{ + return NULL_RTX; +} + /* Implement the TARGET_CALL_ARGS hook. Record information about one argument to the next call. */ @@ -4330,6 +4341,8 @@ nvptx_goacc_reduction (gimple call) #define TARGET_LIBCALL_VALUE nvptx_libcall_value #undef TARGET_FUNCTION_OK_FOR_SIBCALL #define TARGET_FUNCTION_OK_FOR_SIBCALL nvptx_function_ok_for_sibcall +#undef TARGET_GET_DRAP_RTX +#define TARGET_GET_DRAP_RTX nvptx_get_drap_rtx #undef TARGET_SPLIT_COMPLEX_ARG #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true #undef TARGET_RETURN_IN_MEMORY Index: gcc/config/nvptx/nvptx.h =================================================================== --- gcc/config/nvptx/nvptx.h (revision 227598) +++ gcc/config/nvptx/nvptx.h (working copy) @@ -58,6 +58,8 @@ #define BIGGEST_ALIGNMENT 64 #define STRICT_ALIGNMENT 1 +#define MAX_STACK_ALIGNMENT (1024 * 8) + /* Copied from elf.h and other places. We'd otherwise use BIGGEST_ALIGNMENT and fail a number of testcases. */ #define MAX_OFILE_ALIGNMENT (32768 * 8) Index: gcc/config/nvptx/nvptx.md =================================================================== --- gcc/config/nvptx/nvptx.md (revision 227598) +++ gcc/config/nvptx/nvptx.md (working copy) @@ -1274,6 +1274,12 @@ (match_operand 1 "nvptx_register_operand")] "" { + /* The ptx documentation specifies an alloca intrinsic (for 32 bit + only) but notes it is not implemented. The assembler emits a + confused error message. Issue a blunt one now instead. */ + sorry ("target cannot support alloca."); + emit_insn (gen_nop ()); + DONE; if (TARGET_ABI64) emit_insn (gen_allocate_stack_di (operands[0], operands[1])); else @@ -1498,14 +1504,12 @@ (match_operand:SI 7 "const_int_operand")] ;; failure model "" { - emit_insn (gen_atomic_compare_and_swap<mode>_1 (operands[1], operands[2], operands[3], - operands[4], operands[6])); + emit_insn (gen_atomic_compare_and_swap<mode>_1 + (operands[1], operands[2], operands[3], operands[4], operands[6])); - rtx tmp = gen_reg_rtx (GET_MODE (operands[0])); - emit_insn (gen_cstore<mode>4 (tmp, - gen_rtx_EQ (SImode, operands[1], operands[3]), - operands[1], operands[3])); - emit_insn (gen_andsi3 (operands[0], tmp, GEN_INT (1))); + rtx cond = gen_reg_rtx (BImode); + emit_move_insn (cond, gen_rtx_EQ (BImode, operands[1], operands[3])); + emit_insn (gen_sel_truesi (operands[0], cond, GEN_INT (1), GEN_INT (0))); DONE; })