Hi. Following small series enhances HSA back-end in following manner:
1) HSA: support alloca builtin 2) HSA: dump alignment of mem and alloca instructions 3) HSA: write back OMP arguments after a kernel dispatch All patches have been committed to the branch. Martin
>From 860520b994f96e3a12d85f4dc8185c502df46942 Mon Sep 17 00:00:00 2001 From: marxin <mli...@suse.cz> Date: Tue, 24 Nov 2015 15:48:12 +0100 Subject: [PATCH 1/4] HSA: support alloca builtin gcc/ChangeLog: 2015-11-24 Martin Liska <mli...@suse.cz> * hsa-brig.c (emit_alloca_insn): New function. (emit_insn): Handle hsa_insn_alloca. * hsa-gen.c (hsa_init_data_for_cfun): Add new pool allocator for hsa_insn_alloca. (hsa_deinit_data_for_cfun): Release the pool. (hsa_insn_alloca::operator new): New function. (hsa_insn_alloca::hsa_insn_alloca): Likewise. (gen_hsa_alloca): Likewise. (gen_hsa_insns_for_call): Handle __builtin_alloca and __builtin_alloca_with_align. * hsa.h (is_a_helper ::test): New function. --- gcc/hsa-brig.c | 31 +++++++++++++++++++++++ gcc/hsa-gen.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ gcc/hsa.h | 25 +++++++++++++++++++ 3 files changed, 134 insertions(+) diff --git a/gcc/hsa-brig.c b/gcc/hsa-brig.c index fbb08fb..fd60663 100644 --- a/gcc/hsa-brig.c +++ b/gcc/hsa-brig.c @@ -1295,6 +1295,35 @@ emit_segment_insn (hsa_insn_seg *seg) brig_insn_count++; } +/* Emit an HSA alloca instruction and all necessary directives, + schedule necessary operands for writing . */ + +static void +emit_alloca_insn (hsa_insn_alloca *alloca) +{ + struct BrigInstMem repr; + gcc_checking_assert (alloca->operand_count () == 2); + + /* This is necessary because of the erroneous typedef of + BrigMemoryModifier8_t which introduces padding which may then contain + random stuff (which we do not want so that we can test things don't + change). */ + memset (&repr, 0, sizeof (repr)); + repr.base.base.byteCount = htole16 (sizeof (repr)); + repr.base.base.kind = htole16 (BRIG_KIND_INST_MEM); + repr.base.opcode = htole16 (alloca->m_opcode); + repr.base.type = htole16 (alloca->m_type); + repr.base.operands = htole32 (emit_insn_operands (alloca)); + repr.segment = BRIG_SEGMENT_PRIVATE; + repr.modifier.allBits = 0 ; + repr.equivClass = 0; + repr.align = alloca->m_align; + repr.width = BRIG_WIDTH_NONE; + memset (&repr.reserved, 0, sizeof (repr.reserved)); + brig_code.add (&repr, sizeof (repr)); + brig_insn_count++; +} + /* Emit an HSA comparison instruction and all necessary directives, schedule necessary operands for writing . */ @@ -1699,6 +1728,8 @@ emit_insn (hsa_insn_basic *insn) emit_packed_insn (packed); else if (hsa_insn_cvt *cvt = dyn_cast <hsa_insn_cvt *> (insn)) emit_cvt_insn (cvt); + else if (hsa_insn_alloca *alloca = dyn_cast <hsa_insn_alloca *> (insn)) + emit_alloca_insn (alloca); else emit_basic_insn (insn); } diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c index e9c67eb..b39123d 100644 --- a/gcc/hsa-gen.c +++ b/gcc/hsa-gen.c @@ -144,6 +144,7 @@ static object_allocator<hsa_insn_comment> *hsa_allocp_inst_comment; static object_allocator<hsa_insn_queue> *hsa_allocp_inst_queue; static object_allocator<hsa_insn_packed> *hsa_allocp_inst_packed; static object_allocator<hsa_insn_cvt> *hsa_allocp_inst_cvt; +static object_allocator<hsa_insn_alloca> *hsa_allocp_inst_alloca; static object_allocator<hsa_bb> *hsa_allocp_bb; /* List of pointers to all instructions that come from an object allocator. */ @@ -354,6 +355,8 @@ hsa_init_data_for_cfun () = new object_allocator<hsa_insn_packed> ("HSA packed instructions"); hsa_allocp_inst_cvt = new object_allocator<hsa_insn_cvt> ("HSA convert instructions"); + hsa_allocp_inst_alloca + = new object_allocator<hsa_insn_alloca> ("HSA alloca instructions"); hsa_allocp_bb = new object_allocator<hsa_bb> ("HSA basic blocks"); } @@ -402,6 +405,7 @@ hsa_deinit_data_for_cfun (void) delete hsa_allocp_inst_queue; delete hsa_allocp_inst_packed; delete hsa_allocp_inst_cvt; + delete hsa_allocp_inst_alloca; delete hsa_allocp_bb; delete hsa_cfun; } @@ -1608,6 +1612,26 @@ hsa_insn_cvt::hsa_insn_cvt (hsa_op_with_type *dest, hsa_op_with_type *src) { } +/* New operator to allocate alloca from pool alloc. */ + +void * +hsa_insn_alloca::operator new (size_t) +{ + return hsa_allocp_inst_alloca->allocate_raw (); +} + +/* Constructor of class representing the alloca in HSAIL. */ + +hsa_insn_alloca::hsa_insn_alloca (hsa_op_with_type *dest, + hsa_op_with_type *size, unsigned alignment) + : hsa_insn_basic (2, BRIG_OPCODE_ALLOCA, dest->m_type, dest, size), + m_align (BRIG_ALIGNMENT_8) +{ + gcc_assert (dest->m_type == BRIG_TYPE_U32); + if (alignment) + m_align = hsa_alignment_encoding (alignment); +} + /* Append an instruction INSN into the basic block. */ void @@ -3562,6 +3586,53 @@ gen_get_team_num (gimple *stmt, hsa_bb *hbb) hbb->append_insn (basic); } +/* Emit instructions that implement alloca builtin gimple STMT. + Instructions are appended to basic block HBB. */ + +static void +gen_hsa_alloca (gcall *call, hsa_bb *hbb) +{ + tree lhs = gimple_call_lhs (call); + if (lhs == NULL_TREE) + return; + + built_in_function fn = DECL_FUNCTION_CODE (gimple_call_fndecl (call)); + + gcc_checking_assert (fn == BUILT_IN_ALLOCA + || fn == BUILT_IN_ALLOCA_WITH_ALIGN); + + unsigned bit_alignment = 0; + + if (fn == BUILT_IN_ALLOCA_WITH_ALIGN) + { + tree alignment_tree = gimple_call_arg (call, 1); + if (TREE_CODE (alignment_tree) != INTEGER_CST) + { + HSA_SORRY_ATV + (gimple_location (call), "support for HSA does not implement " + "__builtin_alloca_with_align with a non-constant " + "alignment: %E", alignment_tree); + } + + bit_alignment = tree_to_uhwi (alignment_tree); + } + + tree rhs1 = gimple_call_arg (call, 0); + hsa_op_with_type *size = hsa_reg_or_immed_for_gimple_op (rhs1, hbb) + ->get_in_type (BRIG_TYPE_U32, hbb); + hsa_op_with_type *dest = hsa_cfun->reg_for_gimple_ssa (lhs); + + hsa_op_reg *tmp = new hsa_op_reg + (hsa_get_segment_addr_type (BRIG_SEGMENT_PRIVATE)); + hsa_insn_alloca *a = new hsa_insn_alloca (tmp, size, bit_alignment); + hbb->append_insn (a); + + hsa_insn_seg *seg = new hsa_insn_seg + (BRIG_OPCODE_STOF, hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT), + tmp->m_type, BRIG_SEGMENT_PRIVATE, dest, tmp); + hbb->append_insn (seg); +} + /* Set VALUE to a shadow kernel debug argument and append a new instruction to HBB basic block. */ @@ -4333,6 +4404,7 @@ gen_hsa_ternary_atomic_for_builtin (bool ret_orig, static void gen_hsa_insns_for_call (gimple *stmt, hsa_bb *hbb) { + gcall *call = as_a <gcall *> (stmt); tree lhs = gimple_call_lhs (stmt); hsa_op_reg *dest; @@ -4711,6 +4783,12 @@ gen_hsa_insns_for_call (gimple *stmt, hsa_bb *hbb) break; } + case BUILT_IN_ALLOCA: + case BUILT_IN_ALLOCA_WITH_ALIGN: + { + gen_hsa_alloca (call, hbb); + break; + } default: { gen_hsa_insns_for_direct_call (stmt, hbb); diff --git a/gcc/hsa.h b/gcc/hsa.h index f0c3d80..d697542 100644 --- a/gcc/hsa.h +++ b/gcc/hsa.h @@ -941,6 +941,31 @@ is_a_helper <hsa_insn_cvt *>::test (hsa_insn_basic *p) return (p->m_opcode == BRIG_OPCODE_CVT); } +/* HSA alloca instruction. */ + +class hsa_insn_alloca: public hsa_insn_basic +{ +public: + hsa_insn_alloca (hsa_op_with_type *dest, hsa_op_with_type *size, + unsigned alignment = 0); + + /* Required alignment of the allocation. */ + BrigAlignment8_t m_align; + + /* Pool allocator. */ + void *operator new (size_t); +}; + +/* Report whether or not P is an alloca instruction. */ + +template <> +template <> +inline bool +is_a_helper <hsa_insn_alloca *>::test (hsa_insn_basic *p) +{ + return (p->m_opcode == BRIG_OPCODE_ALLOCA); +} + /* Basic block of HSA instructions. */ class hsa_bb -- 2.6.3
>From 90f566eadff12095b9b85f709aa25adcc8c3414a Mon Sep 17 00:00:00 2001 From: marxin <mli...@suse.cz> Date: Tue, 24 Nov 2015 16:17:44 +0100 Subject: [PATCH 2/4] HSA: dump alignment of mem and alloca instructions gcc/ChangeLog: 2015-11-24 Martin Liska <mli...@suse.cz> * hsa-dump.c (hsa_byte_alignment): New function. (dump_hsa_insn_1): Use the function. --- gcc/hsa-dump.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/gcc/hsa-dump.c b/gcc/hsa-dump.c index 1391f7b..70c71bc 100644 --- a/gcc/hsa-dump.c +++ b/gcc/hsa-dump.c @@ -621,6 +621,16 @@ hsa_m_atomicop_name (enum BrigAtomicOperation op) } } +/* Return byte alignment for given BrigAlignment8_t value. */ + +static unsigned +hsa_byte_alignment (BrigAlignment8_t alignment) +{ + gcc_assert (alignment != BRIG_ALIGNMENT_NONE); + + return 1 << (alignment - 1); +} + /* Dump textual representation of HSA IL register REG to file F. */ static void @@ -829,6 +839,8 @@ dump_hsa_insn_1 (FILE *f, hsa_insn_basic *insn, int *indent) fprintf (f, "%s", hsa_opcode_name (mem->m_opcode)); if (addr->m_symbol) fprintf (f, "_%s", hsa_seg_name (addr->m_symbol->m_segment)); + if (mem->m_align != BRIG_ALIGNMENT_NONE) + fprintf (f, "_align(%u)", hsa_byte_alignment (mem->m_align)); if (mem->m_equiv_class != 0) fprintf (f, "_equiv(%i)", mem->m_equiv_class); fprintf (f, "_%s ", hsa_type_name (mem->m_type)); @@ -987,6 +999,16 @@ dump_hsa_insn_1 (FILE *f, hsa_insn_basic *insn, int *indent) else gcc_unreachable (); } + else if (is_a <hsa_insn_alloca *> (insn)) + { + hsa_insn_alloca *alloca = as_a <hsa_insn_alloca *> (insn); + + fprintf (f, "%s_align(%u)_%s ", hsa_opcode_name (insn->m_opcode), + hsa_byte_alignment (alloca->m_align), + hsa_type_name (insn->m_type)); + + dump_hsa_operands (f, insn); + } else { fprintf (f, "%s_%s ", hsa_opcode_name (insn->m_opcode), -- 2.6.3
>From df55b9103bf00e435984eab7a2cd50a3eaf80ef3 Mon Sep 17 00:00:00 2001 From: marxin <mli...@suse.cz> Date: Tue, 24 Nov 2015 16:55:55 +0100 Subject: [PATCH 3/4] HSA: write back OMP arguments after a kernel dispatch gcc/ChangeLog: 2015-11-24 Martin Liska <mli...@suse.cz> * hsa-gen.c (gen_hsa_insns_for_kernel_call): Copy back OMP argument that is copied to a dispatched kernel. --- gcc/hsa-gen.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c index b39123d..75facec 100644 --- a/gcc/hsa-gen.c +++ b/gcc/hsa-gen.c @@ -4009,10 +4009,10 @@ gen_hsa_insns_for_kernel_call (hsa_bb *hbb, gcall *call) tree argument = gimple_call_arg (call, 1); + hsa_symbol *omp_var_decl = NULL; if (TREE_CODE (argument) == ADDR_EXPR) { /* Emit instructions that copy OMP arguments. */ - tree d = TREE_TYPE (TREE_OPERAND (argument, 0)); unsigned omp_data_size = tree_to_uhwi (TYPE_SIZE_UNIT (d)); gcc_checking_assert (omp_data_size > 0); @@ -4020,12 +4020,12 @@ gen_hsa_insns_for_kernel_call (hsa_bb *hbb, gcall *call) if (omp_data_size > hsa_cfun->m_maximum_omp_data_size) hsa_cfun->m_maximum_omp_data_size = omp_data_size; - hsa_symbol *var_decl = get_symbol_for_decl (TREE_OPERAND (argument, 0)); + omp_var_decl = get_symbol_for_decl (TREE_OPERAND (argument, 0)); - hbb->append_insn (new hsa_insn_comment ("memory copy instructions")); + hbb->append_insn (new hsa_insn_comment ("OMP arg memcpy instructions")); - hsa_op_address *src_addr = new hsa_op_address (var_decl); - gen_hsa_memory_copy (hbb, dst_addr, src_addr, var_decl->m_dim); + hsa_op_address *src_addr = new hsa_op_address (omp_var_decl); + gen_hsa_memory_copy (hbb, dst_addr, src_addr, omp_var_decl->m_dim); } else if (integer_zerop (argument)) { @@ -4107,6 +4107,8 @@ gen_hsa_insns_for_kernel_call (hsa_bb *hbb, gcall *call) basic_block dest = split_edge (e); edge false_e = EDGE_SUCC (dest, 0); + basic_block memcpy_dest = split_edge (false_e); + false_e->flags &= ~EDGE_FALLTHRU; false_e->flags |= EDGE_FALSE_VALUE; @@ -4114,6 +4116,7 @@ gen_hsa_insns_for_kernel_call (hsa_bb *hbb, gcall *call) /* Emit blocking signal waiting instruction. */ hsa_bb *new_hbb = hsa_init_new_bb (dest); + hsa_bb *memcpy_hbb = hsa_init_new_bb (memcpy_dest); hbb->append_insn (new hsa_insn_comment ("wait for the signal")); @@ -4137,6 +4140,16 @@ gen_hsa_insns_for_kernel_call (hsa_bb *hbb, gcall *call) new_hbb->append_insn (cmp); new_hbb->append_insn (new hsa_insn_br (ctrl)); + if (TREE_CODE (argument) == ADDR_EXPR) + { + /* Emit instructions that copy back OMP arguments to a caller kernel. */ + memcpy_hbb->append_insn + (new hsa_insn_comment ("OMP arg memcpy back instructions")); + + hsa_op_address *src_addr = new hsa_op_address (omp_var_decl); + gen_hsa_memory_copy (memcpy_hbb, src_addr, dst_addr, omp_var_decl->m_dim); + } + hsa_cfun->m_kernel_dispatch_count++; } -- 2.6.3