Hi, the patch below adds support for HSA vector immediates and instructions storing them directly to memory, which was hitherto missing on the branch.
Committed as r224554. Thanks, Martin 2015-06-16 Martin Jambor <mjam...@suse.cz> * hsa-brig.c (hsa_get_imm_brig_type_len): New function. (emit_immediate_scalar_to_data_section): Likewise. (emit_immediate_operand): Reimplemented. * hsa-gen.c (gen_hsa_insns_for_load): Trimmed long line. (gen_hsa_insns_for_store): Added missing comment, trimmed long line, added another type exception for vector immediates. diff --git a/gcc/hsa-brig.c b/gcc/hsa-brig.c index d28634d..bb4a2c1 100644 --- a/gcc/hsa-brig.c +++ b/gcc/hsa-brig.c @@ -785,118 +785,170 @@ enqueue_op (hsa_op_base *op) return ret; } -/* Emit an immediate BRIG operand IMM. */ +/* Return the length of the birg type TYPE that is going to be streamed out as + an immediate constant (so it must not be B1). */ -static void -emit_immediate_operand (hsa_op_immed *imm) +static unsigned +hsa_get_imm_brig_type_len (BrigType16_t type) { - struct BrigOperandConstantBytes out; - uint32_t byteCount; + BrigType16_t base_type = type & BRIG_TYPE_BASE_MASK; + BrigType16_t pack_type = type & BRIG_TYPE_PACK_MASK; - union - { - uint8_t b8; - uint16_t b16; - uint32_t b32; - uint64_t b64; - } bytes; - unsigned len; + switch (pack_type) + { + case BRIG_TYPE_PACK_NONE: + break; + case BRIG_TYPE_PACK_32: + return 4; + case BRIG_TYPE_PACK_64: + return 8; + case BRIG_TYPE_PACK_128: + return 16; + default: + gcc_unreachable (); + } - memset (&out, 0, sizeof (out)); - switch (imm->type) + switch (base_type) { case BRIG_TYPE_U8: case BRIG_TYPE_S8: - len = 1; - bytes.b8 = (uint8_t) TREE_INT_CST_LOW (imm->value); - break; + case BRIG_TYPE_B8: + return 1; case BRIG_TYPE_U16: case BRIG_TYPE_S16: - bytes.b16 = (uint16_t) TREE_INT_CST_LOW (imm->value); - len = 2; - break; - case BRIG_TYPE_F16: - sorry ("Support for HSA does not implement immediate 16 bit FPU " - "operands"); - len = 2; - break; - + case BRIG_TYPE_B16: + return 2; case BRIG_TYPE_U32: case BRIG_TYPE_S32: - bytes.b32 = (uint32_t) TREE_INT_CST_LOW (imm->value); - len = 4; - break; - + case BRIG_TYPE_F32: + case BRIG_TYPE_B32: + return 4; case BRIG_TYPE_U64: case BRIG_TYPE_S64: - bytes.b64 = (uint64_t) int_cst_value (imm->value); - len = 8; - break; - - case BRIG_TYPE_F32: case BRIG_TYPE_F64: - { - tree expr = imm->value; - tree type = TREE_TYPE (expr); + case BRIG_TYPE_B64: + return 8; + case BRIG_TYPE_B128: + return 16; + default: + gcc_unreachable (); + } +} - len = GET_MODE_SIZE (TYPE_MODE (type)); +/* Emit one scalar VALUE to the data BRIG section. If NEED_LEN is not equal to + zero, shrink or extend the value to NEED_LEN bytes. Return how many bytes + were written. */ - /* There are always 32 bits in each long, no matter the size of - the hosts long. */ - long tmp[6]; +static int +emit_immediate_scalar_to_data_section (tree value, unsigned need_len) +{ + union + { + uint8_t b8; + uint16_t b16; + uint32_t b32; + uint64_t b64; + } bytes; - gcc_assert (len == 4 || len == 8); + memset (&bytes, 0, sizeof (bytes)); + tree type = TREE_TYPE (value); + gcc_checking_assert (TREE_CODE (type) != VECTOR_TYPE); + unsigned data_len = tree_to_uhwi (TYPE_SIZE (type))/BITS_PER_UNIT; + if (INTEGRAL_TYPE_P (type)) + switch (data_len) + { + case 1: + bytes.b8 = (uint8_t) TREE_INT_CST_LOW (value); + break; + case 2: + bytes.b16 = (uint16_t) TREE_INT_CST_LOW (value); + break; + case 4: + bytes.b32 = (uint32_t) TREE_INT_CST_LOW (value); + break; + case 8: + bytes.b64 = (uint64_t) int_cst_value (value); + break; + default: + gcc_unreachable (); + } + else if (SCALAR_FLOAT_TYPE_P (type)) + { + if (data_len == 2) + { + sorry ("Support for HSA does not implement immediate 16 bit FPU " + "operands"); + return 2; + } + unsigned int_len = GET_MODE_SIZE (TYPE_MODE (type)); + /* There are always 32 bits in each long, no matter the size of + the hosts long. */ + long tmp[6]; - real_to_target (tmp, TREE_REAL_CST_PTR (expr), TYPE_MODE (type)); + real_to_target (tmp, TREE_REAL_CST_PTR (value), TYPE_MODE (type)); - if (len == 4) - bytes.b32 = (uint32_t) tmp[0]; - else - { - bytes.b64 = (uint64_t)(uint32_t) tmp[1]; - bytes.b64 <<= 32; - bytes.b64 |= (uint32_t) tmp[0]; - } + if (int_len == 4) + bytes.b32 = (uint32_t) tmp[0]; + else + { + bytes.b64 = (uint64_t)(uint32_t) tmp[1]; + bytes.b64 <<= 32; + bytes.b64 |= (uint32_t) tmp[0]; + } + } + else + gcc_unreachable (); - break; - } + int len; + if (need_len == 0) + len = data_len; + else + len = need_len; - case BRIG_TYPE_U8X4: - case BRIG_TYPE_S8X4: - case BRIG_TYPE_U16X2: - case BRIG_TYPE_S16X2: - case BRIG_TYPE_F16X2: - len = 4; - sorry ("Support for HSA does not implement immediate 32bit " - "vector operands. "); - break; + brig_data.add (&bytes, len); + return len; +} - case BRIG_TYPE_U8X8: - case BRIG_TYPE_S8X8: - case BRIG_TYPE_U16X4: - case BRIG_TYPE_S16X4: - case BRIG_TYPE_F16X4: - case BRIG_TYPE_U32X2: - case BRIG_TYPE_S32X2: - case BRIG_TYPE_F32X2: - len = 8; - sorry ("Support for HSA does not implement immediate 32bit " - "vector operands. "); - break; +/* Emit an immediate BRIG operand IMM. The BRIG type of the immedaite might + have been massaged to comply with various HSA/BRIG type requirements, so the + ony important aspect of that is the length (because HSAIL might expect + smaller constants or become bit-data). The data should be represented + according to what is in the tree representation. */ - default: - gcc_unreachable (); - } +static void +emit_immediate_operand (hsa_op_immed *imm) +{ + struct BrigOperandConstantBytes out; + unsigned total_len = hsa_get_imm_brig_type_len (imm->type); + + /* We do not produce HSAIL array types anywhere. */ + gcc_assert (!(imm->type & BRIG_TYPE_ARRAY)); + memset (&out, 0, sizeof (out)); out.base.byteCount = htole16 (sizeof (out)); out.base.kind = htole16 (BRIG_KIND_OPERAND_CONSTANT_BYTES); - byteCount = htole32 (len); + uint32_t byteCount = htole32 (total_len); out.type = htole16 (imm->type); - out.bytes = brig_data.add (&byteCount, sizeof (byteCount)); - brig_data.add (&bytes, len); - + out.bytes = htole32 (brig_data.add (&byteCount, sizeof (byteCount))); brig_operand.add (&out, sizeof(out)); + + if (TREE_CODE (imm->value) == VECTOR_CST) + { + int i, num = VECTOR_CST_NELTS (imm->value); + for (i = 0; i < num; i++) + { + unsigned actual; + actual = emit_immediate_scalar_to_data_section + (VECTOR_CST_ELT (imm->value, i), 0); + total_len -= actual; + } + /* Vectors should have the exact size. */ + gcc_assert (total_len == 0); + } + else + emit_immediate_scalar_to_data_section (imm->value, total_len); + brig_data.round_size_up (4); } diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c index db5200d..0349efd 100644 --- a/gcc/hsa-gen.c +++ b/gcc/hsa-gen.c @@ -1318,7 +1318,8 @@ gen_hsa_insns_for_load (hsa_op_reg *dest, tree rhs, tree type, hsa_bb *hbb, addr = gen_hsa_addr (rhs, hbb, ssa_map); mem->opcode = BRIG_OPCODE_LD; /* Not dest->type, that's possibly extended. */ - mem->type = mem_type_for_type (hsa_type_for_scalar_tree_type (type, false)); + mem->type = mem_type_for_type (hsa_type_for_scalar_tree_type (type, + false)); mem->operands[0] = dest; mem->operands[1] = addr; set_reg_def (dest, mem); @@ -1331,6 +1332,9 @@ gen_hsa_insns_for_load (hsa_op_reg *dest, tree rhs, tree type, hsa_bb *hbb, rhs); } +/* Generate HSAIL instructions storing into memory. LHS is the destination of + the store, SRC is the source operand. Add instructions to HBB, use SSA_MAP + for HSA SSA lookup. */ static void gen_hsa_insns_for_store (tree lhs, hsa_op_base *src, hsa_bb *hbb, @@ -1343,7 +1347,8 @@ gen_hsa_insns_for_store (tree lhs, hsa_op_base *src, hsa_bb *hbb, mem->opcode = BRIG_OPCODE_ST; if (hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (src)) reg->uses.safe_push (mem); - mem->type = mem_type_for_type (hsa_type_for_scalar_tree_type (TREE_TYPE (lhs), false)); + mem->type = mem_type_for_type (hsa_type_for_scalar_tree_type (TREE_TYPE (lhs), + false)); /* XXX The HSAIL disasm has another constraint: if the source is an immediate then it must match the destination type. If @@ -1351,7 +1356,32 @@ gen_hsa_insns_for_store (tree lhs, hsa_op_base *src, hsa_bb *hbb, We're always allocating new operands so we can modify the above in place. */ if (hsa_op_immed *imm = dyn_cast <hsa_op_immed *> (src)) - imm->type = mem->type; + { + if ((imm->type & BRIG_TYPE_PACK_MASK) == BRIG_TYPE_PACK_NONE) + imm->type = mem->type; + else + { + /* ...and all vector immediates apparently need to be vectors of + unsigned bytes. */ + BrigType16_t bt = bittype_for_type (imm->type); + gcc_assert (bt == bittype_for_type (mem->type)); + switch (bt) + { + case BRIG_TYPE_B32: + imm->type = BRIG_TYPE_U8X4; + break; + case BRIG_TYPE_B64: + imm->type = BRIG_TYPE_U8X8; + break; + case BRIG_TYPE_B128: + imm->type = BRIG_TYPE_U8X16; + break; + default: + gcc_unreachable (); + } + } + } + mem->operands[0] = src; mem->operands[1] = addr; if (addr->reg)