Hi,
Gimple folding for unaligned vector loads and stores.
Regtest completed across variety of systems, P6,P7,P8,P9.
[v2] Added the type for the MEM_REF, per feedback.
Testcases for gimple-folding of the same are currently in-tree
as powerpc/fold-vec-load-*.c and powerpc/fold-vec-store-*.c.
Re-tested, still looks good. :-)
Thanks
-Will
[gcc]
2018-06-12 Will Schmidt <[email protected]>
* config/rs6000/rs6000.c (rs6000_builtin_valid_without_lhs): Add
vec_xst variants to the list.
(rs6000_gimple_fold_builtin): Add support for folding unaligned
vector loads and stores.
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index d62abdf..374666c 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -15360,10 +15360,16 @@ rs6000_builtin_valid_without_lhs (enum
rs6000_builtins fn_code)
case ALTIVEC_BUILTIN_STVX_V8HI:
case ALTIVEC_BUILTIN_STVX_V4SI:
case ALTIVEC_BUILTIN_STVX_V4SF:
case ALTIVEC_BUILTIN_STVX_V2DI:
case ALTIVEC_BUILTIN_STVX_V2DF:
+ case VSX_BUILTIN_STXVW4X_V16QI:
+ case VSX_BUILTIN_STXVW4X_V8HI:
+ case VSX_BUILTIN_STXVW4X_V4SF:
+ case VSX_BUILTIN_STXVW4X_V4SI:
+ case VSX_BUILTIN_STXVD2X_V2DF:
+ case VSX_BUILTIN_STXVD2X_V2DI:
return true;
default:
return false;
}
}
@@ -15869,10 +15875,78 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
gimple_set_location (g, loc);
gsi_replace (gsi, g, true);
return true;
}
+ /* unaligned Vector loads. */
+ case VSX_BUILTIN_LXVW4X_V16QI:
+ case VSX_BUILTIN_LXVW4X_V8HI:
+ case VSX_BUILTIN_LXVW4X_V4SF:
+ case VSX_BUILTIN_LXVW4X_V4SI:
+ case VSX_BUILTIN_LXVD2X_V2DF:
+ case VSX_BUILTIN_LXVD2X_V2DI:
+ {
+ arg0 = gimple_call_arg (stmt, 0); // offset
+ arg1 = gimple_call_arg (stmt, 1); // address
+ lhs = gimple_call_lhs (stmt);
+ location_t loc = gimple_location (stmt);
+ /* Since arg1 may be cast to a different type, just use ptr_type_node
+ here instead of trying to enforce TBAA on pointer types. */
+ tree arg1_type = ptr_type_node;
+ tree lhs_type = TREE_TYPE (lhs);
+ /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
+ the tree using the value from arg0. The resulting type will match
+ the type of arg1. */
+ gimple_seq stmts = NULL;
+ tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0);
+ tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
+ arg1_type, arg1, temp_offset);
+ gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+ /* Use the build2 helper to set up the mem_ref. The MEM_REF could also
+ take an offset, but since we've already incorporated the offset
+ above, here we just pass in a zero. */
+ gimple *g;
+ tree align_ltype = build_aligned_type (lhs_type, TYPE_ALIGN
((lhs_type)));
+ g = gimple_build_assign (lhs, build2 (MEM_REF, align_ltype, temp_addr,
+ build_int_cst (arg1_type, 0)));
+ gimple_set_location (g, loc);
+ gsi_replace (gsi, g, true);
+ return true;
+ }
+
+ /* unaligned Vector stores. */
+ case VSX_BUILTIN_STXVW4X_V16QI:
+ case VSX_BUILTIN_STXVW4X_V8HI:
+ case VSX_BUILTIN_STXVW4X_V4SF:
+ case VSX_BUILTIN_STXVW4X_V4SI:
+ case VSX_BUILTIN_STXVD2X_V2DF:
+ case VSX_BUILTIN_STXVD2X_V2DI:
+ {
+ arg0 = gimple_call_arg (stmt, 0); /* Value to be stored. */
+ arg1 = gimple_call_arg (stmt, 1); /* Offset. */
+ tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address. */
+ location_t loc = gimple_location (stmt);
+ tree arg0_type = TREE_TYPE (arg0);
+ /* Use ptr_type_node (no TBAA) for the arg2_type. */
+ tree arg2_type = ptr_type_node;
+ /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
+ the tree using the value from arg0. The resulting type will match
+ the type of arg2. */
+ gimple_seq stmts = NULL;
+ tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1);
+ tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
+ arg2_type, arg2, temp_offset);
+ /* Mask off any lower bits from the address. */
+ gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+ gimple *g;
+ g = gimple_build_assign (build2 (MEM_REF, arg0_type, temp_addr,
+ build_int_cst (arg2_type, 0)), arg0);
+ gimple_set_location (g, loc);
+ gsi_replace (gsi, g, true);
+ return true;
+ }
+
/* Vector Fused multiply-add (fma). */
case ALTIVEC_BUILTIN_VMADDFP:
case VSX_BUILTIN_XVMADDDP:
case ALTIVEC_BUILTIN_VMLADDUHM:
{