When I forward ported the scatter/gather patterns from my GCC 9
implementation I didn't notice that GCC 10 has a different naming
scheme. :-(
The patterns were being used because all GCN vector loads end up being
scatter/gather, but not by the actual vectorizer. The test fails were
there to see, but there are still a lot of those to work through.
This patch uses the new two-mode naming scheme and implements the
offsets correctly. This is actually a step forward for GCN because the
offsets are always SImode, regardless of the primary mode.
Andrew
amdgcn: Use correct offset mode for gather/scatter
The scatter/gather pattern names changed for GCC 10, but I hadn't noticed.
This switches the patterns to the new offset mode scheme.
2020-02-21 Andrew Stubbs <a...@codesourcery.com>
gcc/
* config/gcn/gcn-valu.md (gather_load<mode>): Rename to ...
(gather_load<mode>v64si): ... this and set operand 2 to V64SI.
(scatter_store<mode>): Rename to ...
(scatter_store<mode>v64si): ... this and set operand 1 to V64SI.
(scatter<mode>_exec): Delete. Move contents ...
(mask_scatter_store<mode>): ... here, and rename that to ...
(mask_gather_load<mode>v64si): ... this. Set operand 2 to V64SI.
Remove mode conversion.
(mask_gather_load<mode>): Rename to ...
(mask_scatter_store<mode>v64si): ... this. Set operand 1 to V64SI.
Remove mode conversion.
* config/gcn/gcn.c (gcn_expand_scaled_offsets): Remove mode conversion.
diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
index d5e6d0a625a..a0cc9a2d8fc 100644
--- a/gcc/config/gcn/gcn-valu.md
+++ b/gcc/config/gcn/gcn-valu.md
@@ -679,10 +679,10 @@
;; fields normally found in a MEM.
;; - Multiple forms of address expression are supported, below.
-(define_expand "gather_load<mode>"
+(define_expand "gather_load<mode>v64si"
[(match_operand:VEC_ALLREG_MODE 0 "register_operand")
(match_operand:DI 1 "register_operand")
- (match_operand 2 "register_operand")
+ (match_operand:V64SI 2 "register_operand")
(match_operand 3 "immediate_operand")
(match_operand:SI 4 "gcn_alu_operand")]
""
@@ -811,9 +811,9 @@
[(set_attr "type" "flat")
(set_attr "length" "12")])
-(define_expand "scatter_store<mode>"
+(define_expand "scatter_store<mode>v64si"
[(match_operand:DI 0 "register_operand")
- (match_operand 1 "register_operand")
+ (match_operand:V64SI 1 "register_operand")
(match_operand 2 "immediate_operand")
(match_operand:SI 3 "gcn_alu_operand")
(match_operand:VEC_ALLREG_MODE 4 "register_operand")]
@@ -833,34 +833,6 @@
DONE;
})
-(define_expand "scatter<mode>_exec"
- [(match_operand:DI 0 "register_operand")
- (match_operand 1 "register_operand")
- (match_operand 2 "immediate_operand")
- (match_operand:SI 3 "gcn_alu_operand")
- (match_operand:VEC_ALLREG_MODE 4 "register_operand")
- (match_operand:DI 5 "gcn_exec_reg_operand")]
- ""
- {
- operands[5] = force_reg (DImode, operands[5]);
-
- rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
- operands[1], operands[3],
- INTVAL (operands[2]), operands[5]);
-
- if (GET_MODE (addr) == V64DImode)
- emit_insn (gen_scatter<mode>_insn_1offset_exec (addr, const0_rtx,
- operands[4], const0_rtx,
- const0_rtx,
- operands[5]));
- else
- emit_insn (gen_scatter<mode>_insn_2offsets_exec (operands[0], addr,
- const0_rtx, operands[4],
- const0_rtx, const0_rtx,
- operands[5]));
- DONE;
- })
-
; Allow any address expression
(define_expand "scatter<mode>_expr<exec_scatter>"
[(set (mem:BLK (scratch))
@@ -2795,10 +2767,10 @@
DONE;
})
-(define_expand "mask_gather_load<mode>"
+(define_expand "mask_gather_load<mode>v64si"
[(match_operand:VEC_ALLREG_MODE 0 "register_operand")
(match_operand:DI 1 "register_operand")
- (match_operand 2 "register_operand")
+ (match_operand:V64SI 2 "register_operand")
(match_operand 3 "immediate_operand")
(match_operand:SI 4 "gcn_alu_operand")
(match_operand:DI 5 "")]
@@ -2806,16 +2778,6 @@
{
rtx exec = force_reg (DImode, operands[5]);
- /* TODO: more conversions will be needed when more types are vectorized. */
- if (GET_MODE (operands[2]) == V64DImode)
- {
- rtx tmp = gen_reg_rtx (V64SImode);
- emit_insn (gen_truncv64div64si2_exec (tmp, operands[2],
- gcn_gen_undef (V64SImode),
- exec));
- operands[2] = tmp;
- }
-
rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
operands[2], operands[4],
INTVAL (operands[3]), exec);
@@ -2836,9 +2798,9 @@
DONE;
})
-(define_expand "mask_scatter_store<mode>"
+(define_expand "mask_scatter_store<mode>v64si"
[(match_operand:DI 0 "register_operand")
- (match_operand 1 "register_operand")
+ (match_operand:V64SI 1 "register_operand")
(match_operand 2 "immediate_operand")
(match_operand:SI 3 "gcn_alu_operand")
(match_operand:VEC_ALLREG_MODE 4 "register_operand")
@@ -2847,18 +2809,20 @@
{
rtx exec = force_reg (DImode, operands[5]);
- /* TODO: more conversions will be needed when more types are vectorized. */
- if (GET_MODE (operands[1]) == V64DImode)
- {
- rtx tmp = gen_reg_rtx (V64SImode);
- emit_insn (gen_truncv64div64si2_exec (tmp, operands[1],
- gcn_gen_undef (V64SImode),
- exec));
- operands[1] = tmp;
- }
+ rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
+ operands[1], operands[3],
+ INTVAL (operands[2]), exec);
- emit_insn (gen_scatter<mode>_exec (operands[0], operands[1], operands[2],
- operands[3], operands[4], exec));
+ if (GET_MODE (addr) == V64DImode)
+ emit_insn (gen_scatter<mode>_insn_1offset_exec (addr, const0_rtx,
+ operands[4], const0_rtx,
+ const0_rtx,
+ exec));
+ else
+ emit_insn (gen_scatter<mode>_insn_2offsets_exec (operands[0], addr,
+ const0_rtx, operands[4],
+ const0_rtx, const0_rtx,
+ exec));
DONE;
})
diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c
index 531f47aff4c..a8fd7d6d52f 100644
--- a/gcc/config/gcn/gcn.c
+++ b/gcc/config/gcn/gcn.c
@@ -1861,15 +1861,6 @@ rtx
gcn_expand_scaled_offsets (addr_space_t as, rtx base, rtx offsets, rtx scale,
bool unsigned_p, rtx exec)
{
- /* Convert the offsets to V64SImode.
- TODO: more conversions will be needed when more types are vectorized. */
- if (GET_MODE (offsets) == V64DImode)
- {
- rtx tmp = gen_reg_rtx (V64SImode);
- emit_insn (gen_truncv64div64si2 (tmp, offsets));
- offsets = tmp;
- }
-
rtx tmpsi = gen_reg_rtx (V64SImode);
rtx tmpdi = gen_reg_rtx (V64DImode);
rtx undefsi = exec ? gcn_gen_undef (V64SImode) : NULL;