This patch add auto-vect patterns for vector-bfloat16 extension.
Similar to vector extensions, these patterns can use vector
BF16 instructions to optimize the automatic vectorization of for loops.
gcc/ChangeLog:
* config/riscv/vector-bfloat16.md (extend<v_fpwidetobf16_trunc><mode>2):
Add auto-vect pattern for vector-bfloat16.
(trunc<mode><v_fpwidetobf16_trunc>2): Ditto.
(*widen_bf16_fma<mode>): Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/vfncvt-auto-vect.c: New test.
* gcc.target/riscv/rvv/autovec/vfwcvt-auto-vect.c: New test.
* gcc.target/riscv/rvv/autovec/vfwmacc-auto-vect.c: New test.
Signed-off-by: Feng Wang <[email protected]>
---
gcc/config/riscv/vector-bfloat16.md | 144 ++++++++++++++++--
.../riscv/rvv/autovec/vfncvt-auto-vect.c | 19 +++
.../riscv/rvv/autovec/vfwcvt-auto-vect.c | 19 +++
.../riscv/rvv/autovec/vfwmacc-auto-vect.c | 14 ++
4 files changed, 182 insertions(+), 14 deletions(-)
create mode 100644
gcc/testsuite/gcc.target/riscv/rvv/autovec/vfncvt-auto-vect.c
create mode 100644
gcc/testsuite/gcc.target/riscv/rvv/autovec/vfwcvt-auto-vect.c
create mode 100644
gcc/testsuite/gcc.target/riscv/rvv/autovec/vfwmacc-auto-vect.c
diff --git a/gcc/config/riscv/vector-bfloat16.md
b/gcc/config/riscv/vector-bfloat16.md
index 562aa8ee5ed..e6482a83356 100644
--- a/gcc/config/riscv/vector-bfloat16.md
+++ b/gcc/config/riscv/vector-bfloat16.md
@@ -25,8 +25,24 @@
(RVVMF2SF "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_FP_32 &&
TARGET_MIN_VLEN > 32")
])
-(define_mode_attr V_FP32TOBF16_TRUNC [
+(define_mode_iterator VSF [
+ (RVVM8SF "TARGET_VECTOR_ELEN_FP_32") (RVVM4SF "TARGET_VECTOR_ELEN_FP_32")
(RVVM2SF "TARGET_VECTOR_ELEN_FP_32")
+ (RVVM1SF "TARGET_VECTOR_ELEN_FP_32") (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 &&
TARGET_MIN_VLEN > 32")
+])
+
+(define_mode_iterator VDF [
+ (RVVM8DF "TARGET_VECTOR_ELEN_FP_64") (RVVM4DF "TARGET_VECTOR_ELEN_FP_64")
+ (RVVM2DF "TARGET_VECTOR_ELEN_FP_64") (RVVM1DF "TARGET_VECTOR_ELEN_FP_64")
+])
+
+(define_mode_attr V_FPWIDETOBF16_TRUNC [
(RVVM8SF "RVVM4BF") (RVVM4SF "RVVM2BF") (RVVM2SF "RVVM1BF") (RVVM1SF
"RVVMF2BF") (RVVMF2SF "RVVMF4BF")
+ (RVVM8DF "RVVM2BF") (RVVM4DF "RVVM1BF") (RVVM2DF "RVVMF2BF") (RVVM1DF
"RVVMF4BF")
+])
+
+(define_mode_attr v_fpwidetobf16_trunc [
+ (RVVM8SF "rvvm4bf") (RVVM4SF "rvvm2bf") (RVVM2SF "rvvm1bf") (RVVM1SF
"rvvmf2bf") (RVVMF2SF "rvvmf4bf")
+ (RVVM8DF "rvvm2bf") (RVVM4DF "rvvm1bf") (RVVM2DF "rvvmf2bf") (RVVM1DF
"rvvmf4bf")
])
(define_mode_attr VF32_SUBEL [
@@ -35,8 +51,8 @@
;; Zvfbfmin extension
(define_insn "@pred_trunc<mode>_to_bf16"
- [(set (match_operand:<V_FP32TOBF16_TRUNC> 0 "register_operand" "=vd, vd,
vr, vr, &vr, &vr")
- (if_then_else:<V_FP32TOBF16_TRUNC>
+ [(set (match_operand:<V_FPWIDETOBF16_TRUNC> 0 "register_operand" "=vd, vd,
vr, vr, &vr, &vr")
+ (if_then_else:<V_FPWIDETOBF16_TRUNC>
(unspec:<VM>
[(match_operand:<VM> 1 "vector_mask_operand" " vm,
vm,Wc1,Wc1,vmWc1,vmWc1")
(match_operand 4 "vector_length_operand" " rK, rK,
rK, rK, rK, rK")
@@ -47,13 +63,13 @@
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)
(reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
- (float_truncate:<V_FP32TOBF16_TRUNC>
+ (float_truncate:<V_FPWIDETOBF16_TRUNC>
(match_operand:VWEXTF_ZVFBF 3 "register_operand" " 0, 0,
0, 0, vr, vr"))
- (match_operand:<V_FP32TOBF16_TRUNC> 2 "vector_merge_operand" " vu, 0,
vu, 0, vu, 0")))]
+ (match_operand:<V_FPWIDETOBF16_TRUNC> 2 "vector_merge_operand" " vu,
0, vu, 0, vu, 0")))]
"TARGET_ZVFBFMIN"
"vfncvtbf16.f.f.w\t%0,%3%p1"
[(set_attr "type" "vfncvtbf16")
- (set_attr "mode" "<V_FP32TOBF16_TRUNC>")
+ (set_attr "mode" "<V_FPWIDETOBF16_TRUNC>")
(set (attr "frm_mode")
(symbol_ref "riscv_vector::get_frm_mode (operands[8])"))])
@@ -69,12 +85,12 @@
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(float_extend:VWEXTF_ZVFBF
- (match_operand:<V_FP32TOBF16_TRUNC> 3 "register_operand" " vr,
vr"))
+ (match_operand:<V_FPWIDETOBF16_TRUNC> 3 "register_operand" " vr,
vr"))
(match_operand:VWEXTF_ZVFBF 2 "vector_merge_operand" " vu,
0")))]
"TARGET_ZVFBFMIN"
"vfwcvtbf16.f.f.v\t%0,%3%p1"
[(set_attr "type" "vfwcvtbf16")
- (set_attr "mode" "<V_FP32TOBF16_TRUNC>")])
+ (set_attr "mode" "<V_FPWIDETOBF16_TRUNC>")])
(define_insn "@pred_widen_bf16_mul_<mode>"
@@ -93,15 +109,15 @@
(plus:VWEXTF_ZVFBF
(mult:VWEXTF_ZVFBF
(float_extend:VWEXTF_ZVFBF
- (match_operand:<V_FP32TOBF16_TRUNC> 3 "register_operand" " vr"))
+ (match_operand:<V_FPWIDETOBF16_TRUNC> 3 "register_operand" "
vr"))
(float_extend:VWEXTF_ZVFBF
- (match_operand:<V_FP32TOBF16_TRUNC> 4 "register_operand" " vr")))
+ (match_operand:<V_FPWIDETOBF16_TRUNC> 4 "register_operand" "
vr")))
(match_operand:VWEXTF_ZVFBF 2 "register_operand" " 0"))
(match_dup 2)))]
"TARGET_ZVFBFWMA"
"vfwmaccbf16.vv\t%0,%3,%4%p1"
[(set_attr "type" "vfwmaccbf16")
- (set_attr "mode" "<V_FP32TOBF16_TRUNC>")
+ (set_attr "mode" "<V_FPWIDETOBF16_TRUNC>")
(set (attr "frm_mode")
(symbol_ref "riscv_vector::get_frm_mode (operands[9])"))])
@@ -121,15 +137,115 @@
(plus:VWEXTF_ZVFBF
(mult:VWEXTF_ZVFBF
(float_extend:VWEXTF_ZVFBF
- (vec_duplicate:<V_FP32TOBF16_TRUNC>
+ (vec_duplicate:<V_FPWIDETOBF16_TRUNC>
(match_operand:<VF32_SUBEL> 3 "register_operand" " f")))
(float_extend:VWEXTF_ZVFBF
- (match_operand:<V_FP32TOBF16_TRUNC> 4 "register_operand" " vr")))
+ (match_operand:<V_FPWIDETOBF16_TRUNC> 4 "register_operand" "
vr")))
(match_operand:VWEXTF_ZVFBF 2 "register_operand" " 0"))
(match_dup 2)))]
"TARGET_ZVFBFWMA"
"vfwmaccbf16.vf\t%0,%3,%4%p1"
[(set_attr "type" "vfwmaccbf16")
- (set_attr "mode" "<V_FP32TOBF16_TRUNC>")
+ (set_attr "mode" "<V_FPWIDETOBF16_TRUNC>")
(set (attr "frm_mode")
(symbol_ref "riscv_vector::get_frm_mode (operands[9])"))])
+
+;; Auto vect pattern
+
+;; -------------------------------------------------------------------------
+;; ---- [BF16] Widening.
+;; -------------------------------------------------------------------------
+;; - vfwcvtbf16.f.f.v
+;; -------------------------------------------------------------------------
+(define_insn_and_split "extend<v_fpwidetobf16_trunc><mode>2"
+ [(set (match_operand:VWEXTF_ZVFBF 0 "register_operand" "=&vr")
+ (float_extend:VWEXTF_ZVFBF
+ (match_operand:<V_FPWIDETOBF16_TRUNC> 1 "register_operand" " vr")))]
+ "TARGET_ZVFBFMIN && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ insn_code icode = code_for_pred_extend_bf16_to (<MODE>mode);
+ riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP, operands);
+ DONE;
+}
+ [(set_attr "type" "vfwcvtbf16")
+ (set_attr "mode" "<MODE>")])
+
+(define_expand "extend<v_fpwidetobf16_trunc><mode>2"
+ [(set (match_operand:VDF 0 "register_operand")
+ (float_extend:VDF
+ (match_operand:<V_FPWIDETOBF16_TRUNC> 1 "register_operand")))]
+ "TARGET_ZVFBFMIN"
+{
+ rtx dblw = gen_reg_rtx (<V_DOUBLE_TRUNC>mode);
+ emit_insn (gen_extend<v_fpwidetobf16_trunc><v_double_trunc>2 (dblw,
operands[1]));
+ emit_insn (gen_extend<v_double_trunc><mode>2 (operands[0], dblw));
+ DONE;
+})
+
+;; -------------------------------------------------------------------------
+;; ---- [BF16] Narrowing.
+;; -------------------------------------------------------------------------
+;; - vfncvtbf16.f.f.w
+;; -------------------------------------------------------------------------
+(define_insn_and_split "trunc<mode><v_fpwidetobf16_trunc>2"
+ [(set (match_operand:<V_FPWIDETOBF16_TRUNC> 0 "register_operand" "=vr")
+ (float_truncate:<V_FPWIDETOBF16_TRUNC>
+ (match_operand:VSF 1 "register_operand" " vr")))]
+ "TARGET_ZVFBFMIN && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ insn_code icode = code_for_pred_trunc_to_bf16 (<MODE>mode);
+ riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP_FRM_DYN,
operands);
+ DONE;
+}
+ [(set_attr "type" "vfncvtbf16")
+ (set_attr "mode" "<MODE>")])
+
+(define_expand "trunc<mode><v_fpwidetobf16_trunc>2"
+ [(set (match_operand:<V_FPWIDETOBF16_TRUNC> 0 "register_operand")
+ (float_truncate:<V_FPWIDETOBF16_TRUNC>
+ (match_operand:VDF 1 "register_operand")))]
+ "TARGET_ZVFBFMIN"
+{
+ rtx half = gen_reg_rtx (<V_DOUBLE_TRUNC>mode);
+ rtx opshalf[] = {half, operands[1]};
+
+ /* According to the RISC-V V Spec 13.19. we need to use
+ vfncvt.rod.f.f.w for all steps but the last. */
+ insn_code icode = code_for_pred_rod_trunc (<MODE>mode);
+ riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP, opshalf);
+
+ emit_insn (gen_trunc<v_double_trunc><v_fpwidetobf16_trunc>2 (operands[0],
half));
+ DONE;
+})
+
+;; -------------------------------------------------------------------------
+;; - vfwmaccbf16
+;; -------------------------------------------------------------------------
+;; Combine extend + fma to widen_fma (vfwmacc)
+(define_insn_and_split "*widen_bf16_fma<mode>"
+ [(set (match_operand:VWEXTF_ZVFBF 0 "register_operand")
+ (plus:VWEXTF_ZVFBF
+ (mult:VWEXTF_ZVFBF
+ (float_extend:VWEXTF_ZVFBF
+ (match_operand:<V_FPWIDETOBF16_TRUNC> 2 "register_operand"))
+ (float_extend:VWEXTF_ZVFBF
+ (match_operand:<V_FPWIDETOBF16_TRUNC> 3 "register_operand")))
+ (match_operand:VWEXTF_ZVFBF 1 "register_operand")))]
+ "TARGET_ZVFBFWMA && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ rtx ops[] = {operands[0], operands[1], operands[2], operands[3]};
+ riscv_vector::emit_vlmax_insn (code_for_pred_widen_bf16_mul (<MODE>mode),
+ riscv_vector::WIDEN_TERNARY_OP_FRM_DYN, ops);
+ DONE;
+ }
+ [(set_attr "type" "vfwmaccbf16")
+ (set_attr "mode" "<MODE>")])
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfncvt-auto-vect.c
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfncvt-auto-vect.c
new file mode 100644
index 00000000000..7ba3615ccf1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfncvt-auto-vect.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv_zvfbfmin -mabi=ilp32d" } */
+
+__attribute__((noipa))
+void vfncvt_float_BFloat16 (__bf16 *dst, float *a, int n)
+{
+ for (int i = 0; i < n; i++)
+ dst[i] = (__bf16)a[i];
+}
+
+__attribute__((noipa))
+void vfncvt_double_BFloat16 (__bf16 *dst, double *a, int n)
+{
+ for (int i = 0; i < n; i++)
+ dst[i] = (__bf16)a[i];
+}
+
+/* { dg-final { scan-assembler-times {\tvfncvtbf16\.f\.f\.w} 2 } } */
+/* { dg-final { scan-assembler-times {\tvfncvt\.rod\.f\.f\.w} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfwcvt-auto-vect.c
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfwcvt-auto-vect.c
new file mode 100644
index 00000000000..6629dd909a0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfwcvt-auto-vect.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv_zvfbfmin -mabi=ilp32d" } */
+
+__attribute__((noipa))
+void vfwcvt__BFloat16float (float *dst, __bf16 *a, int n)
+{
+ for (int i = 0; i < n; i++)
+ dst[i] = (float)a[i];
+}
+
+__attribute__((noipa))
+void vfwcvt__BFloat16double (double *dst, __bf16 *a, int n)
+{
+ for (int i = 0; i < n; i++)
+ dst[i] = (double)a[i];
+}
+
+/* { dg-final { scan-assembler-times {\tvfwcvtbf16\.f\.f\.v} 2 } } */
+/* { dg-final { scan-assembler-times {\tvfwcvt\.f\.f\.v} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfwmacc-auto-vect.c
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfwmacc-auto-vect.c
new file mode 100644
index 00000000000..a767f2c8ef8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfwmacc-auto-vect.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv_zvfbfwma -mabi=ilp32d -ffast-math"
} */
+
+__attribute__ ((noipa))
+void vwmacc_float_bf16 (float *__restrict dst,
+ __bf16 *__restrict a,
+ __bf16 *__restrict b,
+ int n)
+{
+ for (int i = 0; i < n; i++)
+ dst[i] += (float) (a[i] * b[i]);
+}
+
+/* { dg-final { scan-assembler-times {\tvfwmaccbf16\.vv} 1 } } */
--
2.17.1