On 2024-10-18 17:53  钟居哲 <juzhe.zh...@rivai.ai> wrote:
>
>Could you add run test case (verified by QEMU or SPIKE ) ?
> 
I added the run test case with qemu, then I found maybe another change should 
be modified at the same time,
I should add the qemu flag(zvfbfmin and zvfbfwma) in march-to-cpu-opt python 
script.,
@Kito, should I submit the script change on GitHub?
Thanks.
>
>
>juzhe.zh...@rivai.ai
>
>From: Feng Wang
>Date: 2024-10-18 15:24
>To: gcc-patches
>CC: kito.cheng; juzhe.zhong; Feng Wang
>Subject: [PATCH v2] RISC-V:Auto vect for vector-bfloat16
>This patch add auto-vect patterns for vector-bfloat16 extension.
>Similar to vector extensions, these patterns can use vector
>BF16 instructions to optimize the automatic vectorization of for loops.
>gcc/ChangeLog:
>
>* config/riscv/autovec-opt.md (*widen_bf16_fma<mode>):
>Add vfwmacc auto-vect opt pattern for vector-bfloat16.
>* config/riscv/vector-bfloat16.md (extend<v_fpwidetobf16_trunc><mode>2):
>Add auto-vect pattern for Zvfbfmin extension.
>(trunc<mode><v_fpwidetobf16_trunc>2): Ditto.
>* config/riscv/vector-iterators.md:
>Move vector-bfloat16 iterator definitions from vector-bfloat16.md.
>
>gcc/testsuite/ChangeLog:
>
>* gcc.target/riscv/rvv/autovec/vfncvt-auto-vect.c: New test.
>* gcc.target/riscv/rvv/autovec/vfwcvt-auto-vect.c: New test.
>* gcc.target/riscv/rvv/autovec/vfwmacc-auto-vect.c: New test.
>
>Signed-off-by: Feng Wang <wangf...@eswincomputing.com>
>---
>gcc/config/riscv/autovec-opt.md               |  23 ++++
>gcc/config/riscv/vector-bfloat16.md           | 116 +++++++++++++-----
>gcc/config/riscv/vector-iterators.md          |  32 +++++
>.../riscv/rvv/autovec/vfncvt-auto-vect.c      |  19 +++
>.../riscv/rvv/autovec/vfwcvt-auto-vect.c      |  19 +++
>.../riscv/rvv/autovec/vfwmacc-auto-vect.c     |  14 +++
>6 files changed, 195 insertions(+), 28 deletions(-)
>create mode 100644 
>gcc/testsuite/gcc.target/riscv/rvv/autovec/vfncvt-auto-vect.c
>create mode 100644 
>gcc/testsuite/gcc.target/riscv/rvv/autovec/vfwcvt-auto-vect.c
>create mode 100644 
>gcc/testsuite/gcc.target/riscv/rvv/autovec/vfwmacc-auto-vect.c
>
>diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
>index 4b33a145c17..0c6722601ff 100644
>--- a/gcc/config/riscv/autovec-opt.md
>+++ b/gcc/config/riscv/autovec-opt.md
>@@ -1009,6 +1009,29 @@
>   }
>   [(set_attr "type" "vfwmuladd")])
>+;; vfwmacc for vector_bfloat16
>+(define_insn_and_split "*widen_bf16_fma<mode>"
>+  [(set (match_operand:VWEXTF_ZVFBF 0 "register_operand")
>+        (plus:VWEXTF_ZVFBF
>+   (mult:VWEXTF_ZVFBF
>+            (float_extend:VWEXTF_ZVFBF
>+       (match_operand:<V_FPWIDETOBF16_TRUNC> 2 "register_operand"))
>+            (float_extend:VWEXTF_ZVFBF
>+       (match_operand:<V_FPWIDETOBF16_TRUNC> 3 "register_operand")))
>+   (match_operand:VWEXTF_ZVFBF 1 "register_operand")))]
>+  "TARGET_ZVFBFWMA && can_create_pseudo_p ()"
>+  "#"
>+  "&& 1"
>+  [(const_int 0)]
>+  {
>+    rtx ops[] = {operands[0], operands[1], operands[2], operands[3]};
>+    riscv_vector::emit_vlmax_insn (code_for_pred_widen_bf16_mul (<MODE>mode),
>+    riscv_vector::WIDEN_TERNARY_OP_FRM_DYN, ops);
>+    DONE;
>+  }
>+  [(set_attr "type" "vfwmaccbf16")
>+   (set_attr "mode" "<MODE>")])
>+
>;; This combine pattern does not correspond to an single instruction.
>;; This is a temporary pattern produced by a combine pass and if there
>;; is no further combine into widen pattern, then fall back to extend
>diff --git a/gcc/config/riscv/vector-bfloat16.md 
>b/gcc/config/riscv/vector-bfloat16.md
>index 562aa8ee5ed..90b174be2e7 100644
>--- a/gcc/config/riscv/vector-bfloat16.md
>+++ b/gcc/config/riscv/vector-bfloat16.md
>@@ -17,26 +17,11 @@
>;; along with GCC; see the file COPYING3.  If not see
>;; <http://www.gnu.org/licenses/>.
>-(define_mode_iterator VWEXTF_ZVFBF [
>-  (RVVM8SF  "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_FP_32")
>-  (RVVM4SF  "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_FP_32")
>-  (RVVM2SF  "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_FP_32")
>-  (RVVM1SF  "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_FP_32")
>-  (RVVMF2SF "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_FP_32 && 
>TARGET_MIN_VLEN > 32")
>-])
>-
>-(define_mode_attr V_FP32TOBF16_TRUNC [
>-  (RVVM8SF "RVVM4BF") (RVVM4SF "RVVM2BF") (RVVM2SF "RVVM1BF") (RVVM1SF 
>"RVVMF2BF") (RVVMF2SF "RVVMF4BF")
>-])
>-
>-(define_mode_attr VF32_SUBEL [
>-   (RVVM8SF "BF") (RVVM4SF "BF") (RVVM2SF "BF") (RVVM1SF "BF") (RVVMF2SF 
>"BF")])
>-
>;; Zvfbfmin extension
>(define_insn "@pred_trunc<mode>_to_bf16"
>-  [(set (match_operand:<V_FP32TOBF16_TRUNC> 0 "register_operand"   "=vd, vd, 
>vr, vr,  &vr,  &vr")
>-     (if_then_else:<V_FP32TOBF16_TRUNC>
>+  [(set (match_operand:<V_FPWIDETOBF16_TRUNC> 0 "register_operand"   "=vd, 
>vd, vr, vr,  &vr,  &vr")
>+     (if_then_else:<V_FPWIDETOBF16_TRUNC>
>        (unspec:<VM>
>          [(match_operand:<VM> 1 "vector_mask_operand"              " vm, 
>vm,Wc1,Wc1,vmWc1,vmWc1")
>           (match_operand 4 "vector_length_operand"                 " rK, rK, 
>rK, rK,   rK,   rK")
>@@ -47,13 +32,13 @@
>           (reg:SI VL_REGNUM)
>           (reg:SI VTYPE_REGNUM)
>           (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
>-       (float_truncate:<V_FP32TOBF16_TRUNC>
>+       (float_truncate:<V_FPWIDETOBF16_TRUNC>
>           (match_operand:VWEXTF_ZVFBF 3 "register_operand"          "  0,  0, 
> 0,  0,   vr,   vr"))
>-       (match_operand:<V_FP32TOBF16_TRUNC> 2 "vector_merge_operand" " vu,  0, 
>vu,  0,   vu,    0")))]
>+       (match_operand:<V_FPWIDETOBF16_TRUNC> 2 "vector_merge_operand" " vu,  
>0, vu,  0,   vu,    0")))]
>   "TARGET_ZVFBFMIN"
>   "vfncvtbf16.f.f.w\t%0,%3%p1"
>   [(set_attr "type" "vfncvtbf16")
>-   (set_attr "mode" "<V_FP32TOBF16_TRUNC>")
>+   (set_attr "mode" "<V_FPWIDETOBF16_TRUNC>")
>    (set (attr "frm_mode")
>(symbol_ref "riscv_vector::get_frm_mode (operands[8])"))])
>@@ -69,12 +54,12 @@
>          (reg:SI VL_REGNUM)
>          (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
>       (float_extend:VWEXTF_ZVFBF
>-         (match_operand:<V_FP32TOBF16_TRUNC> 3 "register_operand" "   vr,   
>vr"))
>+         (match_operand:<V_FPWIDETOBF16_TRUNC> 3 "register_operand" "   vr,   
>vr"))
>       (match_operand:VWEXTF_ZVFBF 2 "vector_merge_operand"        "   vu,    
>0")))]
>   "TARGET_ZVFBFMIN"
>   "vfwcvtbf16.f.f.v\t%0,%3%p1"
>   [(set_attr "type" "vfwcvtbf16")
>-   (set_attr "mode" "<V_FP32TOBF16_TRUNC>")])
>+   (set_attr "mode" "<V_FPWIDETOBF16_TRUNC>")])
>(define_insn "@pred_widen_bf16_mul_<mode>"
>@@ -93,15 +78,15 @@
>       (plus:VWEXTF_ZVFBF
>         (mult:VWEXTF_ZVFBF
>           (float_extend:VWEXTF_ZVFBF
>-            (match_operand:<V_FP32TOBF16_TRUNC> 3 "register_operand" "   vr"))
>+            (match_operand:<V_FPWIDETOBF16_TRUNC> 3 "register_operand" "   
>vr"))
>           (float_extend:VWEXTF_ZVFBF
>-            (match_operand:<V_FP32TOBF16_TRUNC> 4 "register_operand" "   
>vr")))
>+            (match_operand:<V_FPWIDETOBF16_TRUNC> 4 "register_operand" "   
>vr")))
>         (match_operand:VWEXTF_ZVFBF 2 "register_operand"             "    0"))
>       (match_dup 2)))]
>   "TARGET_ZVFBFWMA"
>   "vfwmaccbf16.vv\t%0,%3,%4%p1"
>   [(set_attr "type" "vfwmaccbf16")
>-   (set_attr "mode" "<V_FP32TOBF16_TRUNC>")
>+   (set_attr "mode" "<V_FPWIDETOBF16_TRUNC>")
>    (set (attr "frm_mode")
>(symbol_ref "riscv_vector::get_frm_mode (operands[9])"))])
>@@ -121,15 +106,90 @@
>       (plus:VWEXTF_ZVFBF
>         (mult:VWEXTF_ZVFBF
>           (float_extend:VWEXTF_ZVFBF
>-            (vec_duplicate:<V_FP32TOBF16_TRUNC>
>+            (vec_duplicate:<V_FPWIDETOBF16_TRUNC>
>               (match_operand:<VF32_SUBEL> 3 "register_operand"       "    
>f")))
>           (float_extend:VWEXTF_ZVFBF
>-            (match_operand:<V_FP32TOBF16_TRUNC> 4 "register_operand" "   
>vr")))
>+            (match_operand:<V_FPWIDETOBF16_TRUNC> 4 "register_operand" "   
>vr")))
>         (match_operand:VWEXTF_ZVFBF 2 "register_operand"             "    0"))
>       (match_dup 2)))]
>   "TARGET_ZVFBFWMA"
>   "vfwmaccbf16.vf\t%0,%3,%4%p1"
>   [(set_attr "type" "vfwmaccbf16")
>-   (set_attr "mode" "<V_FP32TOBF16_TRUNC>")
>+   (set_attr "mode" "<V_FPWIDETOBF16_TRUNC>")
>    (set (attr "frm_mode")
>(symbol_ref "riscv_vector::get_frm_mode (operands[9])"))])
>+
>+;; Auto vect pattern
>+
>+;; -------------------------------------------------------------------------
>+;; ---- [BF16] Widening.
>+;; -------------------------------------------------------------------------
>+;; - vfwcvtbf16.f.f.v
>+;; -------------------------------------------------------------------------
>+(define_insn_and_split "extend<v_fpwidetobf16_trunc><mode>2"
>+  [(set (match_operand:VWEXTF_ZVFBF 0 "register_operand" "=&vr")
>+    (float_extend:VWEXTF_ZVFBF
>+     (match_operand:<V_FPWIDETOBF16_TRUNC>  1 "register_operand" "  vr")))]
>+  "TARGET_ZVFBFMIN && can_create_pseudo_p ()"
>+  "#"
>+  "&& 1"
>+  [(const_int 0)]
>+{
>+  insn_code icode = code_for_pred_extend_bf16_to (<MODE>mode);
>+  riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP, operands);
>+  DONE;
>+}
>+  [(set_attr "type" "vfwcvtbf16")
>+   (set_attr "mode" "<MODE>")])
>+
>+(define_expand "extend<v_fpwidetobf16_trunc><mode>2"
>+  [(set (match_operand:VDF 0 "register_operand")
>+    (float_extend:VDF
>+     (match_operand:<V_FPWIDETOBF16_TRUNC> 1 "register_operand")))]
>+  "TARGET_ZVFBFMIN"
>+{
>+  rtx dblw = gen_reg_rtx (<V_DOUBLE_TRUNC>mode);
>+  emit_insn (gen_extend<v_fpwidetobf16_trunc><v_double_trunc>2 (dblw, 
>operands[1]));
>+  emit_insn (gen_extend<v_double_trunc><mode>2 (operands[0], dblw));
>+  DONE;
>+})
>+
>+;; -------------------------------------------------------------------------
>+;; ---- [BF16] Narrowing.
>+;; -------------------------------------------------------------------------
>+;; - vfncvtbf16.f.f.w
>+;; -------------------------------------------------------------------------
>+(define_insn_and_split "trunc<mode><v_fpwidetobf16_trunc>2"
>+  [(set (match_operand:<V_FPWIDETOBF16_TRUNC> 0 "register_operand" "=vr")
>+    (float_truncate:<V_FPWIDETOBF16_TRUNC>
>+     (match_operand:VSF 1 "register_operand"      " vr")))]
>+  "TARGET_ZVFBFMIN && can_create_pseudo_p ()"
>+  "#"
>+  "&& 1"
>+  [(const_int 0)]
>+{
>+  insn_code icode = code_for_pred_trunc_to_bf16 (<MODE>mode);
>+  riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP_FRM_DYN, 
>operands);
>+  DONE;
>+}
>+  [(set_attr "type" "vfncvtbf16")
>+   (set_attr "mode" "<MODE>")])
>+
>+(define_expand "trunc<mode><v_fpwidetobf16_trunc>2"
>+  [(set (match_operand:<V_FPWIDETOBF16_TRUNC> 0 "register_operand")
>+    (float_truncate:<V_FPWIDETOBF16_TRUNC>
>+     (match_operand:VDF 1 "register_operand")))]
>+  "TARGET_ZVFBFMIN"
>+{
>+  rtx half = gen_reg_rtx (<V_DOUBLE_TRUNC>mode);
>+  rtx opshalf[] = {half, operands[1]};
>+
>+  /* According to the RISC-V V Spec 13.19. we need to use
>+     vfncvt.rod.f.f.w for all steps but the last.  */
>+  insn_code icode = code_for_pred_rod_trunc (<MODE>mode);
>+  riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP, opshalf);
>+
>+  emit_insn (gen_trunc<v_double_trunc><v_fpwidetobf16_trunc>2 (operands[0], 
>half));
>+  DONE;
>+})
>+
>diff --git a/gcc/config/riscv/vector-iterators.md 
>b/gcc/config/riscv/vector-iterators.md
>index 43325d1ba87..a53c5233839 100644
>--- a/gcc/config/riscv/vector-iterators.md
>+++ b/gcc/config/riscv/vector-iterators.md
>@@ -4512,3 +4512,35 @@
>   (V256DF "v64df")
>   (V512DF "v128df")
>])
>+
>+;;vector bfloat16
>+(define_mode_iterator VWEXTF_ZVFBF [
>+  (RVVM8SF  "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_FP_32")
>+  (RVVM4SF  "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_FP_32")
>+  (RVVM2SF  "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_FP_32")
>+  (RVVM1SF  "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_FP_32")
>+  (RVVMF2SF "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_FP_32 && 
>TARGET_MIN_VLEN > 32")
>+])
>+
>+(define_mode_iterator VSF [
>+  (RVVM8SF "TARGET_VECTOR_ELEN_FP_32") (RVVM4SF "TARGET_VECTOR_ELEN_FP_32") 
>(RVVM2SF "TARGET_VECTOR_ELEN_FP_32")
>+  (RVVM1SF "TARGET_VECTOR_ELEN_FP_32") (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && 
>TARGET_MIN_VLEN > 32")
>+])
>+
>+(define_mode_iterator VDF [
>+  (RVVM8DF "TARGET_VECTOR_ELEN_FP_64") (RVVM4DF "TARGET_VECTOR_ELEN_FP_64")
>+  (RVVM2DF "TARGET_VECTOR_ELEN_FP_64") (RVVM1DF "TARGET_VECTOR_ELEN_FP_64")
>+])
>+
>+(define_mode_attr V_FPWIDETOBF16_TRUNC [
>+  (RVVM8SF "RVVM4BF") (RVVM4SF "RVVM2BF") (RVVM2SF "RVVM1BF") (RVVM1SF 
>"RVVMF2BF") (RVVMF2SF "RVVMF4BF")
>+  (RVVM8DF "RVVM2BF") (RVVM4DF "RVVM1BF") (RVVM2DF "RVVMF2BF") (RVVM1DF 
>"RVVMF4BF")
>+])
>+
>+(define_mode_attr v_fpwidetobf16_trunc [
>+  (RVVM8SF "rvvm4bf") (RVVM4SF "rvvm2bf") (RVVM2SF "rvvm1bf") (RVVM1SF 
>"rvvmf2bf") (RVVMF2SF "rvvmf4bf")
>+  (RVVM8DF "rvvm2bf") (RVVM4DF "rvvm1bf") (RVVM2DF "rvvmf2bf") (RVVM1DF 
>"rvvmf4bf")
>+])
>+
>+(define_mode_attr VF32_SUBEL [
>+   (RVVM8SF "BF") (RVVM4SF "BF") (RVVM2SF "BF") (RVVM1SF "BF") (RVVMF2SF 
>"BF")])
>diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfncvt-auto-vect.c 
>b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfncvt-auto-vect.c
>new file mode 100644
>index 00000000000..7ba3615ccf1
>--- /dev/null
>+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfncvt-auto-vect.c
>@@ -0,0 +1,19 @@
>+/* { dg-do compile } */
>+/* { dg-additional-options "-march=rv32gcv_zvfbfmin -mabi=ilp32d" } */
>+
>+__attribute__((noipa))
>+void vfncvt_float_BFloat16 (__bf16 *dst, float *a, int n)
>+{
>+  for (int i = 0; i < n; i++)
>+    dst[i] = (__bf16)a[i];
>+}
>+
>+__attribute__((noipa))
>+void vfncvt_double_BFloat16 (__bf16 *dst, double *a, int n)
>+{
>+  for (int i = 0; i < n; i++)
>+    dst[i] = (__bf16)a[i];
>+}
>+
>+/* { dg-final { scan-assembler-times {\tvfncvtbf16\.f\.f\.w} 2 } } */
>+/* { dg-final { scan-assembler-times {\tvfncvt\.rod\.f\.f\.w} 1 } } */
>diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfwcvt-auto-vect.c 
>b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfwcvt-auto-vect.c
>new file mode 100644
>index 00000000000..6629dd909a0
>--- /dev/null
>+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfwcvt-auto-vect.c
>@@ -0,0 +1,19 @@
>+/* { dg-do compile } */
>+/* { dg-additional-options "-march=rv32gcv_zvfbfmin -mabi=ilp32d" } */
>+
>+__attribute__((noipa))
>+void vfwcvt__BFloat16float (float *dst, __bf16 *a, int n)
>+{
>+  for (int i = 0; i < n; i++)
>+    dst[i] = (float)a[i];
>+}
>+
>+__attribute__((noipa))
>+void vfwcvt__BFloat16double (double *dst, __bf16 *a, int n)
>+{
>+  for (int i = 0; i < n; i++)
>+    dst[i] = (double)a[i];
>+}
>+
>+/* { dg-final { scan-assembler-times {\tvfwcvtbf16\.f\.f\.v} 2 } } */
>+/* { dg-final { scan-assembler-times {\tvfwcvt\.f\.f\.v} 1 } } */
>diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfwmacc-auto-vect.c 
>b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfwmacc-auto-vect.c
>new file mode 100644
>index 00000000000..a767f2c8ef8
>--- /dev/null
>+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfwmacc-auto-vect.c
>@@ -0,0 +1,14 @@
>+/* { dg-do compile } */
>+/* { dg-additional-options "-march=rv32gcv_zvfbfwma -mabi=ilp32d -ffast-math" 
>} */
>+
>+__attribute__ ((noipa))
>+void vwmacc_float_bf16 (float *__restrict dst,
>+ __bf16 *__restrict a,
>+ __bf16 *__restrict b,
>+ int n)
>+{
>+  for (int i = 0; i < n; i++)
>+    dst[i] += (float) (a[i] * b[i]);
>+}
>+
>+/* { dg-final { scan-assembler-times {\tvfwmaccbf16\.vv} 1 } } */
>--
>2.17.1
>
>

Reply via email to