[GCC][PATCH][Aarch64] Add Bfloat16_t scalar type, vector types and machine modes to Aarch64 back-end [1/2]

Stam Markianos-Wright Wed, 18 Dec 2019 08:35:43 -0800

Hi all,

This patch adds Bfloat type support to the ARM back-end.
It also adds a new machine_mode (BFmode) for this type and accompanying Vector 
modes V4BFmode and V8BFmode.


The second patch in this series uses existing target hooks to restrict type use.

Regression testing on aarch64-none-elf passed successfully.

This patch depends on:

https://gcc.gnu.org/ml/gcc-patches/2019-12/msg00857.html

for test suite effective_target update.

Ok for trunk?

Cheers,
Stam


ACLE documents are at https://developer.arm.com/docs/101028/latest
ISA documents are at https://developer.arm.com/docs/ddi0596/latest

Details on ARM Bfloat can be found here:
https://community.arm.com/developer/ip-products/processors/b/ml-ip-blog/posts/bfloat16-processing-for-neural-networks-on-armv8_2d00_a
 


PS. I don't have commit rights, so if someone could commit on my behalf,
that would be great :)



gcc/ChangeLog:

2019-12-16  Stam Markianos-Wright  <stam.markianos-wri...@arm.com>

        * config.gcc: Add arm_bf16.h.
        * config/aarch64/aarch64-builtins.c
         (aarch64_simd_builtin_std_type): Add BFmode.
         (aarch64_init_simd_builtin_types): Add element types for vector types.
        (aarch64_init_bf16_types): New function.
        (aarch64_general_init_builtins): Add arm_init_bf16_types function call.
        * config/aarch64/aarch64-modes.def: Add BFmode and vector modes.
        * config/aarch64/aarch64-simd-builtin-types.def:
        * config/aarch64/aarch64-simd.md: Add BF types to NEON move patterns.
        * config/aarch64/aarch64.c (aarch64_classify_vector_mode): Add BF modes.
        (aarch64_gimplify_va_arg_expr): Add BFmode.
        * config/aarch64/aarch64.h (AARCH64_VALID_SIMD_DREG_MODE): Add V4BF.
        (AARCH64_VALID_SIMD_QREG_MODE): Add V8BF.
        * config/aarch64/aarch64.md: New enabled_for_bfmode_scalar,
          enabled_for_bfmode_vector attributes. Add BFmode to movhf pattern.
        * config/aarch64/arm_bf16.h: New file.
        * config/aarch64/arm_neon.h: Add arm_bf16.h and Bfloat vector types.
        * config/aarch64/iterators.md
          (HFBF, GPF_TF_F16_MOV, VDMOV, VQMOV, VALL_F16MOV): New.



gcc/testsuite/ChangeLog:

2019-12-16  Stam Markianos-Wright  <stam.markianos-wri...@arm.com>

        * gcc.target/aarch64/bfloat16_compile.c: New test.

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 9802f436e06..b49c110ccaf 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -315,7 +315,7 @@ m32c*-*-*)
         ;;
 aarch64*-*-*)
 	cpu_type=aarch64
-	extra_headers="arm_fp16.h arm_neon.h arm_acle.h arm_sve.h"
+	extra_headers="arm_fp16.h arm_neon.h arm_bf16.h arm_acle.h arm_sve.h"
 	c_target_objs="aarch64-c.o"
 	cxx_target_objs="aarch64-c.o"
 	d_target_objs="aarch64-d.o"
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index c35a1b1f029..3ba2f12166f 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -68,6 +68,9 @@
 #define hi_UP    E_HImode
 #define hf_UP    E_HFmode
 #define qi_UP    E_QImode
+#define bf_UP    E_BFmode
+#define v4bf_UP  E_V4BFmode
+#define v8bf_UP  E_V8BFmode
 #define UP(X) X##_UP
 
 #define SIMD_MAX_BUILTIN_ARGS 5
@@ -568,6 +571,10 @@ static tree aarch64_simd_intXI_type_node = NULL_TREE;
 tree aarch64_fp16_type_node = NULL_TREE;
 tree aarch64_fp16_ptr_type_node = NULL_TREE;
 
+/* Back-end node type for brain float (bfloat) types.  */
+tree aarch64_bf16_type_node = NULL_TREE;
+tree aarch64_bf16_ptr_type_node = NULL_TREE;
+
 /* Wrapper around add_builtin_function.  NAME is the name of the built-in
    function, TYPE is the function type, and CODE is the function subcode
    (relative to AARCH64_BUILTIN_GENERAL).  */
@@ -659,6 +666,8 @@ aarch64_simd_builtin_std_type (machine_mode mode,
       return float_type_node;
     case E_DFmode:
       return double_type_node;
+    case E_BFmode:
+      return aarch64_bf16_type_node;
     default:
       gcc_unreachable ();
     }
@@ -750,6 +759,11 @@ aarch64_init_simd_builtin_types (void)
   aarch64_simd_types[Float64x1_t].eltype = double_type_node;
   aarch64_simd_types[Float64x2_t].eltype = double_type_node;
 
+
+/* Init Bfloat vector types with underlying uint types.  */
+  aarch64_simd_types[Bfloat16x4_t].eltype = aarch64_bf16_type_node;
+  aarch64_simd_types[Bfloat16x8_t].eltype = aarch64_bf16_type_node;
+
   for (i = 0; i < nelts; i++)
     {
       tree eltype = aarch64_simd_types[i].eltype;
@@ -1059,6 +1073,19 @@ aarch64_init_fp16_types (void)
   aarch64_fp16_ptr_type_node = build_pointer_type (aarch64_fp16_type_node);
 }
 
+/* Initialize the backend REAL_TYPE type supporting bfloat types.  */
+static void
+aarch64_init_bf16_types (void)
+{
+  aarch64_bf16_type_node = make_node (REAL_TYPE);
+  TYPE_PRECISION (aarch64_bf16_type_node) = 16;
+  SET_TYPE_MODE (aarch64_bf16_type_node, BFmode);
+  layout_type (aarch64_bf16_type_node);
+
+  (*lang_hooks.types.register_builtin_type) (aarch64_bf16_type_node, "__bf16");
+  aarch64_bf16_ptr_type_node = build_pointer_type (aarch64_bf16_type_node);
+}
+
 /* Pointer authentication builtins that will become NOP on legacy platform.
    Currently, these builtins are for internal use only (libgcc EH unwinder).  */
 
@@ -1214,6 +1241,8 @@ aarch64_general_init_builtins (void)
 
   aarch64_init_fp16_types ();
 
+  aarch64_init_bf16_types ();
+
   if (TARGET_SIMD)
     aarch64_init_simd_builtins ();
 
diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def
index 3c698b620cd..59f2ec4eaec 100644
--- a/gcc/config/aarch64/aarch64-modes.def
+++ b/gcc/config/aarch64/aarch64-modes.def
@@ -69,6 +69,13 @@ VECTOR_MODES (FLOAT, 16);     /*            V4SF V2DF.  */
 VECTOR_MODE (FLOAT, DF, 1);   /*                 V1DF.  */
 VECTOR_MODE (FLOAT, HF, 2);   /*                 V2HF.  */
 
+/* Bfloat16 modes.  */
+FLOAT_MODE (BF, 2, 0);
+ADJUST_FLOAT_FORMAT (BF, &arm_bfloat_half_format);
+
+VECTOR_MODE (FLOAT, BF, 4);   /*		 V4BF.  */
+VECTOR_MODE (FLOAT, BF, 8);   /*		 V8BF.  */
+
 /* Oct Int: 256-bit integer mode needed for 32-byte vector arguments.  */
 INT_MODE (OI, 32);
 
diff --git a/gcc/config/aarch64/aarch64-simd-builtin-types.def b/gcc/config/aarch64/aarch64-simd-builtin-types.def
index b015694293c..3b387377f38 100644
--- a/gcc/config/aarch64/aarch64-simd-builtin-types.def
+++ b/gcc/config/aarch64/aarch64-simd-builtin-types.def
@@ -50,3 +50,5 @@
   ENTRY (Float32x4_t, V4SF, none, 13)
   ENTRY (Float64x1_t, V1DF, none, 13)
   ENTRY (Float64x2_t, V2DF, none, 13)
+  ENTRY (Bfloat16x4_t, V4BF, none, 15)
+  ENTRY (Bfloat16x8_t, V8BF, none, 15)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index ad4676bc167..c4858ab7cff 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -19,8 +19,8 @@
 ;; <http://www.gnu.org/licenses/>.
 
 (define_expand "mov<mode>"
-  [(set (match_operand:VALL_F16 0 "nonimmediate_operand")
-	(match_operand:VALL_F16 1 "general_operand"))]
+  [(set (match_operand:VALL_F16MOV 0 "nonimmediate_operand")
+	(match_operand:VALL_F16MOV 1 "general_operand"))]
   "TARGET_SIMD"
   "
   /* Force the operand into a register if it is not an
@@ -101,10 +101,10 @@
   [(set_attr "type" "neon_dup<q>")]
 )
 
-(define_insn "*aarch64_simd_mov<VD:mode>"
-  [(set (match_operand:VD 0 "nonimmediate_operand"
+(define_insn "*aarch64_simd_mov<VDMOV:mode>"
+  [(set (match_operand:VDMOV 0 "nonimmediate_operand"
 		"=w, m,  m,  w, ?r, ?w, ?r, w")
-	(match_operand:VD 1 "general_operand"
+	(match_operand:VDMOV 1 "general_operand"
 		"m,  Dz, w,  w,  w,  r,  r, Dn"))]
   "TARGET_SIMD
    && (register_operand (operands[0], <MODE>mode)
@@ -126,13 +126,14 @@
 }
   [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
 		     neon_logic<q>, neon_to_gp<q>, f_mcr,\
-		     mov_reg, neon_move<q>")]
+		     mov_reg, neon_move<q>")
+    (set_attr "arch" "*,notbf16,*,*,*,*,*,notbf16")]
 )
 
-(define_insn "*aarch64_simd_mov<VQ:mode>"
-  [(set (match_operand:VQ 0 "nonimmediate_operand"
+(define_insn "*aarch64_simd_mov<VQMOV:mode>"
+  [(set (match_operand:VQMOV 0 "nonimmediate_operand"
 		"=w, Umn,  m,  w, ?r, ?w, ?r, w")
-	(match_operand:VQ 1 "general_operand"
+	(match_operand:VQMOV 1 "general_operand"
 		"m,  Dz, w,  w,  w,  r,  r, Dn"))]
   "TARGET_SIMD
    && (register_operand (operands[0], <MODE>mode)
@@ -161,7 +162,8 @@
   [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
 		     neon_logic<q>, multiple, multiple,\
 		     multiple, neon_move<q>")
-   (set_attr "length" "4,4,4,4,8,8,8,4")]
+   (set_attr "length" "4,4,4,4,8,8,8,4")
+   (set_attr "arch" "*,notbf16,*,*,*,*,*,notbf16")]
 )
 
 ;; When storing lane zero we can use the normal STR and its more permissive
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index b0aca03bcb4..f57469b6e23 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1692,6 +1692,7 @@ aarch64_classify_vector_mode (machine_mode mode)
     case E_V2SImode:
     /* ...E_V1DImode doesn't exist.  */
     case E_V4HFmode:
+    case E_V4BFmode:
     case E_V2SFmode:
     case E_V1DFmode:
     /* 128-bit Advanced SIMD vectors.  */
@@ -1700,6 +1701,7 @@ aarch64_classify_vector_mode (machine_mode mode)
     case E_V4SImode:
     case E_V2DImode:
     case E_V8HFmode:
+    case E_V8BFmode:
     case E_V4SFmode:
     case E_V2DFmode:
       return TARGET_SIMD ? VEC_ADVSIMD : 0;
@@ -15548,6 +15550,10 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
 	  field_t = aarch64_fp16_type_node;
 	  field_ptr_t = aarch64_fp16_ptr_type_node;
 	  break;
+	case E_BFmode:
+	  field_t = aarch64_bf16_type_node;
+	  field_ptr_t = aarch64_bf16_ptr_type_node;
+	  break;
 	case E_V2SImode:
 	case E_V4SImode:
 	    {
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 2bb5a208720..857e2b8f90e 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -1120,13 +1120,13 @@ extern enum aarch64_code_model aarch64_cmodel;
 #define AARCH64_VALID_SIMD_DREG_MODE(MODE) \
   ((MODE) == V2SImode || (MODE) == V4HImode || (MODE) == V8QImode \
    || (MODE) == V2SFmode || (MODE) == V4HFmode || (MODE) == DImode \
-   || (MODE) == DFmode)
+   || (MODE) == DFmode || (MODE) == V4BFmode)
 
 /* Modes valid for AdvSIMD Q registers.  */
 #define AARCH64_VALID_SIMD_QREG_MODE(MODE) \
   ((MODE) == V4SImode || (MODE) == V8HImode || (MODE) == V16QImode \
    || (MODE) == V4SFmode || (MODE) == V8HFmode || (MODE) == V2DImode \
-   || (MODE) == V2DFmode)
+   || (MODE) == V2DFmode || (MODE) == V8BFmode)
 
 #define ENDIAN_LANE_N(NUNITS, N) \
   (BYTES_BIG_ENDIAN ? NUNITS - 1 - N : N)
@@ -1174,6 +1174,11 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
 extern tree aarch64_fp16_type_node;
 extern tree aarch64_fp16_ptr_type_node;
 
+/* This type is the user-visible __bf16, and a pointer to that type.  We
+   need it in many places in the backend.  Defined in aarch64-builtins.c.  */
+extern tree aarch64_bf16_type_node;
+extern tree aarch64_bf16_ptr_type_node;
+
 /* The generic unwind code in libgcc does not initialize the frame pointer.
    So in order to unwind a function using a frame pointer, the very first
    function that is unwound must save the frame pointer.  That way the frame
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index b11ead7ab23..6c1cd76bb16 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -344,7 +344,7 @@
 ;; alternative). This attribute is used to compute attribute "enabled", use type
 ;; "any" to enable an alternative in all cases.
 
-(define_enum "arches" [ any rcpc8_4 fp simd sve fp16])
+(define_enum "arches" [ any rcpc8_4 fp simd sve fp16 fp16_notbf16 notbf16])
 
 (define_enum_attr "arch" "arches" (const_string "any"))
 
@@ -378,6 +378,12 @@
 	(and (eq_attr "arch" "fp16")
 	     (match_test "TARGET_FP_F16INST"))
 
+	(and (eq_attr "arch" "fp16_notbf16")
+	     (match_test "TARGET_FP_F16INST && !TARGET_BF16_FP"))
+
+	(and (eq_attr "arch" "notbf16")
+	     (match_test "!TARGET_BF16_SIMD"))
+
 	(and (eq_attr "arch" "sve")
 	     (match_test "TARGET_SVE")))
     (const_string "yes")
@@ -1304,8 +1310,8 @@
 })
 
 (define_expand "mov<mode>"
-  [(set (match_operand:GPF_TF_F16 0 "nonimmediate_operand")
-	(match_operand:GPF_TF_F16 1 "general_operand"))]
+  [(set (match_operand:GPF_TF_F16_MOV 0 "nonimmediate_operand")
+	(match_operand:GPF_TF_F16_MOV 1 "general_operand"))]
   ""
   {
     if (!TARGET_FLOAT)
@@ -1321,11 +1327,11 @@
   }
 )
 
-(define_insn "*movhf_aarch64"
-  [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w  , w,?r,w,w  ,w  ,w,m,r,m ,r")
-	(match_operand:HF 1 "general_operand"      "Y ,?rY,?r, w,w,Ufc,Uvi,m,w,m,rY,r"))]
-  "TARGET_FLOAT && (register_operand (operands[0], HFmode)
-    || aarch64_reg_or_fp_zero (operands[1], HFmode))"
+(define_insn "*mov<mode>_aarch64"
+  [(set (match_operand:HFBF 0 "nonimmediate_operand" "=w,w  , w,?r,w,w  ,w  ,w,m,r,m ,r")
+	(match_operand:HFBF 1 "general_operand"      "Y ,?rY,?r, w,w,Ufc,Uvi,m,w,m,rY,r"))]
+  "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)
+    || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
   "@
    movi\\t%0.4h, #0
    fmov\\t%h0, %w1
@@ -1341,7 +1347,7 @@
    mov\\t%w0, %w1"
   [(set_attr "type" "neon_move,f_mcr,neon_move,neon_to_gp, neon_move,fconsts, \
 		     neon_move,f_loads,f_stores,load_4,store_4,mov_reg")
-   (set_attr "arch" "simd,fp16,simd,simd,simd,fp16,simd,*,*,*,*,*")]
+   (set_attr "arch" "simd,fp16,simd,simd,simd,fp16_notbf16,simd,*,*,*,*,*")]
 )
 
 (define_insn "*movsf_aarch64"
diff --git a/gcc/config/aarch64/arm_bf16.h b/gcc/config/aarch64/arm_bf16.h
new file mode 100644
index 00000000000..aedb0972735
--- /dev/null
+++ b/gcc/config/aarch64/arm_bf16.h
@@ -0,0 +1,42 @@
+/* Arm BF16 instrinsics include file.
+
+   Copyright (C) 2019 Free Software Foundation, Inc.
+   Contributed by Arm.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _AARCH64_BF16_H_
+#define _AARCH64_BF16_H_
+
+#include <stdint.h>
+
+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.2-a+bf16")
+#ifdef __ARM_FEATURE_BF16_SCALAR_ARITHMETIC
+
+typedef __bf16 bfloat16_t;
+
+
+#endif
+#pragma GCC pop_options
+
+#endif
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 8b861601a48..5996df0a612 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -73,6 +73,10 @@ typedef __fp16 float16_t;
 typedef float float32_t;
 typedef double float64_t;
 
+typedef __bf16 bfloat16_t;
+typedef __Bfloat16x4_t bfloat16x4_t;
+typedef __Bfloat16x8_t bfloat16x8_t;
+
 typedef struct int8x8x2_t
 {
   int8x8_t val[2];
@@ -34606,6 +34610,8 @@ vrnd64xq_f64 (float64x2_t __a)
 
 #pragma GCC pop_options
 
+#include "arm_bf16.h"
+
 #undef __aarch64_vget_lane_any
 
 #undef __aarch64_vdup_lane_any
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 1ca5ed1ef1b..9480efef47c 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -57,9 +57,17 @@
 ;; Iterator for all scalar floating point modes (HF, SF, DF)
 (define_mode_iterator GPF_HF [HF SF DF])
 
+;; Iterator for all 16-bit scalar floating point modes (HF, BF)
+(define_mode_iterator HFBF [HF BF])
+
 ;; Iterator for all scalar floating point modes (HF, SF, DF and TF)
 (define_mode_iterator GPF_TF_F16 [HF SF DF TF])
 
+;; Iterator for all scalar floating point modes suitable for moving, including
+;; special BF type.(HF, SF, DF, TF and BF)
+(define_mode_iterator GPF_TF_F16_MOV [(HF "") (BF "TARGET_BF16_FP") (SF "")
+				      (DF "") (TF "")])
+
 ;; Double vector modes.
 (define_mode_iterator VDF [V2SF V4HF])
 
@@ -79,6 +87,9 @@
 ;; Double vector modes.
 (define_mode_iterator VD [V8QI V4HI V4HF V2SI V2SF])
 
+;; Double vector modes suitable for moving.  Includes BFmode.
+(define_mode_iterator VDMOV [V8QI V4HI V4HF V4BF V2SI V2SF])
+
 ;; All modes stored in registers d0-d31.
 (define_mode_iterator DREG [V8QI V4HI V4HF V2SI V2SF DF])
 
@@ -94,6 +105,9 @@
 ;; Quad vector modes.
 (define_mode_iterator VQ [V16QI V8HI V4SI V2DI V8HF V4SF V2DF])
 
+;; Quad vector modes suitable for moving.  Includes BFmode.
+(define_mode_iterator VQMOV [V16QI V8HI V4SI V2DI V8HF V8BF V4SF V2DF])
+
 ;; Copy of the above.
 (define_mode_iterator VQ2 [V16QI V8HI V4SI V2DI V8HF V4SF V2DF])
 
@@ -160,6 +174,15 @@
 (define_mode_iterator VALL_F16 [V8QI V16QI V4HI V8HI V2SI V4SI V2DI
 				V4HF V8HF V2SF V4SF V2DF])
 
+;; All Advanced SIMD modes suitable for moving, loading, and storing,
+;; including special Bfloat vector types.
+(define_mode_iterator VALL_F16MOV [(V8QI "") (V16QI "") (V4HI "") (V8HI "")
+				   (V2SI "") (V4SI "") (V2DI "")
+				   (V4HF "") (V8HF "")
+				   (V4BF "TARGET_BF16_SIMD")
+				   (V8BF "TARGET_BF16_SIMD")
+				   (V2SF "") (V4SF "") (V2DF "")])
+
 ;; The VALL_F16 modes except the 128-bit 2-element ones.
 (define_mode_iterator VALL_F16_NO_V2Q [V8QI V16QI V4HI V8HI V2SI V4SI
 				V4HF V8HF V2SF V4SF])
@@ -885,7 +908,8 @@
 			  (V8HF "16b") (V2SF  "8b")
 			  (V4SF "16b") (V2DF  "16b")
 			  (DI   "8b")  (DF    "8b")
-			  (SI   "8b")  (SF    "8b")])
+			  (SI   "8b")  (SF    "8b")
+			  (V4BF "8b")  (V8HF  "16b")])
 
 ;; Define element mode for each vector mode.
 (define_mode_attr VEL [(V8QI  "QI") (V16QI "QI")
@@ -1265,6 +1289,7 @@
 		     (V2SI "") (V4SI  "_q")
 		     (DI   "") (V2DI  "_q")
 		     (V4HF "") (V8HF "_q")
+		     (V4BF "") (V8BF "_q")
 		     (V2SF "") (V4SF  "_q")
 			       (V2DF  "_q")
 		     (QI "") (HI "") (SI "") (DI "") (HF "") (SF "") (DF "")])
diff --git a/gcc/testsuite/gcc.target/aarch64/bfloat16_compile.c b/gcc/testsuite/gcc.target/aarch64/bfloat16_compile.c
new file mode 100644
index 00000000000..f2bef671deb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/bfloat16_compile.c
@@ -0,0 +1,51 @@
+/* { dg-do assemble { target { aarch64*-*-* } } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon }  */
+/* { dg-additional-options "-O3 --save-temps" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_neon.h>
+
+/*
+**stacktest1:
+**	...
+**	str	h0, \[sp, [0-9]+\]
+**	ldr	h0, \[sp, [0-9]+\]
+**	...
+**	ret
+*/
+bfloat16_t stacktest1 (bfloat16_t __a)
+{
+  volatile bfloat16_t b = __a;
+  return b;
+}
+
+/*
+**stacktest2:
+**	...
+**	str	d0, \[sp, [0-9]+\]
+**	ldr	d0, \[sp, [0-9]+\]
+**	...
+**	ret
+*/
+bfloat16x4_t stacktest2 (bfloat16x4_t __a)
+{
+  volatile bfloat16x4_t b = __a;
+  return b;
+}
+
+/*
+**stacktest3:
+**	...
+**	str	q0, \[sp\]
+**	ldr	q0, \[sp\]
+**	...
+**	ret
+*/
+bfloat16x8_t stacktest3 (bfloat16x8_t __a)
+{
+  volatile bfloat16x8_t b = __a;
+  return b;
+}
+
+

[GCC][PATCH][Aarch64] Add Bfloat16_t scalar type, vector types and machine modes to Aarch64 back-end [1/2]

Reply via email to