diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index af95ac1..165132d 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -19642,6 +19642,7 @@ arm_debugger_arg_offset (int value, rtx addr)
 typedef enum {
   T_V8QI,
   T_V4HI,
+  T_V4HF,
   T_V2SI,
   T_V2SF,
   T_DI,
@@ -19659,14 +19660,15 @@ typedef enum {
 #define TYPE_MODE_BIT(X) (1 << (X))
 
 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI)	\
-		 | TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF)	\
-		 | TYPE_MODE_BIT (T_DI))
+		 | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI)	\
+		 | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI)	\
 		 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF)	\
 		 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
 
 #define v8qi_UP  T_V8QI
 #define v4hi_UP  T_V4HI
+#define v4hf_UP  T_V4HF
 #define v2si_UP  T_V2SI
 #define v2sf_UP  T_V2SF
 #define di_UP    T_DI
@@ -19702,6 +19704,8 @@ typedef enum {
   NEON_SCALARMULH,
   NEON_SCALARMAC,
   NEON_CONVERT,
+  NEON_FLOAT_WIDEN,
+  NEON_FLOAT_NARROW,
   NEON_FIXCONV,
   NEON_SELECT,
   NEON_RESULTPAIR,
@@ -20095,6 +20099,7 @@ arm_init_neon_builtins (void)
 
   tree neon_intQI_type_node;
   tree neon_intHI_type_node;
+  tree neon_floatHF_type_node;
   tree neon_polyQI_type_node;
   tree neon_polyHI_type_node;
   tree neon_intSI_type_node;
@@ -20121,6 +20126,7 @@ arm_init_neon_builtins (void)
 
   tree V8QI_type_node;
   tree V4HI_type_node;
+  tree V4HF_type_node;
   tree V2SI_type_node;
   tree V2SF_type_node;
   tree V16QI_type_node;
@@ -20175,6 +20181,9 @@ arm_init_neon_builtins (void)
   neon_float_type_node = make_node (REAL_TYPE);
   TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
   layout_type (neon_float_type_node);
+  neon_floatHF_type_node = make_node (REAL_TYPE);
+  TYPE_PRECISION (neon_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
+  layout_type (neon_floatHF_type_node);
 
   /* Define typedefs which exactly correspond to the modes we are basing vector
      types on.  If you change these names you'll need to change
@@ -20183,6 +20192,8 @@ arm_init_neon_builtins (void)
 					     "__builtin_neon_qi");
   (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
 					     "__builtin_neon_hi");
+  (*lang_hooks.types.register_builtin_type) (neon_floatHF_type_node,
+					     "__builtin_neon_hf");
   (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
 					     "__builtin_neon_si");
   (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
@@ -20224,6 +20235,8 @@ arm_init_neon_builtins (void)
     build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
   V4HI_type_node =
     build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
+  V4HF_type_node =
+    build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
   V2SI_type_node =
     build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
   V2SF_type_node =
@@ -20346,14 +20359,13 @@ arm_init_neon_builtins (void)
       neon_builtin_datum *d = &neon_builtin_data[i];
 
       const char* const modenames[] = {
-	"v8qi", "v4hi", "v2si", "v2sf", "di",
+	"v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
 	"v16qi", "v8hi", "v4si", "v4sf", "v2di",
 	"ti", "ei", "oi"
       };
       char namebuf[60];
       tree ftype = NULL;
       int is_load = 0, is_store = 0;
-
       gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
 
       d->fcode = fcode;
@@ -20549,8 +20561,9 @@ arm_init_neon_builtins (void)
 	case NEON_REINTERP:
 	  {
 	    /* We iterate over 5 doubleword types, then 5 quadword
-	       types.  */
-	    int rhs = d->mode % 5;
+	       types. V4HF is not a type used in reinterpret, so we translate
+	       d->mode to the correct index in reinterp_ftype_dreg.  */
+	    int rhs = (d->mode - ((d->mode > T_V4HF) ? 1 : 0)) % 5;
 	    switch (insn_data[d->code].operand[0].mode)
 	      {
 	      case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
@@ -20567,7 +20580,36 @@ arm_init_neon_builtins (void)
 	      }
 	  }
 	  break;
-
+	case NEON_FLOAT_WIDEN:
+	  {
+	    tree eltype = NULL_TREE;
+	    tree return_type = NULL_TREE;
+	    switch (insn_data[d->code].operand[1].mode)
+	    {
+	      case V4HFmode:
+	        eltype = V4HF_type_node;
+	        return_type = V4SF_type_node;
+	        break;
+	      default: gcc_unreachable ();
+	    }
+	    ftype = build_function_type_list (return_type, eltype, NULL);
+	    break;
+	  }
+	case NEON_FLOAT_NARROW:
+	  {
+	    tree eltype = NULL_TREE;
+	    tree return_type = NULL_TREE;
+	    switch (insn_data[d->code].operand[1].mode)
+	    {
+	      case V4SFmode:
+	        eltype = V4SF_type_node;
+	        return_type = V4HF_type_node;
+	        break;
+	      default: gcc_unreachable ();
+	    }
+	    ftype = build_function_type_list (return_type, eltype, NULL);
+	    break;
+	  }
 	default:
 	  gcc_unreachable ();
 	}
@@ -21564,6 +21606,8 @@ arm_expand_neon_builtin (int fcode, tree exp, rtx target)
     case NEON_DUP:
     case NEON_RINT:
     case NEON_SPLIT:
+    case NEON_FLOAT_WIDEN:
+    case NEON_FLOAT_NARROW:
     case NEON_REINTERP:
       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
         NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
@@ -25190,7 +25234,7 @@ arm_vector_mode_supported_p (enum machine_mode mode)
 {
   /* Neon also supports V2SImode, etc. listed in the clause below.  */
   if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
-      || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
+      || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
     return true;
 
   if ((TARGET_NEON || TARGET_IWMMXT)
@@ -26034,6 +26078,7 @@ static arm_mangle_map_entry arm_mangle_map[] = {
   { V8QImode,  "__builtin_neon_uqi",    "16__simd64_uint8_t" },
   { V4HImode,  "__builtin_neon_hi",     "16__simd64_int16_t" },
   { V4HImode,  "__builtin_neon_uhi",    "17__simd64_uint16_t" },
+  { V4HFmode,  "__builtin_neon_hf",     "18__simd64_float16_t" },
   { V2SImode,  "__builtin_neon_si",     "16__simd64_int32_t" },
   { V2SImode,  "__builtin_neon_usi",    "17__simd64_uint32_t" },
   { V2SFmode,  "__builtin_neon_sf",     "18__simd64_float32_t" },
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index cc1774b..c47fdf6 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -1052,7 +1052,7 @@ extern int prefer_neon_for_64bits;
 /* Modes valid for Neon D registers.  */
 #define VALID_NEON_DREG_MODE(MODE) \
   ((MODE) == V2SImode || (MODE) == V4HImode || (MODE) == V8QImode \
-   || (MODE) == V2SFmode || (MODE) == DImode)
+   || (MODE) == V4HFmode || (MODE) == V2SFmode || (MODE) == DImode)
 
 /* Modes valid for Neon Q registers.  */
 #define VALID_NEON_QREG_MODE(MODE) \
diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index 4d945ce..e23d03b 100644
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -43,6 +43,7 @@ typedef __builtin_neon_hi int16x4_t	__attribute__ ((__vector_size__ (8)));
 typedef __builtin_neon_si int32x2_t	__attribute__ ((__vector_size__ (8)));
 typedef __builtin_neon_di int64x1_t;
 typedef __builtin_neon_sf float32x2_t	__attribute__ ((__vector_size__ (8)));
+typedef __builtin_neon_hf float16x4_t	__attribute__ ((__vector_size__ (8)));
 typedef __builtin_neon_poly8 poly8x8_t	__attribute__ ((__vector_size__ (8)));
 typedef __builtin_neon_poly16 poly16x4_t	__attribute__ ((__vector_size__ (8)));
 typedef __builtin_neon_uqi uint8x8_t	__attribute__ ((__vector_size__ (8)));
@@ -6016,6 +6017,22 @@ vcvtq_u32_f32 (float32x4_t __a)
   return (uint32x4_t)__builtin_neon_vcvtv4sf (__a, 0);
 }
 
+#if ((__ARM_FP & 0x2) != 0)
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vcvt_f16_f32 (float32x4_t __a)
+{
+  return (float16x4_t)__builtin_neon_vcvtv4hfv4sf (__a);
+}
+
+#endif
+#if ((__ARM_FP & 0x2) != 0)
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vcvt_f32_f16 (float16x4_t __a)
+{
+  return (float32x4_t)__builtin_neon_vcvtv4sfv4hf (__a);
+}
+
+#endif
 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
 vcvt_n_s32_f32 (float32x2_t __a, const int __b)
 {
diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def
index 5bf6d31..92f1d7a 100644
--- a/gcc/config/arm/arm_neon_builtins.def
+++ b/gcc/config/arm/arm_neon_builtins.def
@@ -132,6 +132,8 @@ VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
 VAR2 (UNOP, vrev16, v8qi, v16qi),
 VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
 VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
+VAR1 (FLOAT_WIDEN, vcvtv4sf, v4hf),
+VAR1 (FLOAT_NARROW, vcvtv4hf, v4sf),
 VAR10 (SELECT, vbsl,
 	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
 VAR2 (RINT, vrintn, v2sf, v4sf),
diff --git a/gcc/config/arm/neon-gen.ml b/gcc/config/arm/neon-gen.ml
index a811e15..42b36c2 100644
--- a/gcc/config/arm/neon-gen.ml
+++ b/gcc/config/arm/neon-gen.ml
@@ -121,6 +121,7 @@ let rec signed_ctype = function
   | T_uint16 | T_int16 -> T_intHI
   | T_uint32 | T_int32 -> T_intSI
   | T_uint64 | T_int64 -> T_intDI
+  | T_float16 -> T_floatHF
   | T_float32 -> T_floatSF
   | T_poly8 -> T_intQI
   | T_poly16 -> T_intHI
@@ -272,12 +273,11 @@ let infoword_value elttype features =
    with multiple modes (<to><from>).  *)
 let rec mode_suffix elttype shape =
   try
-    let mode = mode_of_elt elttype shape in
-    string_of_mode mode
+    mode_of_elt_str elttype shape
   with MixedMode (dst, src) ->
-    let dstmode = mode_of_elt dst shape
-    and srcmode = mode_of_elt src shape in
-    string_of_mode dstmode ^ string_of_mode srcmode
+    let dstmode = mode_of_elt_str dst shape
+    and srcmode = mode_of_elt_str src shape in
+    dstmode ^ srcmode
 
 let get_shuffle features =
   try
@@ -291,19 +291,24 @@ let print_feature_test_start features =
     match List.find (fun feature ->
                        match feature with Requires_feature _ -> true
                                         | Requires_arch _ -> true
+                                        | Requires_FP_bit _ -> true
                                         | _ -> false)
                      features with
-      Requires_feature feature -> 
+      Requires_feature feature ->
         Format.printf "#ifdef __ARM_FEATURE_%s@\n" feature
     | Requires_arch arch ->
         Format.printf "#if __ARM_ARCH >= %d@\n" arch
+    | Requires_FP_bit bit ->
+        Format.printf "#if ((__ARM_FP & 0x%X) != 0)@\n"
+                      (1 lsl bit)
     | _ -> assert false
   with Not_found -> assert true
 
 let print_feature_test_end features =
   let feature =
-    List.exists (function Requires_feature x -> true
-                          | Requires_arch x -> true
+    List.exists (function Requires_feature _ -> true
+                          | Requires_arch _ -> true
+                          | Requires_FP_bit _ -> true
                           |  _ -> false) features in
   if feature then Format.printf "#endif@\n"
 
@@ -365,6 +370,7 @@ let deftypes () =
     "__builtin_neon_hi", "int", 16, 4;
     "__builtin_neon_si", "int", 32, 2;
     "__builtin_neon_di", "int", 64, 1;
+    "__builtin_neon_hf", "float", 16, 4;
     "__builtin_neon_sf", "float", 32, 2;
     "__builtin_neon_poly8", "poly", 8, 8;
     "__builtin_neon_poly16", "poly", 16, 4;
diff --git a/gcc/config/arm/neon-testgen.ml b/gcc/config/arm/neon-testgen.ml
index d855c29..543318b 100644
--- a/gcc/config/arm/neon-testgen.ml
+++ b/gcc/config/arm/neon-testgen.ml
@@ -163,10 +163,12 @@ let effective_target features =
     match List.find (fun feature ->
                        match feature with Requires_feature _ -> true
                                         | Requires_arch _ -> true
+                                        | Requires_FP_bit 1 -> true
                                         | _ -> false)
                      features with
       Requires_feature "FMA" -> "arm_neonv2"
     | Requires_arch 8 -> "arm_v8_neon"
+    | Requires_FP_bit 1 -> "arm_neon_fp16"
     | _ -> assert false
   with Not_found -> "arm_neon"
 
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index d7c2cb3..33a7868 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -3281,6 +3281,24 @@
                    (const_string "neon_fp_vadd_qqq_vabs_qq")))]
 )
 
+(define_insn "neon_vcvtv4sfv4hf"
+  [(set (match_operand:V4SF 0 "s_register_operand" "=w")
+	(unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
+			  UNSPEC_VCVT))]
+  "TARGET_NEON && TARGET_FP16"
+  "vcvt.f32.f16\t%q0, %P1"
+  [(set_attr "neon_type" "neon_fp_vadd_ddd_vabs_dd")]
+)
+
+(define_insn "neon_vcvtv4hfv4sf"
+  [(set (match_operand:V4HF 0 "s_register_operand" "=w")
+	(unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
+			  UNSPEC_VCVT))]
+  "TARGET_NEON && TARGET_FP16"
+  "vcvt.f16.f32\t%P0, %q1"
+  [(set_attr "neon_type" "neon_fp_vadd_ddd_vabs_dd")]
+)
+
 (define_insn "neon_vcvt_n<mode>"
   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
 	(unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
diff --git a/gcc/config/arm/neon.ml b/gcc/config/arm/neon.ml
index 34090c9..6a0e1a7 100644
--- a/gcc/config/arm/neon.ml
+++ b/gcc/config/arm/neon.ml
@@ -21,7 +21,7 @@
    <http://www.gnu.org/licenses/>.  *)
 
 (* Shorthand types for vector elements.  *)
-type elts = S8 | S16 | S32 | S64 | F32 | U8 | U16 | U32 | U64 | P8 | P16
+type elts = S8 | S16 | S32 | S64 | F16 | F32 | U8 | U16 | U32 | U64 | P8 | P16
           | I8 | I16 | I32 | I64 | B8 | B16 | B32 | B64 | Conv of elts * elts
           | Cast of elts * elts | NoElts
 
@@ -37,6 +37,7 @@ type vectype = T_int8x8    | T_int8x16
 	     | T_uint16x4  | T_uint16x8
 	     | T_uint32x2  | T_uint32x4
 	     | T_uint64x1  | T_uint64x2
+	     | T_float16x4
 	     | T_float32x2 | T_float32x4
 	     | T_poly8x8   | T_poly8x16
 	     | T_poly16x4  | T_poly16x8
@@ -46,11 +47,13 @@ type vectype = T_int8x8    | T_int8x16
              | T_uint8     | T_uint16
              | T_uint32    | T_uint64
              | T_poly8     | T_poly16
-             | T_float32   | T_arrayof of int * vectype
+             | T_float16   | T_float32
+             | T_arrayof of int * vectype
              | T_ptrto of vectype | T_const of vectype
              | T_void      | T_intQI
              | T_intHI     | T_intSI
-             | T_intDI     | T_floatSF
+             | T_intDI     | T_floatHF
+             | T_floatSF
 
 (* The meanings of the following are:
      TImode : "Tetra", two registers (four words).
@@ -93,7 +96,7 @@ type arity = Arity0 of vectype
            | Arity4 of vectype * vectype * vectype * vectype * vectype
 
 type vecmode = V8QI | V4HI | V2SI | V2SF | DI
-             | V16QI | V8HI | V4SI | V4SF | V2DI
+             | V16QI | V8HI | V4SI | V4SF | V4HF | V2DI
              | QI | HI | SI | SF
 
 type opcode =
@@ -208,6 +211,12 @@ type opcode =
   (* Reinterpret casts.  *)
   | Vreinterp
 
+let string_of_mode = function
+    V8QI -> "v8qi" | V4HI  -> "v4hi"  | V4HF -> "v4hf" |V2SI -> "v2si"
+  | V2SF -> "v2sf" | DI   -> "di"   | V16QI -> "v16qi" | V8HI -> "v8hi"
+  | V4SI -> "v4si" | V4SF -> "v4sf" | V2DI  -> "v2di"  | QI -> "qi"
+  | HI -> "hi" | SI -> "si" | SF -> "sf"
+
 let rev_elems revsize elsize nelts _ =
   let mask = (revsize / elsize) - 1 in
   let arr = Array.init nelts
@@ -284,13 +293,17 @@ type features =
   | Fixed_core_reg
     (* Mark that the intrinsic requires __ARM_FEATURE_string to be defined.  *)
   | Requires_feature of string
+    (* Mark that the intrinsic requires a particular architecture version.  *)
   | Requires_arch of int
+    (* Mark that the intrinsic requires a particular bit in __ARM_FP to
+    be set.   *)
+  | Requires_FP_bit of int
 
 exception MixedMode of elts * elts
 
 let rec elt_width = function
     S8 | U8 | P8 | I8 | B8 -> 8
-  | S16 | U16 | P16 | I16 | B16 -> 16
+  | S16 | U16 | P16 | I16 | B16 | F16 -> 16
   | S32 | F32 | U32 | I32 | B32 -> 32
   | S64 | U64 | I64 | B64 -> 64
   | Conv (a, b) ->
@@ -303,7 +316,7 @@ let rec elt_class = function
     S8 | S16 | S32 | S64 -> Signed
   | U8 | U16 | U32 | U64 -> Unsigned
   | P8 | P16 -> Poly
-  | F32 -> Float
+  | F16 | F32 -> Float
   | I8 | I16 | I32 | I64 -> Int
   | B8 | B16 | B32 | B64 -> Bits
   | Conv (a, b) | Cast (a, b) -> ConvClass (elt_class a, elt_class b)
@@ -315,6 +328,7 @@ let elt_of_class_width c w =
   | Signed, 16 -> S16
   | Signed, 32 -> S32
   | Signed, 64 -> S64
+  | Float, 16 -> F16
   | Float, 32 -> F32
   | Unsigned, 8 -> U8
   | Unsigned, 16 -> U16
@@ -330,7 +344,7 @@ let elt_of_class_width c w =
   | Bits, 16 -> B16
   | Bits, 32 -> B32
   | Bits, 64 -> B64
-  | _ -> failwith "Bad element type"
+  | _ -> failwith ("Bad element type" ^ (string_of_int w))
 
 (* Return unsigned integer element the same width as argument.  *)
 let unsigned_of_elt elt =
@@ -407,6 +421,14 @@ let rec mode_of_elt elt shape =
   | Use_operands ops -> mode_of_elt elt (All (0, (find_key_operand ops)))
   | _ -> failwith "invalid shape"
 
+let mode_of_elt_str elt shape =
+  match elt with
+  Cast (F16, F32) -> "v4hfv4sf"
+  | Cast (F32, F16) -> "v4sfv4hf"
+  | _ ->
+  let mode = mode_of_elt elt shape in
+  string_of_mode mode
+
 (* Modify an element type dependent on the shape of the instruction and the
    operand number.  *)
 
@@ -454,10 +476,11 @@ let type_for_elt shape elt no =
         | U16 -> T_uint16x4
         | U32 -> T_uint32x2
         | U64 -> T_uint64x1
+        | F16 -> T_float16x4
         | F32 -> T_float32x2
         | P8 -> T_poly8x8
         | P16 -> T_poly16x4
-        | _ -> failwith "Bad elt type"
+        | _ -> failwith "Bad elt type for Dreg"
         end
     | Qreg ->
         begin match elt with
@@ -472,7 +495,7 @@ let type_for_elt shape elt no =
         | F32 -> T_float32x4
         | P8 -> T_poly8x16
         | P16 -> T_poly16x8
-        | _ -> failwith "Bad elt type"
+        | _ -> failwith "Bad elt type for Qreg"
         end
     | Corereg ->
         begin match elt with
@@ -487,7 +510,7 @@ let type_for_elt shape elt no =
         | P8 -> T_poly8
         | P16 -> T_poly16
         | F32 -> T_float32
-        | _ -> failwith "Bad elt type"
+        | _ -> failwith "Bad elt type for Corereg"
         end
     | Immed ->
         T_immediate (0, 0)
@@ -506,7 +529,7 @@ let type_for_elt shape elt no =
 let vectype_size = function
     T_int8x8 | T_int16x4 | T_int32x2 | T_int64x1
   | T_uint8x8 | T_uint16x4 | T_uint32x2 | T_uint64x1
-  | T_float32x2 | T_poly8x8 | T_poly16x4 -> 64
+  | T_float32x2 | T_poly8x8 | T_poly16x4 | T_float16x4 -> 64
   | T_int8x16 | T_int16x8 | T_int32x4 | T_int64x2
   | T_uint8x16 | T_uint16x8  | T_uint32x4  | T_uint64x2
   | T_float32x4 | T_poly8x16 | T_poly16x8 -> 128
@@ -621,6 +644,11 @@ let shift_right_acc shape elt =
   const_shift (fun imm -> T_immediate (1, imm))
     ~arity:(fun dst op1 op2 -> Arity3 (dst, dst, op1, op2)) shape elt
 
+let vcvt_sh shape elt =
+  match elt with
+  Cast (el1, el2) -> let vt1,vt2 = type_for_elt shape el1, type_for_elt shape el2 in Arity1 (vt1 0, vt2 1), elt
+  | _ -> failwith "Error in vcvt_sh"
+
 (* Use for immediate right-shifts when the operation doesn't care about
    signedness.  *)
 
@@ -1217,6 +1245,12 @@ let ops =
       [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
     Vcvt, [InfoWord], All (2, Qreg), "vcvtQ", conv_1,
       [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
+    Vcvt, [Builtin_name "vcvt" ; Requires_FP_bit 1],
+          Use_operands [| Dreg; Qreg; |], "vcvt", vcvt_sh,
+      [Cast (F16, F32)];
+    Vcvt, [Builtin_name "vcvt" ; Requires_FP_bit 1],
+          Use_operands [| Qreg; Dreg; |], "vcvt", vcvt_sh,
+      [Cast (F32, F16)];
     Vcvt_n, [InfoWord], Use_operands [| Dreg; Dreg; Immed |], "vcvt_n", conv_2,
       [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
     Vcvt_n, [InfoWord], Use_operands [| Qreg; Qreg; Immed |], "vcvtQ_n", conv_2,
@@ -1782,7 +1816,7 @@ let rec string_of_elt = function
   | U8 -> "u8" | U16 -> "u16" | U32 -> "u32" | U64 -> "u64"
   | I8 -> "i8" | I16 -> "i16" | I32 -> "i32" | I64 -> "i64"
   | B8 -> "8" | B16 -> "16" | B32 -> "32" | B64 -> "64"
-  | F32 -> "f32" | P8 -> "p8" | P16 -> "p16"
+  | F32 -> "f32" | P8 -> "p8" | P16 -> "p16" | F16 -> "f16"
   | Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "_" ^ string_of_elt b
   | NoElts -> failwith "No elts"
 
@@ -1809,6 +1843,7 @@ let string_of_vectype vt =
   | T_uint32x4 -> affix "uint32x4"
   | T_uint64x1 -> affix "uint64x1"
   | T_uint64x2 -> affix "uint64x2"
+  | T_float16x4 -> affix "float16x4"
   | T_float32x2 -> affix "float32x2"
   | T_float32x4 -> affix "float32x4"
   | T_poly8x8 -> affix "poly8x8"
@@ -1825,6 +1860,7 @@ let string_of_vectype vt =
   | T_uint64 -> affix "uint64"
   | T_poly8 -> affix "poly8"
   | T_poly16 -> affix "poly16"
+  | T_float16 -> affix "float16"
   | T_float32 -> affix "float32"
   | T_immediate _ -> "const int"
   | T_void -> "void"
@@ -1832,6 +1868,7 @@ let string_of_vectype vt =
   | T_intHI -> "__builtin_neon_hi"
   | T_intSI -> "__builtin_neon_si"
   | T_intDI -> "__builtin_neon_di"
+  | T_floatHF -> "__builtin_neon_hf"
   | T_floatSF -> "__builtin_neon_sf"
   | T_arrayof (num, base) ->
       let basename = name (fun x -> x) base in
@@ -1852,12 +1889,6 @@ let string_of_inttype = function
   | B_CImode -> "__builtin_neon_ci"
   | B_XImode -> "__builtin_neon_xi"
 
-let string_of_mode = function
-    V8QI -> "v8qi" | V4HI  -> "v4hi"  | V2SI -> "v2si" | V2SF -> "v2sf"
-  | DI   -> "di"   | V16QI -> "v16qi" | V8HI -> "v8hi" | V4SI -> "v4si"
-  | V4SF -> "v4sf" | V2DI  -> "v2di"  | QI -> "qi" | HI -> "hi" | SI -> "si"
-  | SF -> "sf"
-
 (* Use uppercase chars for letters which form part of the intrinsic name, but
    should be omitted from the builtin name (the info is passed in an extra
    argument, instead).  *)