[PATCH] D26534: [PPC] add altivec.h functions for converting a vector of half precision to a vector of single precision

Sean Fertile via cfe-commits Thu, 10 Nov 2016 18:07:54 -0800

sfertile created this revision.
sfertile added reviewers: nemanjai, amehsan, kbarton, syzaara, jtony, lei.
sfertile added subscribers: cfe-commits, echristo.
sfertile set the repository for this revision to rL LLVM.


Change adds 2 functions to altivec.h

vector float vec_extract_fp32_from_shorth (vector unsigned short);
Purpose:
Extracts four single-precision floating-point numbers from the high elements of 
a vector of eight 16-bit elements, interpreting each element as a 16-bit 
floating-point number in IEEE format.
Result value:
The first four elements are interpreted as 16-bit floating-point numbers in 
IEEE format, and extended to single-precision format, returning a vector with 
four single-precision IEEE numbers.

vector float vec_extract_fp32_from_shortl (vector unsigned short);
Purpose
Extracts four single-precision floating-point numbers from the low elements of 
a vector of eight 16-bit elements, interpreting each element as a 16-bit 
floating-point number in IEEE format.
Result value:
The last four elements are interpreted as 16-bit floating-point numbers in IEEE 
format, and extended to single-precision format, returning a vector with four 
single-precision IEEE numbers.

Adds a builtin that gets mapped to the xscvhpsp instruction. (VSX Scalar 
Convert Half-Precision to Double-Precision)


Repository:
  rL LLVM

https://reviews.llvm.org/D26534

Files:
  include/clang/Basic/BuiltinsPPC.def
  lib/Headers/altivec.h
  test/CodeGen/builtins-ppc-p9vector.c


Index: test/CodeGen/builtins-ppc-p9vector.c
===================================================================
--- test/CodeGen/builtins-ppc-p9vector.c
+++ test/CodeGen/builtins-ppc-p9vector.c
@@ -827,4 +827,21 @@
 // CHECK-NEXT: ret <16 x i8>
   return vec_srv (vuca, vucb);
 }
-
+vector float test74(void) {
+// CHECK-BE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> <i32 
undef, i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3>
+// CHECK-BE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}})
+// CHECK-BE-NEXT: ret <4 x float>
+// CHECK-LE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> <i32 
0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef>
+// CHECK-LE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}})
+// CHECK-LE-NEXT: ret <4 x float>
+  return vec_extract_fp32_from_shorth(vusa);
+}
+vector float test75(void) {
+// CHECK-BE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> <i32 
undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7>
+// CHECK-BE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}})
+// CHECK-BE-NEXT: ret <4 x float>
+// CHECK-LE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> <i32 
4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7, i32 undef>
+// CHECK-LE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}})
+// CHECK-LE-NEXT: ret <4 x float>
+  return vec_extract_fp32_from_shortl(vusa);
+}
Index: lib/Headers/altivec.h
===================================================================
--- lib/Headers/altivec.h
+++ lib/Headers/altivec.h
@@ -11902,6 +11902,30 @@
   return __a[__b];
 }
 
+#if defined(__POWER9_VECTOR__)
+static __inline__ vector float __ATTRS_o_ai
+vec_extract_fp32_from_shorth(vector unsigned short __a) {
+  vector unsigned short __b =
+#ifdef __LITTLE_ENDIAN__
+            __builtin_shufflevector(__a, __a, 0, -1, 1, -1, 2, -1, 3, -1);
+#else
+            __builtin_shufflevector(__a, __a, -1, 0, -1, 1, -1, 2, -1, 3);
+#endif
+  return __builtin_vsx_xvcvhpsp(__b);
+}
+
+static __inline__ vector float __ATTRS_o_ai
+vec_extract_fp32_from_shortl(vector unsigned short __a) {
+  vector unsigned short __b =
+#ifdef __LITTLE_ENDIAN__
+            __builtin_shufflevector(__a, __a, 4, -1, 5, -1, 6, -1, 7, -1);
+#else
+            __builtin_shufflevector(__a, __a, -1, 4, -1, 5, -1, 6, -1, 7);
+#endif
+  return __builtin_vsx_xvcvhpsp(__b);
+}
+#endif /* __POWER9_VECTOR__ */
+
 /* vec_insert */
 
 static __inline__ vector signed char __ATTRS_o_ai
Index: include/clang/Basic/BuiltinsPPC.def
===================================================================
--- include/clang/Basic/BuiltinsPPC.def
+++ include/clang/Basic/BuiltinsPPC.def
@@ -380,6 +380,8 @@
 BUILTIN(__builtin_vsx_xviexpdp, "V2dV2ULLiV2ULLi", "")
 BUILTIN(__builtin_vsx_xviexpsp, "V4fV4UiV4Ui", "")
 
+BUILTIN(__builtin_vsx_xvcvhpsp, "V4fV8Us", "")
+
 // HTM builtins
 BUILTIN(__builtin_tbegin, "UiUIi", "")
 BUILTIN(__builtin_tend, "UiUIi", "")

Index: test/CodeGen/builtins-ppc-p9vector.c
===================================================================
--- test/CodeGen/builtins-ppc-p9vector.c
+++ test/CodeGen/builtins-ppc-p9vector.c
@@ -827,4 +827,21 @@
 // CHECK-NEXT: ret <16 x i8>
   return vec_srv (vuca, vucb);
 }
-
+vector float test74(void) {
+// CHECK-BE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3>
+// CHECK-BE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}})
+// CHECK-BE-NEXT: ret <4 x float>
+// CHECK-LE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef>
+// CHECK-LE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}})
+// CHECK-LE-NEXT: ret <4 x float>
+  return vec_extract_fp32_from_shorth(vusa);
+}
+vector float test75(void) {
+// CHECK-BE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> <i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7>
+// CHECK-BE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}})
+// CHECK-BE-NEXT: ret <4 x float>
+// CHECK-LE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> <i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7, i32 undef>
+// CHECK-LE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}})
+// CHECK-LE-NEXT: ret <4 x float>
+  return vec_extract_fp32_from_shortl(vusa);
+}
Index: lib/Headers/altivec.h
===================================================================
--- lib/Headers/altivec.h
+++ lib/Headers/altivec.h
@@ -11902,6 +11902,30 @@
   return __a[__b];
 }
 
+#if defined(__POWER9_VECTOR__)
+static __inline__ vector float __ATTRS_o_ai
+vec_extract_fp32_from_shorth(vector unsigned short __a) {
+  vector unsigned short __b =
+#ifdef __LITTLE_ENDIAN__
+            __builtin_shufflevector(__a, __a, 0, -1, 1, -1, 2, -1, 3, -1);
+#else
+            __builtin_shufflevector(__a, __a, -1, 0, -1, 1, -1, 2, -1, 3);
+#endif
+  return __builtin_vsx_xvcvhpsp(__b);
+}
+
+static __inline__ vector float __ATTRS_o_ai
+vec_extract_fp32_from_shortl(vector unsigned short __a) {
+  vector unsigned short __b =
+#ifdef __LITTLE_ENDIAN__
+            __builtin_shufflevector(__a, __a, 4, -1, 5, -1, 6, -1, 7, -1);
+#else
+            __builtin_shufflevector(__a, __a, -1, 4, -1, 5, -1, 6, -1, 7);
+#endif
+  return __builtin_vsx_xvcvhpsp(__b);
+}
+#endif /* __POWER9_VECTOR__ */
+
 /* vec_insert */
 
 static __inline__ vector signed char __ATTRS_o_ai
Index: include/clang/Basic/BuiltinsPPC.def
===================================================================
--- include/clang/Basic/BuiltinsPPC.def
+++ include/clang/Basic/BuiltinsPPC.def
@@ -380,6 +380,8 @@
 BUILTIN(__builtin_vsx_xviexpdp, "V2dV2ULLiV2ULLi", "")
 BUILTIN(__builtin_vsx_xviexpsp, "V4fV4UiV4Ui", "")
 
+BUILTIN(__builtin_vsx_xvcvhpsp, "V4fV8Us", "")
+
 // HTM builtins
 BUILTIN(__builtin_tbegin, "UiUIi", "")
 BUILTIN(__builtin_tend, "UiUIi", "")

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D26534: [PPC] add altivec.h functions for converting a vector of half precision to a vector of single precision

Reply via email to