Kewen, GCC maintainers:
Version 3, was able to get the overloaded version of scalar_insert_exp
to work and the change to xsxexpqp_f128_<mode> define instruction to
work with the suggestions from Kewen.
Version 2, I have addressed the various comments from Kewen. I had
issues with adding an additional overloaded version of
scalar_insert_exp with vector arguments. The overload infrastructure
didn't work with a mix of scalar and vector arguments. I did rename
the __builtin_insertf128_exp to __builtin_vsx_scalar_insert_exp_qp make
it similar to the existing builtin. I also wasn't able to get the
suggested merge of xsxexpqp_f128_<mode> with xsxexpqp_<mode> to work so
I left the two simpler definitiions.
The patch add three new builtins to extract the significand and
exponent of an IEEE float 128-bit value where the builtin argument is a
vector. Additionally, a builtin to insert the exponent into an IEEE
float 128-bit vector argument is added. These builtins were requested
since there is no clean and optimal way to transfer between a vector
and a scalar IEEE 128 bit value.
The patch has been tested on Power 10 with no regressions. Please let
me know if the patch is acceptable or not. Thanks.
Carl
---------------------------------------
rs6000: Add builtins for IEEE 128-bit floating point values
Add support for the following builtins:
__vector unsigned long long int
__builtin_scalar_extract_exp_to_vec (__ieee128);
__vector unsigned __int128
__builtin_scalar_extract_sig_to_vec (__ieee128);
__ieee128 scalar_insert_exp (__vector unsigned __int128,
__vector unsigned long long);
These builtins were requesed since there is no clean and performant way to
transfer a value from a vector type and scalar type, despite the fact
that they both reside in vector registers.
gcc/
* config/rs6000/rs6000-builtin.cc (rs6000_expand_builtin):
Rename CCDE_FOR_xsxexpqp_tf to CODE_FOR_xsxexpqp_tf_di.
Rename CODE_FOR_xsxexpqp_kf to CODE_FOR_xsxexpqp_kf_di.
* config/rs6000/rs6000-buildin.def (__builtin_extractf128_exp,
__builtin_extractf128_sig, __builtin_insertf128_exp): Add new
builtin definitions.
Rename xsxexpqp_kf to xsxexpqp_kf_di.
* config/rs6000/rs6000-c.cc (altivec_resolve_overloaded_builtin):
Add else if for MODE_VECTOR_INT. Update comments.
* config/rs6000/rs6000-overload.def
(__builtin_vec_scalar_insert_exp): Add new overload definition with
vector arguments.
* config/vsx.md (VSEEQP_DI): New mode iterator.
Rename define_insn xsxexpqp_<mode> to
sxexpqp_<IEEE128:mode>_<VSEEQP_DI:mode>.
(xsxsigqp_f128_<mode>, xsiexpqpf_f128_<mode>): Add define_insn for
new builtins.
* doc/extend.texi (__builtin_extractf128_exp,
__builtin_extractf128_sig): Add documentation for new builtins.
(scalar_insert_exp): Add new overloaded builtin definition.
gcc/testsuite/
* gcc.target/powerpc/bfp/extract-exp-ieee128.c: New test case.
* gcc.target/powerpc/bfp/extract-sig-ieee128.c: New test case.
* gcc.target/powerpc/bfp/insert-exp-ieee128.c: New test case.
---
gcc/config/rs6000/rs6000-builtin.cc | 4 +-
gcc/config/rs6000/rs6000-builtins.def | 11 ++-
gcc/config/rs6000/rs6000-c.cc | 10 +-
gcc/config/rs6000/rs6000-overload.def | 2 +
gcc/config/rs6000/vsx.md | 28 +++++-
gcc/doc/extend.texi | 9 ++
.../powerpc/bfp/extract-exp-ieee128.c | 50 ++++++++++
.../powerpc/bfp/extract-sig-ieee128.c | 57 ++++++++++++
.../powerpc/bfp/insert-exp-ieee128.c | 91 +++++++++++++++++++
9 files changed, 253 insertions(+), 9 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/powerpc/bfp/extract-exp-ieee128.c
create mode 100644 gcc/testsuite/gcc.target/powerpc/bfp/extract-sig-ieee128.c
create mode 100644 gcc/testsuite/gcc.target/powerpc/bfp/insert-exp-ieee128.c
diff --git a/gcc/config/rs6000/rs6000-builtin.cc
b/gcc/config/rs6000/rs6000-builtin.cc
index 534698e7d3e..d99f0ae5dda 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -3326,8 +3326,8 @@ rs6000_expand_builtin (tree exp, rtx target, rtx /*
subtarget */,
case CODE_FOR_fmakf4_odd:
icode = CODE_FOR_fmatf4_odd;
break;
- case CODE_FOR_xsxexpqp_kf:
- icode = CODE_FOR_xsxexpqp_tf;
+ case CODE_FOR_xsxexpqp_kf_di:
+ icode = CODE_FOR_xsxexpqp_tf_di;
break;
case CODE_FOR_xsxsigqp_kf:
icode = CODE_FOR_xsxsigqp_tf;
diff --git a/gcc/config/rs6000/rs6000-builtins.def
b/gcc/config/rs6000/rs6000-builtins.def
index 638d0bc72ca..dcd4a393906 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -2901,8 +2901,14 @@
fpmath double __builtin_truncf128_round_to_odd (_Float128);
TRUNCF128_ODD trunckfdf2_odd {}
+ vull __builtin_scalar_extract_exp_to_vec (_Float128);
+ EEXPKF xsxexpqp_kf_v2di {}
+
+ vuq __builtin_scalar_extract_sig_to_vec (_Float128);
+ ESIGKF xsxsigqp_f128_kf {}
+
const signed long long __builtin_vsx_scalar_extract_expq (_Float128);
- VSEEQP xsxexpqp_kf {}
+ VSEEQP xsxexpqp_kf_di {}
const signed __int128 __builtin_vsx_scalar_extract_sigq (_Float128);
VSESQP xsxsigqp_kf {}
@@ -2915,6 +2921,9 @@
unsigned long long);
VSIEQPF xsiexpqpf_kf {}
+ const _Float128 __builtin_vsx_scalar_insert_exp_vqp (vuq, vull);
+ VSIEQPV xsiexpqpf_f128_kf {}
+
const signed int __builtin_vsx_scalar_test_data_class_qp (_Float128, \
const int<7>);
VSTDCQP xststdcqp_kf {}
diff --git a/gcc/config/rs6000/rs6000-c.cc b/gcc/config/rs6000/rs6000-c.cc
index 8555174d36e..11060f697db 100644
--- a/gcc/config/rs6000/rs6000-c.cc
+++ b/gcc/config/rs6000/rs6000-c.cc
@@ -1929,11 +1929,15 @@ altivec_resolve_overloaded_builtin (location_t loc,
tree fndecl,
128-bit variant of built-in function. */
if (GET_MODE_PRECISION (arg1_mode) > 64)
{
- /* If first argument is of float variety, choose variant
- that expects __ieee128 argument. Otherwise, expect
- __int128 argument. */
+ /* If first argument is of float variety, choose the variant that
+ expects __ieee128 argument. If the first argument is vector
+ int, choose the variant that expects vector unsigned
+ __int128 argument. Otherwise, expect scalar __int128 argument.
+ */
if (GET_MODE_CLASS (arg1_mode) == MODE_FLOAT)
instance_code = RS6000_BIF_VSIEQPF;
+ else if (GET_MODE_CLASS (arg1_mode) == MODE_VECTOR_INT)
+ instance_code = RS6000_BIF_VSIEQPV;
else
instance_code = RS6000_BIF_VSIEQP;
}
diff --git a/gcc/config/rs6000/rs6000-overload.def
b/gcc/config/rs6000/rs6000-overload.def
index c582490c084..84743114c8e 100644
--- a/gcc/config/rs6000/rs6000-overload.def
+++ b/gcc/config/rs6000/rs6000-overload.def
@@ -4515,6 +4515,8 @@
VSIEQP
_Float128 __builtin_vec_scalar_insert_exp (_Float128, unsigned long long);
VSIEQPF
+ _Float128 __builtin_vec_scalar_insert_exp (vuq, vull);
+ VSIEQPV
[VEC_VSTDC, scalar_test_data_class, __builtin_vec_scalar_test_data_class]
unsigned int __builtin_vec_scalar_test_data_class (float, const int);
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 7d845df5c2d..92a34dec1be 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -396,6 +396,7 @@
V4SF
V2DF
V2DI])
+(define_mode_iterator VSEEQP_DI [V2DI DI])
(define_mode_attr VM3_char [(V2DI "d")
(V4SI "w")
@@ -5008,9 +5009,10 @@
;; ISA 3.0 Binary Floating-Point Support
;; VSX Scalar Extract Exponent Quad-Precision
-(define_insn "xsxexpqp_<mode>"
- [(set (match_operand:DI 0 "altivec_register_operand" "=v")
- (unspec:DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
+(define_insn "xsxexpqp_<IEEE128:mode>_<VSEEQP_DI:mode>"
+ [(set (match_operand:VSEEQP_DI 0 "altivec_register_operand" "=v")
+ (unspec:VSEEQP_DI
+ [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
UNSPEC_VSX_SXEXPDP))]
"TARGET_P9_VECTOR"
"xsxexpqp %0,%1"
@@ -5034,6 +5036,15 @@
"xsxsigqp %0,%1"
[(set_attr "type" "vecmove")])
+;; VSX Scalar to Vector Extract Significand IEEE 128-bit floating point format
+(define_insn "xsxsigqp_f128_<mode>"
+ [(set (match_operand:V1TI 0 "altivec_register_operand" "=v")
+ (unspec:V1TI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
+ UNSPEC_VSX_SXSIG))]
+ "TARGET_P9_VECTOR"
+ "xsxsigqp %0,%1"
+ [(set_attr "type" "vecmove")])
+
;; VSX Scalar Extract Significand Double-Precision
(define_insn "xsxsigdp"
[(set (match_operand:DI 0 "register_operand" "=r")
@@ -5054,6 +5065,17 @@
"xsiexpqp %0,%1,%2"
[(set_attr "type" "vecmove")])
+;; VSX Insert Exponent IEEE 128-bit Floating point format
+(define_insn "xsiexpqpf_f128_<mode>"
+ [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
+ (unspec:IEEE128
+ [(match_operand:V1TI 1 "altivec_register_operand" "v")
+ (match_operand:V2DI 2 "altivec_register_operand" "v")]
+ UNSPEC_VSX_SIEXPQP))]
+ "TARGET_P9_VECTOR"
+ "xsiexpqp %0,%1,%2"
+ [(set_attr "type" "vecmove")])
+
;; VSX Scalar Insert Exponent Quad-Precision
(define_insn "xsiexpqp_<mode>"
[(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index e426a2eb7d8..625df24b62f 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -19724,6 +19724,10 @@ double scalar_insert_exp (double significand, unsigned
long long int exponent);
ieee_128 scalar_insert_exp (unsigned __int128 significand,
unsigned long long int exponent);
ieee_128 scalar_insert_exp (ieee_128 significand, unsigned long long int
exponent);
+vector ieee_128 scalar_insert_exp (vector unsigned __int128,
+ vector unsigned long long);
+vector unsigned long long __builtin_scalar_extract_exp_to_vec (ieee_128);
+vector unsigned __int128 __builtin_scalar_extract_sig_to_vec (ieee_128);
int scalar_cmp_exp_gt (double arg1, double arg2);
int scalar_cmp_exp_lt (double arg1, double arg2);
@@ -19777,6 +19781,11 @@ The significand and exponent components of the result
are composed of
the least significant 15 bits of the @code{exponent} argument and the
least significant 112 bits of the @code{significand} argument respectively.
+The @code{__builtin_scalar_extract_exp_to_vec},
+and @code{__builtin_scalar_extract_sig_to_vec} are similar to
+@code{scalar_extract_exp}, @code{scalar_extract_sig} except they operate
+on vector arguments.
+
The @code{scalar_cmp_exp_gt}, @code{scalar_cmp_exp_lt},
@code{scalar_cmp_exp_eq}, and @code{scalar_cmp_exp_unordered} built-in
functions return a non-zero value if @code{arg1} is greater than, less
diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/extract-exp-ieee128.c
b/gcc/testsuite/gcc.target/powerpc/bfp/extract-exp-ieee128.c
new file mode 100644
index 00000000000..39981f0a274
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/extract-exp-ieee128.c
@@ -0,0 +1,50 @@
+/* { dg-do run { target { powerpc*-*-* } } } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p9vector_hw } */
+/* { dg-options "-mdejagnu-cpu=power9" } */
+
+#include <altivec.h>
+#include <stdlib.h>
+
+#if DEBUG
+#include <stdio.h>
+#endif
+
+vector unsigned long long int
+get_exponents (__ieee128 *p)
+{
+ __ieee128 source = *p;
+
+ return __builtin_scalar_extract_exp_to_vec (source);
+}
+
+int
+main ()
+{
+ vector unsigned long long int result, exp_result;
+ union conv128_t
+ {
+ __ieee128 val_ieee128;
+ __int128 val_int128;
+ } source;
+
+ exp_result[0] = 0x0ULL;
+ exp_result[1] = 0x1234ULL;
+ source.val_int128 = 0x923456789ABCDEF0ULL;
+ source.val_int128 = (source.val_int128 << 64) | 0x123456789ABCDEFULL;
+
+ result = get_exponents (&source.val_ieee128);
+
+ if ((result[0] != exp_result[0]) || (result[1] != exp_result[1]))
+#if DEBUG
+ {
+ printf("result[0] = 0x%llx; exp_result[0] = 0x%llx\n",
+ result[0], exp_result[0]);
+ printf("result[1] = 0x%llx; exp_result[1] = 0x%llx\n",
+ result[1], exp_result[1]);
+ }
+#else
+ abort();
+#endif
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/extract-sig-ieee128.c
b/gcc/testsuite/gcc.target/powerpc/bfp/extract-sig-ieee128.c
new file mode 100644
index 00000000000..f7b3aedb832
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/extract-sig-ieee128.c
@@ -0,0 +1,57 @@
+/* { dg-do run { target { powerpc*-*-* } } } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p9vector_hw } */
+/* { dg-options "-mdejagnu-cpu=power9" } */
+
+#include <altivec.h>
+#include <stdlib.h>
+
+#if DEBUG
+#include <stdio.h>
+#endif
+
+vector unsigned __int128
+get_significand (__ieee128 *p)
+{
+ __ieee128 source = *p;
+
+ return __builtin_scalar_extract_sig_to_vec(source);
+}
+
+int
+main ()
+{
+ #define NOT_ZERO_OR_DENORMAL 0x1000000000000
+
+ union conv128_t
+ {
+ __ieee128 val_ieee128;
+ unsigned long long int val_ull[2];
+ unsigned __int128 val_uint128;
+ __vector unsigned __int128 val_vuint128;
+ } source, result, exp_result;
+
+ /* Result is not zero or denormal. */
+ exp_result.val_ull[1] = 0x00056789ABCDEF0ULL | NOT_ZERO_OR_DENORMAL;
+ exp_result.val_ull[0] = 0x123456789ABCDEFULL;
+ source.val_uint128 = 0x923456789ABCDEF0ULL;
+ source.val_uint128 = (source.val_uint128 << 64) | 0x123456789ABCDEFULL;
+
+ /* Note, bits[0:14] are set to 0, bit[15] is 0 if the input was zero or
+ Denormal, 1 otherwise. */
+ result.val_vuint128 = get_significand (&source.val_ieee128);
+
+ if ((result.val_ull[0] != exp_result.val_ull[0])
+ || (result.val_ull[1] != exp_result.val_ull[1]))
+#if DEBUG
+ {
+ printf("result[0] = 0x%llx; exp_result[0] = 0x%llx\n",
+ result.val_ull[0], exp_result.val_ull[0]);
+ printf("result[1] = 0x%llx; exp_result[1] = 0x%llx\n",
+ result.val_ull[1], exp_result.val_ull[1]);
+ }
+#else
+ abort();
+#endif
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/insert-exp-ieee128.c
b/gcc/testsuite/gcc.target/powerpc/bfp/insert-exp-ieee128.c
new file mode 100644
index 00000000000..402dc48ad9d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/insert-exp-ieee128.c
@@ -0,0 +1,91 @@
+/* { dg-do run { target { powerpc*-*-* } } } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p9vector_hw } */
+/* { dg-options "-mdejagnu-cpu=power9" } */
+
+#include <altivec.h>
+#include <stdlib.h>
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+__ieee128
+insert_exponent (__vector unsigned __int128 *significand_p,
+ __vector unsigned long long int *exponent_p)
+{
+ __vector unsigned __int128 significand = *significand_p;
+ __vector unsigned long long int exponent = *exponent_p;
+
+ return scalar_insert_exp (significand, exponent);
+}
+
+__ieee128
+insert_exponent2 (unsigned __int128 significand,
+ unsigned long long int exponent)
+{
+ return scalar_insert_exp (significand, exponent);
+}
+
+int
+main ()
+{
+ __ieee128 val_ieee128, result_ieee128, exp_result_ieee128;
+ unsigned __int128 val_int128;
+ unsigned long long int val_ull;
+ union conv128_t
+ {
+ __ieee128 val_ieee128;
+ __vector unsigned __int128 val_vint128;
+ __vector unsigned long long int val_vull;
+ } result, exp_result, significand;
+
+ __vector unsigned long long int exponent;
+
+ /* Scalar argument test */
+ val_ieee128 = 0xFEDCBA9876543210ULL;
+ val_ull = 0x5678;
+ exp_result.val_vull[1] = 0x5678000000000000ULL;
+ exp_result.val_vull[0] = 0xfedcba9876543210;
+ result_ieee128 = insert_exponent2 (val_ieee128, val_ull);
+
+ if (result_ieee128 != exp_result.val_ieee128)
+#ifdef DEBUG
+ {
+ result.val_ieee128 = result_ieee128;
+ printf("Scalar argument ERROR:\n");
+ printf(" val_ieee128 = 0x%llx %llx\n",
+ result.val_vull[1], result.val_vull[0]);
+ printf(" val_ieee128 = 0x%llx %llx\n",
+ exp_result.val_vull[1], exp_result.val_vull[0]);
+ }
+#else
+ abort ();
+#endif
+
+ /* Vector argument test */
+ significand.val_vull[0] = 0xFEDCBA9876543210ULL;
+ significand.val_vull[1] = 0x7FFF12345678ABCDULL; /* positive value */
+
+ exponent[0] = 0x5678;
+ exponent[1] = 0x1234;
+
+ exp_result.val_vull[0] = 0xFEDCBA9876543210ULL;
+ exp_result.val_vull[1] = 0x123412345678ABCDULL;
+
+ result.val_ieee128 = insert_exponent(&significand.val_vint128, &exponent);
+
+ if (result.val_ieee128 != exp_result.val_ieee128)
+#ifdef DEBUG
+ {
+ printf("Vector argument ERROR:\n");
+ printf(" result = 0x%llx %llx\n",
+ result.val_vull[1], result.val_vull[0]);
+ printf(" exp_result = 0x%llx %llx\n",
+ exp_result.val_vull[1], exp_result.val_vull[0]);
+ }
+#else
+ abort ();
+#endif
+
+}
--
2.37.2