From: Matthew Malcomson <mmalcom...@nvidia.com> This commit just defines the new names -- as yet don't implement them. Saving this commit because this is one decision, and recording what the decision was and why:
Adding new floating point builtins for each floating point type that is defined in the general code *except* f128x (which would have a size greater than 16bytes -- the largest integral atomic operation we currently support). We have to base our naming on floating point *types* rather than sizes since different types can have the same size and the operations need to be distinguished based on type. N.b. one could make size-suffixed builtins that are still overloaded based on types but I thought that this was the cleaner approach. (Actual requirement is distinction based on mode, this is how I choose which internal function to use in a later patch. I believe that defining the function in terms of types and internally mapping to modes is a sensible split between user interface and internal implementation). N.b. in order to choose whether these operations are available or not in something like libstdc++ I use something like `__has_builtin(__atomic_fetch_add_fp)`. This happens to be the builtin for implementing the relevant operation on doubles, but it also seems like a nice name to check. - This would require that all compiler implementations have floating point atomics for all floating point types they support available at the same time. I don't expect this is much of a problem but invite dissent. N.b. I used the below type suffixes (following what seems like the existing convention for builtins): - float -> f - double -> <no suffix> - long double -> l - _FloatN -> fN (for N <- (16, 32, 64, 128)) - _FloatNx -> fNx (for N <- (32, 64)) Richi suggested doing this expansion generally for all these builtins following Cxy _Atomic semantics on IRC. Since C hasn't specified any fetch_<op> semantics for floating point types, C++ has only specified `atomic<>::fetch_{add,sub}`, and the operations other than these are all bitwise operations (which don't to map well to floating point), I believe I have followed that suggestion by implementing all fetch_{sub,add}/{add,sub}_fetch operations. I have not implemented anything for the __sync_* builtins on the belief that these are legacy and new code should use the __atomic_* builtins. Happy to adjust if that is a bad choice. Only the new function types were needed for most cases. The Fortran frontend does not use `builtin-types.def` so it needed the fortran `types.def` to be updated to include the floating point built in types in `enum builtin_type` local to `gfc_init_builtin_functions`. - N.b. these types are already available in the fortran frontend (being defined by `build_common_tree_nodes`), it's just that they were not available for sync-builtins.def functions until this commit. ------------------------------ N.b. for this RFC I've not checked that any other frontends can access these builtins. Even the fortran frontend I've only adjusted things to ensure stuff builds. Signed-off-by: Matthew Malcomson <mmalcom...@nvidia.com> --- gcc/builtin-types.def | 20 ++++++++++++++++++ gcc/fortran/types.def | 48 +++++++++++++++++++++++++++++++++++++++++++ gcc/sync-builtins.def | 40 ++++++++++++++++++++++++++++++++++++ 3 files changed, 108 insertions(+) diff --git a/gcc/builtin-types.def b/gcc/builtin-types.def index c97d6bad1de..97ccd945b55 100644 --- a/gcc/builtin-types.def +++ b/gcc/builtin-types.def @@ -802,6 +802,26 @@ DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I2_INT, BT_VOID, BT_VOLATILE_PTR, BT_I2, BT DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I4_INT, BT_VOID, BT_VOLATILE_PTR, BT_I4, BT_INT) DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I8_INT, BT_VOID, BT_VOLATILE_PTR, BT_I8, BT_INT) DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I16_INT, BT_VOID, BT_VOLATILE_PTR, BT_I16, BT_INT) +DEF_FUNCTION_TYPE_3 (BT_FN_FLOAT_VPTR_FLOAT_INT, BT_FLOAT, BT_VOLATILE_PTR, + BT_FLOAT, BT_INT) +DEF_FUNCTION_TYPE_3 (BT_FN_DOUBLE_VPTR_DOUBLE_INT, BT_DOUBLE, BT_VOLATILE_PTR, + BT_DOUBLE, BT_INT) +DEF_FUNCTION_TYPE_3 (BT_FN_LONGDOUBLE_VPTR_LONGDOUBLE_INT, BT_LONGDOUBLE, + BT_VOLATILE_PTR, BT_LONGDOUBLE, BT_INT) +DEF_FUNCTION_TYPE_3 (BT_FN_BFLOAT16_VPTR_BFLOAT16_INT, BT_BFLOAT16, BT_VOLATILE_PTR, + BT_BFLOAT16, BT_INT) +DEF_FUNCTION_TYPE_3 (BT_FN_FLOAT16_VPTR_FLOAT16_INT, BT_FLOAT16, BT_VOLATILE_PTR, + BT_FLOAT16, BT_INT) +DEF_FUNCTION_TYPE_3 (BT_FN_FLOAT32_VPTR_FLOAT32_INT, BT_FLOAT32, BT_VOLATILE_PTR, + BT_FLOAT32, BT_INT) +DEF_FUNCTION_TYPE_3 (BT_FN_FLOAT64_VPTR_FLOAT64_INT, BT_FLOAT64, BT_VOLATILE_PTR, + BT_FLOAT64, BT_INT) +DEF_FUNCTION_TYPE_3 (BT_FN_FLOAT128_VPTR_FLOAT128_INT, BT_FLOAT128, BT_VOLATILE_PTR, + BT_FLOAT128, BT_INT) +DEF_FUNCTION_TYPE_3 (BT_FN_FLOAT32X_VPTR_FLOAT32X_INT, BT_FLOAT32X, BT_VOLATILE_PTR, + BT_FLOAT32X, BT_INT) +DEF_FUNCTION_TYPE_3 (BT_FN_FLOAT64X_VPTR_FLOAT64X_INT, BT_FLOAT64X, BT_VOLATILE_PTR, + BT_FLOAT64X, BT_INT) DEF_FUNCTION_TYPE_3 (BT_FN_INT_PTRPTR_SIZE_SIZE, BT_INT, BT_PTR_PTR, BT_SIZE, BT_SIZE) DEF_FUNCTION_TYPE_3 (BT_FN_PTR_CONST_PTR_CONST_PTR_SIZE, BT_PTR, BT_CONST_PTR, BT_CONST_PTR, BT_SIZE) DEF_FUNCTION_TYPE_3 (BT_FN_BOOL_INT_INT_INTPTR, BT_BOOL, BT_INT, BT_INT, diff --git a/gcc/fortran/types.def b/gcc/fortran/types.def index 390cc9542f7..52695a39047 100644 --- a/gcc/fortran/types.def +++ b/gcc/fortran/types.def @@ -60,6 +60,34 @@ DEF_PRIMITIVE_TYPE (BT_I4, builtin_type_for_size (BITS_PER_UNIT*4, 1)) DEF_PRIMITIVE_TYPE (BT_I8, builtin_type_for_size (BITS_PER_UNIT*8, 1)) DEF_PRIMITIVE_TYPE (BT_I16, builtin_type_for_size (BITS_PER_UNIT*16, 1)) +DEF_PRIMITIVE_TYPE (BT_FLOAT, float_type_node) +DEF_PRIMITIVE_TYPE (BT_DOUBLE, double_type_node) +DEF_PRIMITIVE_TYPE (BT_LONGDOUBLE, long_double_type_node) +DEF_PRIMITIVE_TYPE (BT_BFLOAT16, (bfloat16_type_node + ? bfloat16_type_node + : error_mark_node)) +DEF_PRIMITIVE_TYPE (BT_FLOAT16, (float16_type_node + ? float16_type_node + : error_mark_node)) +DEF_PRIMITIVE_TYPE (BT_FLOAT32, (float32_type_node + ? float32_type_node + : error_mark_node)) +DEF_PRIMITIVE_TYPE (BT_FLOAT64, (float64_type_node + ? float64_type_node + : error_mark_node)) +DEF_PRIMITIVE_TYPE (BT_FLOAT128, (float128_type_node + ? float128_type_node + : error_mark_node)) +DEF_PRIMITIVE_TYPE (BT_FLOAT32X, (float32x_type_node + ? float32x_type_node + : error_mark_node)) +DEF_PRIMITIVE_TYPE (BT_FLOAT64X, (float64x_type_node + ? float64x_type_node + : error_mark_node)) +DEF_PRIMITIVE_TYPE (BT_FLOAT128X, (float128x_type_node + ? float128x_type_node + : error_mark_node)) + DEF_PRIMITIVE_TYPE (BT_PTR, ptr_type_node) DEF_PRIMITIVE_TYPE (BT_CONST_PTR, const_ptr_type_node) DEF_PRIMITIVE_TYPE (BT_VOLATILE_PTR, @@ -144,6 +172,26 @@ DEF_FUNCTION_TYPE_3 (BT_FN_I2_VPTR_I2_INT, BT_I2, BT_VOLATILE_PTR, BT_I2, BT_INT DEF_FUNCTION_TYPE_3 (BT_FN_I4_VPTR_I4_INT, BT_I4, BT_VOLATILE_PTR, BT_I4, BT_INT) DEF_FUNCTION_TYPE_3 (BT_FN_I8_VPTR_I8_INT, BT_I8, BT_VOLATILE_PTR, BT_I8, BT_INT) DEF_FUNCTION_TYPE_3 (BT_FN_I16_VPTR_I16_INT, BT_I16, BT_VOLATILE_PTR, BT_I16, BT_INT) +DEF_FUNCTION_TYPE_3 (BT_FN_FLOAT_VPTR_FLOAT_INT, BT_FLOAT, BT_VOLATILE_PTR, + BT_FLOAT, BT_INT) +DEF_FUNCTION_TYPE_3 (BT_FN_DOUBLE_VPTR_DOUBLE_INT, BT_DOUBLE, BT_VOLATILE_PTR, + BT_DOUBLE, BT_INT) +DEF_FUNCTION_TYPE_3 (BT_FN_LONGDOUBLE_VPTR_LONGDOUBLE_INT, BT_LONGDOUBLE, + BT_VOLATILE_PTR, BT_LONGDOUBLE, BT_INT) +DEF_FUNCTION_TYPE_3 (BT_FN_BFLOAT16_VPTR_BFLOAT16_INT, BT_BFLOAT16, BT_VOLATILE_PTR, + BT_BFLOAT16, BT_INT) +DEF_FUNCTION_TYPE_3 (BT_FN_FLOAT16_VPTR_FLOAT16_INT, BT_FLOAT16, BT_VOLATILE_PTR, + BT_FLOAT16, BT_INT) +DEF_FUNCTION_TYPE_3 (BT_FN_FLOAT32_VPTR_FLOAT32_INT, BT_FLOAT32, BT_VOLATILE_PTR, + BT_FLOAT32, BT_INT) +DEF_FUNCTION_TYPE_3 (BT_FN_FLOAT64_VPTR_FLOAT64_INT, BT_FLOAT64, BT_VOLATILE_PTR, + BT_FLOAT64, BT_INT) +DEF_FUNCTION_TYPE_3 (BT_FN_FLOAT128_VPTR_FLOAT128_INT, BT_FLOAT128, BT_VOLATILE_PTR, + BT_FLOAT128, BT_INT) +DEF_FUNCTION_TYPE_3 (BT_FN_FLOAT32X_VPTR_FLOAT32X_INT, BT_FLOAT32X, BT_VOLATILE_PTR, + BT_FLOAT32X, BT_INT) +DEF_FUNCTION_TYPE_3 (BT_FN_FLOAT64X_VPTR_FLOAT64X_INT, BT_FLOAT64X, BT_VOLATILE_PTR, + BT_FLOAT64X, BT_INT) DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I1_INT, BT_VOID, BT_VOLATILE_PTR, BT_I1, BT_INT) DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I2_INT, BT_VOID, BT_VOLATILE_PTR, BT_I2, BT_INT) DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I4_INT, BT_VOID, BT_VOLATILE_PTR, BT_I4, BT_INT) diff --git a/gcc/sync-builtins.def b/gcc/sync-builtins.def index b4ec3782799..89cc564a8f6 100644 --- a/gcc/sync-builtins.def +++ b/gcc/sync-builtins.def @@ -28,6 +28,30 @@ along with GCC; see the file COPYING3. If not see is supposed to be using. It's overloaded, and is resolved to one of the "_1" through "_16" versions, plus some extra casts. */ + +/* Same as DEF_GCC_FLOATN_NX_BUILTINS, except for sync builtins. + N.b. we do not define the f128x type because this would be larger than the + 16 byte integral types that we have atomic support for. That would mean + we couldn't implement them without adding special extra handling -- + especially because to act atomically on such large sizes all architectures + would require locking implementations added in libatomic. */ +#undef DEF_SYNC_FLOATN_NX_BUILTINS +#define DEF_SYNC_FLOATN_NX_BUILTINS(ENUM, NAME, TYPE_MACRO, ATTRS) \ + DEF_SYNC_BUILTIN (ENUM ## F16, NAME "f16", TYPE_MACRO (FLOAT16), ATTRS) \ + DEF_SYNC_BUILTIN (ENUM ## F32, NAME "f32", TYPE_MACRO (FLOAT32), ATTRS) \ + DEF_SYNC_BUILTIN (ENUM ## F64, NAME "f64", TYPE_MACRO (FLOAT64), ATTRS) \ + DEF_SYNC_BUILTIN (ENUM ## F128, NAME "f128", TYPE_MACRO (FLOAT128), ATTRS) \ + DEF_SYNC_BUILTIN (ENUM ## F32X, NAME "f32x", TYPE_MACRO (FLOAT32X), ATTRS) \ + DEF_SYNC_BUILTIN (ENUM ## F64X, NAME "f64x", TYPE_MACRO (FLOAT64X), ATTRS) + +#undef DEF_SYNC_FLOAT_BUILTINS +#define DEF_SYNC_FLOAT_BUILTINS(ENUM, NAME, TYPE_MACRO, ATTRS) \ + DEF_SYNC_BUILTIN (ENUM ## _FPF, NAME "_fpf", TYPE_MACRO (FLOAT), ATTRS) \ + DEF_SYNC_BUILTIN (ENUM ## _FP, NAME "_fp", TYPE_MACRO (DOUBLE), ATTRS) \ + DEF_SYNC_BUILTIN (ENUM ## _FPL, NAME "_fpl", TYPE_MACRO (LONGDOUBLE), ATTRS) \ + DEF_SYNC_BUILTIN (ENUM ## _FPF16B, NAME "_fpf16b", TYPE_MACRO (BFLOAT16), ATTRS) \ + DEF_SYNC_FLOATN_NX_BUILTINS (ENUM ## _FP, NAME "_fp", TYPE_MACRO, ATTRS) + DEF_SYNC_BUILTIN (BUILT_IN_SYNC_FETCH_AND_ADD_N, "__sync_fetch_and_add", BT_FN_VOID_VAR, ATTR_NOTHROWCALL_LEAF_LIST) DEF_SYNC_BUILTIN (BUILT_IN_SYNC_FETCH_AND_ADD_1, "__sync_fetch_and_add_1", @@ -378,6 +402,10 @@ DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_ADD_FETCH_8, DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_ADD_FETCH_16, "__atomic_add_fetch_16", BT_FN_I16_VPTR_I16_INT, ATTR_NOTHROWCALL_LEAF_LIST) +#define ADD_FETCH_TYPE(F) BT_FN_##F##_VPTR_##F##_INT +DEF_SYNC_FLOAT_BUILTINS (BUILT_IN_ATOMIC_ADD_FETCH, "__atomic_add_fetch", + ADD_FETCH_TYPE, ATTR_NOTHROWCALL_LEAF_LIST) +#undef ADD_FETCH_TYPE DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_SUB_FETCH_N, "__atomic_sub_fetch", @@ -397,6 +425,10 @@ DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_SUB_FETCH_8, DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_SUB_FETCH_16, "__atomic_sub_fetch_16", BT_FN_I16_VPTR_I16_INT, ATTR_NOTHROWCALL_LEAF_LIST) +#define SUB_FETCH_TYPE(F) BT_FN_##F##_VPTR_##F##_INT +DEF_SYNC_FLOAT_BUILTINS (BUILT_IN_ATOMIC_SUB_FETCH, "__atomic_sub_fetch", + SUB_FETCH_TYPE, ATTR_NOTHROWCALL_LEAF_LIST) +#undef SUB_FETCH_TYPE DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_AND_FETCH_N, "__atomic_and_fetch", @@ -492,6 +524,10 @@ DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_ADD_8, DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_ADD_16, "__atomic_fetch_add_16", BT_FN_I16_VPTR_I16_INT, ATTR_NOTHROWCALL_LEAF_LIST) +#define FETCH_ADD_TYPE(F) BT_FN_##F##_VPTR_##F##_INT +DEF_SYNC_FLOAT_BUILTINS (BUILT_IN_ATOMIC_FETCH_ADD, "__atomic_fetch_add", + FETCH_ADD_TYPE, ATTR_NOTHROWCALL_LEAF_LIST) +#undef FETCH_ADD_TYPE DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_SUB_N, "__atomic_fetch_sub", @@ -511,6 +547,10 @@ DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_SUB_8, DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_SUB_16, "__atomic_fetch_sub_16", BT_FN_I16_VPTR_I16_INT, ATTR_NOTHROWCALL_LEAF_LIST) +#define FETCH_SUB_TYPE(F) BT_FN_##F##_VPTR_##F##_INT +DEF_SYNC_FLOAT_BUILTINS (BUILT_IN_ATOMIC_FETCH_SUB, "__atomic_fetch_sub", + FETCH_SUB_TYPE, ATTR_NOTHROWCALL_LEAF_LIST) +#undef FETCH_SUB_TYPE DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_AND_N, "__atomic_fetch_and", -- 2.43.0