On Wed, 9 Aug 2023, Jakub Jelinek wrote: > Hi! > > The following patch introduces some -fsanitize=undefined support for _BitInt, > but some of the diagnostics is limited by lack of proper support in the > library. > I've filed https://github.com/llvm/llvm-project/issues/64100 to request > proper support, for now some of the diagnostics might have less or more > confusing or inaccurate wording but UB should still be diagnosed when it > happens.
OK, you're the expert here. Richard. > 2023-08-09 Jakub Jelinek <ja...@redhat.com> > > PR c/102989 > gcc/ > * internal-fn.cc (expand_ubsan_result_store): Add LHS, MODE and > DO_ERROR arguments. For non-mode precision BITINT_TYPE results > check if all padding bits up to mode precision are zeros or sign > bit copies and if not, jump to DO_ERROR. > (expand_addsub_overflow, expand_neg_overflow, expand_mul_overflow): > Adjust expand_ubsan_result_store callers. > * ubsan.cc: Include target.h and langhooks.h. > (ubsan_encode_value): Pass BITINT_TYPE values which fit into pointer > size converted to pointer sized integer, pass BITINT_TYPE values > which fit into TImode (if supported) or DImode as those integer types > or otherwise for now punt (pass 0). > (ubsan_type_descriptor): Handle BITINT_TYPE. For pstyle of > UBSAN_PRINT_FORCE_INT use TK_Integer (0x0000) mode with a > TImode/DImode precision rather than TK_Unknown used otherwise for > large/huge BITINT_TYPEs. > (instrument_si_overflow): Instrument BITINT_TYPE operations even when > they don't have mode precision. > * ubsan.h (enum ubsan_print_style): New enumerator. > gcc/c-family/ > * c-ubsan.cc (ubsan_instrument_shift): Use UBSAN_PRINT_FORCE_INT > for type0 type descriptor. > > --- gcc/ubsan.cc.jj 2023-08-08 15:54:35.443599459 +0200 > +++ gcc/ubsan.cc 2023-08-08 16:12:02.329939798 +0200 > @@ -50,6 +50,8 @@ along with GCC; see the file COPYING3. > #include "gimple-fold.h" > #include "varasm.h" > #include "realmpfr.h" > +#include "target.h" > +#include "langhooks.h" > > /* Map from a tree to a VAR_DECL tree. */ > > @@ -125,6 +127,25 @@ tree > ubsan_encode_value (tree t, enum ubsan_encode_value_phase phase) > { > tree type = TREE_TYPE (t); > + if (TREE_CODE (type) == BITINT_TYPE) > + { > + if (TYPE_PRECISION (type) <= POINTER_SIZE) > + { > + type = pointer_sized_int_node; > + t = fold_build1 (NOP_EXPR, type, t); > + } > + else > + { > + scalar_int_mode arith_mode > + = (targetm.scalar_mode_supported_p (TImode) ? TImode : DImode); > + if (TYPE_PRECISION (type) > GET_MODE_PRECISION (arith_mode)) > + return build_zero_cst (pointer_sized_int_node); > + type > + = build_nonstandard_integer_type (GET_MODE_PRECISION (arith_mode), > + TYPE_UNSIGNED (type)); > + t = fold_build1 (NOP_EXPR, type, t); > + } > + } > scalar_mode mode = SCALAR_TYPE_MODE (type); > const unsigned int bitsize = GET_MODE_BITSIZE (mode); > if (bitsize <= POINTER_SIZE) > @@ -355,14 +376,32 @@ ubsan_type_descriptor (tree type, enum u > { > /* See through any typedefs. */ > type = TYPE_MAIN_VARIANT (type); > + tree type3 = type; > + if (pstyle == UBSAN_PRINT_FORCE_INT) > + { > + /* Temporary hack for -fsanitize=shift with _BitInt(129) and more. > + libubsan crashes if it is not TK_Integer type. */ > + if (TREE_CODE (type) == BITINT_TYPE) > + { > + scalar_int_mode arith_mode > + = (targetm.scalar_mode_supported_p (TImode) > + ? TImode : DImode); > + if (TYPE_PRECISION (type) > GET_MODE_PRECISION (arith_mode)) > + type3 = build_qualified_type (type, TYPE_QUAL_CONST); > + } > + if (type3 == type) > + pstyle = UBSAN_PRINT_NORMAL; > + } > > - tree decl = decl_for_type_lookup (type); > + tree decl = decl_for_type_lookup (type3); > /* It is possible that some of the earlier created DECLs were found > unused, in that case they weren't emitted and varpool_node::get > returns NULL node on them. But now we really need them. Thus, > renew them here. */ > if (decl != NULL_TREE && varpool_node::get (decl)) > - return build_fold_addr_expr (decl); > + { > + return build_fold_addr_expr (decl); > + } > > tree dtype = ubsan_get_type_descriptor_type (); > tree type2 = type; > @@ -370,6 +409,7 @@ ubsan_type_descriptor (tree type, enum u > pretty_printer pretty_name; > unsigned char deref_depth = 0; > unsigned short tkind, tinfo; > + char tname_bitint[sizeof ("unsigned _BitInt(2147483647)")]; > > /* Get the name of the type, or the name of the pointer type. */ > if (pstyle == UBSAN_PRINT_POINTER) > @@ -403,8 +443,18 @@ ubsan_type_descriptor (tree type, enum u > } > > if (tname == NULL) > - /* We weren't able to determine the type name. */ > - tname = "<unknown>"; > + { > + if (TREE_CODE (type2) == BITINT_TYPE) > + { > + snprintf (tname_bitint, sizeof (tname_bitint), > + "%s_BitInt(%d)", TYPE_UNSIGNED (type2) ? "unsigned " : "", > + TYPE_PRECISION (type2)); > + tname = tname_bitint; > + } > + else > + /* We weren't able to determine the type name. */ > + tname = "<unknown>"; > + } > > pp_quote (&pretty_name); > > @@ -472,6 +522,18 @@ ubsan_type_descriptor (tree type, enum u > case INTEGER_TYPE: > tkind = 0x0000; > break; > + case BITINT_TYPE: > + { > + /* FIXME: libubsan right now only supports _BitInts which > + fit into DImode or TImode. */ > + scalar_int_mode arith_mode = (targetm.scalar_mode_supported_p (TImode) > + ? TImode : DImode); > + if (TYPE_PRECISION (eltype) <= GET_MODE_PRECISION (arith_mode)) > + tkind = 0x0000; > + else > + tkind = 0xffff; > + } > + break; > case REAL_TYPE: > /* FIXME: libubsan right now only supports float, double and > long double type formats. */ > @@ -486,7 +548,17 @@ ubsan_type_descriptor (tree type, enum u > tkind = 0xffff; > break; > } > - tinfo = get_ubsan_type_info_for_type (eltype); > + tinfo = tkind == 0xffff ? 0 : get_ubsan_type_info_for_type (eltype); > + > + if (pstyle == UBSAN_PRINT_FORCE_INT) > + { > + tkind = 0x0000; > + scalar_int_mode arith_mode = (targetm.scalar_mode_supported_p (TImode) > + ? TImode : DImode); > + tree t = lang_hooks.types.type_for_mode (arith_mode, > + TYPE_UNSIGNED (eltype)); > + tinfo = get_ubsan_type_info_for_type (t); > + } > > /* Create a new VAR_DECL of type descriptor. */ > const char *tmp = pp_formatted_text (&pretty_name); > @@ -522,7 +594,7 @@ ubsan_type_descriptor (tree type, enum u > varpool_node::finalize_decl (decl); > > /* Save the VAR_DECL into the hash table. */ > - decl_for_type_insert (type, decl); > + decl_for_type_insert (type3, decl); > > return build_fold_addr_expr (decl); > } > @@ -1604,8 +1676,9 @@ instrument_si_overflow (gimple_stmt_iter > Also punt on bit-fields. */ > if (!INTEGRAL_TYPE_P (lhsinner) > || TYPE_OVERFLOW_WRAPS (lhsinner) > - || maybe_ne (GET_MODE_BITSIZE (TYPE_MODE (lhsinner)), > - TYPE_PRECISION (lhsinner))) > + || (TREE_CODE (lhsinner) != BITINT_TYPE > + && maybe_ne (GET_MODE_BITSIZE (TYPE_MODE (lhsinner)), > + TYPE_PRECISION (lhsinner)))) > return; > > switch (code) > --- gcc/ubsan.h.jj 2023-08-08 15:54:35.460599221 +0200 > +++ gcc/ubsan.h 2023-08-08 16:12:02.329939798 +0200 > @@ -39,7 +39,8 @@ enum ubsan_null_ckind { > enum ubsan_print_style { > UBSAN_PRINT_NORMAL, > UBSAN_PRINT_POINTER, > - UBSAN_PRINT_ARRAY > + UBSAN_PRINT_ARRAY, > + UBSAN_PRINT_FORCE_INT > }; > > /* This controls ubsan_encode_value behavior. */ > --- gcc/internal-fn.cc.jj 2023-08-08 15:55:06.709161614 +0200 > +++ gcc/internal-fn.cc 2023-08-08 16:22:09.404440148 +0200 > @@ -981,8 +981,38 @@ expand_arith_overflow_result_store (tree > /* Helper for expand_*_overflow. Store RES into TARGET. */ > > static void > -expand_ubsan_result_store (rtx target, rtx res) > +expand_ubsan_result_store (tree lhs, rtx target, scalar_int_mode mode, > + rtx res, rtx_code_label *do_error) > { > + if (TREE_CODE (TREE_TYPE (lhs)) == BITINT_TYPE > + && TYPE_PRECISION (TREE_TYPE (lhs)) < GET_MODE_PRECISION (mode)) > + { > + int uns = TYPE_UNSIGNED (TREE_TYPE (lhs)); > + int prec = TYPE_PRECISION (TREE_TYPE (lhs)); > + int tgtprec = GET_MODE_PRECISION (mode); > + rtx resc = gen_reg_rtx (mode), lres; > + emit_move_insn (resc, res); > + if (uns) > + { > + rtx mask > + = immed_wide_int_const (wi::shifted_mask (0, prec, false, tgtprec), > + mode); > + lres = expand_simple_binop (mode, AND, res, mask, NULL_RTX, > + true, OPTAB_LIB_WIDEN); > + } > + else > + { > + lres = expand_shift (LSHIFT_EXPR, mode, res, tgtprec - prec, > + NULL_RTX, 1); > + lres = expand_shift (RSHIFT_EXPR, mode, lres, tgtprec - prec, > + NULL_RTX, 0); > + } > + if (lres != res) > + emit_move_insn (res, lres); > + do_compare_rtx_and_jump (res, resc, > + NE, true, mode, NULL_RTX, NULL, do_error, > + profile_probability::very_unlikely ()); > + } > if (GET_CODE (target) == SUBREG && SUBREG_PROMOTED_VAR_P (target)) > /* If this is a scalar in a register that is stored in a wider mode > than the declared mode, compute the result into its declared mode > @@ -1431,7 +1461,7 @@ expand_addsub_overflow (location_t loc, > if (lhs) > { > if (is_ubsan) > - expand_ubsan_result_store (target, res); > + expand_ubsan_result_store (lhs, target, mode, res, do_error); > else > { > if (do_xor) > @@ -1528,7 +1558,7 @@ expand_neg_overflow (location_t loc, tre > if (lhs) > { > if (is_ubsan) > - expand_ubsan_result_store (target, res); > + expand_ubsan_result_store (lhs, target, mode, res, do_error); > else > expand_arith_overflow_result_store (lhs, target, mode, res); > } > @@ -2414,7 +2450,7 @@ expand_mul_overflow (location_t loc, tre > if (lhs) > { > if (is_ubsan) > - expand_ubsan_result_store (target, res); > + expand_ubsan_result_store (lhs, target, mode, res, do_error); > else > expand_arith_overflow_result_store (lhs, target, mode, res); > } > --- gcc/c-family/c-ubsan.cc.jj 2023-08-08 15:54:33.596625322 +0200 > +++ gcc/c-family/c-ubsan.cc 2023-08-08 16:12:02.332939756 +0200 > @@ -256,8 +256,8 @@ ubsan_instrument_shift (location_t loc, > tt = build_call_expr_loc (loc, builtin_decl_explicit (BUILT_IN_TRAP), 0); > else > { > - tree data = ubsan_create_data ("__ubsan_shift_data", 1, &loc, > - ubsan_type_descriptor (type0), > + tree utd0 = ubsan_type_descriptor (type0, UBSAN_PRINT_FORCE_INT); > + tree data = ubsan_create_data ("__ubsan_shift_data", 1, &loc, utd0, > ubsan_type_descriptor (type1), NULL_TREE, > NULL_TREE); > data = build_fold_addr_expr_loc (loc, data); > > Jakub > > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg, Germany; GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)