On 7/26/21 4:36 PM, Jakub Jelinek wrote:
On Mon, Jul 26, 2021 at 10:33:51PM +0200, Jakub Jelinek via Gcc-patches wrote:@@ -835,6 +836,43 @@ fold_using_range::range_of_builtin_ubsan_call (irange &r, gcall *call, r.set_varying (type); }+// Return TRUE if we recognize the target character set and return the+// range for lower case and upper case letters. + +static bool +get_letter_range (tree type, irange &lowers, irange &uppers) +{ + // ASCII + if (lang_hooks.to_target_charset (' ') == 0x20) + { + lowers = int_range<2> (build_int_cst (type, 'a'), + build_int_cst (type, 'z')); + uppers = int_range<2> (build_int_cst (type, 'A'), + build_int_cst (type, 'Z'));Wouldn't it be safer to lang_hooks.to_target_charset ('a') (and 'z') and just verify that their difference is that of 'z' - 'a'David is right, that their difference is 25. I think we don't support EBCDIC on the host, but what if.Jakub
Fine by me. This is running thru testing now :-) This seems good yes? Andrew
commit f13a661874b772e473ee78fe8133899db51ea642 Author: Andrew MacLeod <[email protected]> Date: Mon Jul 26 15:38:42 2021 -0400 Handle ASCII and EBCDIC in toupper and tolower ranges. gcc/ PR tree-optimization/78888 * gimple-range-fold.cc (get_letter_range): New. (fold_using_range::range_of_builtin_call): Call get_letter_range. gcc/testsuite/ * gcc.dg/pr78888.c: Add extra non-standard verifications. diff --git a/gcc/gimple-range-fold.cc b/gcc/gimple-range-fold.cc index 8465b4a82f6..a952f693c77 100644 --- a/gcc/gimple-range-fold.cc +++ b/gcc/gimple-range-fold.cc @@ -37,6 +37,7 @@ along with GCC; see the file COPYING3. If not see #include "cfgloop.h" #include "tree-ssa-loop.h" #include "tree-scalar-evolution.h" +#include "langhooks.h" #include "vr-values.h" #include "range.h" #include "value-query.h" @@ -835,6 +836,30 @@ fold_using_range::range_of_builtin_ubsan_call (irange &r, gcall *call, r.set_varying (type); } +// Return TRUE if we recognize the target character set and return the +// range for lower case and upper case letters. + +static bool +get_letter_range (tree type, irange &lowers, irange &uppers) +{ + // ASCII + int a = lang_hooks.to_target_charset ('a'); + int z = lang_hooks.to_target_charset ('z'); + int A = lang_hooks.to_target_charset ('A'); + int Z = lang_hooks.to_target_charset ('Z'); + + if ((z - a == 25) && (Z - A == 25)) + { + lowers = int_range<2> (build_int_cst (type, 'a'), + build_int_cst (type, 'z')); + uppers = int_range<2> (build_int_cst (type, 'A'), + build_int_cst (type, 'Z')); + return true; + } + // Unknown character set. + return false; +} + // For a builtin in CALL, return a range in R if known and return // TRUE. Otherwise return FALSE. @@ -873,13 +898,16 @@ fold_using_range::range_of_builtin_call (irange &r, gcall *call, arg = gimple_call_arg (call, 0); if (!src.get_operand (r, arg)) return false; + + int_range<3> lowers; + int_range<3> uppers; + if (!get_letter_range (type, lowers, uppers)) + return false; + // Return the range passed in without any lower case characters, // but including all the upper case ones. - int_range<2> exclude (build_int_cst (type, 'a'), - build_int_cst (type, 'z'), VR_ANTI_RANGE); - r.intersect (exclude); - int_range<2> uppers (build_int_cst (type, 'A'), - build_int_cst (type, 'Z')); + lowers.invert (); + r.intersect (lowers); r.union_ (uppers); return true; } @@ -889,13 +917,16 @@ fold_using_range::range_of_builtin_call (irange &r, gcall *call, arg = gimple_call_arg (call, 0); if (!src.get_operand (r, arg)) return false; + + int_range<3> lowers; + int_range<3> uppers; + if (!get_letter_range (type, lowers, uppers)) + return false; + // Return the range passed in without any upper case characters, // but including all the lower case ones. - int_range<2> exclude (build_int_cst (type, 'A'), - build_int_cst (type, 'Z'), VR_ANTI_RANGE); - r.intersect (exclude); - int_range<2> lowers (build_int_cst (type, 'a'), - build_int_cst (type, 'z')); + uppers.invert (); + r.intersect (uppers); r.union_ (lowers); return true; }
