https://github.com/keinflue created https://github.com/llvm/llvm-project/pull/154807
This adds a range attribute to function parameters to enable optimization with regards to restricted value ranges of enumerations without fixed underlying type in C++ with the -fstrict-enums option. Loads for such enumerations were already annotated previously but do not have the desired effect for by-value parameters. As a side effect this also applies these range attributes to types with boolean representation which are not represented as i1 in parameters (if these exist). The optimization is disabled with -fsanitize=bool and -fsanitize=enum to avoid removal of their checks. This also slighlty refactors the getRangeForType to be reusable for this purpose. Fixes #154022 >From 35308dce96c6901a088422ff85609789064d40e5 Mon Sep 17 00:00:00 2001 From: keinflue <keinf...@posteo.de> Date: Thu, 21 Aug 2025 18:32:21 +0200 Subject: [PATCH] [clang] -fstrict-enums optimization for by-value parameters This adds a range attribute to function parameters to enable optimization with regards to restricted value ranges of enumerations without fixed underlying type in C++ with the -fstrict-enums option. Loads for such enumerations were already annotated previously but do not have the desired effect for by-value parameters. As a side effect this also applies these range attributes to types with boolean representation which are not represented as i1 in parameters (if these exist). The optimization is disabled with -fsanitize=bool and -fsanitize=enum to avoid removal of their checks. This also slighlty refactors the getRangeForType to be reusable for this purpose. Fixes #154022 --- clang/lib/CodeGen/CGCall.cpp | 11 ++ clang/lib/CodeGen/CGExpr.cpp | 68 +++++-- clang/lib/CodeGen/CodeGenFunction.h | 5 + clang/test/CodeGenCXX/gh154022.cpp | 288 ++++++++++++++++++++++++++++ 4 files changed, 354 insertions(+), 18 deletions(-) create mode 100644 clang/test/CodeGenCXX/gh154022.cpp diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index b959982809911..dfba41569fa28 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -37,6 +37,7 @@ #include "llvm/IR/AttributeMask.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/ConstantRange.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/IntrinsicInst.h" @@ -3209,6 +3210,16 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, auto AI = Fn->getArg(FirstIRArg); llvm::Type *LTy = ConvertType(Arg->getType()); + bool HasBoolCheck = SanOpts.has(SanitizerKind::Bool); + bool HasEnumCheck = SanOpts.has(SanitizerKind::Enum); + if (!HasBoolCheck && !HasEnumCheck && LTy->isIntegerTy()) { + const std::optional<llvm::ConstantRange> Range = + getRangeForType(Arg->getType(), LTy->getIntegerBitWidth()); + if (Range && !Range->isFullSet()) + AI->addAttr(llvm::Attribute::get(getLLVMContext(), + llvm::Attribute::Range, *Range)); + } + // Prepare parameter attributes. So far, only attributes for pointer // parameters are prepared. See // http://llvm.org/docs/LangRef.html#paramattrs. diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index d5df6dd3e303c..418120c596ed1 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -37,6 +37,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/IR/ConstantRange.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" @@ -52,6 +53,7 @@ #include <numeric> #include <optional> #include <string> +#include <utility> using namespace clang; using namespace CodeGen; @@ -1917,34 +1919,59 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(LValue lvalue, lvalue.getTBAAInfo(), lvalue.isNontemporal()); } -static bool getRangeForType(CodeGenFunction &CGF, QualType Ty, - llvm::APInt &Min, llvm::APInt &End, - bool StrictEnums, bool IsBool) { +std::optional<llvm::ConstantRange> +CodeGenFunction::getRangeForType(QualType Ty, unsigned BitWidth, + bool ForceStrictEnums, + bool AssumeBooleanRepresentation) { + assert(BitWidth >= 1 && "Zero bits value range not meaningful."); + + if (AssumeBooleanRepresentation || + (Ty->hasBooleanRepresentation() && !Ty->isVectorType())) { + if (BitWidth > 1) + return llvm::ConstantRange(llvm::APInt(BitWidth, 0), + llvm::APInt(BitWidth, 2)); + else + return llvm::ConstantRange(BitWidth, /*isFullSet=*/true); + } + const EnumType *ET = Ty->getAs<EnumType>(); const EnumDecl *ED = ET ? ET->getOriginalDecl()->getDefinitionOrSelf() : nullptr; + bool StrictEnums = ForceStrictEnums || CGM.getCodeGenOpts().StrictEnums; bool IsRegularCPlusPlusEnum = - CGF.getLangOpts().CPlusPlus && StrictEnums && ET && !ED->isFixed(); - if (!IsBool && !IsRegularCPlusPlusEnum) - return false; - - if (IsBool) { - Min = llvm::APInt(CGF.getContext().getTypeSize(Ty), 0); - End = llvm::APInt(CGF.getContext().getTypeSize(Ty), 2); - } else { + getLangOpts().CPlusPlus && StrictEnums && ET && !ED->isFixed(); + if (IsRegularCPlusPlusEnum) { + llvm::APInt Min, End; ED->getValueRange(End, Min); + // getValueRange may produce [0, 0] or [2**(w-1),2**(w-1)] to indicate a + // full range + auto Range = Min == End + ? llvm::ConstantRange(BitWidth, /*isFullSet=*/true) + : llvm::ConstantRange(std::move(Min), std::move(End)); + assert(Range.getActiveBits() <= BitWidth && + "Representing type with fewer bits than necessary for values?"); + if (Ty->isSignedIntegerOrEnumerationType()) + return Range.sextOrTrunc(BitWidth); + else { + assert(Ty->isUnsignedIntegerOrEnumerationType()); + return Range.zextOrTrunc(BitWidth); + } } - return true; + return std::nullopt; } llvm::MDNode *CodeGenFunction::getRangeForLoadFromType(QualType Ty) { - llvm::APInt Min, End; - if (!getRangeForType(*this, Ty, Min, End, CGM.getCodeGenOpts().StrictEnums, - Ty->hasBooleanRepresentation() && !Ty->isVectorType())) + llvm::Type *LTy = convertTypeForLoadStore(Ty); + if (!LTy->isIntegerTy()) + return nullptr; + + const std::optional<llvm::ConstantRange> Range = + getRangeForType(Ty, LTy->getIntegerBitWidth()); + if (!Range || Range->isFullSet() || Range->isEmptySet()) return nullptr; llvm::MDBuilder MDHelper(getLLVMContext()); - return MDHelper.createRange(Min, End); + return MDHelper.createRange(Range->getLower(), Range->getUpper()); } void CodeGenFunction::maybeAttachRangeForLoad(llvm::LoadInst *Load, QualType Ty, @@ -1986,10 +2013,15 @@ bool CodeGenFunction::EmitScalarRangeCheck(llvm::Value *Value, QualType Ty, getContext().isTypeIgnoredBySanitizer(SanitizerKind::Enum, Ty)) return false; - llvm::APInt Min, End; - if (!getRangeForType(*this, Ty, Min, End, /*StrictEnums=*/true, IsBool)) + const std::optional<llvm::ConstantRange> Range = getRangeForType( + Ty, getContext().getTypeSize(Ty), /*ForceStrictEnums=*/true, + /*AssumeBooleanRepresentation*/ IsBool); + if (!Range || Range->isFullSet()) return true; + llvm::APInt Min = Range->getLower(); + llvm::APInt End = Range->getUpper(); + SanitizerKind::SanitizerOrdinal Kind = NeedsEnumCheck ? SanitizerKind::SO_Enum : SanitizerKind::SO_Bool; diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index ad318f289ee83..5ae485e9fd905 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -39,6 +39,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" +#include "llvm/IR/ConstantRange.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Debug.h" @@ -5530,6 +5531,10 @@ class CodeGenFunction : public CodeGenTypeCache { llvm::Value *FormAArch64ResolverCondition(const FMVResolverOption &RO); llvm::Value *EmitAArch64CpuSupports(const CallExpr *E); llvm::Value *EmitAArch64CpuSupports(ArrayRef<StringRef> FeatureStrs); + + std::optional<llvm::ConstantRange> + getRangeForType(QualType Ty, unsigned BitWidth, bool ForceStrictEnums = false, + bool AssumeBooleanRepresentation = false); }; inline DominatingLLVMValue::saved_type diff --git a/clang/test/CodeGenCXX/gh154022.cpp b/clang/test/CodeGenCXX/gh154022.cpp new file mode 100644 index 0000000000000..6a2815afc351e --- /dev/null +++ b/clang/test/CodeGenCXX/gh154022.cpp @@ -0,0 +1,288 @@ +// RUN: %clang_cc1 %s -triple i386-unknown-unknown -emit-llvm -O1 -fstrict-enums -std=c++11 -o - | FileCheck %s +// RUN: %clang_cc1 %s -triple i386-unknown-unknown -emit-llvm -O3 -std=c++11 -o - | FileCheck --check-prefixes=NO-STRICT-ENUMS-C,NO-STRICT-ENUMS %s +// RUN: %clang_cc1 -x c %s -triple i386-unknown-unknown -emit-llvm -O1 -std=c23 -fstrict-enums -o - | FileCheck --check-prefix=NO-STRICT-ENUMS-C %s + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +#ifdef __cplusplus +enum e1 { }; +bool g1a(enum e1 x) { + return (unsigned int)x > 1; +} +// CHECK-LABEL: define{{.*}} i1 @g1a +// CHECK: ret i1 false +// NO-STRICT-ENUMS-LABEL: define{{.*}} i1 @g1a +// NO-STRICT-ENUMS: ret i1 %cmp +bool g1b(enum e1 x) { + return (unsigned int)x >= 1; +} +// CHECK-LABEL: define{{.*}} i1 @g1b +// CHECK: ret i1 %cmp +// NO-STRICT-ENUMS-LABEL: define{{.*}} i1 @g1b +// NO-STRICT-ENUMS: ret i1 %cmp +#endif // __cplusplus + +enum e2 { e2_a }; +bool g2a(enum e2 x) { + return (unsigned int)x > 1; +} +// CHECK-LABEL: define{{.*}} i1 @g2a +// CHECK: ret i1 false +// NO-STRICT-ENUMS-C-LABEL: define{{.*}} i1 @g2a +// NO-STRICT-ENUMS-C: ret i1 %cmp +bool g2b(enum e2 x) { + return (unsigned int)x >= 1; +} +// CHECK-LABEL: define{{.*}} i1 @g2b +// CHECK: ret i1 %cmp +// NO-STRICT-ENUMS-C-LABEL: define{{.*}} i1 @g2b +// NO-STRICT-ENUMS-C: ret i1 %cmp + +enum e3 { e3_a = 1 }; +bool g3a(enum e3 x) { + return (unsigned int)x > 1; +} +// CHECK-LABEL: define{{.*}} i1 @g3a +// CHECK: ret i1 false +// NO-STRICT-ENUMS-C-LABEL: define{{.*}} i1 @g3a +// NO-STRICT-ENUMS-C: ret i1 %cmp +bool g3b(enum e3 x) { + return (unsigned int)x >= 1; +} +// CHECK-LABEL: define{{.*}} i1 @g3b +// CHECK: ret i1 %cmp +// NO-STRICT-ENUMS-C-LABEL: define{{.*}} i1 @g3b +// NO-STRICT-ENUMS-C: ret i1 %cmp + +enum e4 { e4_a = -1 }; +bool g4a(enum e4 x) { + return (int)x > 0; +} +// CHECK-LABEL: define{{.*}} i1 @g4a +// CHECK: ret i1 false +// NO-STRICT-ENUMS-C-LABEL: define{{.*}} i1 @g4a +// NO-STRICT-ENUMS-C: ret i1 %cmp +bool g4b(enum e4 x) { + return (int)x >= 0; +} +// CHECK-LABEL: define{{.*}} i1 @g4b +// CHECK: ret i1 %cmp +// NO-STRICT-ENUMS-C-LABEL: define{{.*}} i1 @g4b +// NO-STRICT-ENUMS-C: ret i1 %cmp +bool g4c(enum e4 x) { + return (int)x < -1; +} +// CHECK-LABEL: define{{.*}} i1 @g4c +// CHECK: ret i1 false +// NO-STRICT-ENUMS-C-LABEL: define{{.*}} i1 @g4c +// NO-STRICT-ENUMS-C: ret i1 %cmp +bool g4d(enum e4 x) { + return (int)x <= -1; +} +// CHECK-LABEL: define{{.*}} i1 @g4d +// CHECK: ret i1 %cmp +// NO-STRICT-ENUMS-C-LABEL: define{{.*}} i1 @g4d +// NO-STRICT-ENUMS-C: ret i1 %cmp + +enum e5 { e5_a = 2 }; +bool g5a(enum e5 x) { + return (unsigned int)x > 3; +} +// CHECK-LABEL: define{{.*}} i1 @g5a +// CHECK: ret i1 false +// NO-STRICT-ENUMS-C-LABEL: define{{.*}} i1 @g5a +// NO-STRICT-ENUMS-C: ret i1 %cmp +bool g5b(enum e5 x) { + return (unsigned int)x >= 3; +} +// CHECK-LABEL: define{{.*}} i1 @g5b +// CHECK: ret i1 %cmp +// NO-STRICT-ENUMS-C-LABEL: define{{.*}} i1 @g5b +// NO-STRICT-ENUMS-C: ret i1 %cmp + +enum e6 { e6_a = -2 }; +bool g6a(enum e6 x) { + return (int)x > 1; +} +// CHECK-LABEL: define{{.*}} i1 @g6a +// CHECK: ret i1 false +// NO-STRICT-ENUMS-C-LABEL: define{{.*}} i1 @g6a +// NO-STRICT-ENUMS-C: ret i1 %cmp +bool g6b(enum e6 x) { + return (int)x >= 1; +} +// CHECK-LABEL: define{{.*}} i1 @g6b +// CHECK: ret i1 %cmp +// NO-STRICT-ENUMS-C-LABEL: define{{.*}} i1 @g6b +// NO-STRICT-ENUMS-C: ret i1 %cmp +bool g6c(enum e6 x) { + return (int)x < -2; +} +// CHECK-LABEL: define{{.*}} i1 @g6c +// CHECK: ret i1 false +// NO-STRICT-ENUMS-C-LABEL: define{{.*}} i1 @g6c +// NO-STRICT-ENUMS-C: ret i1 %cmp +bool g6d(enum e6 x) { + return (int)x <= -2; +} +// CHECK-LABEL: define{{.*}} i1 @g6d +// CHECK: ret i1 %cmp +// NO-STRICT-ENUMS-C-LABEL: define{{.*}} i1 @g6d +// NO-STRICT-ENUMS-C: ret i1 %cmp + +enum e7 { e7_a = -1, e7_b = 2 }; +bool g7a(enum e7 x) { + return (int)x > 3; +} +// CHECK-LABEL: define{{.*}} i1 @g7a +// CHECK: ret i1 false +// NO-STRICT-ENUMS-C-LABEL: define{{.*}} i1 @g7a +// NO-STRICT-ENUMS-C: ret i1 %cmp +bool g7b(enum e7 x) { + return (int)x >= 3; +} +// CHECK-LABEL: define{{.*}} i1 @g7b +// CHECK: ret i1 %cmp +// NO-STRICT-ENUMS-C-LABEL: define{{.*}} i1 @g7b +// NO-STRICT-ENUMS-C: ret i1 %cmp +bool g7c(enum e7 x) { + return (int)x < -4; +} +// CHECK-LABEL: define{{.*}} i1 @g7c +// CHECK: ret i1 false +// NO-STRICT-ENUMS-C-LABEL: define{{.*}} i1 @g7c +// NO-STRICT-ENUMS-C: ret i1 %cmp +bool g7d(enum e7 x) { + return (int)x <= -4; +} +// CHECK-LABEL: define{{.*}} i1 @g7d +// CHECK: ret i1 %cmp +// NO-STRICT-ENUMS-C-LABEL: define{{.*}} i1 @g7d +// NO-STRICT-ENUMS-C: ret i1 %cmp + +enum e8 { e8_a = (unsigned int)-1 }; +bool g8b(enum e8 x) { + return (unsigned int)x >= (unsigned int)-1; +} +// CHECK-LABEL: define{{.*}} i1 @g8b +// CHECK: ret i1 %cmp +// NO-STRICT-ENUMS-C-LABEL: define{{.*}} i1 @g8b +// NO-STRICT-ENUMS-C: ret i1 %cmp + +enum e9 { e9_a = (unsigned int)-2 }; +bool g9b(enum e9 x) { + return (unsigned int)x >= (unsigned int)-1; +} +// CHECK-LABEL: define{{.*}} i1 @g9b +// CHECK: ret i1 %cmp +// NO-STRICT-ENUMS-C-LABEL: define{{.*}} i1 @g9b +// NO-STRICT-ENUMS-C: ret i1 %cmp + +enum e10 { e10_a = (int)(((unsigned int)-1) >> 1) }; +bool g10a(enum e10 x) { + return (unsigned int)x > (int)(((unsigned int)-1) >> 1); +} +// CHECK-LABEL: define{{.*}} i1 @g10a +// CHECK: ret i1 false +// NO-STRICT-ENUMS-C-LABEL: define{{.*}} i1 @g10a +// NO-STRICT-ENUMS-C: ret i1 %cmp +bool g10b(enum e10 x) { + return (unsigned int)x >= (int)(((unsigned int)-1) >> 1); +} +// CHECK-LABEL: define{{.*}} i1 @g10b +// CHECK: ret i1 %cmp +// NO-STRICT-ENUMS-C-LABEL: define{{.*}} i1 @g10b +// NO-STRICT-ENUMS-C: ret i1 %cmp +bool g10d(enum e10 x) { + return (unsigned int)x <= 0; +} +// CHECK-LABEL: define{{.*}} i1 @g10d +// CHECK: ret i1 %cmp +// NO-STRICT-ENUMS-C-LABEL: define{{.*}} i1 @g10d +// NO-STRICT-ENUMS-C: ret i1 %cmp + +enum e11 { e11_a = -(int)(((unsigned int)-1) >> 1) }; +bool g11b(enum e11 x) { + return (int)x >= (int)(((unsigned int)-1) >> 1); +} +// CHECK-LABEL: define{{.*}} i1 @g11b +// CHECK: ret i1 %cmp +// NO-STRICT-ENUMS-C-LABEL: define{{.*}} i1 @g11b +// NO-STRICT-ENUMS-C: ret i1 %cmp +bool g11d(enum e11 x) { + return (int)x <= -(int)(((unsigned int)-1) >> 1) - 1; +} +// CHECK-LABEL: define{{.*}} i1 @g11d +// CHECK: ret i1 %cmp +// NO-STRICT-ENUMS-C-LABEL: define{{.*}} i1 @g11d +// NO-STRICT-ENUMS-C: ret i1 %cmp + +enum e12 { e12_a = -(long long)(((unsigned long long)-1) >> 2) }; +bool g12a(enum e12 x) { + return (long long)x > (long long)(((unsigned long long)-1) >> 2); +} +// CHECK-LABEL: define{{.*}} i1 @g12a +// CHECK: ret i1 false +// NO-STRICT-ENUMS-C-LABEL: define{{.*}} i1 @g12a +// NO-STRICT-ENUMS-C: ret i1 %cmp +bool g12b(enum e12 x) { + return (long long)x >= (long long)(((unsigned long long)-1) >> 2); +} +// CHECK-LABEL: define{{.*}} i1 @g12b +// CHECK: ret i1 %cmp +// NO-STRICT-ENUMS-C-LABEL: define{{.*}} i1 @g12b +// NO-STRICT-ENUMS-C: ret i1 %cmp +bool g12c(enum e12 x) { + return (long long)x < -(long long)(((unsigned long long)-1) >> 2) - 1; +} +// CHECK-LABEL: define{{.*}} i1 @g12c +// CHECK: ret i1 false +// NO-STRICT-ENUMS-C-LABEL: define{{.*}} i1 @g12c +// NO-STRICT-ENUMS-C: ret i1 %cmp +bool g12d(enum e12 x) { + return (long long)x <= -(long long)(((unsigned long long)-1) >> 2) - 1; +} +// CHECK-LABEL: define{{.*}} i1 @g12d +// CHECK: ret i1 %cmp +// NO-STRICT-ENUMS-C-LABEL: define{{.*}} i1 @g12d +// NO-STRICT-ENUMS-C: ret i1 %cmp + +enum e13 : long long { e13_a = -(long long)(((unsigned long long)-1) >> 2) }; +bool g13a(enum e13 x) { + return (long long)x > (long long)(((unsigned long long)-1) >> 2); +} +// CHECK-LABEL: define{{.*}} i1 @g13a +// CHECK: ret i1 %cmp +// NO-STRICT-ENUMS-C-LABEL: define{{.*}} i1 @g13a +// NO-STRICT-ENUMS-C: ret i1 %cmp +bool g13c(enum e13 x) { + return (long long)x < -(long long)(((unsigned long long)-1) >> 2) - 1; +} +// CHECK-LABEL: define{{.*}} i1 @g13c +// CHECK: ret i1 %cmp +// NO-STRICT-ENUMS-C-LABEL: define{{.*}} i1 @g13c +// NO-STRICT-ENUMS-C: ret i1 %cmp + +#ifdef __cplusplus +enum class e14 { e14_a = -(int)(((unsigned int)-1) >> 2) }; +bool g14a(enum e14 x) { + return (int)x > (int)(((unsigned int)-1) >> 2); +} +// CHECK-LABEL: define{{.*}} i1 @g14a +// CHECK: ret i1 %cmp +// NO-STRICT-ENUMS-LABEL: define{{.*}} i1 @g14a +// NO-STRICT-ENUMS: ret i1 %cmp +bool g14c(enum e14 x) { + return (int)x < -(int)(((unsigned int)-1) >> 2) - 1; +} +// CHECK-LABEL: define{{.*}} i1 @g14c +// CHECK: ret i1 %cmp +// NO-STRICT-ENUMS-LABEL: define{{.*}} i1 @g14c +// NO-STRICT-ENUMS: ret i1 %cmp +#endif // __cplusplus + +#ifdef __cplusplus +} +#endif // __cplusplus _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits