ziqingluo-90 updated this revision to Diff 490648.
ziqingluo-90 added a comment.
Rebased the code w.r.t. a series of refactoring in [-Wunsafe-buffer-usage].
Added a `FixableGadget` for array subscripts of the form `DRE[*]` in the
context of lvalue-to-rvalue casting.
Also did a refactoring at the place where matchers are applied: Matchers of a
`FixableGadget` and of a `WarningGadget` cat match for the same AST node. So
they should not be put in an `anyOf` group, otherwise they can disable each
other due to short-circuiting.
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D139737/new/
https://reviews.llvm.org/D139737
Files:
clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
clang/lib/Analysis/UnsafeBufferUsage.cpp
clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-local-var-span.cpp
Index: clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-local-var-span.cpp
===================================================================
--- /dev/null
+++ clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-local-var-span.cpp
@@ -0,0 +1,86 @@
+// RUN: cp %s %t.cpp
+// RUN: %clang_cc1 -std=c++20 -Wunsafe-buffer-usage -fixit %t.cpp
+// RUN: grep -v CHECK %t.cpp | FileCheck %s
+typedef int * Int_ptr_t;
+typedef int Int_t;
+
+void local_array_subscript_simple() {
+ int tmp;
+// CHECK: std::span<int> p{new int [10], 10};
+// CHECK: std::span<const int> q{new int [10], 10};
+// CHECK: tmp = p[5];
+// CHECK: tmp = q[5];
+ int *p = new int[10];
+ const int *q = new int[10];
+ tmp = p[5];
+ tmp = q[5];
+
+// CHECK: std::span<int> x{new int [10], 10};
+// CHECK: std::span<int> y{new int, 1};
+// CHECK: std::span<Int_t> z{new int [10], 10};
+// CHECK: std::span<Int_t> w{new Int_t [10], 10};
+
+ Int_ptr_t x = new int[10];
+ Int_ptr_t y = new int;
+ Int_t * z = new int[10];
+ Int_t * w = new Int_t[10];
+
+ // CHECK: tmp = x[5];
+ tmp = x[5];
+ // CHECK: tmp = y[5];
+ tmp = y[5]; // y[5] will crash after being span
+ // CHECK: tmp = z[5];
+ tmp = z[5];
+ // CHECK: tmp = w[5];
+ tmp = w[5];
+}
+
+void local_array_subscript_auto() {
+ int tmp;
+// CHECK: std::span<int> p{new int [10], 10};
+// CHECK: tmp = p[5];
+ auto p = new int[10];
+ tmp = p[5];
+}
+
+void local_array_subscript_variable_extent() {
+ int n = 10;
+ int tmp;
+ //FIXME: need to think it twice about side effects in the expressions
+ //used to initialize span objects
+ // CHECK: std::span<int> p{new int [n], n};
+ // CHECK: std::span<int> q{new int [n++], <# placeholder #>};
+ // CHECK: tmp = p[5];
+ // CHECK: tmp = q[5];
+ int *p = new int[n];
+ // If the extent expression does not have a constant value, we cannot fill the extent for users...
+ int *q = new int[n++];
+ tmp = p[5];
+ tmp = q[5];
+}
+
+
+void local_ptr_to_array() {
+ int tmp;
+ int n = 10;
+ int a[10];
+ int b[n]; // If the extent expression does not have a constant value, we cannot fill the extent for users...
+ // CHECK: std::span<int> p{a, 10};
+ // CHECK: std::span<int> q{b, <# placeholder #>};
+ // CHECK: tmp = p[5];
+ // CHECK: tmp = q[5];
+ int *p = a;
+ int *q = b;
+ tmp = p[5];
+ tmp = q[5];
+}
+
+void local_ptr_addrof_init() {
+ int var;
+// CHECK: std::span<int> q{&var, 1};
+// CHECK: var = q[5];
+ int * q = &var;
+ // This expression involves unsafe buffer accesses, which will crash
+ // at runtime after applying the fix-it,
+ var = q[5];
+}
Index: clang/lib/Analysis/UnsafeBufferUsage.cpp
===================================================================
--- clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -115,6 +115,21 @@
MatchDescendantVisitor Visitor(&DTM, Finder, Builder, ASTMatchFinder::BK_All);
return Visitor.findMatch(DynTypedNode::create(Node));
}
+
+AST_MATCHER_P(CastExpr, castSubExpr, internal::Matcher<Expr>, innerMatcher) {
+ return innerMatcher.matches(*Node.getSubExpr(), Finder, Builder);
+}
+
+// Returns a matcher that matches any expression 'e' such that `innerMatcher`
+// matches 'e' and 'e' is in an Unspecified Lvalue Context.
+static auto isInUnspecifiedLvalueContext(internal::Matcher<Expr> innerMatcher) {
+ auto isLvalueToRvalueCast = [](internal::Matcher<Expr> M) {
+ return implicitCastExpr(hasCastKind(CastKind::CK_LValueToRValue),
+ castSubExpr(M));
+ };
+ //FIXME: add assignmentTo context...
+ return isLvalueToRvalueCast(innerMatcher);
+}
} // namespace clang::ast_matchers
namespace {
@@ -282,7 +297,7 @@
/// Array subscript expressions on raw pointers as if they're arrays. Unsafe as
/// it doesn't have any bounds checks for the array.
class ArraySubscriptGadget : public WarningGadget {
- static constexpr const char *const ArraySubscrTag = "arraySubscr";
+ static constexpr const char *const ArraySubscrTag = "ArraySubscript";
const ArraySubscriptExpr *ASE;
public:
@@ -366,6 +381,51 @@
// FIXME: pointer adding zero should be fine
//FIXME: this gadge will need a fix-it
};
+
+class Strategy;
+
+// Represents expressions of the form `DRE[*]` in the Unspecified Lvalue
+// Context (see `isInUnspecifiedLvalueContext`).
+// Note here `[]` is the built-in subscript operator.
+class ULCArraySubscriptGadget : public FixableGadget {
+private:
+ static constexpr const char *const ULCArraySubscriptTag = "ULCArraySubscript";
+ const ArraySubscriptExpr *Node;
+
+public:
+ ULCArraySubscriptGadget(const MatchFinder::MatchResult &Result)
+ : FixableGadget(Kind::ULCArraySubscript),
+ Node(Result.Nodes.getNodeAs<ArraySubscriptExpr>(ULCArraySubscriptTag)) {
+ assert(Node != nullptr && "Expecting a non-null matching result");
+ }
+
+ static bool classof(const Gadget *G) {
+ return G->getKind() == Kind::ULCArraySubscript;
+ }
+
+ static Matcher matcher() {
+ auto ArrayOrPtr = anyOf(hasPointerType(), hasArrayType());
+ auto BaseIsArrayOrPtrDRE =
+ hasBase(ignoringParenImpCasts(allOf(declRefExpr(), ArrayOrPtr)));
+ auto Target =
+ arraySubscriptExpr(BaseIsArrayOrPtrDRE,
+ unless(hasIndex(integerLiteral(equals(0)))))
+ .bind(ULCArraySubscriptTag);
+
+ return expr(isInUnspecifiedLvalueContext(Target));
+ }
+
+ virtual Optional<FixItList> getFixits(const Strategy &S) const override;
+
+ virtual const Stmt *getBaseStmt() const override { return Node; }
+
+ virtual DeclUseList getClaimedVarUseSites() const override {
+ if (const auto *DRE = dyn_cast<DeclRefExpr>(Node->getBase()->IgnoreImpCasts())) {
+ return {DRE};
+ }
+ return {};
+ }
+};
} // namespace
namespace {
@@ -519,26 +579,30 @@
// clang-format off
M.addMatcher(
stmt(forEveryDescendant(
+ eachOf(
+ // A `FixableGadget` matcher and a `WarningGadget` matcher should not disable
+ // each other (they could if they were put in the same `anyOf` group).
+ // We also should make sure no two `FixableGadget` (resp. `WarningGadget`) matchers
+ // match for the same node, so that we can group them
+ // in one `anyOf` group (for better performance via short-circuiting).
stmt(anyOf(
- // Add Gadget::matcher() for every gadget in the registry.
-#define GADGET(x) \
- x ## Gadget::matcher().bind(#x),
+#define FIXABLE_GADGET(x) \
+ x ## Gadget::matcher(),
#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
- // In parallel, match all DeclRefExprs so that to find out
- // whether there are any uncovered by gadgets.
- declRefExpr(anyOf(hasPointerType(), hasArrayType()),
- to(varDecl())).bind("any_dre"),
// Also match DeclStmts because we'll need them when fixing
// their underlying VarDecls that otherwise don't have
// any backreferences to DeclStmts.
declStmt().bind("any_ds")
- ))
- // FIXME: Idiomatically there should be a forCallable(equalsNode(D))
- // here, to make sure that the statement actually belongs to the
- // function and not to a nested function. However, forCallable uses
- // ParentMap which can't be used before the AST is fully constructed.
- // The original problem doesn't sound like it needs ParentMap though,
- // maybe there's a more direct solution?
+ )),
+ stmt(anyOf(
+ // Add Gadget::matcher() for every gadget in the registry.
+#define WARNING_GADGET(x) \
+ x ## Gadget::matcher().bind(#x),
+#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
+ // In parallel, match all DeclRefExprs so that to find out
+ // whether there are any uncovered by gadgets.
+ declRefExpr(anyOf(hasPointerType(), hasArrayType()), to(varDecl())).bind("any_dre")
+ )))
)),
&CB
);
@@ -607,11 +671,189 @@
return FixablesForUnsafeVars;
}
+static Strategy
+getNaiveStrategy(const llvm::SmallVectorImpl<const VarDecl *> &UnsafeVars) {
+ Strategy S;
+ for (const VarDecl *VD : UnsafeVars) {
+ S.set(VD, Strategy::Kind::Span);
+ }
+ return S;
+}
+
+Optional<FixItList>
+ULCArraySubscriptGadget::getFixits(const Strategy &S) const {
+ if (const auto *DRE = dyn_cast<DeclRefExpr>(Node->getBase()->IgnoreImpCasts()))
+ if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
+ switch (S.lookup(VD)) {
+ case Strategy::Kind::Span:
+ return FixItList{};
+ case Strategy::Kind::Wontfix:
+ case Strategy::Kind::Iterator:
+ case Strategy::Kind::Array:
+ case Strategy::Kind::Vector:
+ llvm_unreachable("unsupported strategies for FixableGadgets");
+ }
+ }
+ return std::nullopt;
+}
+
+// For a non-null initializer `Init` of `T *` type, this function writes
+// `{Init, S}` as a part of a fix-it to output stream . The list-initializer
+// `{Init, S}` belongs to a span constructor, where `Init` specifies the
+// address of the data and `S` is the extent. In many cases, this function
+// cannot figure out the actual extent `S`. It then will use a place holder to
+// replace `S` to ask users to fill `S` in. The span object being constructed
+// will have type `span<T, S>`.
+//
+// FIXME: This function so far only works for spanify "single-level" pointers.
+// When it comes to multi-level pointers, the data address is not necessarily
+// identical to `Init` and the element type of the constructing span object is
+// not necessarily `T`.
+//
+// Parameters:
+// `Init` a pointer to the initializer expression
+// `Ctx` a reference to the ASTContext
+// `OS` the output stream where fix-it texts being written to
+static void populateInitializerFixItWithSpan(const Expr *Init,
+ const ASTContext &Ctx, raw_ostream &OS) {
+ const PrintingPolicy &PP = Ctx.getPrintingPolicy();
+
+ // Prints the begin address of the span constructor:
+ OS << "{";
+ Init->printPretty(OS, nullptr, PP);
+ OS << ", ";
+
+ bool ExtKnown = false; // true iff the extent can be obtained
+ // Printers that print extent into OS and sets ExtKnown to true:
+ std::function PrintExpr = [&ExtKnown, &OS, &PP](const Expr *Size) {
+ Size->printPretty(OS, nullptr, PP);
+ ExtKnown = true;
+ };
+ std::function PrintAPInt = [&ExtKnown, &OS](const APInt &Size) {
+ Size.print(OS, false);
+ ExtKnown = true;
+ };
+ std::function PrintOne = [&ExtKnown, &OS](void) {
+ OS << "1";
+ ExtKnown = true;
+ };
+
+ // Try to get the data extent. Break into different cases:
+ if (auto CxxNew = dyn_cast<CXXNewExpr>(Init->IgnoreImpCasts())) {
+ // In cases `Init` is `new T[n]` and there is no explicit cast over
+ // `Init`, we know that `Init` must evaluates to a pointer to `n` objects
+ // of `T`. So the extent is `n` unless `n` has side effects. Similar but
+ // simpler for `Init` is `new T`.
+ if (const Expr *Ext = CxxNew->getArraySize().value_or(nullptr)) {
+ if (!Ext->HasSideEffects(Ctx))
+ PrintExpr(Ext);
+ } else if (!CxxNew->isArray())
+ PrintOne();
+ } else if (auto CArrTy = dyn_cast<ConstantArrayType>(
+ Init->IgnoreImpCasts()->getType().getTypePtr())) {
+ // In cases `Init` is of an array type after stripping off implicit casts,
+ // `PointeeTy` must besimilar to the element type of `CArrTy`, so the
+ // extent is the array size of `CArrTy`. Note that if the array size is
+ // not a constant, we cannot use it as the extent.
+ PrintAPInt(CArrTy->getSize());
+ // FIXME: local static array may be converted to std:array. Is there any
+ // dependency among these fix-its?
+ } else {
+ // In cases `Init` is of the form `&Var` after stripping of implicit
+ // casts, where `&` is the built-in operator, `PointeeTy` must be similar
+ // to the type of `Var`, so the extent is 1.
+ if (auto AddrOfExpr = dyn_cast<UnaryOperator>(Init->IgnoreImpCasts()))
+ if (AddrOfExpr->getOpcode() == UnaryOperatorKind::UO_AddrOf &&
+ isa_and_present<DeclRefExpr>(AddrOfExpr->getSubExpr()))
+ PrintOne();
+ // TODO: we can handle more cases, e.g., `&a[0]`, `&a`, std::addressof,...
+ // etc.
+ }
+ if (!ExtKnown)
+ OS << UnsafeBufferUsageHandler::getUserFillPlaceHolder();
+ OS << "}";
+}
+
+// For a `VarDecl` of the form `T * var (= Init)?`, this
+// function generates a fix-it for the declaration, which re-declares `var` to
+// be of `span<T>` type and transforms the initializer, if present, to a span
+// constructor---`span<T> var {Init, Extent}`, where `Extent` may need the user
+// to fill in.
+//
+// FIXME:
+// For now, we only consider single level pointer types, i.e., given `T` is
+// `int **`, the converted type will be `span<int*>`.
+// We need to handle multi-level pointers correctly later.
+//
+// Parameters:
+// `D` a pointer the variable declaration node
+// `Ctx` a reference to the ASTContext
+// Returns:
+// the generated fix-it
+static FixItHint fixVarDeclWithSpan(const VarDecl *D,
+ const ASTContext &Ctx) {
+ const QualType &T = D->getType().getDesugaredType(Ctx);
+ const QualType &SpanEltT = T->getPointeeType();
+ assert(!SpanEltT.isNull() && "Trying to fix a non-pointer type variable!");
+
+ SmallString<32> Replacement;
+ raw_svector_ostream OS(Replacement);
+
+ // Simply make `D` to be of span type:
+ OS << "std::span<" << SpanEltT.getAsString() << "> " << D->getName();
+ if (const Expr *Init = D->getInit())
+ populateInitializerFixItWithSpan(Init, Ctx, OS);
+
+ return FixItHint::CreateReplacement(
+ SourceRange{D->getInnerLocStart(), D->getEndLoc()}, OS.str());
+}
+
+static FixItList fixVariableWithSpan(const VarDecl *VD,
+ const DeclUseTracker &Tracker,
+ const ASTContext &Ctx) {
+ const DeclStmt *DS = Tracker.lookupDecl(VD);
+ assert(DS && "Fixing non-local variables not implemented yet!");
+ assert(DS->getSingleDecl() == VD &&
+ "Fixing non-single declarations not implemented yet!");
+ // Currently DS is an unused variable but we'll need it when
+ // non-single decls are implemented, where the pointee type name
+ // and the '*' are spread around the place.
+ (void)DS;
+
+ // FIXME: handle cases where DS has multiple declarations
+ FixItHint Fix = fixVarDeclWithSpan(VD, Ctx);
+ return {Fix};
+}
+
+static FixItList fixVariable(const VarDecl *VD, Strategy::Kind K,
+ const DeclUseTracker &Tracker,
+ const ASTContext &Ctx) {
+ switch (K) {
+ case Strategy::Kind::Span: {
+ if (VD->getType()->isPointerType())
+ return fixVariableWithSpan(VD, Tracker, Ctx);
+ return {};
+ }
+ case Strategy::Kind::Iterator:
+ case Strategy::Kind::Array:
+ case Strategy::Kind::Vector:
+ llvm_unreachable("Strategy not implemented yet!");
+ case Strategy::Kind::Wontfix:
+ llvm_unreachable("Invalid strategy!");
+ }
+}
+
static std::map<const VarDecl *, FixItList>
-getFixIts(FixableGadgetSets &FixablesForUnsafeVars, const Strategy &S) {
+getFixIts(FixableGadgetSets &FixablesForUnsafeVars, const Strategy &S, const DeclUseTracker &Tracker, const ASTContext &Ctx) {
std::map<const VarDecl *, FixItList> FixItsForVariable;
for (const auto &[VD, Fixables] : FixablesForUnsafeVars.byVar) {
- // TODO fixVariable - fixit for the variable itself
+ FixItsForVariable[VD] = fixVariable(VD, S.lookup(VD), Tracker, Ctx);
+ // If we fail to produce Fix-It for the declaration we have to skip the variable entirely.
+ if (FixItsForVariable[VD].empty()) {
+ FixItsForVariable.erase(VD);
+ continue;
+ }
+
bool ImpossibleToFix = false;
llvm::SmallVector<FixItHint, 16> FixItsForVD;
for (const auto &F : Fixables) {
@@ -634,15 +876,6 @@
return FixItsForVariable;
}
-static Strategy
-getNaiveStrategy(const llvm::SmallVectorImpl<const VarDecl *> &UnsafeVars) {
- Strategy S;
- for (const VarDecl *VD : UnsafeVars) {
- S.set(VD, Strategy::Kind::Span);
- }
- return S;
-}
-
void clang::checkUnsafeBufferUsage(const Decl *D,
UnsafeBufferUsageHandler &Handler) {
assert(D && D->getBody());
@@ -675,7 +908,7 @@
Strategy NaiveStrategy = getNaiveStrategy(UnsafeVars);
std::map<const VarDecl *, FixItList> FixItsForVariable =
- getFixIts(FixablesForUnsafeVars, NaiveStrategy);
+ getFixIts(FixablesForUnsafeVars, NaiveStrategy, Tracker, D->getASTContext());
// FIXME Detect overlapping FixIts.
Index: clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
===================================================================
--- clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
+++ clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
@@ -29,6 +29,7 @@
WARNING_GADGET(Decrement)
WARNING_GADGET(ArraySubscript)
WARNING_GADGET(PointerArithmetic)
+FIXABLE_GADGET(ULCArraySubscript)
#undef FIXABLE_GADGET
#undef WARNING_GADGET
Index: clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
===================================================================
--- clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
+++ clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
@@ -37,6 +37,15 @@
/// Invoked when a fix is suggested against a variable.
virtual void handleFixableVariable(const VarDecl *Variable,
FixItList &&List) = 0;
+
+ /// Returns the text indicating that the user needs to provide input there:
+ static std::string
+ getUserFillPlaceHolder(const StringRef &HintTextToUser = "placeholder") {
+ std::string s = std::string("<# ");
+ s += HintTextToUser;
+ s += " #>";
+ return s;
+ }
};
// This function invokes the analysis and allows the caller to react to it
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits