serge-sans-paille created this revision.
serge-sans-paille added a reviewer: erik.pilkington.
serge-sans-paille added a project: clang.
Herald added subscribers: cfe-commits, dexonsmith.

Implement a pessimistic evaluator of the minimal required size for a buffer 
based on the format string, and couple that with the fortified version to emit 
a warning when the buffer size is lower than the lower bound computed from the 
format string.

See the test cases for examples of warnings, and 
https://github.com/serge-sans-paille/llvm-project/pull/6/checks for the 
cross-platform validation.

Note: The lower bound could be improved, but I'd rather do that in an 
incremental commit, if that's okay with the reviewers.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D71566

Files:
  clang/include/clang/Basic/DiagnosticSemaKinds.td
  clang/lib/Sema/SemaChecking.cpp
  clang/test/Sema/warn-fortify-source.c

Index: clang/test/Sema/warn-fortify-source.c
===================================================================
--- clang/test/Sema/warn-fortify-source.c
+++ clang/test/Sema/warn-fortify-source.c
@@ -96,6 +96,21 @@
   __builtin_vsnprintf(buf, 11, "merp", list); // expected-warning {{'vsnprintf' size argument is too large; destination buffer has size 10, but size argument is 11}}
 }
 
+void call_sprintf_chk(char *buf) {
+  __builtin___sprintf_chk(buf, 1, 2, "a%d", 9);
+  __builtin___sprintf_chk(buf, 1, 1, "a%d", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 1, but format string expands to at least 2}}
+  __builtin___sprintf_chk(buf, 1, 4, "a%#x", 9);
+  __builtin___sprintf_chk(buf, 1, 3, "a%#x", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 3, but format string expands to at least 4}}
+  __builtin___sprintf_chk(buf, 1, 4, "a%p", (void *)9);
+  __builtin___sprintf_chk(buf, 1, 3, "a%p", (void *)9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 3, but format string expands to at least 4}}
+  __builtin___sprintf_chk(buf, 1, 3, "a%+d", 9);
+  __builtin___sprintf_chk(buf, 1, 2, "a%+d", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 2, but format string expands to at least 3}}
+  __builtin___sprintf_chk(buf, 1, 6, "a%5d", 9);
+  __builtin___sprintf_chk(buf, 1, 5, "a%5d", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 5, but format string expands to at least 6}}
+  __builtin___sprintf_chk(buf, 1, 2, "%f", 9.f);
+  __builtin___sprintf_chk(buf, 1, 1, "a%f", 9.f); // expected-warning {{'sprintf' will always overflow; destination buffer has size 1, but format string expands to at least 2}}
+}
+
 #ifdef __cplusplus
 template <class> struct S {
   void mf() const {
Index: clang/lib/Sema/SemaChecking.cpp
===================================================================
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -308,6 +308,58 @@
 
   return false;
 }
+class EstimateSizeFormatHandler
+    : public analyze_format_string::FormatStringHandler {
+  size_t Size;
+
+public:
+  EstimateSizeFormatHandler(const StringLiteral *fexpr)
+      : Size(fexpr->getLength()) {}
+
+  bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
+                             const char *startSpecifier,
+                             unsigned specifierLen) override {
+    const analyze_format_string::OptionalAmount &FW = FS.getFieldWidth();
+    if (FW.getHowSpecified() == analyze_format_string::OptionalAmount::Constant)
+      Size += FW.getConstantAmount();
+    else
+      Size += FS.getConversionSpecifier().isAnyIntArg() ||
+              FS.getConversionSpecifier().isDoubleArg();
+    Size += FS.hasPlusPrefix() || FS.hasSpacePrefix();
+    if (FS.hasAlternativeForm()) {
+      switch (FS.getConversionSpecifier().getKind()) {
+      default:
+        break;
+      case analyze_format_string::ConversionSpecifier::oArg: // leading 0
+      case analyze_format_string::ConversionSpecifier::aArg: // force a .
+      case analyze_format_string::ConversionSpecifier::AArg: // force a .
+      case analyze_format_string::ConversionSpecifier::eArg: // force a .
+      case analyze_format_string::ConversionSpecifier::EArg: // force a .
+      case analyze_format_string::ConversionSpecifier::fArg: // force a .
+      case analyze_format_string::ConversionSpecifier::FArg: // force a .
+      case analyze_format_string::ConversionSpecifier::gArg: // force a .
+      case analyze_format_string::ConversionSpecifier::GArg: // force a .
+        Size += 1;
+        break;
+      case analyze_format_string::ConversionSpecifier::xArg: // leading 0x
+      case analyze_format_string::ConversionSpecifier::XArg: // leading 0x
+        Size += 2;
+        break;
+      }
+    }
+    switch (FS.getConversionSpecifier().getKind()) {
+    case analyze_format_string::ConversionSpecifier::pArg: // leading 0x
+      Size += 3;
+      break;
+    default:
+      break;
+    }
+    Size -= specifierLen;
+    return true;
+  }
+
+  size_t getSizeLowerBound() const { return Size; }
+};
 
 /// Check a call to BuiltinID for buffer overflows. If BuiltinID is a
 /// __builtin_*_chk function, then use the object size argument specified in the
@@ -328,10 +380,35 @@
 
   unsigned DiagID = 0;
   bool IsChkVariant = false;
+  llvm::APSInt UsedSize = llvm::APSInt::get(-1);
   unsigned SizeIndex, ObjectIndex;
   switch (BuiltinID) {
   default:
     return;
+  case Builtin::BI__builtin___sprintf_chk: {
+    if (auto *StrE = dyn_cast<StringLiteral>(
+            TheCall->getArg(3)->IgnoreParenImpCasts())) {
+      EstimateSizeFormatHandler H(StrE);
+      StringRef StrRef = StrE->getString();
+      const char *Str = StrRef.data();
+      const ConstantArrayType *T =
+          Context.getAsConstantArrayType(StrE->getType());
+      assert(T && "String literal not of constant array type!");
+      size_t TypeSize = T->getSize().getZExtValue();
+      size_t StrLen =
+          std::min(std::max(TypeSize, size_t(1)) - 1, StrRef.size());
+      if (!analyze_format_string::ParsePrintfString(
+              H, Str, Str + StrLen, getLangOpts(), Context.getTargetInfo(),
+              false)) {
+        DiagID = diag::warn_fortify_source_format_overflow;
+        IsChkVariant = true;
+        UsedSize = H.getSizeLowerBound();
+        ObjectIndex = 2;
+        break;
+      }
+    }
+    return;
+  }
   case Builtin::BI__builtin___memcpy_chk:
   case Builtin::BI__builtin___memmove_chk:
   case Builtin::BI__builtin___memset_chk:
@@ -427,11 +504,13 @@
   }
 
   // Evaluate the number of bytes of the object that this call will use.
-  Expr::EvalResult Result;
-  Expr *UsedSizeArg = TheCall->getArg(SizeIndex);
-  if (!UsedSizeArg->EvaluateAsInt(Result, getASTContext()))
-    return;
-  llvm::APSInt UsedSize = Result.Val.getInt();
+  if (UsedSize == -1) {
+    Expr::EvalResult Result;
+    Expr *UsedSizeArg = TheCall->getArg(SizeIndex);
+    if (!UsedSizeArg->EvaluateAsInt(Result, getASTContext()))
+      return;
+    UsedSize = Result.Val.getInt();
+  }
 
   if (UsedSize.ule(ObjectSize))
     return;
@@ -8867,6 +8946,7 @@
                                                   S.Context.getTargetInfo(),
                                             Type == Sema::FST_FreeBSDKPrintf))
       H.DoneProcessing();
+
   } else if (Type == Sema::FST_Scanf) {
     CheckScanfHandler H(S, FExpr, OrigFormatExpr, Type, firstDataArg,
                         numDataArgs, Str, HasVAListArg, Args, format_idx,
Index: clang/include/clang/Basic/DiagnosticSemaKinds.td
===================================================================
--- clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -741,6 +741,12 @@
   "'%0' size argument is too large; destination buffer has size %1,"
   " but size argument is %2">, InGroup<FortifySource>;
 
+def warn_fortify_source_format_overflow : Warning<
+  "'%0' will always overflow; destination buffer has size %1,"
+  " but format string expands to at least %2">,
+  InGroup<FortifySource>;
+
+
 /// main()
 // static main() is not an error in C, just in C++.
 def warn_static_main : Warning<"'main' should not be declared static">,
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to