zukatsinadze created this revision.
zukatsinadze added reviewers: NoQ, Charusso, Szelethus.
zukatsinadze added a project: clang.
Herald added subscribers: cfe-commits, ASDenysPetrov, martong, dkrupp, 
donat.nagy, mikhail.ramalho, a.sidorin, szepet, baloghadamsoftware, xazax.hun, 
mgorny.

This patch introduces a new checker:
`alpha.security.cert.str.37c`

This checker is implemented based on the following rule:
https://wiki.sei.cmu.edu/confluence/x/BNcxBQ
The check warns if the argument of a character handling 
function is not representable as unsigned char.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D79358

Files:
  clang/docs/analyzer/checkers.rst
  clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
  clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt
  clang/lib/StaticAnalyzer/Checkers/cert/Str37Checker.cpp
  clang/test/Analysis/cert/str37-c-fp-suppression.cpp
  clang/test/Analysis/cert/str37-c.cpp

Index: clang/test/Analysis/cert/str37-c.cpp
===================================================================
--- /dev/null
+++ clang/test/Analysis/cert/str37-c.cpp
@@ -0,0 +1,29 @@
+// RUN: %clang_analyze_cc1 \
+// RUN:  -analyzer-checker=alpha.security.cert.str.37c\
+// RUN:  -verify %s
+
+// Examples from the CERT rule's page.
+// https://wiki.sei.cmu.edu/confluence/x/BNcxBQ
+
+#include "../Inputs/system-header-simulator.h"
+
+int isspace(int c);
+
+size_t bad_usage(const char *s) {
+  const char *t = s;
+  size_t length = strlen(s) + 1;
+  while (isspace(*t) && (t - s < length)) {
+    // expected-warning@-1 {{arguments to character-handling function `isspace` must be representable as an unsigned char}}
+    ++t;
+  }
+  return t - s;
+}
+
+size_t good_usage(const char *s) {
+  const char *t = s;
+  size_t length = strlen(s) + 1;
+  while (isspace((unsigned char)*t) && (t - s < length)) { // no warning
+    ++t;
+  }
+  return t - s;
+}
Index: clang/test/Analysis/cert/str37-c-fp-suppression.cpp
===================================================================
--- /dev/null
+++ clang/test/Analysis/cert/str37-c-fp-suppression.cpp
@@ -0,0 +1,200 @@
+// RUN: %clang_analyze_cc1 \
+// RUN:  -analyzer-checker=alpha.security.cert.str.37c\
+// RUN:  -verify %s
+
+#include "../Inputs/system-header-simulator.h"
+
+int isspace(int c);
+int toupper(int c);
+int isalnum(int c);
+int isalpha(int c);
+int isascii(int c);
+int isblank(int c);
+int iscntrl(int c);
+int isdigit(int c);
+int isgraph(int c);
+int islower(int c);
+int isprint(int c);
+int ispunct(int c);
+int isupper(int c);
+int isxdigit(int c);
+int toascii(int c);
+int tolower(int c);
+
+namespace correct_use {
+
+int test_tolower(const char *c) {
+  char lowA = tolower('A'); // no warning
+  return tolower(97);       // no warning
+}
+
+int test_toascii(const char *c) {
+  return toascii((unsigned char)*c); // no warning
+}
+
+void test_toupper(const char *s) {
+  char c = 98;
+  c = toupper(c); // no warning
+  char b = -2;
+  b = toupper((unsigned char)b); // no warning
+}
+
+int test_isalnum() {
+  int a = 50;
+  int testEOF = isalnum(EOF); // no warning
+  return isalnum(a);          // no warning
+}
+
+int test_isalpha() {
+  char c = 'a';
+  int b = isalpha(255); // no warning
+  return isalpha(c);    // no warning
+}
+
+int test_isascii() {
+  char c = 'a';
+  int b = isascii(200); // no warning
+  int A = isascii(65);  // no warning
+  return isascii(c);    // no warning
+}
+
+int test_isblank() {
+  char c = ' ';
+  bool isEOFBlank = isblank(EOF); // no warning
+  return isblank(c);              // no warning
+}
+
+int test_iscntrl() {
+  char c = 2;
+  return iscntrl(c); // no warning
+}
+
+int test_isdigit() {
+  char c = '2';
+  return isdigit(c); // no warning
+}
+
+int test_isgraph() {
+  char c = '9';
+  return isgraph(c); // no warning
+}
+
+int test_islower() {
+  char c = 'a';
+  return islower(c); // no warning
+}
+
+int test_isprint(char c) {
+  return isprint((unsigned char)c); // no warning
+}
+
+int test_ispunct() {
+  char c = 'a';
+  char b = -2;
+  bool test_unsigned = ispunct((unsigned char)b); // no warning
+  bool not_true = ispunct(c);                     // no warning
+  return ispunct(2);                              // no warning
+}
+
+int test_isupper(const char *b) {
+  char c = 'A';
+  bool A_is_upper = isupper(c);      // no warning
+  return isupper((unsigned char)*b); // no warning
+}
+
+int test_isxdigit() {
+  char hexa = '9';
+  char not_hexa = 'M';
+  return isxdigit(hexa) || isxdigit(not_hexa); // no warning
+}
+} // namespace correct_use
+
+namespace incorrect_use {
+
+int test_tolower() {
+  int a = 5;
+  int b = 7;
+  char c = a - b;
+  return tolower(c);
+  // expected-warning@-1 {{arguments to character-handling function `tolower` must be representable as an unsigned char}}
+}
+
+int test_toascii(const char *c) {
+  return toascii(*c);
+  // expected-warning@-1 {{arguments to character-handling function `toascii` must be representable as an unsigned char}}
+}
+
+void test_toupper(const char *s) {
+  char c = -2;
+  c = toupper(c);
+  // expected-warning@-1 {{arguments to character-handling function `toupper` must be representable as an unsigned char}}
+}
+
+int test_isalnum() {
+  char c = -2;
+  return isalnum(c);
+  // expected-warning@-1 {{arguments to character-handling function `isalnum` must be representable as an unsigned char}}
+}
+
+int test_isalpha() {
+  return isalpha(256);
+  // expected-warning@-1 {{arguments to character-handling function `isalpha` must be representable as an unsigned char}}
+}
+
+int test_isascii() {
+  int b = isascii(269);
+  // expected-warning@-1 {{arguments to character-handling function `isascii` must be representable as an unsigned char}}
+  return isascii(100); // no warning
+}
+
+int test_isblank() {
+  char c = -2;
+  return isblank(c);
+  // expected-warning@-1 {{arguments to character-handling function `isblank` must be representable as an unsigned char}}
+}
+
+int test_iscntrl() {
+  char c = -22;
+  return iscntrl(c);
+  // expected-warning@-1 {{arguments to character-handling function `iscntrl` must be representable as an unsigned char}}
+}
+
+int test_isdigit() {
+  char c = -2;
+  return isdigit(c);
+  // expected-warning@-1 {{arguments to character-handling function `isdigit` must be representable as an unsigned char}}
+}
+
+int test_isgraph() {
+  return isgraph(269);
+  // expected-warning@-1 {{arguments to character-handling function `isgraph` must be representable as an unsigned char}}
+}
+
+int test_islower() {
+  return islower(269);
+  // expected-warning@-1 {{arguments to character-handling function `islower` must be representable as an unsigned char}}
+}
+
+int test_isprint(char c) {
+  return isprint(c);
+  // expected-warning@-1 {{arguments to character-handling function `isprint` must be representable as an unsigned char}}
+}
+
+int test_ispunct() {
+  char c;
+  return ispunct(c);
+  // expected-warning@-1 {{arguments to character-handling function `ispunct` must be representable as an unsigned char}}
+}
+
+int test_isupper(const char *b) {
+  return isupper(*b);
+  // expected-warning@-1 {{arguments to character-handling function `isupper` must be representable as an unsigned char}}
+}
+
+int test_isxdigit() {
+  char hexa = -10;
+  return isxdigit(hexa);
+  // expected-warning@-1 {{arguments to character-handling function `isxdigit` must be representable as an unsigned char}}
+}
+
+} // namespace incorrect_use
Index: clang/lib/StaticAnalyzer/Checkers/cert/Str37Checker.cpp
===================================================================
--- /dev/null
+++ clang/lib/StaticAnalyzer/Checkers/cert/Str37Checker.cpp
@@ -0,0 +1,104 @@
+//== Str37Checker.cpp --------------------------------- -*- C++ -*--=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines Str37Checker which finds calls of character-handling
+// functions with arguments which are not representable as an unsigned char.
+// https://wiki.sei.cmu.edu/confluence/x/BNcxBQ
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
+#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
+#include "clang/StaticAnalyzer/Core/Checker.h"
+#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+
+using namespace clang;
+using namespace ento;
+
+namespace {
+
+class Str37Checker : public Checker<check::PreCall> {
+private:
+  BugType BT{this,
+             "arguments to character-handling functions must be representable "
+             "as an unsigned char",
+             categories::SecurityError};
+
+  using ArgCheck = std::function<void(const Str37Checker *, const CallEvent &,
+                                      CheckerContext &)>;
+  // The pairs are in the following form:
+  // {{function-name, argument-count}, {callback, index-of-argument}}
+  const CallDescriptionMap<std::pair<ArgCheck, unsigned>> CDM = {
+      {{"isalnum", 1}, {&Str37Checker::checkPreCall, 0}},
+      {{"isalpha", 1}, {&Str37Checker::checkPreCall, 0}},
+      {{"isascii", 1}, {&Str37Checker::checkPreCall, 0}},
+      {{"isblank", 1}, {&Str37Checker::checkPreCall, 0}},
+      {{"iscntrl", 1}, {&Str37Checker::checkPreCall, 0}},
+      {{"isdigit", 1}, {&Str37Checker::checkPreCall, 0}},
+      {{"isgraph", 1}, {&Str37Checker::checkPreCall, 0}},
+      {{"islower", 1}, {&Str37Checker::checkPreCall, 0}},
+      {{"isprint", 1}, {&Str37Checker::checkPreCall, 0}},
+      {{"ispunct", 1}, {&Str37Checker::checkPreCall, 0}},
+      {{"isspace", 1}, {&Str37Checker::checkPreCall, 0}},
+      {{"isupper", 1}, {&Str37Checker::checkPreCall, 0}},
+      {{"isxdigit", 1}, {&Str37Checker::checkPreCall, 0}},
+      {{"toascii", 1}, {&Str37Checker::checkPreCall, 0}},
+      {{"toupper", 1}, {&Str37Checker::checkPreCall, 0}},
+      {{"tolower", 1}, {&Str37Checker::checkPreCall, 0}}};
+
+public:
+  void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
+};
+} // namespace
+
+void Str37Checker::checkPreCall(const CallEvent &Call,
+                                CheckerContext &C) const {
+  const auto *Lookup = CDM.lookup(Call);
+  if (!Lookup)
+    return;
+
+  unsigned ArgIndex = (*Lookup).second;
+  SValBuilder &SVB = C.getSValBuilder();
+  ProgramStateRef State = C.getState();
+
+  const Expr *ArgExpr = Call.getArgExpr(ArgIndex);
+  QualType Ty = ArgExpr->IgnoreImpCasts()->getType();
+  if (Ty->isPointerType())
+    Ty = Ty->getPointeeType();
+  const auto *B = dyn_cast<BuiltinType>(Ty);
+  if (!B)
+    return;
+  if (B->getKind() == BuiltinType::UChar)
+    return;
+
+  SVal ArgV = Call.getArgSVal(ArgIndex);
+  if (const auto AcutalVal = SVB.getKnownValue(State, ArgV))
+    // -1 is included because of EOF
+    if (-1 <= *AcutalVal && *AcutalVal <= 255)
+      return;
+
+  StringRef CallName = Call.getCalleeIdentifier()->getName();
+  SmallString<128> Msg;
+  llvm::raw_svector_ostream Out(Msg);
+  Out << '\'' << "arguments to character-handling function `" << CallName
+      << "` must be representable as an unsigned char";
+  std::string ReportMsg = Out.str().str();
+
+  ExplodedNode *N = C.generateErrorNode();
+  auto Report = std::make_unique<PathSensitiveBugReport>(BT, ReportMsg, N);
+
+  C.emitReport(std::move(Report));
+}
+
+void ento::registerStr37(CheckerManager &Mgr) {
+  Mgr.registerChecker<Str37Checker>();
+}
+
+bool ento::shouldRegisterStr37(const CheckerManager &) { return true; }
Index: clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt
===================================================================
--- clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt
+++ clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt
@@ -97,6 +97,7 @@
   ReturnUndefChecker.cpp
   ReturnValueChecker.cpp
   RunLoopAutoreleaseLeakChecker.cpp
+  cert/Str37Checker.cpp
   SimpleStreamChecker.cpp
   SmartPtrModeling.cpp
   StackAddrEscapeChecker.cpp
Index: clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
===================================================================
--- clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
+++ clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
@@ -73,6 +73,7 @@
 
 def CERT : Package<"cert">, ParentPackage<SecurityAlpha>;
 def POS : Package<"pos">, ParentPackage<CERT>;
+def STR : Package<"str">, ParentPackage<CERT>;
 
 def Unix : Package<"unix">;
 def UnixAlpha : Package<"unix">, ParentPackage<Alpha>;
@@ -848,6 +849,15 @@
 
 } // end "alpha.cert.pos"
 
+let ParentPackage = STR in {
+
+  def Str37 : Checker<"37c">,
+  HelpText<"Arguments to character-handling functions must be "
+           "representable as an unsigned char">,
+  Documentation<HasDocumentation>;
+
+} // end "alpha.cert.str"
+
 let ParentPackage = SecurityAlpha in {
 
 def ArrayBoundChecker : Checker<"ArrayBound">,
Index: clang/docs/analyzer/checkers.rst
===================================================================
--- clang/docs/analyzer/checkers.rst
+++ clang/docs/analyzer/checkers.rst
@@ -1930,19 +1930,6 @@
 alpha.security
 ^^^^^^^^^^^^^^
 
-
-alpha.security.cert
-^^^^^^^^^^^^^^^^^^^
-
-SEI CERT checkers which tries to find errors based on their `C coding rules <https://wiki.sei.cmu.edu/confluence/display/c/2+Rules>`_.
-
-.. _alpha-security-cert-pos-checkers:
-
-alpha.security.cert.pos
-^^^^^^^^^^^^^^^^^^^^^^^
-
-SEI CERT checkers of `POSIX C coding rules <https://wiki.sei.cmu.edu/confluence/pages/viewpage.action?pageId=87152405>`_.
-
 .. _alpha-security-cert-pos-34c:
 
 alpha.security.cert.pos.34c
@@ -1960,7 +1947,29 @@
  
     return putenv(env); // putenv function should not be called with auto variables
   }
-  
+
+
+.. _alpha-security-cert-str-37c:
+
+alpha.security.cert.str.37c
+"""""""""""""""""""""""""""
+
+Finds calls of character-handling functions with arguments which are not representable as an unsigned char.
+Rule applies to the following functions: ``isalnum``, ``isalpha``, ``isascii``, ``isblank``, ``iscntrl``, ``isdigit``, ``isgraph``, 
+``islower``, ``isprint``, ``ispunct``, ``isspace``, ``isupper``, ``isxdigit``, ``toascii``, ``toupper``, ``tolower``.
+
+.. code-block:: c
+
+  size_t count_preceding_whitespace(const char *s) {
+    const char *t = s;
+    size_t length = strlen(s) + 1;
+    while (isspace(*t) && (t - s < length)) { 
+      // The argument to isspace() must be EOF or representable as an unsigned char; otherwise, the result is undefined.
+      ++t;
+    }
+    return t - s;
+}
+
 .. _alpha-security-ArrayBound:
 
 alpha.security.ArrayBound (C)
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to