ztamas created this revision. Herald added subscribers: cfe-commits, xazax.hun, mgorny. Herald added a project: clang.
This check searches for signed char -> integer conversions which might indicate programming error, because of the misinterpretation of char values. A signed char might store the non-ASCII characters as negative values. The human programmer probably expects that after an integer conversion the converted value matches with the character code (a value from [0..255]), however, the actual value is in [-128..127] interval. See also: STR34-C. Cast characters to unsigned char before converting to larger integer sizes https://wiki.sei.cmu.edu/confluence/display/c/STR34-C.+Cast+characters+to+unsigned+char+before+converting+to+larger+integer+sizes By now this check is limited to assignment / variable declarations. If we would catch all signed char -> integer conversion, then it would produce a lot of findings and also false positives. So I added only this use case now, but this check can be extended with additional use cases later. The CERT documentation mentions another use case when the char is used for array subscript. Next to that a third use case can be the signed char - unsigned char comparison, which also a use case where things happen unexpectedly because of conversion to integer. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D71174 Files: clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.cpp clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.h clang-tools-extra/docs/ReleaseNotes.rst clang-tools-extra/docs/clang-tidy/checks/bugprone-signed-char-misuse.rst clang-tools-extra/docs/clang-tidy/checks/list.rst clang-tools-extra/test/clang-tidy/checkers/bugprone-signed-char-misuse-with-option.cpp clang-tools-extra/test/clang-tidy/checkers/bugprone-signed-char-misuse.cpp
Index: clang-tools-extra/test/clang-tidy/checkers/bugprone-signed-char-misuse.cpp =================================================================== --- /dev/null +++ clang-tools-extra/test/clang-tidy/checkers/bugprone-signed-char-misuse.cpp @@ -0,0 +1,123 @@ +// RUN: %check_clang_tidy %s bugprone-signed-char-misuse %t + +/////////////////////////////////////////////////////////////////// +/// Test cases correctly caught by the check. + +int SimpleVarDeclaration() { + signed char CCharacter = -5; + int NCharacter = CCharacter; + // CHECK-MESSAGES: [[@LINE-1]]:20: warning: singed char -> integer ('int') conversion; consider to cast to unsigned char first. [bugprone-signed-char-misuse] + + return NCharacter; +} + +int SimpleAssignment() { + signed char CCharacter = -5; + int NCharacter; + NCharacter = CCharacter; + // CHECK-MESSAGES: [[@LINE-1]]:16: warning: singed char -> integer ('int') conversion; consider to cast to unsigned char first. [bugprone-signed-char-misuse] + + return NCharacter; +} + +int CStyleCast() { + signed char CCharacter = -5; + int NCharacter; + NCharacter = (int)CCharacter; + // CHECK-MESSAGES: [[@LINE-1]]:21: warning: singed char -> integer ('int') conversion; consider to cast to unsigned char first. [bugprone-signed-char-misuse] + + return NCharacter; +} + +int StaticCast() { + signed char CCharacter = -5; + int NCharacter; + NCharacter = static_cast<int>(CCharacter); + // CHECK-MESSAGES: [[@LINE-1]]:33: warning: singed char -> integer ('int') conversion; consider to cast to unsigned char first. [bugprone-signed-char-misuse] + + return NCharacter; +} + +int FunctionalCast() { + signed char CCharacter = -5; + int NCharacter; + NCharacter = int(CCharacter); + // CHECK-MESSAGES: [[@LINE-1]]:20: warning: singed char -> integer ('int') conversion; consider to cast to unsigned char first. [bugprone-signed-char-misuse] + + return NCharacter; +} + +int NegativeConstValue() { + const signed char CCharacter = -5; + int NCharacter = CCharacter; + // CHECK-MESSAGES: [[@LINE-1]]:20: warning: singed char -> integer ('int') conversion; consider to cast to unsigned char first. [bugprone-signed-char-misuse] + + return NCharacter; +} + +int CharPointer(signed char *CCharacter) { + int NCharacter = *CCharacter; + // CHECK-MESSAGES: [[@LINE-1]]:20: warning: singed char -> integer ('int') conversion; consider to cast to unsigned char first. [bugprone-signed-char-misuse] + + return NCharacter; +} + +/////////////////////////////////////////////////////////////////// +/// Test cases correctly ignored by the check. + +int UnsignedCharCast() { + unsigned char CCharacter = 'a'; + int NCharacter = CCharacter; + + return NCharacter; +} + +int PositiveConstValue() { + const signed char CCharacter = 5; + int NCharacter = CCharacter; + + return NCharacter; +} + +// singed char -> integer cast is not the direct child of declaration expression. +int DescendantCast() { + signed char CCharacter = 'a'; + int NCharacter = 10 + CCharacter; + + return NCharacter; +} + +// singed char -> integer cast is not the direct child of assignment expression. +int DescendantCastAssignment() { + signed char CCharacter = 'a'; + int NCharacter; + NCharacter = 10 + CCharacter; + + return NCharacter; +} + +// bool is an integer type in clang; make sure to ignore it. +bool BoolVarDeclaration() { + signed char CCharacter = 'a'; + bool BCharacter = CCharacter == 'b'; + + return BCharacter; +} + +// bool is an integer type in clang; make sure to ignore it. +bool BoolAssignment() { + signed char CCharacter = 'a'; + bool BCharacter; + BCharacter = CCharacter == 'b'; + + return BCharacter; +} + +// char is an integer type in clang; make sure to ignore it. +unsigned char CharToCharCast() { + signed char SCCharacter = 'a'; + unsigned char USCharacter; + USCharacter = SCCharacter; + + return USCharacter; +} Index: clang-tools-extra/test/clang-tidy/checkers/bugprone-signed-char-misuse-with-option.cpp =================================================================== --- /dev/null +++ clang-tools-extra/test/clang-tidy/checkers/bugprone-signed-char-misuse-with-option.cpp @@ -0,0 +1,74 @@ +// RUN: %check_clang_tidy %s bugprone-signed-char-misuse %t \ +// RUN: -config='{CheckOptions: \ +// RUN: [{key: bugprone-signed-char-misuse.CharTypdefsToIgnore, value: "sal_Int8;int8_t"}]}' \ +// RUN: -- + +/////////////////////////////////////////////////////////////////// +/// Test cases correctly caught by the check. + +// Check that a simple test case is still caught. +int SimpleAssignment() { + signed char CCharacter = -5; + int NCharacter; + NCharacter = CCharacter; + // CHECK-MESSAGES: [[@LINE-1]]:16: warning: singed char -> integer ('int') conversion; consider to cast to unsigned char first. [bugprone-signed-char-misuse] + + return NCharacter; +} + +typedef signed char sal_Char; + +int TypedefNotInIgnorableList() { + sal_Char CCharacter = 'a'; + int NCharacter = CCharacter; + // CHECK-MESSAGES: [[@LINE-1]]:20: warning: singed char -> integer ('int') conversion; consider to cast to unsigned char first. [bugprone-signed-char-misuse] + + return NCharacter; +} + +/////////////////////////////////////////////////////////////////// +/// Test cases correctly ignored by the check. + +typedef signed char sal_Int8; + +int OneIgnorableTypedef() { + sal_Int8 CCharacter = 'a'; + int NCharacter = CCharacter; + + return NCharacter; +} + +typedef signed char int8_t; + +int OtherIgnorableTypedef() { + int8_t CCharacter = 'a'; + int NCharacter = CCharacter; + + return NCharacter; +} + +/////////////////////////////////////////////////////////////////// +/// Test cases which should be caught by the check. + +namespace boost { + +template <class T> +class optional { + T *member; + +public: + optional(T value) { + member = new T(value); + } + + T operator*() { return *member; } +}; + +} // namespace boost + +int DereferenceWithTypdef(boost::optional<sal_Int8> param) { + int NCharacter = *param; + // CHECK-MESSAGES: [[@LINE-1]]:20: warning: singed char -> integer ('int') conversion; consider to cast to unsigned char first. [bugprone-signed-char-misuse] + + return NCharacter; +} \ No newline at end of file Index: clang-tools-extra/docs/clang-tidy/checks/list.rst =================================================================== --- clang-tools-extra/docs/clang-tidy/checks/list.rst +++ clang-tools-extra/docs/clang-tidy/checks/list.rst @@ -63,6 +63,7 @@ bugprone-not-null-terminated-result bugprone-parent-virtual-call bugprone-posix-return + bugprone-signed-char-misuse bugprone-sizeof-container bugprone-sizeof-expression bugprone-string-constructor Index: clang-tools-extra/docs/clang-tidy/checks/bugprone-signed-char-misuse.rst =================================================================== --- /dev/null +++ clang-tools-extra/docs/clang-tidy/checks/bugprone-signed-char-misuse.rst @@ -0,0 +1,72 @@ +.. title:: clang-tidy - bugprone-signed-char-misuse + +bugprone-signed-char-misuse +=========================== + +Finds ``signed char`` -> integer conversions which might indicate a programming +error. The basic problem with the ``signed char``, that it might store the +non-ASCII characters as negative values. The human programmer probably +expects that after an integer conversion the converted value matches with the +character code (a value from [0..255]), however, the actual value is in +[-128..127] interval. This also applies to the plain ``char`` type on +those implementations which represent ``char`` similar to ``signed char``. + +To avoid this kind of misinterpretation, the desired way of converting from a +``signed char`` to an integer value is converting to ``unsigned char`` first, +which stores all the characters in the positive [0..255] interval which matches +with the known character codes. + +By now, this check is limited to assignments and variable declarations, +where a ``signed char`` is assigned to an integer variable. There are other +use cases where the same misinterpretation might lead to similar bugous +behavior. + +See also: +`STR34-C. Cast characters to unsigned char before converting to larger integer sizes +<https://wiki.sei.cmu.edu/confluence/display/c/STR34-C.+Cast+characters+to+unsigned+char+before+converting+to+larger+integer+sizes>`_ + +A good example from the CERT description when a ``char`` variable is used to +read from a file that might contain non-ASCII characters. The problem comes +up when the code uses the ``-1`` integer value as EOF, while the 255 character +code is also stored as ``-1`` in two's complement form of char type. +See a simple example of this bellow. This code stops not only when it reaches +the end of the file, but also when it gets a character with the 255 code. + +.. code-block:: c++ + + #define EOF (-1) + + int read(void) { + char CChar; + int IChar = EOF; + + if (readChar(CChar)) { + IChar = CChar; + } + return IChar; + } + +A proper way to fix the code above is converting the ``char`` variable to +an ``unsigned char`` value first. + +.. code-block:: c++ + + #define EOF (-1) + + int read(void) { + char CChar; + int IChar = EOF; + + if (readChar(CChar)) { + IChar = static_cast<unsigned char>(CChar); + } + return IChar; + } + +.. option:: CharTypdefsToIgnore + + A semicolon-separated list of typedef names. In this list, we can list + typedefs for ``char`` or ``signed char``, which will be ignored by the + check. This is useful when a typedef introduces an integer alias like + ``sal_Int8`` or ``int8_t``. In this case, human misinterpretation is not + an issue. \ No newline at end of file Index: clang-tools-extra/docs/ReleaseNotes.rst =================================================================== --- clang-tools-extra/docs/ReleaseNotes.rst +++ clang-tools-extra/docs/ReleaseNotes.rst @@ -94,6 +94,17 @@ Without the null terminator it can result in undefined behaviour when the string is read. +- New :doc:`bugprone-signed-char-misuse + <clang-tidy/checks/bugprone-signed-char-misuse>` check. + + Finds ``signed char`` -> integer conversions which might indicate a programming + error. The basic problem with the ``signed char``, that it might store the + non-ASCII characters as negative values. The human programmer probably + expects that after an integer conversion the converted value matches with the + character code (a value from [0..255]), however, the actual value is in + [-128..127] interval. This also applies to the plain ``char`` type on + those implementations which represent ``char`` similar to ``signed char``. + - New :doc:`cert-mem57-cpp <clang-tidy/checks/cert-mem57-cpp>` check. Index: clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.h =================================================================== --- /dev/null +++ clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.h @@ -0,0 +1,44 @@ +//===--- SignedCharMisuseCheck.h - clang-tidy -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_SIGNEDCHARMISUSECHECK_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_SIGNEDCHARMISUSECHECK_H + +#include "../ClangTidyCheck.h" + +namespace clang { +namespace tidy { +namespace bugprone { + +/// Finds ``signed char`` -> integer conversions which might indicate a programming +/// error. The basic problem with the ``signed char``, that it might store the +/// non-ASCII characters as negative values. The human programmer probably +/// expects that after an integer conversion the converted value matches with the +/// character code (a value from [0..255]), however, the actual value is in +/// [-128..127] interval. This also applies to the plain ``char`` type on +/// those implementations which represent ``char`` similar to ``signed char``. +/// +/// For the user-facing documentation see: +/// http://clang.llvm.org/extra/clang-tidy/checks/bugprone-signed-char-misuse.html +class SignedCharMisuseCheck : public ClangTidyCheck { +public: + SignedCharMisuseCheck(StringRef Name, ClangTidyContext *Context); + + void storeOptions(ClangTidyOptions::OptionMap &Opts) override; + void registerMatchers(ast_matchers::MatchFinder *Finder) override; + void check(const ast_matchers::MatchFinder::MatchResult &Result) override; + +private: + const std::string CharTypdefsToIgnoreList; +}; + +} // namespace bugprone +} // namespace tidy +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_SIGNEDCHARMISUSECHECK_H Index: clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.cpp =================================================================== --- /dev/null +++ clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.cpp @@ -0,0 +1,108 @@ +//===--- SignedCharMisuseCheck.cpp - clang-tidy ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SignedCharMisuseCheck.h" +#include "../utils/OptionsUtils.h" +#include "clang/AST/ASTContext.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" + +using namespace clang::ast_matchers; +using namespace clang::ast_matchers::internal; + +namespace clang { +namespace tidy { +namespace bugprone { + +namespace { +Matcher<TypedefDecl> hasAnyListedName(const std::string &Names) { + const std::vector<std::string> NameList = + utils::options::parseStringList(Names); + return hasAnyName(std::vector<StringRef>(NameList.begin(), NameList.end())); +} +} // namespace + +SignedCharMisuseCheck::SignedCharMisuseCheck(StringRef Name, + ClangTidyContext *Context) + : ClangTidyCheck(Name, Context), + CharTypdefsToIgnoreList(Options.get("CharTypdefsToIgnore", "")) {} + +void SignedCharMisuseCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { + Options.store(Opts, "CharTypdefsToIgnore", CharTypdefsToIgnoreList); +} + +void SignedCharMisuseCheck::registerMatchers(MatchFinder *Finder) { + // We can ignore typedefs which are some kind of integer types + // (e.g. typedef char sal_Int8). In this case, we don't need to + // worry about the misinterpretation of char values. + const auto IntTypedef = qualType( + hasDeclaration(typedefDecl(hasAnyListedName(CharTypdefsToIgnoreList)))); + + const auto SignedCharType = expr(hasType(qualType( + allOf(isAnyCharacter(), isSignedInteger(), unless(IntTypedef))))); + + const auto IntegerType = qualType(allOf(isInteger(), unless(isAnyCharacter()), + unless(booleanType()))) + .bind("integerType"); + ; + + // We are interested in signed char -> integer conversion. + const auto ImplicitCastExpr = + implicitCastExpr(hasSourceExpression(SignedCharType), + hasImplicitDestinationType(IntegerType)) + .bind("castExpression"); + ; + + const auto CStyleCastExpr = cStyleCastExpr(has(ImplicitCastExpr)); + const auto StaticCastExpr = cxxStaticCastExpr(has(ImplicitCastExpr)); + const auto FunctionalCastExpr = cxxFunctionalCastExpr(has(ImplicitCastExpr)); + + // We catch any type of casts to an integer. We need to have these cast + // expressions explicitly to catch only those casts which are direct children + // of an assignment/declaration. + const auto CastExpr = expr(anyOf(ImplicitCastExpr, CStyleCastExpr, + StaticCastExpr, FunctionalCastExpr)); + + // Catch assignments with the suspicious type conversion. + const auto AssignmentOperatorExpr = expr(binaryOperator( + hasOperatorName("="), hasLHS(hasType(IntegerType)), hasRHS(CastExpr))); + + Finder->addMatcher(AssignmentOperatorExpr, this); + + // Catch declarations with the suspicious type conversion. + const auto Declaration = + varDecl(isDefinition(), hasType(IntegerType), hasInitializer(CastExpr)); + + Finder->addMatcher(Declaration, this); +} + +void SignedCharMisuseCheck::check(const MatchFinder::MatchResult &Result) { + const auto *CastExpression = + Result.Nodes.getNodeAs<ImplicitCastExpr>("castExpression"); + const auto *IntegerType = Result.Nodes.getNodeAs<QualType>("integerType"); + assert(CastExpression); + assert(IntegerType); + + // Ignore the match if we know that the value is not negative. + // The potential misinterpretation happens for negative values only. + Expr::EvalResult EVResult; + if (!CastExpression->isValueDependent() && + CastExpression->getSubExpr()->EvaluateAsInt(EVResult, *Result.Context)) { + llvm::APSInt Value1 = EVResult.Val.getInt(); + if (Value1.isNonNegative()) + return; + } + + diag(CastExpression->getBeginLoc(), + "singed char -> integer (%0) conversion; " + "consider to cast to unsigned char first.") + << *IntegerType; +} + +} // namespace bugprone +} // namespace tidy +} // namespace clang Index: clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt =================================================================== --- clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt +++ clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt @@ -28,6 +28,7 @@ NotNullTerminatedResultCheck.cpp ParentVirtualCallCheck.cpp PosixReturnCheck.cpp + SignedCharMisuseCheck.cpp SizeofContainerCheck.cpp SizeofExpressionCheck.cpp StringConstructorCheck.cpp Index: clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp =================================================================== --- clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp +++ clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp @@ -36,6 +36,7 @@ #include "NotNullTerminatedResultCheck.h" #include "ParentVirtualCallCheck.h" #include "PosixReturnCheck.h" +#include "SignedCharMisuseCheck.h" #include "SizeofContainerCheck.h" #include "SizeofExpressionCheck.h" #include "StringConstructorCheck.h" @@ -119,6 +120,8 @@ "bugprone-parent-virtual-call"); CheckFactories.registerCheck<PosixReturnCheck>( "bugprone-posix-return"); + CheckFactories.registerCheck<SignedCharMisuseCheck>( + "bugprone-signed-char-misuse"); CheckFactories.registerCheck<SizeofContainerCheck>( "bugprone-sizeof-container"); CheckFactories.registerCheck<SizeofExpressionCheck>(
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits