ken-matsui created this revision.
ken-matsui added a reviewer: aaron.ballman.
Herald added a subscriber: mgorny.
Herald added a project: All.
ken-matsui requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

This patch implements suggestion for typoed directives in preprocessor 
conditionals. The related issue is: 
https://github.com/llvm/llvm-project/issues/51598.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D124726

Files:
  clang/include/clang/Basic/DiagnosticGroups.td
  clang/include/clang/Basic/DiagnosticLexKinds.td
  clang/include/clang/Lex/Preprocessor.h
  clang/include/clang/Tooling/LevDistance.h
  clang/lib/Lex/CMakeLists.txt
  clang/lib/Lex/PPDirectives.cpp
  clang/lib/Tooling/CMakeLists.txt
  clang/lib/Tooling/LevDistance.cpp
  clang/test/OpenMP/predefined_macro.c
  clang/test/Preprocessor/suggest-typoed-directive.c
  clang/unittests/Tooling/CMakeLists.txt
  clang/unittests/Tooling/LevDistanceTest.cpp

Index: clang/unittests/Tooling/LevDistanceTest.cpp
===================================================================
--- /dev/null
+++ clang/unittests/Tooling/LevDistanceTest.cpp
@@ -0,0 +1,60 @@
+//===- unittest/Tooling/LevDistanceTest.cpp -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Tooling/LevDistance.h"
+#include "gtest/gtest.h"
+#include <array>
+
+using namespace clang;
+
+using tooling::levdistance::findSimilarStr;
+using tooling::levdistance::levDistance;
+
+namespace {
+
+TEST(LevDistanceTest, levDistance) {
+  EXPECT_EQ(0ul, levDistance("aaaa", "aaaa"));
+  EXPECT_EQ(1ul, levDistance("aaaa", "aaab"));
+  EXPECT_EQ(2ul, levDistance("aabc", "aacb"));
+  EXPECT_EQ(2ul, levDistance("aabc", "abca"));
+  EXPECT_EQ(3ul, levDistance("aabc", "adef"));
+  EXPECT_EQ(4ul, levDistance("abcd", "efgh"));
+}
+
+TEST(LevDistanceTest, findSimilarStr) {
+  std::array<std::string, 2> candidates{"aaab", "aaabc"};
+  EXPECT_EQ(std::string("aaab"), findSimilarStr(candidates, "aaaa"));
+
+  candidates = {"aab", "abc"};
+  EXPECT_EQ(std::string("aab"), findSimilarStr(candidates, "aaa"));
+
+  candidates = {"ab", "bc"};
+  EXPECT_EQ(std::string("ab"), findSimilarStr(candidates, "aa"));
+
+  candidates = {"b", "c"};
+  EXPECT_EQ(None, findSimilarStr(candidates, "a"));
+}
+
+TEST(LevDistanceTest, findSimilarStrToMacros) {
+  const std::array<std::string, 8> candidates{
+      "if", "ifdef", "ifndef", "elif", "elifdef", "elifndef", "else", "endif"};
+
+  EXPECT_EQ(std::string("elifdef"), findSimilarStr(candidates, "elfidef"));
+  EXPECT_EQ(std::string("elifdef"), findSimilarStr(candidates, "elifdef"));
+  EXPECT_EQ(None, findSimilarStr(candidates, "special_compiler_directive"));
+}
+
+TEST(LevDistanceTest, findSimilarStrInCaseInsensitive) {
+  const std::array<std::string, 8> candidates{
+      "if", "ifdef", "ifndef", "elif", "elifdef", "elifndef", "else", "endif"};
+
+  EXPECT_EQ(std::string("elifdef"), findSimilarStr(candidates, "elifdef"));
+  EXPECT_EQ(std::string("elifdef"), findSimilarStr(candidates, "ELIFDEF"));
+}
+
+} // end anonymous namespace
Index: clang/unittests/Tooling/CMakeLists.txt
===================================================================
--- clang/unittests/Tooling/CMakeLists.txt
+++ clang/unittests/Tooling/CMakeLists.txt
@@ -18,6 +18,7 @@
   FixItTest.cpp
   HeaderIncludesTest.cpp
   StandardLibraryTest.cpp
+  LevDistanceTest.cpp
   LexicallyOrderedRecursiveASTVisitorTest.cpp
   LookupTest.cpp
   QualTypeNamesTest.cpp
Index: clang/test/Preprocessor/suggest-typoed-directive.c
===================================================================
--- /dev/null
+++ clang/test/Preprocessor/suggest-typoed-directive.c
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -std=c99 -fsyntax-only -verify -pedantic %s
+
+// expected-warning@+11 {{'#id' directive not found, did you mean '#if'?}}
+// expected-warning@+11 {{'#ifd' directive not found, did you mean '#if'?}}
+// expected-warning@+11 {{'#ifde' directive not found, did you mean '#ifdef'?}}
+// expected-warning@+11 {{'#elid' directive not found, did you mean '#elif'?}}
+// expected-warning@+11 {{'#elsif' directive not found, did you mean '#elif'?}}
+// expected-warning@+11 {{'#elseif' directive not found, did you mean '#elif'?}}
+// expected-warning@+11 {{'#elfidef' directive not found, did you mean '#elifdef'?}}
+// expected-warning@+11 {{'#elfindef' directive not found, did you mean '#elifdef'?}}
+// expected-warning@+11 {{'#elsi' directive not found, did you mean '#else'?}}
+// expected-warning@+11 {{'#endi' directive not found, did you mean '#endif'?}}
+#ifdef UNDEFINED
+#id
+#ifd
+#ifde
+#elid
+#elsif
+#elseif
+#elfidef
+#elfindef
+#elsi
+#endi
+#endif
+
+#if special_compiler
+#special_compiler_directive // no diagnostics
+#endif
+
+// expected-warning {{ISO C requires a translation unit to contain at least one declaration}}
Index: clang/test/OpenMP/predefined_macro.c
===================================================================
--- clang/test/OpenMP/predefined_macro.c
+++ clang/test/OpenMP/predefined_macro.c
@@ -4,7 +4,7 @@
 // RUN: %clang_cc1 -fopenmp-simd -verify -o - %s
 // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -verify -o - %s
 // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -verify -o - %s
-// expected-no-diagnostics
+// expected-warning@+5 {{'#elsif' directive not found, did you mean '#elif'?}}
 #ifdef FOPENMP
 // -fopenmp option is specified
 #ifndef _OPENMP
Index: clang/lib/Tooling/LevDistance.cpp
===================================================================
--- /dev/null
+++ clang/lib/Tooling/LevDistance.cpp
@@ -0,0 +1,59 @@
+//===--- LevDistance.cpp ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file contains implementations of Levenshtein distances.
+//
+//  The [Levenshtein distance] is a metric for measuring the difference between
+//  two strings.
+//
+//  [Levenshtein distance]: https://en.wikipedia.org/wiki/Levenshtein_distance
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Tooling/LevDistance.h"
+
+namespace clang {
+namespace tooling {
+namespace levdistance {
+
+size_t levDistance(StringRef a, StringRef b) {
+  const size_t aSize = a.size();
+  const size_t bSize = b.size();
+
+  // for all i and j, d[i,j] will hold the Levenshtein distance between
+  // the first i characters of `a` and the first j characters of `b`
+  std::vector<std::vector<size_t>> d(aSize + 1, std::vector<size_t>(bSize + 1));
+  d[0][0] = 0;
+
+  // source prefixes can be transformed into empty string by
+  // dropping all characters
+  for (size_t i = 1; i <= aSize; ++i)
+    d[i][0] = i;
+
+  // target prefixes can be reached from empty source prefix
+  // by inserting every character
+  for (size_t j = 1; j <= bSize; ++j)
+    d[0][j] = j;
+
+  for (size_t i = 1; i <= aSize; ++i) {
+    for (size_t j = 1; j <= bSize; ++j) {
+      const size_t substCost = a[i - 1] == b[j - 1] ? 0 : 1;
+      d[i][j] = std::min({
+          d[i - 1][j] + 1,            // deletion
+          d[i][j - 1] + 1,            // insertion
+          d[i - 1][j - 1] + substCost // substitution
+      });
+    }
+  }
+
+  return d[aSize][bSize];
+}
+
+} // namespace levdistance
+} // namespace tooling
+} // namespace clang
Index: clang/lib/Tooling/CMakeLists.txt
===================================================================
--- clang/lib/Tooling/CMakeLists.txt
+++ clang/lib/Tooling/CMakeLists.txt
@@ -110,6 +110,7 @@
   GuessTargetAndModeCompilationDatabase.cpp
   InterpolatingCompilationDatabase.cpp
   JSONCompilationDatabase.cpp
+  LevDistance.cpp
   Refactoring.cpp
   RefactoringCallbacks.cpp
   StandaloneExecution.cpp
Index: clang/lib/Lex/PPDirectives.cpp
===================================================================
--- clang/lib/Lex/PPDirectives.cpp
+++ clang/lib/Lex/PPDirectives.cpp
@@ -32,17 +32,19 @@
 #include "clang/Lex/PreprocessorOptions.h"
 #include "clang/Lex/Token.h"
 #include "clang/Lex/VariadicMacroSupport.h"
+#include "clang/Tooling/LevDistance.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/AlignOf.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Path.h"
 #include <algorithm>
+#include <array>
 #include <cassert>
 #include <cstring>
 #include <new>
@@ -433,6 +435,24 @@
   return BytesToSkip - LengthDiff;
 }
 
+/// SuggestTypoedDirective - Provide a suggestion for a typoed directive. If
+/// there is no typo, then just skip suggesting.
+void Preprocessor::SuggestTypoedDirective(const Token &Tok, StringRef Directive,
+                                          const SourceLocation &endLoc) {
+  const std::array<std::string, 8> candidates{
+      "if", "ifdef", "ifndef", "elif", "elifdef", "elifndef", "else", "endif"};
+
+  if (const auto sugg =
+          tooling::levdistance::findSimilarStr(candidates, Directive)) {
+    CharSourceRange DirectiveRange =
+        CharSourceRange::getCharRange(Tok.getLocation(), endLoc);
+    const std::string suggValue = sugg.getValue();
+
+    auto Hint = FixItHint::CreateReplacement(DirectiveRange, "#" + suggValue);
+    Diag(Tok, diag::warn_pp_typo_directive) << Directive << suggValue << Hint;
+  }
+}
+
 /// SkipExcludedConditionalBlock - We just read a \#if or related directive and
 /// decided that the subsequent tokens are in the \#if'd out portion of the
 /// file.  Lex the rest of the file, until we see an \#endif.  If
@@ -556,6 +576,8 @@
         CurPPLexer->pushConditionalLevel(Tok.getLocation(), /*wasskipping*/true,
                                        /*foundnonskip*/false,
                                        /*foundelse*/false);
+      } else {
+        SuggestTypoedDirective(Tok, Directive, endLoc);
       }
     } else if (Directive[0] == 'e') {
       StringRef Sub = Directive.substr(1);
@@ -705,7 +727,11 @@
             break;
           }
         }
+      } else {
+        SuggestTypoedDirective(Tok, Directive, endLoc);
       }
+    } else {
+      SuggestTypoedDirective(Tok, Directive, endLoc);
     }
 
     CurPPLexer->ParsingPreprocessorDirective = false;
Index: clang/lib/Lex/CMakeLists.txt
===================================================================
--- clang/lib/Lex/CMakeLists.txt
+++ clang/lib/Lex/CMakeLists.txt
@@ -29,4 +29,5 @@
 
   LINK_LIBS
   clangBasic
+  clangTooling
   )
Index: clang/include/clang/Tooling/LevDistance.h
===================================================================
--- /dev/null
+++ clang/include/clang/Tooling/LevDistance.h
@@ -0,0 +1,98 @@
+//===--- LevDistance.h ------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements Levenshtein distances.
+//
+//  The [Levenshtein distance] is a metric for measuring the difference between
+//  two strings.
+//
+//  [Levenshtein distance]: https://en.wikipedia.org/wiki/Levenshtein_distance
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLING_LEVDISTANCE_H
+#define LLVM_CLANG_TOOLING_LEVDISTANCE_H
+
+#include "clang/Basic/LLVM.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
+#include <algorithm>
+#include <string>
+#include <utility>
+#include <vector>
+
+namespace clang {
+namespace tooling {
+namespace levdistance {
+
+// Calculate the difference between `a` and `b`.
+size_t levDistance(StringRef a, StringRef b);
+
+namespace internal {
+
+// Find the exact case-insensitive string for `word` in `rng`
+template <class SinglePassRange>
+Optional<std::string> findExactStr(const SinglePassRange &rng, StringRef word) {
+  auto first = std::cbegin(rng);
+  auto last = std::cend(rng);
+
+  const auto exactStr = std::find_if(first, last, [word](const auto &r) {
+    return std::equal(r.begin(), r.end(), word.begin(), word.end(),
+                      [](char a, char b) { return tolower(a) == tolower(b); });
+  });
+  if (exactStr == last) {
+    return None;
+  }
+  return *exactStr;
+}
+
+} // namespace internal
+
+// Find a similar string for `word` in `rng`.
+template <class SinglePassRange>
+Optional<std::string> findSimilarStr(const SinglePassRange &rng, StringRef word,
+                                     size_t dist = 0) {
+  // We need to check if `rng` has the exact case-insensitive string because the
+  // Levenshtein distance match does not care about it.
+  if (const auto exactStr = internal::findExactStr(rng, word)) {
+    return *exactStr;
+  }
+
+  // Keep going with the Levenshtein distance match.
+  // If dist is given, use the dist for maxDist; otherwise, if word size is
+  // less than 3, use the word size minus 1 and if not, use the word size
+  // divided by 3.
+  const size_t maxDist = dist != 0         ? dist
+                         : word.size() < 3 ? word.size() - 1
+                                           : word.size() / 3;
+
+  std::vector<std::pair<std::string, size_t>> candidates;
+  for (const auto &r : rng) {
+    const size_t curDist = levDistance(word, r);
+    if (curDist <= maxDist) {
+      candidates.emplace_back(r, curDist);
+    }
+  }
+
+  if (candidates.empty()) {
+    return None;
+  } else if (candidates.size() == 1) {
+    return candidates[0].first;
+  } else {
+    const auto similarStr = std::max_element(
+        candidates.cbegin(), candidates.cend(),
+        [](const auto &a, const auto &b) { return a.second < b.second; });
+    return (*similarStr).first;
+  }
+}
+
+} // namespace levdistance
+} // namespace tooling
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLING_LEVDISTANCE_H
Index: clang/include/clang/Lex/Preprocessor.h
===================================================================
--- clang/include/clang/Lex/Preprocessor.h
+++ clang/include/clang/Lex/Preprocessor.h
@@ -2241,6 +2241,11 @@
   /// Return true if an error occurs parsing the arg list.
   bool ReadMacroParameterList(MacroInfo *MI, Token& LastTok);
 
+  /// Provide a suggestion for a typoed directive. If there is no typo, then
+  /// just skip suggesting.
+  void SuggestTypoedDirective(const Token &Tok, StringRef Directive,
+                              const SourceLocation &endLoc);
+
   /// We just read a \#if or related directive and decided that the
   /// subsequent tokens are in the \#if'd out portion of the
   /// file.  Lex the rest of the file, until we see an \#endif.  If \p
Index: clang/include/clang/Basic/DiagnosticLexKinds.td
===================================================================
--- clang/include/clang/Basic/DiagnosticLexKinds.td
+++ clang/include/clang/Basic/DiagnosticLexKinds.td
@@ -360,6 +360,10 @@
   "%select{left|right}0 side of operator converted from negative value to "
   "unsigned: %1">;
 
+def warn_pp_typo_directive : Warning<
+  "'#%0' directive not found, did you mean '#%1'?">,
+  InGroup<TypoedDirective>;
+
 def ext_pp_import_directive : Extension<"#import is a language extension">,
   InGroup<DiagGroup<"import-preprocessor-directive-pedantic">>;
 def err_pp_import_directive_ms : Error<
Index: clang/include/clang/Basic/DiagnosticGroups.td
===================================================================
--- clang/include/clang/Basic/DiagnosticGroups.td
+++ clang/include/clang/Basic/DiagnosticGroups.td
@@ -1020,6 +1020,9 @@
 def ThreadSafetyVerbose : DiagGroup<"thread-safety-verbose">;
 def ThreadSafetyBeta : DiagGroup<"thread-safety-beta">;
 
+// Typoed directive warnings
+def TypoedDirective : DiagGroup<"typoed-directive">;
+
 // Uniqueness Analysis warnings
 def Consumed       : DiagGroup<"consumed">;
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to