sstwcw created this revision.
sstwcw added reviewers: HazardyKnusperkeks, MyDeveloperDay, curdeius, owenpan, 
sammccall.
Herald added a project: All.
sstwcw requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

In 2183fe2 
<https://reviews.llvm.org/rG2183fe2160fbcb754893e98829f2bff4d0fccfa3> I didn't 
notice that `PPLevelBranchIndex` was common to the
entire file.  The commit caused problems when there are multiple
preprocessor if sections.  Using a single `PPLevelBranchIndex` also
brings the problem that the extra branches in an if section with more
branches than other sections are parsed without other sections.

This patch fixes the problem by storing the structure of the
preprocessor if sections.  This way, `PPLevelBranchIndex` is checked
against the stored structure, and if a nonexistent branch is called for
because another section has more branches, the first or second branch
will be parsed.  The change in 2183fe2 
<https://reviews.llvm.org/rG2183fe2160fbcb754893e98829f2bff4d0fccfa3> setting 
`PPLevelBranchIndex` is
reverted.  Instead the stored branch structure is used to determine when
the second branch should be parsed instead of the first.

The logic for determining when a branch is skipped in moved from
`conditionalCompilationStart` and `conditionalCompilationAlternative`
into `conditionalCompilationCondition` because the two cases now have
more overlap.  `PPChainbranchIndex` is changed to include the section
number in addition to the branch number.

Fixes https://github.com/llvm/llvm-project/issues/58188


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D135740

Files:
  clang/lib/Format/UnwrappedLineParser.cpp
  clang/lib/Format/UnwrappedLineParser.h
  clang/unittests/Format/FormatTest.cpp

Index: clang/unittests/Format/FormatTest.cpp
===================================================================
--- clang/unittests/Format/FormatTest.cpp
+++ clang/unittests/Format/FormatTest.cpp
@@ -6099,6 +6099,25 @@
                "#endif\n"
                "  x;\n"
                "}");
+  // Verify that an `#if 0` at the start of a file doesn't affect other parts of
+  // the file.
+  verifyFormat("#if 0\n"
+               "#endif\n"
+               "#if X\n"
+               "int something_fairly_long;\n"
+               "if (1) {\n"
+               "  int something_fairly_long;\n"
+               "}\n"
+               "#endif\n");
+  verifyFormat("#if 1\n"
+               "{\n"
+               "#endif\n"
+               "#if X\n"
+               "  x;\n"
+               "#else\n"
+               "  x;\n"
+               "#endif\n"
+               "}");
 }
 
 TEST_F(FormatTest, GraciouslyHandleIncorrectPreprocessorConditions) {
Index: clang/lib/Format/UnwrappedLineParser.h
===================================================================
--- clang/lib/Format/UnwrappedLineParser.h
+++ clang/lib/Format/UnwrappedLineParser.h
@@ -314,11 +314,75 @@
   // Contains the maximum number of branches at each nesting level.
   SmallVector<int, 8> PPLevelBranchCount;
 
-  // Contains the number of branches per nesting level we are currently
-  // in while parsing a preprocessor branch sequence.
+  // Contains the indices of the current if-section and branch in that section
+  // we are currently in while parsing a preprocessor branch sequence, with one
+  // entry per nesting level.
   // This is used to update PPLevelBranchCount at the end of a branch
   // sequence.
-  std::stack<int> PPChainBranchIndex;
+  // For example:
+  //   PPSectionsTop = 0
+  //   PPChainBranchIndex = []
+  //   #if 0
+  //     PPSectionsTop = 0 -- It gets reset to 0 when a new level is entered.
+  //     PPChainBranchIndex = [{Section: 0, Branch: 0}]
+  //   #elif 1
+  //     PPSectionsTop = 0
+  //     PPChainBranchIndex = [{Section: 0, Branch: 1}]
+  //   #endif
+  //   PPSectionsTop = 1
+  //   PPChainBranchindex = []
+  //   #if 2
+  //     PPSectionsTop = 0
+  //     PPChainBranchIndex = [{Section: 1, Branch: 0}]
+  //     #if 3
+  //       PPSectionsTop = 0
+  //       PPChainBranchIndex = [{Section: 1, Branch: 0},
+  //                             {Section: 0, Branch: 0}]
+  //     #endif
+  //     PPSectionsTop = 1 -- It gets incremented when a section ends.
+  //     PPChainBranchIndex = [{Section: 1, Branch: 0}]
+  //   #endif
+  //   PPSectionsTop = 2
+  //   PPChainBranchIndex = []
+  // After parsing all this, PPBranchStructure is:
+  // {
+  //   Unreachable: true
+  //   Sections: [
+  //     [
+  //       {Unreachable: false, Sections: []},
+  //       {Unreachable: false, Sections: []}
+  //     ],
+  //     [
+  //       {
+  //         Unreachable: false,
+  //         Sections: [[{Unreachable: false, Sections: []}]]
+  //       }
+  //     ]
+  //   ]
+  // }
+  struct PPSectionGroupId {
+    int Section;
+    int Branch;
+  };
+  SmallVector<PPSectionGroupId> PPChainBranchIndex;
+
+  // The number of sections seen at the current level.
+  int PPSectionsCurrent;
+
+  // The structure of the #if branches.  'Sections' contains one entry for each
+  // section between #if and #endif regardless of how many #elsif there are in
+  // between.  For each of the sections, there is one entry for each branch.
+  struct PPBranchStructureT {
+    std::vector<std::vector<PPBranchStructureT>> Sections;
+    bool Unreachable = false;
+  } PPBranchStructure;
+
+  // Update the saved branch structure based on the current number of branches
+  // seen.  Returns the structure addressed by the indices.
+  PPBranchStructureT *
+  findPPBranchStructure(PPBranchStructureT *Structure,
+                        const PPSectionGroupId *IndicesBegin,
+                        const PPSectionGroupId *IndicesEnd);
 
   // Include guard search state. Used to fixup preprocessor indent levels
   // so that include guards do not participate in indentation.
Index: clang/lib/Format/UnwrappedLineParser.cpp
===================================================================
--- clang/lib/Format/UnwrappedLineParser.cpp
+++ clang/lib/Format/UnwrappedLineParser.cpp
@@ -346,6 +346,7 @@
   NestedTooDeep.clear();
   PPStack.clear();
   Line->FirstStartColumn = FirstStartColumn;
+  PPSectionsCurrent = 0;
 }
 
 void UnwrappedLineParser::parse() {
@@ -1125,11 +1126,49 @@
 }
 
 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
+  bool Skip;
+  if (PPChainBranchIndex.empty()) {
+    // This condition occurs when there is an #elif without and #if.
+    Skip = false;
+  } else {
+    assert(PPBranchLevel >= 0);
+    const auto &CurrentIndex = PPChainBranchIndex.back();
+    PPBranchStructureT *Structure =
+        findPPBranchStructure(&PPBranchStructure, PPChainBranchIndex.begin(),
+                              PPChainBranchIndex.end() - 1);
+    findPPBranchStructure(Structure, PPChainBranchIndex.end() - 1,
+                          PPChainBranchIndex.end())
+        ->Unreachable = Unreachable;
+    // The default branch to be parsed when PPLevelBranchIndex calls for a
+    // nonexistent branch is the first branch when it is reachable, otherwise
+    // the second branch.
+    bool IsDefaultBranch =
+        CurrentIndex.Branch == 0 ||
+        (CurrentIndex.Branch == 1 &&
+         Structure->Sections[CurrentIndex.Section][0].Unreachable);
+    if (PPLevelBranchIndex[PPBranchLevel] == CurrentIndex.Branch) {
+      // The branch is parsed when called for.
+      Skip = false;
+    } else if (IsDefaultBranch && PPLevelBranchIndex[PPBranchLevel] == 0) {
+      // When the branch called for is the first one, but it is unreachable, the
+      // second one is parsed.
+      Skip = false;
+    } else if (IsDefaultBranch &&
+               PPLevelBranchIndex[PPBranchLevel] >=
+                   static_cast<ptrdiff_t>(
+                       Structure->Sections[CurrentIndex.Section].size())) {
+      // When the branch called for doesn't exist, the default branch is parsed.
+      Skip = false;
+    } else {
+      Skip = true;
+    }
+  }
+
   size_t Line = CurrentLines->size();
   if (CurrentLines == &PreprocessorDirectives)
     Line += Lines.size();
 
-  if (Unreachable ||
+  if (Unreachable || Skip ||
       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) {
     PPStack.push_back({PP_Unreachable, Line});
   } else {
@@ -1141,14 +1180,12 @@
   ++PPBranchLevel;
   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
-    // If the first branch is unreachable, set the BranchIndex to 1.  This way
-    // the next branch will be parsed if there is one.
-    PPLevelBranchIndex.push_back(Unreachable ? 1 : 0);
+    PPLevelBranchIndex.push_back(0);
     PPLevelBranchCount.push_back(0);
   }
-  PPChainBranchIndex.push(0);
-  bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
-  conditionalCompilationCondition(Unreachable || Skip);
+  PPChainBranchIndex.push_back({/*Section=*/PPSectionsCurrent, /*Branch=*/0});
+  PPSectionsCurrent = 0;
+  conditionalCompilationCondition(Unreachable);
 }
 
 void UnwrappedLineParser::conditionalCompilationAlternative() {
@@ -1156,27 +1193,51 @@
     PPStack.pop_back();
   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
   if (!PPChainBranchIndex.empty())
-    ++PPChainBranchIndex.top();
-  conditionalCompilationCondition(
-      PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
-      PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
+    ++PPChainBranchIndex.back().Branch;
+  conditionalCompilationCondition(/*Unreachable=*/false);
 }
 
 void UnwrappedLineParser::conditionalCompilationEnd() {
   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
-    if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
-      PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
+    if (PPChainBranchIndex.back().Branch + 1 >
+        PPLevelBranchCount[PPBranchLevel]) {
+      PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.back().Branch + 1;
+    }
   }
   // Guard against #endif's without #if.
   if (PPBranchLevel > -1)
     --PPBranchLevel;
-  if (!PPChainBranchIndex.empty())
-    PPChainBranchIndex.pop();
+  if (PPChainBranchIndex.empty()) {
+    PPSectionsCurrent = 0;
+  } else {
+    PPSectionsCurrent = PPChainBranchIndex.back().Section + 1;
+    PPChainBranchIndex.pop_back();
+  }
   if (!PPStack.empty())
     PPStack.pop_back();
 }
 
+UnwrappedLineParser::PPBranchStructureT *
+UnwrappedLineParser::findPPBranchStructure(PPBranchStructureT *Structure,
+                                           const PPSectionGroupId *IndicesBegin,
+                                           const PPSectionGroupId *IndicesEnd) {
+  if (IndicesBegin == IndicesEnd)
+    return Structure;
+  if (IndicesBegin->Section >=
+      static_cast<ptrdiff_t>(Structure->Sections.size())) {
+    Structure->Sections.resize(IndicesBegin->Section + 1);
+  }
+  if (IndicesBegin->Branch >=
+      static_cast<ptrdiff_t>(
+          Structure->Sections[IndicesBegin->Section].size())) {
+    Structure->Sections[IndicesBegin->Section].resize(IndicesBegin->Branch + 1);
+  }
+  return findPPBranchStructure(
+      &Structure->Sections[IndicesBegin->Section][IndicesBegin->Branch],
+      IndicesBegin + 1, IndicesEnd);
+}
+
 void UnwrappedLineParser::parsePPIf(bool IfDef) {
   bool IfNDef = FormatTok->is(tok::pp_ifndef);
   nextToken();
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to