juliehockett updated this revision to Diff 144011.
juliehockett marked 17 inline comments as done.
juliehockett added a comment.

Reorganizing and streamlining, particularly in decoupling the reader from the 
reduce process and redesigning a bit to allow for more flexible reducing. 
Currently implements an in-memory reducer, but could (theoretically) be 
extended.


https://reviews.llvm.org/D43341

Files:
  clang-doc/BitcodeReader.cpp
  clang-doc/BitcodeReader.h
  clang-doc/BitcodeWriter.cpp
  clang-doc/BitcodeWriter.h
  clang-doc/CMakeLists.txt
  clang-doc/Index.cpp
  clang-doc/Index.h
  clang-doc/Reducer.cpp
  clang-doc/Reducer.h
  clang-doc/Representation.cpp
  clang-doc/Representation.h
  clang-doc/tool/ClangDocMain.cpp
  docs/ReleaseNotes.rst
  test/clang-doc/bc-comment.cpp
  test/clang-doc/bc-namespace.cpp
  test/clang-doc/bc-record.cpp

Index: test/clang-doc/bc-record.cpp
===================================================================
--- /dev/null
+++ test/clang-doc/bc-record.cpp
@@ -0,0 +1,183 @@
+// This test requires Linux due to the system-dependent USR for the
+// inner class in function H.
+// REQUIRES: system-linux
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: echo "" > %t/compile_flags.txt
+// RUN: cp "%s" "%t/test.cpp"
+// RUN: clang-doc --dump -doxygen -p %t %t/test.cpp -output=%t/docs
+// RUN: llvm-bcanalyzer %t/docs/bc/ACE81AFA6627B4CEF2B456FB6E1252925674AF7E.bc --dump | FileCheck %s --check-prefix CHECK-A
+// RUN: llvm-bcanalyzer %t/docs/bc/FC07BD34D5E77782C263FA944447929EA8753740.bc --dump | FileCheck %s --check-prefix CHECK-B
+// RUN: llvm-bcanalyzer %t/docs/bc/1E3438A08BA22025C0B46289FF0686F92C8924C5.bc --dump | FileCheck %s --check-prefix CHECK-BC
+// RUN: llvm-bcanalyzer %t/docs/bc/06B5F6A19BA9F6A832E127C9968282B94619B210.bc --dump | FileCheck %s --check-prefix CHECK-C
+// RUN: llvm-bcanalyzer %t/docs/bc/0921737541208B8FA9BB42B60F78AC1D779AA054.bc --dump | FileCheck %s --check-prefix CHECK-D
+// RUN: llvm-bcanalyzer %t/docs/bc/289584A8E0FF4178A794622A547AA622503967A1.bc --dump | FileCheck %s --check-prefix CHECK-E
+// RUN: llvm-bcanalyzer %t/docs/bc/DEB4AC1CD9253CD9EF7FBE6BCAC506D77984ABD4.bc --dump | FileCheck %s --check-prefix CHECK-ECON
+// RUN: llvm-bcanalyzer %t/docs/bc/BD2BDEBD423F80BACCEA75DE6D6622D355FC2D17.bc --dump | FileCheck %s --check-prefix CHECK-EDES
+// RUN: llvm-bcanalyzer %t/docs/bc/E3B54702FABFF4037025BA194FC27C47006330B5.bc --dump | FileCheck %s --check-prefix CHECK-F
+// RUN: llvm-bcanalyzer %t/docs/bc/B6AC4C5C9F2EA3F2B3ECE1A33D349F4EE502B24E.bc --dump | FileCheck %s --check-prefix CHECK-H
+// RUN: llvm-bcanalyzer %t/docs/bc/E81CE07BB3FCCC7A88D059FC33F4140DF74F63DA.bc --dump | FileCheck %s --check-prefix CHECK-I
+// RUN: llvm-bcanalyzer %t/docs/bc/5093D428CDC62096A67547BA52566E4FB9404EEE.bc --dump | FileCheck %s --check-prefix CHECK-PM
+// RUN: llvm-bcanalyzer %t/docs/bc/CA7C7935730B5EACD25F080E9C83FA087CCDC75E.bc --dump | FileCheck %s --check-prefix CHECK-X
+// RUN: llvm-bcanalyzer %t/docs/bc/641AB4A3D36399954ACDE29C7A8833032BF40472.bc --dump | FileCheck %s --check-prefix CHECK-Y
+
+union A { int X; int Y; };
+// CHECK-A: <RecordBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+  // CHECK-A-NEXT: <USR abbrevid=4 op0=20 op1=172 op2=232 op3=26 op4=250 op5=102 op6=39 op7=180 op8=206 op9=242 op10=180 op11=86 op12=251 op13=110 op14=18 op15=82 op16=146 op17=86 op18=116 op19=175 op20=126/>
+  // CHECK-A-NEXT: <Name abbrevid=5 op0=1/> blob data = 'A'
+  // CHECK-A-NEXT: <DefLocation abbrevid=7 op0=24 op1={{[0-9]*}}/> blob data = '{{.*}}'
+  // CHECK-A-NEXT: <TagType abbrevid=9 op0=2/>
+  // CHECK-A-NEXT: <MemberTypeBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+    // CHECK-A-NEXT: <Type abbrevid=4 op0=4 op1=3/> blob data = 'int'
+    // CHECK-A-NEXT: <Name abbrevid=5 op0=4/> blob data = 'A::X'
+    // CHECK-A-NEXT: <Access abbrevid=6 op0=3/>
+  // CHECK-A-NEXT: </MemberTypeBlock>
+  // CHECK-A-NEXT: <MemberTypeBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+    // CHECK-A-NEXT: <Type abbrevid=4 op0=4 op1=3/> blob data = 'int'
+    // CHECK-A-NEXT: <Name abbrevid=5 op0=4/> blob data = 'A::Y'
+    // CHECK-A-NEXT: <Access abbrevid=6 op0=3/>
+  // CHECK-A-NEXT: </MemberTypeBlock>
+// CHECK-A-NEXT: </RecordBlock>
+
+enum B { X, Y };
+// CHECK-B: <EnumBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+  // CHECK-B-NEXT: <USR abbrevid=4 op0=20 op1=252 op2=7 op3=189 op4=52 op5=213 op6=231 op7=119 op8=130 op9=194 op10=99 op11=250 op12=148 op13=68 op14=71 op15=146 op16=158 op17=168 op18=117 op19=55 op20=64/>
+  // CHECK-B-NEXT: <Name abbrevid=5 op0=1/> blob data = 'B'
+  // CHECK-B-NEXT: <DefLocation abbrevid=7 op0=42 op1={{[0-9]*}}/> blob data = '{{.*}}'
+  // CHECK-B-NEXT: <Member abbrevid=9 op0=1/> blob data = 'X'
+  // CHECK-B-NEXT: <Member abbrevid=9 op0=1/> blob data = 'Y'
+// CHECK-B-NEXT: </EnumBlock>
+
+enum class Bc { A, B };
+// CHECK-BC: <EnumBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+  // CHECK-BC-NEXT: <USR abbrevid=4 op0=20 op1=30 op2=52 op3=56 op4=160 op5=139 op6=162 op7=32 op8=37 op9=192 op10=180 op11=98 op12=137 op13=255 op14=6 op15=134 op16=249 op17=44 op18=137 op19=36 op20=197/>
+  // CHECK-BC-NEXT: <Name abbrevid=5 op0=2/> blob data = 'Bc'
+  // CHECK-BC-NEXT: <DefLocation abbrevid=7 op0=51 op1={{[0-9]*}}/> blob data = '{{.*}}'
+  // CHECK-BC-NEXT: <Scoped abbrevid=10 op0=1/>
+  // CHECK-BC-NEXT: <Member abbrevid=9 op0=1/> blob data = 'A'
+  // CHECK-BC-NEXT: <Member abbrevid=9 op0=1/> blob data = 'B'
+// CHECK-BC-NEXT: </EnumBlock>
+
+struct C { int i; };
+// CHECK-C: <RecordBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+  // CHECK-C-NEXT: <USR abbrevid=4 op0=20 op1=6 op2=181 op3=246 op4=161 op5=155 op6=169 op7=246 op8=168 op9=50 op10=225 op11=39 op12=201 op13=150 op14=130 op15=130 op16=185 op17=70 op18=25 op19=178 op20=16/>
+  // CHECK-C-NEXT: <Name abbrevid=5 op0=1/> blob data = 'C'
+  // CHECK-C-NEXT: <DefLocation abbrevid=7 op0=61 op1={{[0-9]*}}/> blob data = '{{.*}}'
+  // CHECK-C-NEXT: <MemberTypeBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+    // CHECK-C-NEXT: <Type abbrevid=4 op0=4 op1=3/> blob data = 'int'
+    // CHECK-C-NEXT: <Name abbrevid=5 op0=4/> blob data = 'C::i'
+    // CHECK-C-NEXT: <Access abbrevid=6 op0=3/>
+  // CHECK-C-NEXT: </MemberTypeBlock>
+// CHECK-C-NEXT: </RecordBlock>
+
+class D {};
+// CHECK-D: <RecordBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+  // CHECK-D-NEXT: <USR abbrevid=4 op0=20 op1=9 op2=33 op3=115 op4=117 op5=65 op6=32 op7=139 op8=143 op9=169 op10=187 op11=66 op12=182 op13=15 op14=120 op15=172 op16=29 op17=119 op18=154 op19=160 op20=84/>
+  // CHECK-D-NEXT: <Name abbrevid=5 op0=1/> blob data = 'D'
+  // CHECK-D-NEXT: <DefLocation abbrevid=7 op0=73 op1={{[0-9]*}}/> blob data = '{{.*}}'
+  // CHECK-D-NEXT: <TagType abbrevid=9 op0=3/>
+// CHECK-D-NEXT: </RecordBlock>
+
+class E {
+public:
+  E() {}
+  ~E() {}
+
+protected:
+  void ProtectedMethod();
+};
+// CHECK-E: <RecordBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+  // CHECK-E-NEXT: <USR abbrevid=4 op0=20 op1=40 op2=149 op3=132 op4=168 op5=224 op6=255 op7=65 op8=120 op9=167 op10=148 op11=98 op12=42 op13=84 op14=122 op15=166 op16=34 op17=80 op18=57 op19=103 op20=161/>
+  // CHECK-E-NEXT: <Name abbrevid=5 op0=1/> blob data = 'E'
+  // CHECK-E-NEXT: <DefLocation abbrevid=7 op0=81 op1={{[0-9]*}}/> blob data = '{{.*}}'
+  // CHECK-E-NEXT: <TagType abbrevid=9 op0=3/>
+// CHECK-E-NEXT: </RecordBlock>
+
+// CHECK-ECON: <FunctionBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+  // CHECK-ECON-NEXT: <USR abbrevid=4 op0=20 op1=222 op2=180 op3=172 op4=28 op5=217 op6=37 op7=60 op8=217 op9=239 op10=127 op11=190 op12=107 op13=202 op14=197 op15=6 op16=215 op17=121 op18=132 op19=171 op20=212/>
+  // CHECK-ECON-NEXT: <Name abbrevid=5 op0=1/> blob data = 'E'
+  // CHECK-ECON-NEXT: <Namespace abbrevid=6 op0=1 op1=40/> blob data = '289584A8E0FF4178A794622A547AA622503967A1'
+  // CHECK-ECON-NEXT: <IsMethod abbrevid=11 op0=1/>
+  // CHECK-ECON-NEXT: <DefLocation abbrevid=7 op0=83 op1={{[0-9]*}}/> blob data = '{{.*}}'
+  // CHECK-ECON-NEXT: <Parent abbrevid=9 op0=1 op1=40/> blob data = '289584A8E0FF4178A794622A547AA622503967A1'
+  // CHECK-ECON-NEXT: <TypeBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+    // CHECK-ECON-NEXT: <Type abbrevid=4 op0=4 op1=4/> blob data = 'void'
+  // CHECK-ECON-NEXT: </TypeBlock>
+// CHECK-ECON-NEXT: </FunctionBlock>
+
+// CHECK-EDES: <FunctionBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+  // CHECK-EDES-NEXT: <USR abbrevid=4 op0=20 op1=189 op2=43 op3=222 op4=189 op5=66 op6=63 op7=128 op8=186 op9=204 op10=234 op11=117 op12=222 op13=109 op14=102 op15=34 op16=211 op17=85 op18=252 op19=45 op20=23/>
+  // CHECK-EDES-NEXT: <Name abbrevid=5 op0=2/> blob data = '~E'
+  // CHECK-EDES-NEXT: <Namespace abbrevid=6 op0=1 op1=40/> blob data = '289584A8E0FF4178A794622A547AA622503967A1'
+  // CHECK-EDES-NEXT: <IsMethod abbrevid=11 op0=1/>
+  // CHECK-EDES-NEXT: <DefLocation abbrevid=7 op0=84 op1={{[0-9]*}}/> blob data = '{{.*}}'
+  // CHECK-EDES-NEXT: <Parent abbrevid=9 op0=1 op1=40/> blob data = '289584A8E0FF4178A794622A547AA622503967A1'
+  // CHECK-EDES-NEXT: <TypeBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+    // CHECK-EDES-NEXT: <Type abbrevid=4 op0=4 op1=4/> blob data = 'void'
+  // CHECK-EDES-NEXT: </TypeBlock>
+// CHECK-EDES-NEXT: </FunctionBlock>
+
+void E::ProtectedMethod() {}
+// CHECK-PM: <FunctionBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+  // CHECK-PM-NEXT: <USR abbrevid=4 op0=20 op1=80 op2=147 op3=212 op4=40 op5=205 op6=198 op7=32 op8=150 op9=166 op10=117 op11=71 op12=186 op13=82 op14=86 op15=110 op16=79 op17=185 op18=64 op19=78 op20=238/>
+  // CHECK-PM-NEXT: <Name abbrevid=5 op0=15/> blob data = 'ProtectedMethod'
+  // CHECK-PM-NEXT: <Namespace abbrevid=6 op0=1 op1=40/> blob data = '289584A8E0FF4178A794622A547AA622503967A1'
+  // CHECK-PM-NEXT: <IsMethod abbrevid=11 op0=1/>
+  // CHECK-PM-NEXT: <DefLocation abbrevid=7 op0=120 op1={{[0-9]*}}/> blob data = '{{.*}}'
+  // CHECK-PM-NEXT: <Location abbrevid=8 op0=87 op1={{[0-9]*}}/> blob data = '{{.*}}'
+  // CHECK-PM-NEXT: <Parent abbrevid=9 op0=1 op1=40/> blob data = '289584A8E0FF4178A794622A547AA622503967A1'
+  // CHECK-PM-NEXT: <TypeBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+    // CHECK-PM-NEXT: <Type abbrevid=4 op0=4 op1=4/> blob data = 'void'
+  // CHECK-PM-NEXT: </TypeBlock>
+// CHECK-PM-NEXT: </FunctionBlock>
+
+
+
+class F : virtual private D, public E {};
+// CHECK-F: <RecordBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+  // CHECK-F-NEXT: <USR abbrevid=4 op0=20 op1=227 op2=181 op3=71 op4=2 op5=250 op6=191 op7=244 op8=3 op9=112 op10=37 op11=186 op12=25 op13=79 op14=194 op15=124 op16=71 op17=0 op18=99 op19=48 op20=181/>
+  // CHECK-F-NEXT: <Name abbrevid=5 op0=1/> blob data = 'F'
+  // CHECK-F-NEXT: <DefLocation abbrevid=7 op0=136 op1={{[0-9]*}}/> blob data = '{{.*}}'
+  // CHECK-F-NEXT: <TagType abbrevid=9 op0=3/>
+  // CHECK-F-NEXT: <Parent abbrevid=10 op0=1 op1=40/> blob data = '289584A8E0FF4178A794622A547AA622503967A1'
+  // CHECK-F-NEXT: <VParent abbrevid=11 op0=1 op1=40/> blob data = '0921737541208B8FA9BB42B60F78AC1D779AA054'
+// CHECK-F-NEXT: </RecordBlock>
+
+class X {
+  class Y {};
+};
+// CHECK-X: <RecordBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+  // CHECK-X-NEXT: <USR abbrevid=4 op0=20 op1=202 op2=124 op3=121 op4=53 op5=115 op6=11 op7=94 op8=172 op9=210 op10=95 op11=8 op12=14 op13=156 op14=131 op15=250 op16=8 op17=124 op18=205 op19=199 op20=94/>
+  // CHECK-X-NEXT: <Name abbrevid=5 op0=1/> blob data = 'X'
+  // CHECK-X-NEXT: <DefLocation abbrevid=7 op0=146 op1={{[0-9]*}}/> blob data = '{{.*}}'
+  // CHECK-X-NEXT: <TagType abbrevid=9 op0=3/>
+// CHECK-X-NEXT: </RecordBlock>
+
+// CHECK-Y: <RecordBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+  // CHECK-Y-NEXT: <USR abbrevid=4 op0=20 op1=100 op2=26 op3=180 op4=163 op5=211 op6=99 op7=153 op8=149 op9=74 op10=205 op11=226 op12=156 op13=122 op14=136 op15=51 op16=3 op17=43 op18=244 op19=4 op20=114/>
+  // CHECK-Y-NEXT: <Name abbrevid=5 op0=1/> blob data = 'Y'
+  // CHECK-Y-NEXT: <Namespace abbrevid=6 op0=1 op1=40/> blob data = 'CA7C7935730B5EACD25F080E9C83FA087CCDC75E'
+  // CHECK-Y-NEXT: <DefLocation abbrevid=7 op0=147 op1={{[0-9]*}}/> blob data = '{{.*}}'
+  // CHECK-Y-NEXT: <TagType abbrevid=9 op0=3/>
+// CHECK-Y-NEXT: </RecordBlock>
+
+void H() {
+  class I {};
+}
+// CHECK-H: <FunctionBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+  // CHECK-H-NEXT: <USR abbrevid=4 op0=20 op1=182 op2=172 op3=76 op4=92 op5=159 op6=46 op7=163 op8=242 op9=179 op10=236 op11=225 op12=163 op13=61 op14=52 op15=159 op16=78 op17=229 op18=2 op19=178 op20=78/>
+  // CHECK-H-NEXT: <Name abbrevid=5 op0=1/> blob data = 'H'
+  // CHECK-H-NEXT: <DefLocation abbrevid=7 op0=164 op1={{[0-9]*}}/> blob data = '{{.*}}'
+  // CHECK-H-NEXT: <TypeBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+    // CHECK-H-NEXT: <Type abbrevid=4 op0=4 op1=4/> blob data = 'void'
+  // CHECK-H-NEXT: </TypeBlock>
+// CHECK-H-NEXT: </FunctionBlock>
+
+
+// CHECK-I: <RecordBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+  // CHECK-I-NEXT: <USR abbrevid=4 op0=20 op1=232 op2=28 op3=224 op4=123 op5=179 op6=252 op7=204 op8=122 op9=136 op10=208 op11=89 op12=252 op13=51 op14=244 op15=20 op16=13 op17=247 op18=79 op19=99 op20=218/>
+  // CHECK-I-NEXT: <Name abbrevid=5 op0=1/> blob data = 'I'
+  // CHECK-I-NEXT: <Namespace abbrevid=6 op0=2 op1=40/> blob data = 'B6AC4C5C9F2EA3F2B3ECE1A33D349F4EE502B24E'
+  // CHECK-I-NEXT: <DefLocation abbrevid=7 op0=165 op1={{[0-9]*}}/> blob data = '{{.*}}'
+  // CHECK-I-NEXT: <TagType abbrevid=9 op0=3/>
+// CHECK-I-NEXT: </RecordBlock>
Index: test/clang-doc/bc-namespace.cpp
===================================================================
--- /dev/null
+++ test/clang-doc/bc-namespace.cpp
@@ -0,0 +1,80 @@
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: echo "" > %t/compile_flags.txt
+// RUN: cp "%s" "%t/test.cpp"
+// RUN: clang-doc --dump -doxygen -p %t %t/test.cpp -output=%t/docs
+// RUN: llvm-bcanalyzer %t/docs/bc/8D042EFFC98B373450BC6B5B90A330C25A150E9C.bc --dump | FileCheck %s --check-prefix CHECK-A
+// RUN: llvm-bcanalyzer %t/docs/bc/E21AF79E2A9D02554BA090D10DF39FE273F5CDB5.bc --dump | FileCheck %s --check-prefix CHECK-B
+// RUN: llvm-bcanalyzer %t/docs/bc/39D3C95A5F7CE2BA4937BD7B01BAE09EBC2AD8AC.bc --dump | FileCheck %s --check-prefix CHECK-F
+// RUN: llvm-bcanalyzer %t/docs/bc/9A82CB33ED0FDF81EE383D31CD0957D153C5E840.bc --dump | FileCheck %s --check-prefix CHECK-FUNC
+// RUN: llvm-bcanalyzer %t/docs/bc/E9ABF7E7E2425B626723D41E76E4BC7E7A5BD775.bc --dump | FileCheck %s --check-prefix CHECK-E
+ 
+namespace A {
+// CHECK-A: <NamespaceBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+  // CHECK-A-NEXT: <USR abbrevid=4 op0=20 op1=141 op2=4 op3=46 op4=255 op5=201 op6=139 op7=55 op8=52 op9=80 op10=188 op11=107 op12=91 op13=144 op14=163 op15=48 op16=194 op17=90 op18=21 op19=14 op20=156/>
+  // CHECK-A-NEXT: <Name abbrevid=5 op0=1/> blob data = 'A'
+// CHECK-A-NEXT: </NamespaceBlock>
+  
+void f();
+
+}  // namespace A
+
+namespace A {
+
+void f(){};
+// CHECK-F: <FunctionBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+  // CHECK-F-NEXT: <USR abbrevid=4 op0=20 op1=57 op2=211 op3=201 op4=90 op5=95 op6=124 op7=226 op8=186 op9=73 op10=55 op11=189 op12=123 op13=1 op14=186 op15=224 op16=158 op17=188 op18=42 op19=216 op20=172/>
+  // CHECK-F-NEXT: <Name abbrevid=5 op0=1/> blob data = 'f'
+  // CHECK-F-NEXT: <Namespace abbrevid=6 op0=0 op1=40/> blob data = '8D042EFFC98B373450BC6B5B90A330C25A150E9C'
+  // CHECK-F-NEXT: <DefLocation abbrevid=7 op0=24 op1={{[0-9]*}}/> blob data = '{{.*}}'
+  // CHECK-F-NEXT: <Location abbrevid=8 op0=18 op1={{[0-9]*}}/> blob data = '{{.*}}'
+  // CHECK-F-NEXT: <TypeBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+    // CHECK-F-NEXT: <Type abbrevid=4 op0=4 op1=4/> blob data = 'void'
+  // CHECK-F-NEXT: </TypeBlock>
+// CHECK-F-NEXT: </FunctionBlock>
+
+namespace B {
+// CHECK-B: <NamespaceBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+  // CHECK-B-NEXT: <USR abbrevid=4 op0=20 op1=226 op2=26 op3=247 op4=158 op5=42 op6=157 op7=2 op8=85 op9=75 op10=160 op11=144 op12=209 op13=13 op14=243 op15=159 op16=226 op17=115 op18=245 op19=205 op20=181/>
+  // CHECK-B-NEXT: <Name abbrevid=5 op0=1/> blob data = 'B'
+  // CHECK-B-NEXT: <Namespace abbrevid=6 op0=0 op1=40/> blob data = '8D042EFFC98B373450BC6B5B90A330C25A150E9C'
+// CHECK-B-NEXT: </NamespaceBlock>
+
+enum E { X };
+// CHECK-E: <EnumBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+  // CHECK-E-NEXT: <USR abbrevid=4 op0=20 op1=233 op2=171 op3=247 op4=231 op5=226 op6=66 op7=91 op8=98 op9=103 op10=35 op11=212 op12=30 op13=118 op14=228 op15=188 op16=126 op17=122 op18=91 op19=215 op20=117/>
+  // CHECK-E-NEXT: <Name abbrevid=5 op0=1/> blob data = 'E'
+  // CHECK-E-NEXT: <Namespace abbrevid=6 op0=0 op1=40/> blob data = 'E21AF79E2A9D02554BA090D10DF39FE273F5CDB5'
+  // CHECK-E-NEXT: <Namespace abbrevid=6 op0=0 op1=40/> blob data = '8D042EFFC98B373450BC6B5B90A330C25A150E9C'
+  // CHECK-E-NEXT: <DefLocation abbrevid=7 op0=43 op1={{[0-9]*}}/> blob data = '{{.*}}'
+  // CHECK-E-NEXT: <Member abbrevid=9 op0=1/> blob data = 'X'
+// CHECK-E-NEXT: </EnumBlock>
+
+E func(int i) { return X; }
+// CHECK-FUNC: <FunctionBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+  // CHECK-FUNC-NEXT: <USR abbrevid=4 op0=20 op1=154 op2=130 op3=203 op4=51 op5=237 op6=15 op7=223 op8=129 op9=238 op10=56 op11=61 op12=49 op13=205 op14=9 op15=87 op16=209 op17=83 op18=197 op19=232 op20=64/>
+  // CHECK-FUNC-NEXT: <Name abbrevid=5 op0=4/> blob data = 'func'
+  // CHECK-FUNC-NEXT: <Namespace abbrevid=6 op0=0 op1=40/> blob data = 'E21AF79E2A9D02554BA090D10DF39FE273F5CDB5'
+  // CHECK-FUNC-NEXT: <Namespace abbrevid=6 op0=0 op1=40/> blob data = '8D042EFFC98B373450BC6B5B90A330C25A150E9C'
+  // CHECK-FUNC-NEXT: <DefLocation abbrevid=7 op0=53 op1={{[0-9]*}}/> blob data = '{{.*}}'
+  // CHECK-FUNC-NEXT: <TypeBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+    // CHECK-FUNC-NEXT: <Type abbrevid=4 op0=4 op1=12/> blob data = 'enum A::B::E'
+  // CHECK-FUNC-NEXT: </TypeBlock>
+  // CHECK-FUNC-NEXT: <FieldTypeBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+    // CHECK-FUNC-NEXT: <Type abbrevid=4 op0=4 op1=3/> blob data = 'int'
+    // CHECK-FUNC-NEXT: <Name abbrevid=5 op0=1/> blob data = 'i'
+  // CHECK-FUNC-NEXT: </FieldTypeBlock>
+// CHECK-FUNC-NEXT: </FunctionBlock>
+
+}  // namespace B
+}  // namespace A
+
+
+
+
+
+
+
+
+
+
Index: test/clang-doc/bc-comment.cpp
===================================================================
--- /dev/null
+++ test/clang-doc/bc-comment.cpp
@@ -0,0 +1,188 @@
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: echo "" > %t/compile_flags.txt
+// RUN: cp "%s" "%t/test.cpp"
+// RUN: clang-doc --dump -doxygen -p %t %t/test.cpp -output=%t/docs
+// RUN: llvm-bcanalyzer %t/docs/bc/7574630614A535710E5A6ABCFFF98BCA2D06A4CA.bc --dump | FileCheck %s
+ 
+/// \brief Brief description.
+///
+/// Extended description that
+/// continues onto the next line.
+/// 
+/// <ul class="test">
+///   <li> Testing.
+/// </ul>
+///
+/// \verbatim
+/// The description continues.
+/// \endverbatim
+/// --
+/// \param [out] I is a parameter.
+/// \param J is a parameter.
+/// \return void
+void F(int I, int J);
+
+/// Bonus comment on definition
+void F(int I, int J) {}
+
+// CHECK: <BLOCKINFO_BLOCK/>
+// CHECK-NEXT: <VersionBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+  // CHECK-NEXT: <Version abbrevid=4 op0=1/>
+// CHECK-NEXT: </VersionBlock>
+// CHECK-NEXT: <FunctionBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+  // CHECK-NEXT: <USR abbrevid=4 op0=20 op1=117 op2=116 op3=99 op4=6 op5=20 op6=165 op7=53 op8=113 op9=14 op10=90 op11=106 op12=188 op13=255 op14=249 op15=139 op16=202 op17=45 op18=6 op19=164 op20=202/>
+  // CHECK-NEXT: <Name abbrevid=5 op0=1/> blob data = 'F'
+  // CHECK-NEXT: <CommentBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+    // CHECK-NEXT: <Kind abbrevid=4 op0=11/> blob data = 'FullComment'
+    // CHECK-NEXT: <CommentBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+      // CHECK-NEXT: <Kind abbrevid=4 op0=16/> blob data = 'ParagraphComment'
+      // CHECK-NEXT: <CommentBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+        // CHECK-NEXT: <Kind abbrevid=4 op0=11/> blob data = 'TextComment'
+      // CHECK-NEXT: </CommentBlock>
+    // CHECK-NEXT: </CommentBlock>
+    // CHECK-NEXT: <CommentBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+      // CHECK-NEXT: <Kind abbrevid=4 op0=19/> blob data = 'BlockCommandComment'
+      // CHECK-NEXT: <Name abbrevid=6 op0=5/> blob data = 'brief'
+      // CHECK-NEXT: <CommentBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+        // CHECK-NEXT: <Kind abbrevid=4 op0=16/> blob data = 'ParagraphComment'
+        // CHECK-NEXT: <CommentBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+          // CHECK-NEXT: <Kind abbrevid=4 op0=11/> blob data = 'TextComment'
+          // CHECK-NEXT: <Text abbrevid=5 op0=19/> blob data = ' Brief description.'
+        // CHECK-NEXT: </CommentBlock>
+      // CHECK-NEXT: </CommentBlock>
+    // CHECK-NEXT: </CommentBlock>
+    // CHECK-NEXT: <CommentBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+      // CHECK-NEXT: <Kind abbrevid=4 op0=16/> blob data = 'ParagraphComment'
+      // CHECK-NEXT: <CommentBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+        // CHECK-NEXT: <Kind abbrevid=4 op0=11/> blob data = 'TextComment'
+        // CHECK-NEXT: <Text abbrevid=5 op0=26/> blob data = ' Extended description that'
+      // CHECK-NEXT: </CommentBlock>
+      // CHECK-NEXT: <CommentBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+        // CHECK-NEXT: <Kind abbrevid=4 op0=11/> blob data = 'TextComment'
+        // CHECK-NEXT: <Text abbrevid=5 op0=30/> blob data = ' continues onto the next line.'
+      // CHECK-NEXT: </CommentBlock>
+    // CHECK-NEXT: </CommentBlock>
+    // CHECK-NEXT: <CommentBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+      // CHECK-NEXT: <Kind abbrevid=4 op0=16/> blob data = 'ParagraphComment'
+      // CHECK-NEXT: <CommentBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+        // CHECK-NEXT: <Kind abbrevid=4 op0=11/> blob data = 'TextComment'
+      // CHECK-NEXT: </CommentBlock>
+      // CHECK-NEXT: <CommentBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+        // CHECK-NEXT: <Kind abbrevid=4 op0=19/> blob data = 'HTMLStartTagComment'
+        // CHECK-NEXT: <Name abbrevid=6 op0=2/> blob data = 'ul'
+        // CHECK-NEXT: <AttrKey abbrevid=12 op0=5/> blob data = 'class'
+        // CHECK-NEXT: <AttrVal abbrevid=13 op0=4/> blob data = 'test'
+      // CHECK-NEXT: </CommentBlock>
+      // CHECK-NEXT: <CommentBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+        // CHECK-NEXT: <Kind abbrevid=4 op0=11/> blob data = 'TextComment'
+      // CHECK-NEXT: </CommentBlock>
+      // CHECK-NEXT: <CommentBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+        // CHECK-NEXT: <Kind abbrevid=4 op0=19/> blob data = 'HTMLStartTagComment'
+        // CHECK-NEXT: <Name abbrevid=6 op0=2/> blob data = 'li'
+      // CHECK-NEXT: </CommentBlock>
+      // CHECK-NEXT: <CommentBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+        // CHECK-NEXT: <Kind abbrevid=4 op0=11/> blob data = 'TextComment'
+        // CHECK-NEXT: <Text abbrevid=5 op0=9/> blob data = ' Testing.'
+      // CHECK-NEXT: </CommentBlock>
+      // CHECK-NEXT: <CommentBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+        // CHECK-NEXT: <Kind abbrevid=4 op0=11/> blob data = 'TextComment'
+      // CHECK-NEXT: </CommentBlock>
+      // CHECK-NEXT: <CommentBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+        // CHECK-NEXT: <Kind abbrevid=4 op0=17/> blob data = 'HTMLEndTagComment'
+        // CHECK-NEXT: <Name abbrevid=6 op0=2/> blob data = 'ul'
+        // CHECK-NEXT: <SelfClosing abbrevid=10 op0=1/>
+      // CHECK-NEXT: </CommentBlock>
+    // CHECK-NEXT: </CommentBlock>
+    // CHECK-NEXT: <CommentBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+      // CHECK-NEXT: <Kind abbrevid=4 op0=16/> blob data = 'ParagraphComment'
+      // CHECK-NEXT: <CommentBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+        // CHECK-NEXT: <Kind abbrevid=4 op0=11/> blob data = 'TextComment'
+      // CHECK-NEXT: </CommentBlock>
+    // CHECK-NEXT: </CommentBlock>
+    // CHECK-NEXT: <CommentBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+      // CHECK-NEXT: <Kind abbrevid=4 op0=20/> blob data = 'VerbatimBlockComment'
+      // CHECK-NEXT: <Name abbrevid=6 op0=8/> blob data = 'verbatim'
+      // CHECK-NEXT: <CloseName abbrevid=9 op0=11/> blob data = 'endverbatim'
+      // CHECK-NEXT: <CommentBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+        // CHECK-NEXT: <Kind abbrevid=4 op0=24/> blob data = 'VerbatimBlockLineComment'
+        // CHECK-NEXT: <Text abbrevid=5 op0=27/> blob data = ' The description continues.'
+      // CHECK-NEXT: </CommentBlock>
+    // CHECK-NEXT: </CommentBlock>
+    // CHECK-NEXT: <CommentBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+      // CHECK-NEXT: <Kind abbrevid=4 op0=16/> blob data = 'ParagraphComment'
+      // CHECK-NEXT: <CommentBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+        // CHECK-NEXT: <Kind abbrevid=4 op0=11/> blob data = 'TextComment'
+        // CHECK-NEXT: <Text abbrevid=5 op0=3/> blob data = ' --'
+      // CHECK-NEXT: </CommentBlock>
+      // CHECK-NEXT: <CommentBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+        // CHECK-NEXT: <Kind abbrevid=4 op0=11/> blob data = 'TextComment'
+      // CHECK-NEXT: </CommentBlock>
+    // CHECK-NEXT: </CommentBlock>
+    // CHECK-NEXT: <CommentBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+      // CHECK-NEXT: <Kind abbrevid=4 op0=19/> blob data = 'ParamCommandComment'
+      // CHECK-NEXT: <Direction abbrevid=7 op0=5/> blob data = '[out]'
+      // CHECK-NEXT: <ParamName abbrevid=8 op0=1/> blob data = 'I'
+      // CHECK-NEXT: <Explicit abbrevid=11 op0=1/>
+      // CHECK-NEXT: <CommentBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+        // CHECK-NEXT: <Kind abbrevid=4 op0=16/> blob data = 'ParagraphComment'
+        // CHECK-NEXT: <CommentBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+          // CHECK-NEXT: <Kind abbrevid=4 op0=11/> blob data = 'TextComment'
+          // CHECK-NEXT: <Text abbrevid=5 op0=16/> blob data = ' is a parameter.'
+        // CHECK-NEXT: </CommentBlock>
+        // CHECK-NEXT: <CommentBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+          // CHECK-NEXT: <Kind abbrevid=4 op0=11/> blob data = 'TextComment'
+        // CHECK-NEXT: </CommentBlock>
+      // CHECK-NEXT: </CommentBlock>
+    // CHECK-NEXT: </CommentBlock>
+    // CHECK-NEXT: <CommentBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+      // CHECK-NEXT: <Kind abbrevid=4 op0=19/> blob data = 'ParamCommandComment'
+      // CHECK-NEXT: <Direction abbrevid=7 op0=4/> blob data = '[in]'
+      // CHECK-NEXT: <ParamName abbrevid=8 op0=1/> blob data = 'J'
+      // CHECK-NEXT: <CommentBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+        // CHECK-NEXT: <Kind abbrevid=4 op0=16/> blob data = 'ParagraphComment'
+        // CHECK-NEXT: <CommentBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+          // CHECK-NEXT: <Kind abbrevid=4 op0=11/> blob data = 'TextComment'
+          // CHECK-NEXT: <Text abbrevid=5 op0=16/> blob data = ' is a parameter.'
+        // CHECK-NEXT: </CommentBlock>
+        // CHECK-NEXT: <CommentBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+          // CHECK-NEXT: <Kind abbrevid=4 op0=11/> blob data = 'TextComment'
+        // CHECK-NEXT: </CommentBlock>
+      // CHECK-NEXT: </CommentBlock>
+    // CHECK-NEXT: </CommentBlock>
+    // CHECK-NEXT: <CommentBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+      // CHECK-NEXT: <Kind abbrevid=4 op0=19/> blob data = 'BlockCommandComment'
+      // CHECK-NEXT: <Name abbrevid=6 op0=6/> blob data = 'return'
+      // CHECK-NEXT: <CommentBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+        // CHECK-NEXT: <Kind abbrevid=4 op0=16/> blob data = 'ParagraphComment'
+        // CHECK-NEXT: <CommentBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+          // CHECK-NEXT: <Kind abbrevid=4 op0=11/> blob data = 'TextComment'
+          // CHECK-NEXT: <Text abbrevid=5 op0=5/> blob data = ' void'
+        // CHECK-NEXT: </CommentBlock>
+      // CHECK-NEXT: </CommentBlock>
+    // CHECK-NEXT: </CommentBlock>
+  // CHECK-NEXT: </CommentBlock>
+  // CHECK-NEXT: <CommentBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+    // CHECK-NEXT: <Kind abbrevid=4 op0=11/> blob data = 'FullComment'
+    // CHECK-NEXT: <CommentBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+      // CHECK-NEXT: <Kind abbrevid=4 op0=16/> blob data = 'ParagraphComment'
+      // CHECK-NEXT: <CommentBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+        // CHECK-NEXT: <Kind abbrevid=4 op0=11/> blob data = 'TextComment'
+        // CHECK-NEXT: <Text abbrevid=5 op0=28/> blob data = ' Bonus comment on definition'
+      // CHECK-NEXT: </CommentBlock>
+    // CHECK-NEXT: </CommentBlock>
+  // CHECK-NEXT: </CommentBlock>
+  // CHECK-NEXT: <DefLocation abbrevid=7 op0=27 op1={{[0-9]*}}/> blob data = '{{.*}}'
+  // CHECK-NEXT: <Location abbrevid=8 op0=24 op1={{[0-9]*}}/> blob data = '{{.*}}'
+  // CHECK-NEXT: <TypeBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+    // CHECK-NEXT: <Type abbrevid=4 op0=4 op1=4/> blob data = 'void'
+  // CHECK-NEXT: </TypeBlock>
+  // CHECK-NEXT: <FieldTypeBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+    // CHECK-NEXT: <Type abbrevid=4 op0=4 op1=3/> blob data = 'int'
+    // CHECK-NEXT: <Name abbrevid=5 op0=1/> blob data = 'I'
+  // CHECK-NEXT: </FieldTypeBlock>
+  // CHECK-NEXT: <FieldTypeBlock NumWords={{[0-9]*}} BlockCodeSize=4>
+    // CHECK-NEXT: <Type abbrevid=4 op0=4 op1=3/> blob data = 'int'
+    // CHECK-NEXT: <Name abbrevid=5 op0=1/> blob data = 'J'
+  // CHECK-NEXT: </FieldTypeBlock>
+// CHECK-NEXT: </FunctionBlock>
Index: docs/ReleaseNotes.rst
===================================================================
--- docs/ReleaseNotes.rst
+++ docs/ReleaseNotes.rst
@@ -42,7 +42,8 @@
 Major New Features
 ------------------
 
-...
+New tool :doc:`clang-doc <clang-doc>`, a tool for generating C and C++ 
+documenation from source code and comments. 
 
 Improvements to clang-query
 ---------------------------
Index: clang-doc/tool/ClangDocMain.cpp
===================================================================
--- clang-doc/tool/ClangDocMain.cpp
+++ clang-doc/tool/ClangDocMain.cpp
@@ -19,6 +19,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "ClangDoc.h"
+#include "Reducer.h"
 #include "clang/AST/AST.h"
 #include "clang/AST/Decl.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
@@ -54,14 +55,62 @@
                      llvm::cl::desc("Dump mapper results to bitcode file."),
                      llvm::cl::init(false), llvm::cl::cat(ClangDocCategory));
 
+static llvm::cl::opt<bool>
+    DumpResult("dump",
+               llvm::cl::desc("Dump intermediate results to bitcode file."),
+               llvm::cl::init(false), llvm::cl::cat(ClangDocCategory));
+
+static llvm::cl::opt<std::string> Format(
+    "format",
+    llvm::cl::desc("Format for outputted docs (Current options are yaml)."),
+    llvm::cl::init("yaml"), llvm::cl::cat(ClangDocCategory));
+
 static llvm::cl::opt<bool> DoxygenOnly(
     "doxygen",
     llvm::cl::desc("Use only doxygen-style comments to generate docs."),
     llvm::cl::init(false), llvm::cl::cat(ClangDocCategory));
 
+bool CreateDirectory(const Twine &DirName, bool ClearDirectory = false) {
+  std::error_code OK;
+  llvm::SmallString<128> DocsRootPath;
+  if (ClearDirectory) {
+    std::error_code RemoveStatus = llvm::sys::fs::remove_directories(DirName);
+    if (RemoveStatus != OK) {
+      llvm::errs() << "Unable to remove existing documentation directory for "
+                   << DirName << ".\n";
+      return true;
+    }
+  }
+  std::error_code DirectoryStatus = llvm::sys::fs::create_directories(DirName);
+  if (DirectoryStatus != OK) {
+    llvm::errs() << "Unable to create documentation directories.\n";
+    return true;
+  }
+  return false;
+}
+
+bool DumpResultToFile(const Twine &DirName, const Twine &FileName,
+                      StringRef Buffer, bool ClearDirectory = false) {
+  std::error_code OK;
+  llvm::SmallString<128> IRRootPath;
+  llvm::sys::path::native(OutDirectory, IRRootPath);
+  llvm::sys::path::append(IRRootPath, DirName);
+  if (CreateDirectory(IRRootPath, ClearDirectory))
+    return true;
+  llvm::sys::path::append(IRRootPath, FileName);
+  std::error_code OutErrorInfo;
+  llvm::raw_fd_ostream OS(IRRootPath, OutErrorInfo, llvm::sys::fs::F_None);
+  if (OutErrorInfo != OK) {
+    llvm::errs() << "Error opening documentation file.\n";
+    return true;
+  }
+  OS << Buffer;
+  OS.close();
+  return false;
+}
+
 int main(int argc, const char **argv) {
   llvm::sys::PrintStackTraceOnErrorSignal(argv[0]);
-  std::error_code OK;
 
   auto Exec = clang::tooling::createExecutorFromCommandLineArgs(
       argc, argv, ClangDocCategory);
@@ -80,35 +129,33 @@
 
   // Mapping phase
   llvm::outs() << "Mapping decls...\n";
-  auto Err = Exec->get()->execute(doc::newMapperActionFactory(
-                                      Exec->get()->getExecutionContext()),
-                                  ArgAdjuster);
-  if (Err)
+  auto Err = Exec->get()->execute(
+      doc::newMapperActionFactory(Exec->get()->getExecutionContext()),
+      ArgAdjuster);
+  if (Err) {
     llvm::errs() << toString(std::move(Err)) << "\n";
+    return 1;
+  }
 
   if (DumpMapperResult) {
-    Exec->get()->getToolResults()->forEachResult([&](StringRef Key,
-                                                     StringRef Value) {
-      SmallString<128> IRRootPath;
-      llvm::sys::path::native(OutDirectory, IRRootPath);
-      llvm::sys::path::append(IRRootPath, "bc");
-      std::error_code DirectoryStatus =
-          llvm::sys::fs::create_directories(IRRootPath);
-      if (DirectoryStatus != OK) {
-        llvm::errs() << "Unable to create documentation directories.\n";
-        return;
-      }
-      llvm::sys::path::append(IRRootPath, Key + ".bc");
-      std::error_code OutErrorInfo;
-      llvm::raw_fd_ostream OS(IRRootPath, OutErrorInfo, llvm::sys::fs::F_None);
-      if (OutErrorInfo != OK) {
-        llvm::errs() << "Error opening documentation file.\n";
-        return;
-      }
-      OS << Value;
-      OS.close();
-    });
+    bool Err = false;
+    Exec->get()->getToolResults()->forEachResult(
+        [&](StringRef Key, StringRef Value) {
+          Err = DumpResultToFile("bc", Key + ".bc", Value);
+        });
+    return Err;
   }
 
-  return 0;
+  // Reducing phase
+  llvm::outs() << "Reducing infos...\n";
+  auto Index = doc::inMemoryReduceResults(Exec->get()->getToolResults());
+
+  if (DumpResult) {
+    llvm::outs() << "Writing intermediate results...\n";
+    for (const auto &Pair : Index.dumpInfos()) {
+      if (DumpResultToFile("bc", Pair.getKey() + ".bc", Pair.getValue()))
+        return 1;
+    }
+    return 0;
+  }
 }
Index: clang-doc/Representation.h
===================================================================
--- clang-doc/Representation.h
+++ clang-doc/Representation.h
@@ -1,4 +1,4 @@
-///===-- Representation.h - ClangDoc Represenation --------------*- C++ -*-===//
+///===-- Representation.h - ClangDoc Representation -------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -26,6 +26,7 @@
 namespace clang {
 namespace doc {
 
+// SHA1'd hash of a USR.
 using SymbolID = std::array<uint8_t, 20>;
 
 struct Info;
@@ -40,15 +41,16 @@
 // A representation of a parsed comment.
 struct CommentInfo {
   CommentInfo() = default;
-  CommentInfo(CommentInfo &&Other) : Children(std::move(Other.Children)) {}
-
-  SmallString<16>
-      Kind; // Kind of comment (TextComment, InlineCommandComment,
-            // HTMLStartTagComment, HTMLEndTagComment, BlockCommandComment,
-            // ParamCommandComment, TParamCommandComment, VerbatimBlockComment,
-            // VerbatimBlockLineComment, VerbatimLineComment).
-  SmallString<64> Text;      // Text of the comment.
-  SmallString<16> Name;      // Name of the comment (for Verbatim and HTML).
+  CommentInfo(CommentInfo &Other) = delete;
+  CommentInfo(CommentInfo &&Other) = default;
+
+  SmallString<16> Kind; // Kind of comment (TextComment, InlineCommandComment,
+                        // HTMLStartTagComment, HTMLEndTagComment,
+                        // BlockCommandComment, ParamCommandComment,
+                        // TParamCommandComment, VerbatimBlockComment,
+                        // VerbatimBlockLineComment, VerbatimLineComment).
+  SmallString<64> Text; // Text of the comment.
+  SmallString<16> Name; // Name of the comment (for Verbatim and HTML).
   SmallString<8> Direction;  // Parameter direction (for (T)ParamCommand).
   SmallString<16> ParamName; // Parameter name (for (T)ParamCommand).
   SmallString<16> CloseName; // Closing tag name (for VerbatimBlock).
@@ -70,19 +72,32 @@
   Reference(llvm::StringRef Name) : UnresolvedName(Name) {}
   Reference(SymbolID USR, InfoType IT) : USR(USR), RefType(IT) {}
 
+  bool operator==(const Reference &Other) const {
+    return USR == Other.USR && UnresolvedName == Other.UnresolvedName &&
+           RefType == Other.RefType;
+  }
+  bool operator!=(const Reference &Other) const {
+    return USR != Other.USR || UnresolvedName != Other.UnresolvedName ||
+           RefType != Other.RefType;
+  }
+
   SymbolID USR;                   // Unique identifer for referenced decl
   SmallString<16> UnresolvedName; // Name of unresolved type.
-  InfoType RefType =
-      InfoType::IT_default; // Indicates the type of this Reference (namespace,
-                            // record, function, enum, default).
+  InfoType RefType = InfoType::IT_default; // Indicates the type of this
+                                           // Reference (namespace, record,
+                                           // function, enum, default).
+  Info *Ref;
 };
 
 // A base struct for TypeInfos
 struct TypeInfo {
   TypeInfo() = default;
   TypeInfo(SymbolID &Type, InfoType IT) : Type(Type, IT) {}
   TypeInfo(llvm::StringRef RefName) : Type(RefName) {}
 
+  bool operator==(const TypeInfo &Other) const { return Type == Other.Type; }
+  bool operator!=(const TypeInfo &Other) const { return Type != Other.Type; }
+
   Reference Type; // Referenced type in this info.
 };
 
@@ -94,6 +109,13 @@
   FieldTypeInfo(llvm::StringRef RefName, llvm::StringRef Name)
       : TypeInfo(RefName), Name(Name) {}
 
+  bool operator==(const FieldTypeInfo &Other) const {
+    return TypeInfo::operator==(Other) && Name == Other.Name;
+  }
+  bool operator!=(const FieldTypeInfo &Other) const {
+    return TypeInfo::operator!=(Other) || Name != Other.Name;
+  }
+
   SmallString<16> Name; // Name associated with this info.
 };
 
@@ -105,6 +127,13 @@
   MemberTypeInfo(llvm::StringRef RefName, llvm::StringRef Name)
       : FieldTypeInfo(RefName, Name) {}
 
+  bool operator==(const MemberTypeInfo &Other) const {
+    return FieldTypeInfo::operator==(Other) && Access == Other.Access;
+  }
+  bool operator!=(const MemberTypeInfo &Other) const {
+    return FieldTypeInfo::operator!=(Other) || Access != Other.Access;
+  }
+
   AccessSpecifier Access =
       clang::AccessSpecifier::AS_none; // Access level associated with this
                                        // info (public, protected, private,
@@ -115,49 +144,72 @@
   Location() = default;
   Location(int LineNumber, SmallString<16> Filename)
       : LineNumber(LineNumber), Filename(std::move(Filename)) {}
+  bool operator==(const Location &Other) const {
+    return LineNumber == Other.LineNumber && Filename == Other.Filename;
+  }
 
   int LineNumber;           // Line number of this Location.
   SmallString<32> Filename; // File for this Location.
 };
 
 /// A base struct for Infos.
 struct Info {
   Info() = default;
-  Info(Info &&Other) : Description(std::move(Other.Description)) {}
-  virtual ~Info() = default;
+  Info(InfoType IT) : IT(IT) {}
+  Info(Info &Other) = delete;
+  Info(Info &&Other) = default;
+
+  bool merge(Info &&I);
 
   SymbolID USR; // Unique identifier for the decl described by this Info.
-  SmallString<16> Name; // Unqualified name of the decl.
+  InfoType IT = InfoType::IT_default; // InfoType of this particular Info.
+  SmallString<16> Name;               // Unqualified name of the decl.
   llvm::SmallVector<Reference, 4>
       Namespace; // List of parent namespaces for this decl.
   std::vector<CommentInfo> Description; // Comment description of this decl.
 };
 
 // Info for namespaces.
-struct NamespaceInfo : public Info {};
+struct NamespaceInfo : public Info {
+  NamespaceInfo() : Info(InfoType::IT_namespace) {}
+
+  bool merge(NamespaceInfo &&I);
+};
 
 // Info for symbols.
 struct SymbolInfo : public Info {
+  SymbolInfo(InfoType IT) : Info(IT) {}
+
+  bool merge(SymbolInfo &&I);
+
   llvm::Optional<Location> DefLoc;    // Location where this decl is defined.
   llvm::SmallVector<Location, 2> Loc; // Locations where this decl is declared.
 };
 
 // TODO: Expand to allow for documenting templating and default args.
 // Info for functions.
 struct FunctionInfo : public SymbolInfo {
+  FunctionInfo() : SymbolInfo(InfoType::IT_function) {}
+
+  bool merge(FunctionInfo &&I);
+
   bool IsMethod = false; // Indicates whether this function is a class method.
   Reference Parent;      // Reference to the parent class decl for this method.
   TypeInfo ReturnType;   // Info about the return type of this function.
-  llvm::SmallVector<FieldTypeInfo, 4> Params; // List of parameters.
-  AccessSpecifier Access =
-      AccessSpecifier::AS_none; // Access level for this method (public,
-                                // private, protected, none).
+  llvm::SmallVector<FieldTypeInfo, 4> Params;        // List of parameters.
+  AccessSpecifier Access = AccessSpecifier::AS_none; // Access level for this
+                                                     // method (public, private,
+                                                     // protected, none).
 };
 
 // TODO: Expand to allow for documenting templating, inheritance access,
 // friend classes
 // Info for types.
 struct RecordInfo : public SymbolInfo {
+  RecordInfo() : SymbolInfo(InfoType::IT_record) {}
+
+  bool merge(RecordInfo &&I);
+
   TagTypeKind TagType = TagTypeKind::TTK_Struct; // Type of this record (struct,
                                                  // class, union, interface).
   llvm::SmallVector<MemberTypeInfo, 4>
@@ -171,6 +223,10 @@
 // TODO: Expand to allow for documenting templating.
 // Info for types.
 struct EnumInfo : public SymbolInfo {
+  EnumInfo() : SymbolInfo(InfoType::IT_enum) {}
+
+  bool merge(EnumInfo &&I);
+
   bool Scoped =
       false; // Indicates whether this enum is scoped (e.g. enum class).
   llvm::SmallVector<SmallString<16>, 4> Members; // List of enum members.
Index: clang-doc/Representation.cpp
===================================================================
--- /dev/null
+++ clang-doc/Representation.cpp
@@ -0,0 +1,91 @@
+///===-- Representation.cpp - ClangDoc Representation -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Representation.h"
+
+namespace clang {
+namespace doc {
+
+template <typename T> void assign(T &L, T &R) {
+  if (L != R)
+    L = R;
+}
+
+template <typename T> void move(T &L, T &&R) {
+  if (L != R)
+    L = std::move(R);
+}
+
+template <>
+void move(llvm::SmallVectorImpl<Reference> &L,
+          llvm::SmallVectorImpl<Reference> &&R) {
+  if (L.empty())
+    L = std::move(R);
+}
+
+template <>
+void move(llvm::Optional<Location> &L, llvm::Optional<Location> &&R) {
+  if (R.hasValue())
+    L = std::move(R);
+}
+
+template <typename T>
+void extend(llvm::SmallVectorImpl<T> &L, llvm::SmallVectorImpl<T> &&R) {
+  std::move(R.begin(), R.end(), std::back_inserter(L));
+}
+
+template <typename T> void extend(std::vector<T> &L, std::vector<T> &&R) {
+  std::move(R.begin(), R.end(), std::back_inserter(L));
+}
+
+bool Info::merge(Info &&Other) {
+  if (IT != Other.IT)
+    return false;
+  assign(USR, Other.USR);
+  assign(Name, Other.Name);
+  move(Namespace, std::move(Other.Namespace));
+  extend(Description, std::move(Other.Description));
+  return true;
+}
+
+bool SymbolInfo::merge(SymbolInfo &&Other) {
+  move(DefLoc, std::move(Other.DefLoc));
+  extend(Loc, std::move(Other.Loc));
+  return Info::merge(std::move(Other));
+}
+
+bool NamespaceInfo::merge(NamespaceInfo &&Other) {
+  return Info::merge(std::move(Other));
+}
+
+bool RecordInfo::merge(RecordInfo &&Other) {
+  assign(TagType, Other.TagType);
+  move(Members, std::move(Other.Members));
+  move(Parents, std::move(Other.Parents));
+  move(VirtualParents, std::move(Other.VirtualParents));
+  return SymbolInfo::merge(std::move(Other));
+}
+
+bool EnumInfo::merge(EnumInfo &&Other) {
+  assign(Scoped, Other.Scoped);
+  move(Members, std::move(Other.Members));
+  return SymbolInfo::merge(std::move(Other));
+}
+
+bool FunctionInfo::merge(FunctionInfo &&Other) {
+  assign(IsMethod, Other.IsMethod);
+  assign(Access, Other.Access);
+  move(ReturnType, std::move(Other.ReturnType));
+  move(Parent, std::move(Other.Parent));
+  move(Params, std::move(Other.Params));
+  return SymbolInfo::merge(std::move(Other));
+}
+
+} // namespace doc
+} // namespace clang
\ No newline at end of file
Index: clang-doc/Reducer.h
===================================================================
--- /dev/null
+++ clang-doc/Reducer.h
@@ -0,0 +1,30 @@
+///===-- ClangDocReducer.h - ClangDocReducer -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_REDUCER_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_REDUCER_H
+
+#include "Index.h"
+#include "clang/Tooling/Execution.h"
+#include "clang/Tooling/Tooling.h"
+
+namespace clang {
+namespace doc {
+
+// Combine occurrences of the same info (prefering the left in case of
+// conflict, and collecting all data from both into the left).
+std::unique_ptr<Info> reduceInfos(std::vector<std::unique_ptr<Info>> &Values);
+
+// Reduce an in-memory collection of ToolResults by key.
+InMemoryIndex inMemoryReduceResults(tooling::ToolResults *Results);
+
+} // namespace doc
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_REDUCER_H
Index: clang-doc/Reducer.cpp
===================================================================
--- /dev/null
+++ clang-doc/Reducer.cpp
@@ -0,0 +1,69 @@
+///===-- ClangDocReducer.h - ClangDocReducer -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Reducer.h"
+#include "BitcodeReader.h"
+#include "Representation.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace clang {
+namespace doc {
+
+#define REDUCE(INFO, TYPE)                                                     \
+  {                                                                            \
+    std::unique_ptr<Info> Merged = llvm::make_unique<INFO>();                  \
+    INFO *Tmp = static_cast<INFO *>(Merged.get());                             \
+    for (auto &I : Values) {                                                   \
+      if (!Tmp->merge(std::move(*static_cast<INFO *>(I.get()))))               \
+        return nullptr;                                                        \
+    }                                                                          \
+    return Merged;                                                             \
+  }
+
+std::unique_ptr<Info> reduceInfos(std::vector<std::unique_ptr<Info>> &Values) {
+  if (Values.empty())
+    return nullptr;
+
+  switch (Values[0]->IT) {
+  case InfoType::IT_namespace:
+    REDUCE(NamespaceInfo, InfoType::IT_namespace)
+  case InfoType::IT_record:
+    REDUCE(RecordInfo, InfoType::IT_record)
+  case InfoType::IT_enum:
+    REDUCE(EnumInfo, InfoType::IT_enum)
+  case InfoType::IT_function:
+    REDUCE(FunctionInfo, InfoType::IT_function)
+  case InfoType::IT_default:
+    llvm::errs() << "Unexpected info type in index.\n";
+    return nullptr;
+  }
+}
+
+#undef REDUCE
+
+InMemoryIndex inMemoryReduceResults(tooling::ToolResults *Results) {
+  InMemoryIndex DocIndex;
+
+  // Collect values into index by key.
+  Results->forEachResult([&](StringRef Key, StringRef Value) {
+    llvm::BitstreamCursor Stream(Value);
+    ClangDocBitcodeReader Reader(Stream);
+    auto Infos = Reader.readBitcode();
+    for (auto &I : Infos) {
+      DocIndex.insert(Key, std::move(I));
+    }
+  });
+
+  // Merge values with the same key.
+  DocIndex.reduce(reduceInfos);
+  return DocIndex;
+}
+
+} // namespace doc
+} // namespace clang
Index: clang-doc/Index.h
===================================================================
--- /dev/null
+++ clang-doc/Index.h
@@ -0,0 +1,63 @@
+///===-- Index.h - ClangDoc Index -------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an abstract index for a collection of clang-doc infos, as
+// well as defining an in-memory implementation of that index.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_INDEX_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_INDEX_H
+
+#include "Representation.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include <array>
+
+namespace clang {
+namespace doc {
+
+// Abstract class representing an index of mapped info objects.
+class Index {
+public:
+  virtual ~Index() = default;
+
+  // Inserts an info into the index.
+  virtual void insert(StringRef Key, std::unique_ptr<Info> &&I) = 0;
+
+  // Runs a user-defined reduce function on all keys to merge a vector of infos
+  // (of the same key) into a single info for that key.
+  virtual void reduce(llvm::function_ref<std::unique_ptr<Info>(
+                          std::vector<std::unique_ptr<Info>> &Value)>
+                          Callback) = 0;
+};
+
+class InMemoryIndex : public Index {
+public:
+  InMemoryIndex() = default;
+
+  void insert(StringRef Key, std::unique_ptr<Info> &&I) override;
+
+  // Reduce merges all values in the vector, then flushes the vector and pushes
+  // the merged value onto the newly empty vector.
+  void reduce(llvm::function_ref<
+              std::unique_ptr<Info>(std::vector<std::unique_ptr<Info>> &Values)>
+                  Callback) override;
+
+  const llvm::StringMap<SmallString<4096>> dumpInfos() const;
+
+private:
+  llvm::StringMap<std::vector<std::unique_ptr<Info>>> InfoIndex;
+};
+
+} // namespace doc
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_INDEX_H
Index: clang-doc/Index.cpp
===================================================================
--- /dev/null
+++ clang-doc/Index.cpp
@@ -0,0 +1,68 @@
+///===-- Index.cpp - ClangDoc Index -----------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Index.h"
+#include "BitcodeWriter.cpp"
+#include "Representation.h"
+#include "llvm/Bitcode/BitstreamWriter.h"
+
+namespace clang {
+namespace doc {
+
+const llvm::StringMap<SmallString<4096>> InMemoryIndex::dumpInfos() const {
+  llvm::StringMap<SmallString<4096>> dumpedInfos;
+  for (const auto &InfoIdx : InfoIndex) {
+    llvm::BitstreamWriter Stream(dumpedInfos[InfoIdx.getKey()]);
+    ClangDocBitcodeWriter Writer(Stream);
+    for (const auto &I : InfoIdx.getValue()) {
+      switch (I->IT) {
+      case InfoType::IT_namespace:
+        Writer.emitBlock(*static_cast<clang::doc::NamespaceInfo *>(I.get()));
+        break;
+      case InfoType::IT_record:
+        Writer.emitBlock(*static_cast<clang::doc::RecordInfo *>(I.get()));
+        break;
+      case InfoType::IT_enum:
+        Writer.emitBlock(*static_cast<clang::doc::EnumInfo *>(I.get()));
+        break;
+      case InfoType::IT_function:
+        Writer.emitBlock(*static_cast<clang::doc::FunctionInfo *>(I.get()));
+        break;
+      default:
+        llvm::errs() << "Unexpected info in index.\n";
+        return dumpedInfos;
+      }
+    }
+  }
+  return dumpedInfos;
+}
+
+void InMemoryIndex::insert(StringRef Key, std::unique_ptr<Info> &&I) {
+  auto R = InfoIndex.try_emplace(Key, std::vector<std::unique_ptr<Info>>());
+  R.first->second.emplace_back(std::move(I));
+}
+
+void InMemoryIndex::reduce(llvm::function_ref<std::unique_ptr<Info>(
+                               std::vector<std::unique_ptr<Info>> &Value)>
+                               Callback) {
+  for (auto &Pair : InfoIndex) {
+    if (auto Reduced = Callback(Pair.getValue())) {
+      Pair.getValue().clear();
+      Pair.getValue().emplace_back(std::move(Reduced));
+      continue;
+    }
+    llvm::errs() << "Error reducing infos.\n";
+    // Merge process likely invalidated fields, so clear the infos to be sure.
+    Pair.getValue().clear();
+    return;
+  }
+}
+
+} // namespace doc
+} // namespace clang
\ No newline at end of file
Index: clang-doc/CMakeLists.txt
===================================================================
--- clang-doc/CMakeLists.txt
+++ clang-doc/CMakeLists.txt
@@ -3,9 +3,13 @@
   )
 
 add_clang_library(clangDoc
+  BitcodeReader.cpp
   BitcodeWriter.cpp
   ClangDoc.cpp
+  Index.cpp
   Mapper.cpp
+  Reducer.cpp
+  Representation.cpp
   Serialize.cpp
 
   LINK_LIBS
Index: clang-doc/BitcodeWriter.h
===================================================================
--- clang-doc/BitcodeWriter.h
+++ clang-doc/BitcodeWriter.h
@@ -34,7 +34,7 @@
 static const unsigned VersionNumber = 1;
 
 struct BitCodeConstants {
-  static constexpr unsigned RecordSize = 16U;
+  static constexpr unsigned RecordSize = 32U;
   static constexpr unsigned SignatureBitSize = 8U;
   static constexpr unsigned SubblockIDSize = 4U;
   static constexpr unsigned BoolSize = 1U;
@@ -45,6 +45,8 @@
   static constexpr unsigned ReferenceTypeSize = 8U;
   static constexpr unsigned USRLengthSize = 6U;
   static constexpr unsigned USRBitLengthSize = 8U;
+  static constexpr char Signature[4] = {'D', 'O', 'C', 'S'};
+  static constexpr int USRHashSize = 20;
 };
 
 // New Ids need to be added to both the enum here and the relevant IdNameMap in
@@ -121,13 +123,6 @@
     emitVersionBlock();
   }
 
-#ifndef NDEBUG // Don't want explicit dtor unless needed.
-  ~ClangDocBitcodeWriter() {
-    // Check that the static size is large-enough.
-    assert(Record.capacity() > BitCodeConstants::RecordSize);
-  }
-#endif
-
   // Block emission of different info types.
   void emitBlock(const NamespaceInfo &I);
   void emitBlock(const RecordInfo &I);
Index: clang-doc/BitcodeWriter.cpp
===================================================================
--- clang-doc/BitcodeWriter.cpp
+++ clang-doc/BitcodeWriter.cpp
@@ -228,6 +228,8 @@
 
 // AbbreviationMap
 
+constexpr char BitCodeConstants::Signature[];
+
 void ClangDocBitcodeWriter::AbbreviationMap::add(RecordId RID,
                                                  unsigned AbbrevID) {
   assert(RecordIdNameMap[RID] && "Unknown RecordId.");
@@ -246,7 +248,7 @@
 /// \brief Emits the magic number header to check that its the right format,
 /// in this case, 'DOCS'.
 void ClangDocBitcodeWriter::emitHeader() {
-  for (char C : llvm::StringRef("DOCS"))
+  for (char C : BitCodeConstants::Signature)
     Stream.Emit((unsigned)C, BitCodeConstants::SignatureBitSize);
 }
 
@@ -428,8 +430,7 @@
 
 void ClangDocBitcodeWriter::emitBlock(const CommentInfo &I) {
   StreamSubBlockGuard Block(Stream, BI_COMMENT_BLOCK_ID);
-  for (const auto &L :
-       std::vector<std::pair<llvm::StringRef, RecordId>>{
+  for (const auto &L : std::vector<std::pair<llvm::StringRef, RecordId>>{
            {I.Kind, COMMENT_KIND},
            {I.Text, COMMENT_TEXT},
            {I.Name, COMMENT_NAME},
Index: clang-doc/BitcodeReader.h
===================================================================
--- /dev/null
+++ clang-doc/BitcodeReader.h
@@ -0,0 +1,69 @@
+//===--  BitcodeReader.h - ClangDoc Bitcode Reader --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a reader for parsing the clang-doc internal
+// representation from LLVM bitcode. The reader takes in a stream of bits and
+// generates the set of infos that it represents.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_BITCODEREADER_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_BITCODEREADER_H
+
+#include "BitcodeWriter.h"
+#include "Representation.h"
+#include "clang/AST/AST.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Bitcode/BitstreamReader.h"
+
+namespace clang {
+namespace doc {
+
+// Class to read bitstream into an InfoSet collection
+class ClangDocBitcodeReader {
+public:
+  ClangDocBitcodeReader(llvm::BitstreamCursor &Stream) : Stream(Stream) {}
+
+  // Main entry point, calls readBlock to read each block in the given stream.
+  std::vector<std::unique_ptr<Info>> readBitcode();
+
+private:
+  enum class Cursor { BadBlock = 1, Record, BlockEnd, BlockBegin };
+
+  // Top level parsing
+  bool validateStream();
+  bool readVersion();
+  bool readBlockInfoBlock();
+
+  // Read a block of records into a single Info struct, calls readRecord on each
+  // record found.
+  template <typename T> bool readBlock(unsigned ID, T I);
+
+  // Step through a block of records to find the next data field.
+  template <typename T> bool readSubBlock(unsigned ID, T I);
+
+  // Read record data into the given Info data field, calling the appropriate
+  // parseRecord functions to parse and store the data.
+  template <typename T> bool readRecord(unsigned ID, T I);
+
+  // Helper function to step through blocks to find and dispatch the next record
+  // or block to be read.
+  Cursor skipUntilRecordOrBlock(unsigned &BlockOrRecordID);
+
+  // Helper function to set up the approriate type of Info.
+  std::unique_ptr<Info> readBlockToInfo(unsigned ID);
+
+  llvm::BitstreamCursor &Stream;
+  Optional<llvm::BitstreamBlockInfo> BlockInfo;
+};
+
+} // namespace doc
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_BITCODEREADER_H
Index: clang-doc/BitcodeReader.cpp
===================================================================
--- /dev/null
+++ clang-doc/BitcodeReader.cpp
@@ -0,0 +1,528 @@
+//===--  BitcodeReader.cpp - ClangDoc Bitcode Reader ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BitcodeReader.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace clang {
+namespace doc {
+
+SymbolID StringToSymbolUSR(llvm::StringRef StringUSR) {
+  SymbolID USR;
+  std::string HexString = fromHex(StringUSR);
+  std::copy(HexString.begin(), HexString.end(), USR.begin());
+  return USR;
+}
+
+using Record = llvm::SmallVector<uint64_t, 1024>;
+
+bool decodeRecord(Record R, llvm::SmallVectorImpl<char> &Field,
+                  llvm::StringRef Blob) {
+  Field.assign(Blob.begin(), Blob.end());
+  return true;
+}
+
+bool decodeRecord(Record R, SymbolID &Field, llvm::StringRef Blob) {
+  if (R[0] != BitCodeConstants::USRHashSize)
+    return false;
+
+  // First position in the record is the length of the following array, so we
+  // copy the following elements to the field.
+  for (int I = 0, E = R[0]; I < E; ++I)
+    Field[I] = R[I + 1];
+  return true;
+}
+
+bool decodeRecord(Record R, bool &Field, llvm::StringRef Blob) {
+  Field = R[0] != 0;
+  return true;
+}
+
+bool decodeRecord(Record R, int &Field, llvm::StringRef Blob) {
+  if (R[0] > INT_MAX)
+    return false;
+  Field = (int)R[0];
+  return true;
+}
+
+bool decodeRecord(Record R, AccessSpecifier &Field, llvm::StringRef Blob) {
+  switch (R[0]) {
+  case AS_public:
+  case AS_private:
+  case AS_protected:
+  case AS_none:
+    Field = (AccessSpecifier)R[0];
+    return true;
+  default:
+    return false;
+  }
+}
+
+bool decodeRecord(Record R, TagTypeKind &Field, llvm::StringRef Blob) {
+  switch (R[0]) {
+  case TTK_Struct:
+  case TTK_Interface:
+  case TTK_Union:
+  case TTK_Class:
+  case TTK_Enum:
+    Field = (TagTypeKind)R[0];
+    return true;
+  default:
+    return false;
+  }
+}
+
+bool decodeRecord(Record R, llvm::Optional<Location> &Field,
+                  llvm::StringRef Blob) {
+  if (R[0] > INT_MAX)
+    return false;
+  Field.emplace((int)R[0], Blob);
+  return true;
+}
+
+bool decodeRecord(Record R, Reference &Field, llvm::StringRef Blob) {
+  switch (auto IT = static_cast<InfoType>(R[0])) {
+  case InfoType::IT_namespace:
+  case InfoType::IT_record:
+  case InfoType::IT_function:
+  case InfoType::IT_enum:
+    Field = Reference(StringToSymbolUSR(Blob), IT);
+    return true;
+  case InfoType::IT_default:
+    Field = Reference(Blob);
+    return true;
+  }
+  return false;
+}
+
+bool decodeRecord(Record R, llvm::SmallVectorImpl<llvm::SmallString<16>> &Field,
+                  llvm::StringRef Blob) {
+  Field.push_back(Blob);
+  return true;
+}
+
+bool decodeRecord(Record R, llvm::SmallVectorImpl<Location> &Field,
+                  llvm::StringRef Blob) {
+  if (R[0] > INT_MAX)
+    return false;
+  Field.emplace_back((int)R[0], Blob);
+  return true;
+}
+
+bool decodeRecord(Record R, llvm::SmallVectorImpl<Reference> &Field,
+                  llvm::StringRef Blob) {
+  switch (auto IT = static_cast<InfoType>(R[0])) {
+  case InfoType::IT_namespace:
+  case InfoType::IT_record:
+  case InfoType::IT_function:
+  case InfoType::IT_enum:
+    Field.emplace_back(StringToSymbolUSR(Blob), IT);
+    return true;
+  case InfoType::IT_default:
+    Field.emplace_back(Blob);
+    return true;
+  }
+  return false;
+}
+
+bool parseRecord(Record R, unsigned ID, llvm::StringRef Blob,
+                 const unsigned VersionNo) {
+  if (ID == VERSION && R[0] == VersionNo)
+    return true;
+  return false;
+}
+
+bool parseRecord(Record R, unsigned ID, llvm::StringRef Blob,
+                 NamespaceInfo *I) {
+  switch (ID) {
+  case NAMESPACE_USR:
+    return decodeRecord(R, I->USR, Blob);
+  case NAMESPACE_NAME:
+    return decodeRecord(R, I->Name, Blob);
+  case NAMESPACE_NAMESPACE:
+    return decodeRecord(R, I->Namespace, Blob);
+  default:
+    return false;
+  }
+}
+
+bool parseRecord(Record R, unsigned ID, llvm::StringRef Blob, RecordInfo *I) {
+  switch (ID) {
+  case RECORD_USR:
+    return decodeRecord(R, I->USR, Blob);
+  case RECORD_NAME:
+    return decodeRecord(R, I->Name, Blob);
+  case RECORD_NAMESPACE:
+    return decodeRecord(R, I->Namespace, Blob);
+  case RECORD_PARENT:
+    return decodeRecord(R, I->Parents, Blob);
+  case RECORD_VPARENT:
+    return decodeRecord(R, I->VirtualParents, Blob);
+  case RECORD_DEFLOCATION:
+    return decodeRecord(R, I->DefLoc, Blob);
+  case RECORD_LOCATION:
+    return decodeRecord(R, I->Loc, Blob);
+  case RECORD_TAG_TYPE:
+    return decodeRecord(R, I->TagType, Blob);
+  default:
+    return false;
+  }
+}
+
+bool parseRecord(Record R, unsigned ID, llvm::StringRef Blob, EnumInfo *I) {
+  switch (ID) {
+  case ENUM_USR:
+    return decodeRecord(R, I->USR, Blob);
+  case ENUM_NAME:
+    return decodeRecord(R, I->Name, Blob);
+  case ENUM_NAMESPACE:
+    return decodeRecord(R, I->Namespace, Blob);
+  case ENUM_DEFLOCATION:
+    return decodeRecord(R, I->DefLoc, Blob);
+  case ENUM_LOCATION:
+    return decodeRecord(R, I->Loc, Blob);
+  case ENUM_MEMBER:
+    return decodeRecord(R, I->Members, Blob);
+  case ENUM_SCOPED:
+    return decodeRecord(R, I->Scoped, Blob);
+  default:
+    return false;
+  }
+}
+
+bool parseRecord(Record R, unsigned ID, llvm::StringRef Blob, FunctionInfo *I) {
+  switch (ID) {
+  case FUNCTION_USR:
+    return decodeRecord(R, I->USR, Blob);
+  case FUNCTION_NAME:
+    return decodeRecord(R, I->Name, Blob);
+  case FUNCTION_NAMESPACE:
+    return decodeRecord(R, I->Namespace, Blob);
+  case FUNCTION_PARENT:
+    return decodeRecord(R, I->Parent, Blob);
+  case FUNCTION_DEFLOCATION:
+    return decodeRecord(R, I->DefLoc, Blob);
+  case FUNCTION_LOCATION:
+    return decodeRecord(R, I->Loc, Blob);
+  case FUNCTION_ACCESS:
+    return decodeRecord(R, I->Access, Blob);
+  case FUNCTION_IS_METHOD:
+    return decodeRecord(R, I->IsMethod, Blob);
+  default:
+    return false;
+  }
+}
+
+bool parseRecord(Record R, unsigned ID, llvm::StringRef Blob, TypeInfo *I) {
+  switch (ID) {
+  case TYPE_REF:
+    return decodeRecord(R, I->Type, Blob);
+  default:
+    return false;
+  }
+}
+
+bool parseRecord(Record R, unsigned ID, llvm::StringRef Blob,
+                 FieldTypeInfo *I) {
+  switch (ID) {
+  case FIELD_TYPE_REF:
+    return decodeRecord(R, I->Type, Blob);
+  case FIELD_TYPE_NAME:
+    return decodeRecord(R, I->Name, Blob);
+  default:
+    return false;
+  }
+}
+
+bool parseRecord(Record R, unsigned ID, llvm::StringRef Blob,
+                 MemberTypeInfo *I) {
+  switch (ID) {
+  case MEMBER_TYPE_REF:
+    return decodeRecord(R, I->Type, Blob);
+  case MEMBER_TYPE_NAME:
+    return decodeRecord(R, I->Name, Blob);
+  case MEMBER_TYPE_ACCESS:
+    return decodeRecord(R, I->Access, Blob);
+  default:
+    return false;
+  }
+}
+
+bool parseRecord(Record R, unsigned ID, llvm::StringRef Blob, CommentInfo *I) {
+  switch (ID) {
+  case COMMENT_KIND:
+    return decodeRecord(R, I->Kind, Blob);
+  case COMMENT_TEXT:
+    return decodeRecord(R, I->Text, Blob);
+  case COMMENT_NAME:
+    return decodeRecord(R, I->Name, Blob);
+  case COMMENT_DIRECTION:
+    return decodeRecord(R, I->Direction, Blob);
+  case COMMENT_PARAMNAME:
+    return decodeRecord(R, I->ParamName, Blob);
+  case COMMENT_CLOSENAME:
+    return decodeRecord(R, I->CloseName, Blob);
+  case COMMENT_ATTRKEY:
+    return decodeRecord(R, I->AttrKeys, Blob);
+  case COMMENT_ATTRVAL:
+    return decodeRecord(R, I->AttrValues, Blob);
+  case COMMENT_ARG:
+    return decodeRecord(R, I->Args, Blob);
+  case COMMENT_SELFCLOSING:
+    return decodeRecord(R, I->SelfClosing, Blob);
+  case COMMENT_EXPLICIT:
+    return decodeRecord(R, I->Explicit, Blob);
+  default:
+    return false;
+  }
+}
+
+CommentInfo *getCommentInfo(const unsigned int I) {
+  llvm::errs() << "Cannot have comment subblock.\n";
+  exit(1);
+}
+
+CommentInfo *getCommentInfo(TypeInfo *I) {
+  llvm::errs() << "Cannot have comment subblock.\n";
+  exit(1);
+}
+
+CommentInfo *getCommentInfo(Info *I) {
+  I->Description.emplace_back();
+  return &I->Description.back();
+}
+
+CommentInfo *getCommentInfo(CommentInfo *I) {
+  I->Children.emplace_back(llvm::make_unique<CommentInfo>());
+  return I->Children.back().get();
+}
+
+CommentInfo *getCommentInfo(std::unique_ptr<CommentInfo> &I) {
+  return getCommentInfo(I.get());
+}
+
+template <typename T, typename TTypeInfo>
+void addTypeInfo(T I, TTypeInfo &&TI) {
+  llvm::errs() << "Invalid type for info.\n";
+  exit(1);
+}
+
+template <> void addTypeInfo(RecordInfo *I, MemberTypeInfo &&T) {
+  I->Members.emplace_back(std::move(T));
+}
+
+template <> void addTypeInfo(FunctionInfo *I, TypeInfo &&T) {
+  I->ReturnType = std::move(T);
+}
+
+template <> void addTypeInfo(FunctionInfo *I, FieldTypeInfo &&T) {
+  I->Params.emplace_back(std::move(T));
+}
+
+// Read records from bitcode into a given info.
+template <typename T> bool ClangDocBitcodeReader::readRecord(unsigned ID, T I) {
+  Record R;
+  llvm::StringRef Blob;
+  unsigned RecID = Stream.readRecord(ID, R, &Blob);
+  return parseRecord(R, RecID, Blob, I);
+}
+
+// Read a block of records into a single info.
+template <typename T> bool ClangDocBitcodeReader::readBlock(unsigned ID, T I) {
+  if (Stream.EnterSubBlock(ID))
+    return false;
+
+  while (true) {
+    unsigned BlockOrCode = 0;
+    Cursor Res = skipUntilRecordOrBlock(BlockOrCode);
+
+    switch (Res) {
+    case Cursor::BadBlock:
+      return false;
+    case Cursor::BlockEnd:
+      return true;
+    case Cursor::BlockBegin:
+      if (readSubBlock(BlockOrCode, I))
+        continue;
+      if (!Stream.SkipBlock())
+        return false;
+      continue;
+    case Cursor::Record:
+      break;
+    }
+    if (!readRecord(BlockOrCode, I))
+      return false;
+  }
+}
+
+template <typename T>
+bool ClangDocBitcodeReader::readSubBlock(unsigned ID, T I) {
+  switch (ID) {
+  // Blocks can only have Comment or TypeInfo subblocks
+  case BI_COMMENT_BLOCK_ID:
+    if (readBlock(ID, getCommentInfo(I)))
+      return true;
+    return false;
+  case BI_TYPE_BLOCK_ID: {
+    TypeInfo TI;
+    if (readBlock(ID, &TI)) {
+      addTypeInfo(I, std::move(TI));
+      return true;
+    }
+    return false;
+  }
+  case BI_FIELD_TYPE_BLOCK_ID: {
+    FieldTypeInfo TI;
+    if (readBlock(ID, &TI)) {
+      addTypeInfo(I, std::move(TI));
+      return true;
+    }
+    return false;
+  }
+  case BI_MEMBER_TYPE_BLOCK_ID: {
+    MemberTypeInfo TI;
+    if (readBlock(ID, &TI)) {
+      addTypeInfo(I, std::move(TI));
+      return true;
+    }
+    return false;
+  }
+  default:
+    llvm::errs() << "Invalid subblock type.\n";
+    return false;
+  }
+}
+
+ClangDocBitcodeReader::Cursor
+ClangDocBitcodeReader::skipUntilRecordOrBlock(unsigned &BlockOrRecordID) {
+  BlockOrRecordID = 0;
+
+  while (!Stream.AtEndOfStream()) {
+    unsigned Code = Stream.ReadCode();
+
+    switch ((llvm::bitc::FixedAbbrevIDs)Code) {
+    case llvm::bitc::ENTER_SUBBLOCK:
+      BlockOrRecordID = Stream.ReadSubBlockID();
+      return Cursor::BlockBegin;
+    case llvm::bitc::END_BLOCK:
+      if (Stream.ReadBlockEnd())
+        return Cursor::BadBlock;
+      return Cursor::BlockEnd;
+    case llvm::bitc::DEFINE_ABBREV:
+      Stream.ReadAbbrevRecord();
+      continue;
+    case llvm::bitc::UNABBREV_RECORD:
+      return Cursor::BadBlock;
+    default:
+      BlockOrRecordID = Code;
+      return Cursor::Record;
+    }
+  }
+  llvm_unreachable("Premature stream end.");
+}
+
+bool ClangDocBitcodeReader::validateStream() {
+  if (Stream.AtEndOfStream())
+    return false;
+
+  // Sniff for the signature.
+  if (Stream.Read(8) != BitCodeConstants::Signature[0] ||
+      Stream.Read(8) != BitCodeConstants::Signature[1] ||
+      Stream.Read(8) != BitCodeConstants::Signature[2] ||
+      Stream.Read(8) != BitCodeConstants::Signature[3])
+    return false;
+  return true;
+}
+
+bool ClangDocBitcodeReader::readBlockInfoBlock() {
+  BlockInfo = Stream.ReadBlockInfoBlock();
+  if (!BlockInfo)
+    return false;
+  Stream.setBlockInfo(&*BlockInfo);
+  return true;
+}
+
+#define READINFO(INFO, TYPE)                                                   \
+  {                                                                            \
+    std::unique_ptr<Info> I = llvm::make_unique<INFO>();                       \
+    I->IT = TYPE;                                                              \
+    if (readBlock(ID, static_cast<INFO *>(I.get())))                           \
+      return I;                                                                \
+    break;                                                                     \
+  }
+
+std::unique_ptr<Info> ClangDocBitcodeReader::readBlockToInfo(unsigned ID) {
+  switch (ID) {
+  case BI_NAMESPACE_BLOCK_ID:
+    READINFO(NamespaceInfo, InfoType::IT_namespace)
+  case BI_RECORD_BLOCK_ID:
+    READINFO(RecordInfo, InfoType::IT_record)
+  case BI_ENUM_BLOCK_ID:
+    READINFO(EnumInfo, InfoType::IT_enum)
+  case BI_FUNCTION_BLOCK_ID:
+    READINFO(FunctionInfo, InfoType::IT_function)
+  default:
+    break;
+  }
+  llvm::errs() << "Error reading from block.\n";
+  return nullptr;
+}
+
+#undef READINFO
+
+// Entry point
+std::vector<std::unique_ptr<Info>> ClangDocBitcodeReader::readBitcode() {
+  std::vector<std::unique_ptr<Info>> Infos;
+  if (!validateStream())
+    return Infos;
+
+  // Read the top level blocks.
+  while (!Stream.AtEndOfStream()) {
+    unsigned Code = Stream.ReadCode();
+    if (Code != llvm::bitc::ENTER_SUBBLOCK)
+      return Infos;
+
+    unsigned ID = Stream.ReadSubBlockID();
+    switch (ID) {
+    // NamedType and Comment blocks should not appear at the top level
+    case BI_TYPE_BLOCK_ID:
+    case BI_FIELD_TYPE_BLOCK_ID:
+    case BI_MEMBER_TYPE_BLOCK_ID:
+    case BI_COMMENT_BLOCK_ID:
+      llvm::errs() << "Invalid top level block.\n";
+      return Infos;
+    case BI_NAMESPACE_BLOCK_ID:
+    case BI_RECORD_BLOCK_ID:
+    case BI_ENUM_BLOCK_ID:
+    case BI_FUNCTION_BLOCK_ID:
+      if (std::unique_ptr<Info> I = readBlockToInfo(ID)) {
+        Infos.emplace_back(std::move(I));
+      }
+      return Infos;
+    case BI_VERSION_BLOCK_ID:
+      if (readBlock(ID, VersionNumber))
+        continue;
+      return Infos;
+    case llvm::bitc::BLOCKINFO_BLOCK_ID:
+      if (readBlockInfoBlock())
+        continue;
+      return Infos;
+    default:
+      if (!Stream.SkipBlock())
+        continue;
+    }
+  }
+  llvm::errs() << "test\n";
+  return Infos;
+}
+
+} // namespace doc
+} // namespace clang
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to