hokein updated this revision to Diff 453098. hokein added a comment. update based on the offline discussion -- we'd like to keep all categories of IDENTIFIER (type-name, namespace-name, template-name, template-name) as they are useful in disambiguation, but we eliminate the ambiguities per each category
- eliminate all different type rules (class-name, enum-name, typedef-name), fold them into a unified type-name, this removes the #1 type-name ambiguity, and gives us a big performance boost; - remove the namespace-alis rules, as they're less interesting and marginal useful; Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D130747/new/ https://reviews.llvm.org/D130747 Files: clang-tools-extra/pseudo/lib/cxx/cxx.bnf clang-tools-extra/pseudo/test/glr.cpp Index: clang-tools-extra/pseudo/test/glr.cpp =================================================================== --- clang-tools-extra/pseudo/test/glr.cpp +++ clang-tools-extra/pseudo/test/glr.cpp @@ -12,10 +12,7 @@ // CHECK-NEXT: │ └─; := tok[8] // CHECK-NEXT: └─statement~simple-declaration := decl-specifier-seq init-declarator-list ; // CHECK-NEXT: ├─decl-specifier-seq~simple-type-specifier := <ambiguous> -// CHECK-NEXT: │ ├─simple-type-specifier~type-name := <ambiguous> -// CHECK-NEXT: │ │ ├─type-name~IDENTIFIER := tok[5] -// CHECK-NEXT: │ │ ├─type-name~IDENTIFIER := tok[5] -// CHECK-NEXT: │ │ └─type-name~IDENTIFIER := tok[5] +// CHECK-NEXT: │ ├─simple-type-specifier~IDENTIFIER := tok[5] // CHECK-NEXT: │ └─simple-type-specifier~IDENTIFIER := tok[5] // CHECK-NEXT: ├─init-declarator-list~ptr-declarator := ptr-operator ptr-declarator // CHECK-NEXT: │ ├─ptr-operator~* := tok[6] @@ -23,12 +20,11 @@ // CHECK-NEXT: └─; := tok[8] } -// CHECK: 3 Ambiguous nodes: +// CHECK: 2 Ambiguous nodes: // CHECK-NEXT: 1 simple-type-specifier // CHECK-NEXT: 1 statement -// CHECK-NEXT: 1 type-name // CHECK-EMPTY: // CHECK-NEXT: 0 Opaque nodes: // CHECK-EMPTY: -// CHECK-NEXT: Ambiguity: 0.40 misparses/token +// CHECK-NEXT: Ambiguity: 0.20 misparses/token // CHECK-NEXT: Unparsed: 0.00% Index: clang-tools-extra/pseudo/lib/cxx/cxx.bnf =================================================================== --- clang-tools-extra/pseudo/lib/cxx/cxx.bnf +++ clang-tools-extra/pseudo/lib/cxx/cxx.bnf @@ -34,14 +34,10 @@ _ := declaration-seq # gram.key -typedef-name := IDENTIFIER -typedef-name := simple-template-id +#! No namespace-alis rules, as they're less interesting and marginal useful. namespace-name := IDENTIFIER -namespace-name := namespace-alias -namespace-alias := IDENTIFIER class-name := IDENTIFIER class-name := simple-template-id -enum-name := IDENTIFIER template-name := IDENTIFIER # gram.basic @@ -391,9 +387,12 @@ builtin-type := FLOAT builtin-type := DOUBLE builtin-type := VOID -type-name := class-name -type-name := enum-name -type-name := typedef-name +#! Unlike C++ standard grammar, we don't distinguish the underlying type (class, +#! enum, typedef) of the IDENTIFIER, as these ambiguities are "local" and don't +#! affect the final parse tree. Eliminating them gives a significant performance +#! boost to the parser. +type-name := IDENTIFIER +type-name := simple-template-id elaborated-type-specifier := class-key nested-name-specifier_opt IDENTIFIER elaborated-type-specifier := class-key simple-template-id elaborated-type-specifier := class-key nested-name-specifier TEMPLATE_opt simple-template-id
Index: clang-tools-extra/pseudo/test/glr.cpp =================================================================== --- clang-tools-extra/pseudo/test/glr.cpp +++ clang-tools-extra/pseudo/test/glr.cpp @@ -12,10 +12,7 @@ // CHECK-NEXT: â ââ; := tok[8] // CHECK-NEXT: ââstatement~simple-declaration := decl-specifier-seq init-declarator-list ; // CHECK-NEXT: ââdecl-specifier-seq~simple-type-specifier := <ambiguous> -// CHECK-NEXT: â ââsimple-type-specifier~type-name := <ambiguous> -// CHECK-NEXT: â â ââtype-name~IDENTIFIER := tok[5] -// CHECK-NEXT: â â ââtype-name~IDENTIFIER := tok[5] -// CHECK-NEXT: â â ââtype-name~IDENTIFIER := tok[5] +// CHECK-NEXT: â ââsimple-type-specifier~IDENTIFIER := tok[5] // CHECK-NEXT: â ââsimple-type-specifier~IDENTIFIER := tok[5] // CHECK-NEXT: ââinit-declarator-list~ptr-declarator := ptr-operator ptr-declarator // CHECK-NEXT: â ââptr-operator~* := tok[6] @@ -23,12 +20,11 @@ // CHECK-NEXT: ââ; := tok[8] } -// CHECK: 3 Ambiguous nodes: +// CHECK: 2 Ambiguous nodes: // CHECK-NEXT: 1 simple-type-specifier // CHECK-NEXT: 1 statement -// CHECK-NEXT: 1 type-name // CHECK-EMPTY: // CHECK-NEXT: 0 Opaque nodes: // CHECK-EMPTY: -// CHECK-NEXT: Ambiguity: 0.40 misparses/token +// CHECK-NEXT: Ambiguity: 0.20 misparses/token // CHECK-NEXT: Unparsed: 0.00% Index: clang-tools-extra/pseudo/lib/cxx/cxx.bnf =================================================================== --- clang-tools-extra/pseudo/lib/cxx/cxx.bnf +++ clang-tools-extra/pseudo/lib/cxx/cxx.bnf @@ -34,14 +34,10 @@ _ := declaration-seq # gram.key -typedef-name := IDENTIFIER -typedef-name := simple-template-id +#! No namespace-alis rules, as they're less interesting and marginal useful. namespace-name := IDENTIFIER -namespace-name := namespace-alias -namespace-alias := IDENTIFIER class-name := IDENTIFIER class-name := simple-template-id -enum-name := IDENTIFIER template-name := IDENTIFIER # gram.basic @@ -391,9 +387,12 @@ builtin-type := FLOAT builtin-type := DOUBLE builtin-type := VOID -type-name := class-name -type-name := enum-name -type-name := typedef-name +#! Unlike C++ standard grammar, we don't distinguish the underlying type (class, +#! enum, typedef) of the IDENTIFIER, as these ambiguities are "local" and don't +#! affect the final parse tree. Eliminating them gives a significant performance +#! boost to the parser. +type-name := IDENTIFIER +type-name := simple-template-id elaborated-type-specifier := class-key nested-name-specifier_opt IDENTIFIER elaborated-type-specifier := class-key simple-template-id elaborated-type-specifier := class-key nested-name-specifier TEMPLATE_opt simple-template-id
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits