================ @@ -804,6 +806,46 @@ void FormatTokenLexer::handleTableGenMultilineString() { FirstLineText, MultiLineString->OriginalColumn, Style.TabWidth, Encoding); } +void FormatTokenLexer::handleTableGenNumericLikeIdentifier() { + FormatToken *Tok = Tokens.back(); + // TableGen identifiers can begin with digits. Such tokens are lexed as + // numeric_constant now. + if (Tok->isNot(tok::numeric_constant)) + return; + StringRef Text = Tok->TokenText; + // Identifiers cannot begin with + or -. + if (Text.size() < 1 || Text[0] == '+' || Text[0] == '-') + return; + // The following check is based on llvm::TGLexer::LexToken. + if (isdigit(Text[0])) { + size_t I = 0; + char NextChar = (char)0; + // Identifiers in TalbleGen may begin with digits. Skip to first non-digit. + do { + NextChar = Text[I++]; + } while (I < Text.size() && isdigit(NextChar)); + // All the characters are digits. + if (I >= Text.size()) + return; + // Base character. But it does not check the first 0 and that the base is + // the second character. ---------------- hnakamura5 wrote:
Yes for the both question. This is about TableGen compiler's lexer. As you wonder, this comment may be not precise enough. Later I will fix it. For example, `0x1234x` is regarded as integer because the lexer assumes it is a integer at the point it have got `0x1` part. This is an syntax error example written in the unittest. I want to note here by this comment is, `1x1234x` is also regarded as integer (and syntax error). This behavior comes from the lexer does not check the character before 'x' is 0 or other number. (FYI, `1y1234x ` is a valid identifier. Such a ambiguity is only when the first non-digit character is 'x' or 'b'. ) https://github.com/llvm/llvm-project/pull/78571 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits