================ @@ -0,0 +1,205 @@ +//===-- DILLexer.cpp ------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// This implements the recursive descent parser for the Data Inspection +// Language (DIL), and its helper functions, which will eventually underlie the +// 'frame variable' command. The language that this parser recognizes is +// described in lldb/docs/dil-expr-lang.ebnf +// +//===----------------------------------------------------------------------===// + +#include "lldb/ValueObject/DILLexer.h" +#include "llvm/ADT/StringMap.h" + +namespace lldb_private { + +namespace dil { + +// For fast keyword lookup. More keywords will be added later. +const llvm::StringMap<dil::TokenKind> Keywords = { + {"namespace", dil::TokenKind::kw_namespace}, +}; + +const std::string DILToken::getTokenName(dil::TokenKind kind) { + switch (kind) { + case dil::TokenKind::coloncolon: + return "coloncolon"; + case dil::TokenKind::eof: + return "eof"; + case dil::TokenKind::identifier: + return "identifier"; + case dil::TokenKind::kw_namespace: + return "namespace"; + case dil::TokenKind::l_paren: + return "l_paren"; + case dil::TokenKind::r_paren: + return "r_paren"; + case dil::TokenKind::unknown: + return "unknown"; + default: + return "token_name"; + } +} + +static bool Is_Letter(char c) { + if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z')) + return true; + return false; +} + +static bool Is_Digit(char c) { return ('0' <= c && c <= '9'); } + +// A word starts with a letter, underscore, or dollar sign, followed by +// letters ('a'..'z','A'..'Z'), digits ('0'..'9'), and/or underscores. +bool DILLexer::Is_Word(std::string::iterator start, uint32_t &length) { + bool done = false; + bool dollar_start = false; + + // Must not start with a digit. + if (m_cur_pos == m_expr.end() || Is_Digit(*m_cur_pos)) + return false; + + // First character *may* be a '$', for a register name or convenience + // variable. + if (*m_cur_pos == '$') { + dollar_start = true; + ++m_cur_pos; + length++; + } + + // Contains only letters, digits or underscores + for (; m_cur_pos != m_expr.end() && !done; ++m_cur_pos) { + char c = *m_cur_pos; + if (!Is_Letter(c) && !Is_Digit(c) && c != '_') { + done = true; + break; + } else + length++; + } + + if (dollar_start && length > 1) // Must have something besides just '$' + return true; + + if (!dollar_start && length > 0) + return true; + + // Not a valid word, so re-set the lexing position. + m_cur_pos = start; ---------------- labath wrote:
AFAICT, this is the only use of the start argument, which makes for a very weird API. Perhaps the function could make a note of the starting position internally, and then return the range it found to the caller (I'd suggest a return type of `iterator_range<string::iterator>, with the empty range meaning "no word found") https://github.com/llvm/llvm-project/pull/123521 _______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits