[PATCH] Support byte and byte string literals

2021-07-25 Thread Mark Wielaard
A byte literal is an u8 created as a ascii char or hex escape
e.g. b'X'.  A byte string literal is a string created from ascii or
hex chars. bytes are represented as u8 and byte strings as str (with
just ascii < 256 chars), but it should really be &'static [u8; n].
---
 gcc/rust/backend/rust-compile-expr.h  |  9 -
 gcc/rust/parse/rust-parse-impl.h  |  8 
 gcc/rust/rust-backend.h   |  3 +++
 gcc/rust/rust-gcc.cc  |  9 +
 gcc/rust/typecheck/rust-hir-type-check-expr.h | 19 +++
 .../rust/compile/torture/byte_char_str.rs |  8 
 6 files changed, 55 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/rust/compile/torture/byte_char_str.rs

diff --git a/gcc/rust/backend/rust-compile-expr.h 
b/gcc/rust/backend/rust-compile-expr.h
index dff4712e18e..fa6a53991ac 100644
--- a/gcc/rust/backend/rust-compile-expr.h
+++ b/gcc/rust/backend/rust-compile-expr.h
@@ -278,7 +278,14 @@ public:
}
return;
 
-   case HIR::Literal::STRING: {
+   case HIR::Literal::BYTE: {
+ char c = literal_value->as_string ().c_str ()[0];
+ translated = ctx->get_backend ()->char_constant_expression (c);
+   }
+   return;
+
+  case HIR::Literal::STRING:
+   case HIR::Literal::BYTE_STRING: {
  auto base = ctx->get_backend ()->string_constant_expression (
literal_value->as_string ());
  translated
diff --git a/gcc/rust/parse/rust-parse-impl.h b/gcc/rust/parse/rust-parse-impl.h
index be261715c6c..73600d22d60 100644
--- a/gcc/rust/parse/rust-parse-impl.h
+++ b/gcc/rust/parse/rust-parse-impl.h
@@ -12545,10 +12545,18 @@ Parser::null_denotation 
(const_TokenPtr tok,
   return std::unique_ptr (
new AST::LiteralExpr (tok->get_str (), AST::Literal::STRING,
  tok->get_type_hint (), {}, tok->get_locus ()));
+case BYTE_STRING_LITERAL:
+  return std::unique_ptr (
+   new AST::LiteralExpr (tok->get_str (), AST::Literal::BYTE_STRING,
+ tok->get_type_hint (), {}, tok->get_locus ()));
 case CHAR_LITERAL:
   return std::unique_ptr (
new AST::LiteralExpr (tok->get_str (), AST::Literal::CHAR,
  tok->get_type_hint (), {}, tok->get_locus ()));
+case BYTE_CHAR_LITERAL:
+  return std::unique_ptr (
+   new AST::LiteralExpr (tok->get_str (), AST::Literal::BYTE,
+ tok->get_type_hint (), {}, tok->get_locus ()));
 case TRUE_LITERAL:
   return std::unique_ptr (
new AST::LiteralExpr ("true", AST::Literal::BOOL, tok->get_type_hint (),
diff --git a/gcc/rust/rust-backend.h b/gcc/rust/rust-backend.h
index 35271b60f43..1dd4aba12ca 100644
--- a/gcc/rust/rust-backend.h
+++ b/gcc/rust/rust-backend.h
@@ -331,6 +331,9 @@ public:
   // Return an expression for the string value VAL.
   virtual Bexpression *string_constant_expression (const std::string &val) = 0;
 
+  // Get a char literal
+  virtual Bexpression *char_constant_expression (char c) = 0;
+
   // Get a char literal
   virtual Bexpression *wchar_constant_expression (wchar_t c) = 0;
 
diff --git a/gcc/rust/rust-gcc.cc b/gcc/rust/rust-gcc.cc
index 74a8b5221f1..23a91ad9bcb 100644
--- a/gcc/rust/rust-gcc.cc
+++ b/gcc/rust/rust-gcc.cc
@@ -333,6 +333,8 @@ public:
 
   Bexpression *wchar_constant_expression (wchar_t c);
 
+  Bexpression *char_constant_expression (char c);
+
   Bexpression *boolean_constant_expression (bool val);
 
   Bexpression *real_part_expression (Bexpression *bcomplex, Location);
@@ -1557,6 +1559,13 @@ Gcc_backend::wchar_constant_expression (wchar_t c)
   return this->make_expression (ret);
 }
 
+Bexpression *
+Gcc_backend::char_constant_expression (char c)
+{
+  tree ret = build_int_cst (this->char_type ()->get_tree (), c);
+  return this->make_expression (ret);
+}
+
 // Make a constant boolean expression.
 
 Bexpression *
diff --git a/gcc/rust/typecheck/rust-hir-type-check-expr.h 
b/gcc/rust/typecheck/rust-hir-type-check-expr.h
index 166535acba0..6e5b2312f50 100644
--- a/gcc/rust/typecheck/rust-hir-type-check-expr.h
+++ b/gcc/rust/typecheck/rust-hir-type-check-expr.h
@@ -542,6 +542,12 @@ public:
}
break;
 
+   case HIR::Literal::LitType::BYTE: {
+ auto ok = context->lookup_builtin ("u8", &infered);
+ rust_assert (ok);
+   }
+   break;
+
case HIR::Literal::LitType::STRING: {
  TyTy::BaseType *base = nullptr;
  auto ok = context->lookup_builtin ("str", &base);
@@ -553,6 +559,19 @@ public:
}
break;
 
+   case HIR::Literal::LitType::BYTE_STRING: {
+ /* We just treat this as a string, but it really is an arraytype of
+u8. It isn't in UTF-8, but really just a byte array.  */
+ TyTy::BaseType *base = nullptr;
+ auto ok = context->lookup_builtin ("str", &base);
+ rust_assert (ok);
+
+ infered
+ 

[PATCH] Support RangeFrom ([x..]) and RangeFromTo ([x..y]) in the parser

2021-07-25 Thread Mark Wielaard
Parsing the .. (DOT_DOT) operator to get a range had two
issues. Trying to compile:

  let block = [1,2,3,4,5];
  let _rf = &block[1..];
  let _rt = &block[..3];
  let _rft = &block[2..4];

range.rs:4:23: error: found unexpected token ‘]’ in null denotation
4 |   let _rf = &block[1..];
  |   ^
range.rs:4:24: error: expecting ‘]’ but ‘;’ found
4 |   let _rf = &block[1..];
  |^

Since .. can represent either a range from or a range from-to it can
be followed by an expression or not. We do have a hack in our
pratt-parser so that it is allowed to return a nullptr. But even in
that case it will have swallowed the next token. Add another hack to
the pratt-parser so that if the next token is one that cannot start an
expression and the caller allows a nullptr return then don't skip the
token and return immediately.

After this patch we can parse the above range expressions, but we
still don't handle them fully:

range.rs:4:20: fatal error: Failed to lower expr: [1..]
4 |   let _rf = &block[1..];
  |^

Ranges are actually syntactic sugar for std::ops::Range[From|To].
---
 gcc/rust/parse/rust-parse-impl.h | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/gcc/rust/parse/rust-parse-impl.h b/gcc/rust/parse/rust-parse-impl.h
index be261715c6c..7b128fff157 100644
--- a/gcc/rust/parse/rust-parse-impl.h
+++ b/gcc/rust/parse/rust-parse-impl.h
@@ -12348,6 +12348,18 @@ Parser::parse_expr (int 
right_binding_power,
ParseRestrictions restrictions)
 {
   const_TokenPtr current_token = lexer.peek_token ();
+  // Special hack because we are allowed to return nullptr, in that case we
+  // don't want to skip the token, since we don't actually parse it. But if
+  // null isn't allowed it indicates an error, and we want to skip past that.
+  // So return early if it is one of the tokens that ends an expression
+  // (or at least cannot start a new expression).
+  if (restrictions.expr_can_be_null)
+{
+  TokenId id = current_token->get_id ();
+  if (id == SEMICOLON || id == RIGHT_PAREN || id == RIGHT_CURLY
+ || id == RIGHT_SQUARE)
+   return nullptr;
+}
   lexer.skip_token ();
 
   // parse null denotation (unary part of expression)
@@ -14028,6 +14040,9 @@ 
Parser::parse_led_range_exclusive_expr (
 {
   // FIXME: this probably parses expressions accidently or whatever
   // try parsing RHS (as tok has already been consumed in parse_expression)
+  // Can be nullptr, in which case it is a RangeFromExpr, otherwise a
+  // RangeFromToExpr.
+  restrictions.expr_can_be_null = true;
   std::unique_ptr right
 = parse_expr (LBP_DOT_DOT, AST::AttrVec (), restrictions);
 
-- 
2.32.0

-- 
Gcc-rust mailing list
Gcc-rust@gcc.gnu.org
https://gcc.gnu.org/mailman/listinfo/gcc-rust