jrtc27 created this revision.
jrtc27 added reviewers: nand, jfb, Bigcheese, rsmith, dexonsmith.
Herald added subscribers: kristof.beyls, arichardson.
jrtc27 requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

The Clang interpreter's bytecode uses a packed stream of bytes
representation, but also wants to have some opcodes take pointers as
arguments, which are currently embedded in the bytecode directly.

However, CHERI, and thus Arm's upcoming experimental Morello prototype,
provide spatial memory safety for C/C++ by implementing language-level
(and sub-language-level) pointers as capabilities, which track bounds,
permissions and validity in hardware. This uses tagged memory with a
single tag bit at every capability-aligned address, and so storing
pointers to unaligned addresses results in the tag being stripped,
leading to a tag fault when the pointer is ultimately dereferenced at a
later point.

In order to support a stricter C/C++ implementation like CHERI, we no
longer store pointers directly in the bytecode, instead storing them in
a table and embedding the index in the bytecode.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D97606

Files:
  clang/lib/AST/Interp/ByteCodeEmitter.cpp
  clang/lib/AST/Interp/Disasm.cpp
  clang/lib/AST/Interp/Interp.h
  clang/lib/AST/Interp/Program.cpp
  clang/lib/AST/Interp/Program.h
  clang/lib/AST/Interp/Source.h
  clang/utils/TableGen/ClangOpcodesEmitter.cpp

Index: clang/utils/TableGen/ClangOpcodesEmitter.cpp
===================================================================
--- clang/utils/TableGen/ClangOpcodesEmitter.cpp
+++ clang/utils/TableGen/ClangOpcodesEmitter.cpp
@@ -124,7 +124,7 @@
     for (size_t I = 0, N = Args.size(); I < N; ++I) {
       OS << "  auto V" << I;
       OS << " = ";
-      OS << "PC.read<" << Args[I]->getValueAsString("Name") << ">();\n";
+      OS << "ReadArg<" << Args[I]->getValueAsString("Name") << ">(S, PC);\n";
     }
 
     // Emit a call to the template method and pass arguments.
@@ -162,7 +162,8 @@
     OS << "  OS << \"\\t\"";
 
     for (auto *Arg : R->getValueAsListOfDefs("Args"))
-      OS << " << PC.read<" << Arg->getValueAsString("Name") << ">() << \" \"";
+      OS << " << ReadArg<" << Arg->getValueAsString("Name")
+         << ">(P, PC) << \" \"";
 
     OS << " << \"\\n\";\n";
     OS << "  continue;\n";
Index: clang/lib/AST/Interp/Source.h
===================================================================
--- clang/lib/AST/Interp/Source.h
+++ clang/lib/AST/Interp/Source.h
@@ -44,8 +44,9 @@
   bool operator!=(const CodePtr &RHS) const { return Ptr != RHS.Ptr; }
 
   /// Reads data and advances the pointer.
-  template <typename T> T read() {
-    T Value = ReadHelper<T>(Ptr);
+  template <typename T> std::enable_if_t<!std::is_pointer<T>::value, T> read() {
+    using namespace llvm::support;
+    T Value = endian::read<T, endianness::native, 1>(Ptr);
     Ptr += sizeof(T);
     return Value;
   }
@@ -54,22 +55,6 @@
   /// Constructor used by Function to generate pointers.
   CodePtr(const char *Ptr) : Ptr(Ptr) {}
 
-  /// Helper to decode a value or a pointer.
-  template <typename T>
-  static std::enable_if_t<!std::is_pointer<T>::value, T>
-  ReadHelper(const char *Ptr) {
-    using namespace llvm::support;
-    return endian::read<T, endianness::native, 1>(Ptr);
-  }
-
-  template <typename T>
-  static std::enable_if_t<std::is_pointer<T>::value, T>
-  ReadHelper(const char *Ptr) {
-    using namespace llvm::support;
-    auto Punned = endian::read<uintptr_t, endianness::native, 1>(Ptr);
-    return reinterpret_cast<T>(Punned);
-  }
-
 private:
   friend class Function;
 
Index: clang/lib/AST/Interp/Program.h
===================================================================
--- clang/lib/AST/Interp/Program.h
+++ clang/lib/AST/Interp/Program.h
@@ -44,6 +44,12 @@
 public:
   Program(Context &Ctx) : Ctx(Ctx) {}
 
+  /// Marshals a native pointer to an ID for embedding in bytecode.
+  unsigned getOrCreateNativePointer(const void *Ptr);
+
+  /// Returns the value of a marshalled native pointer.
+  const void *getNativePointer(unsigned Idx);
+
   /// Emits a string literal among global data.
   unsigned createGlobalString(const StringLiteral *S);
 
@@ -143,6 +149,11 @@
   /// Function relocation locations.
   llvm::DenseMap<const FunctionDecl *, std::vector<unsigned>> Relocs;
 
+  /// Native pointers referenced by bytecode.
+  std::vector<const void *> NativePointers;
+  /// Cached native pointer indices.
+  llvm::DenseMap<const void *, unsigned> NativePointerIndices;
+
   /// Custom allocator for global storage.
   using PoolAllocTy = llvm::BumpPtrAllocatorImpl<llvm::MallocAllocator>;
 
Index: clang/lib/AST/Interp/Program.cpp
===================================================================
--- clang/lib/AST/Interp/Program.cpp
+++ clang/lib/AST/Interp/Program.cpp
@@ -18,6 +18,21 @@
 using namespace clang;
 using namespace clang::interp;
 
+unsigned Program::getOrCreateNativePointer(const void *Ptr) {
+  auto It = NativePointerIndices.find(Ptr);
+  if (It != NativePointerIndices.end())
+    return It->second;
+
+  unsigned Idx = NativePointers.size();
+  NativePointers.push_back(Ptr);
+  NativePointerIndices[Ptr] = Idx;
+  return Idx;
+}
+
+const void *Program::getNativePointer(unsigned Idx) {
+  return NativePointers[Idx];
+}
+
 unsigned Program::createGlobalString(const StringLiteral *S) {
   const size_t CharWidth = S->getCharByteWidth();
   const size_t BitWidth = CharWidth * Ctx.getCharBit();
Index: clang/lib/AST/Interp/Interp.h
===================================================================
--- clang/lib/AST/Interp/Interp.h
+++ clang/lib/AST/Interp/Interp.h
@@ -13,8 +13,6 @@
 #ifndef LLVM_CLANG_AST_INTERP_INTERP_H
 #define LLVM_CLANG_AST_INTERP_INTERP_H
 
-#include <limits>
-#include <vector>
 #include "Function.h"
 #include "InterpFrame.h"
 #include "InterpStack.h"
@@ -30,6 +28,9 @@
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APSInt.h"
 #include "llvm/Support/Endian.h"
+#include <limits>
+#include <type_traits>
+#include <vector>
 
 namespace clang {
 namespace interp {
@@ -948,6 +949,23 @@
   return true;
 }
 
+//===----------------------------------------------------------------------===//
+// Read opcode arguments
+//===----------------------------------------------------------------------===//
+
+template <typename T>
+inline std::enable_if_t<!std::is_pointer<T>::value, T> ReadArg(InterpState &S,
+                                                               CodePtr OpPC) {
+  return OpPC.read<T>();
+}
+
+template <typename T>
+inline std::enable_if_t<std::is_pointer<T>::value, T> ReadArg(InterpState &S,
+                                                              CodePtr OpPC) {
+  uint32_t ID = OpPC.read<uint32_t>();
+  return reinterpret_cast<T>(S.P.getNativePointer(ID));
+}
+
 /// Interpreter entry point.
 bool Interpret(InterpState &S, APValue &Result);
 
Index: clang/lib/AST/Interp/Disasm.cpp
===================================================================
--- clang/lib/AST/Interp/Disasm.cpp
+++ clang/lib/AST/Interp/Disasm.cpp
@@ -21,6 +21,19 @@
 using namespace clang;
 using namespace clang::interp;
 
+template <typename T>
+inline std::enable_if_t<!std::is_pointer<T>::value, T> ReadArg(Program &P,
+                                                               CodePtr OpPC) {
+  return OpPC.read<T>();
+}
+
+template <typename T>
+inline std::enable_if_t<std::is_pointer<T>::value, T> ReadArg(Program &P,
+                                                              CodePtr OpPC) {
+  uint32_t ID = OpPC.read<uint32_t>();
+  return reinterpret_cast<T>(P.getNativePointer(ID));
+}
+
 LLVM_DUMP_METHOD void Function::dump() const { dump(llvm::errs()); }
 
 LLVM_DUMP_METHOD void Function::dump(llvm::raw_ostream &OS) const {
Index: clang/lib/AST/Interp/ByteCodeEmitter.cpp
===================================================================
--- clang/lib/AST/Interp/ByteCodeEmitter.cpp
+++ clang/lib/AST/Interp/ByteCodeEmitter.cpp
@@ -11,6 +11,7 @@
 #include "Opcode.h"
 #include "Program.h"
 #include "clang/AST/DeclCXX.h"
+#include <type_traits>
 
 using namespace clang;
 using namespace clang::interp;
@@ -122,29 +123,48 @@
   return false;
 }
 
+/// Helper to write bytecode and bail out if 32-bit offsets become invalid.
+/// Pointers will be automatically marshalled as 32-bit IDs.
+template <typename T>
+static std::enable_if_t<!std::is_pointer<T>::value, void>
+emit(Program &P, std::vector<char> &Code, const T &Val, bool &Success) {
+  size_t Size = sizeof(Val);
+  if (Code.size() + Size > std::numeric_limits<unsigned>::max()) {
+    Success = false;
+    return;
+  }
+
+  const char *Data = reinterpret_cast<const char *>(&Val);
+  Code.insert(Code.end(), Data, Data + Size);
+}
+
+template <typename T>
+static std::enable_if_t<std::is_pointer<T>::value, void>
+emit(Program &P, std::vector<char> &Code, const T &Val, bool &Success) {
+  size_t Size = sizeof(uint32_t);
+  if (Code.size() + Size > std::numeric_limits<unsigned>::max()) {
+    Success = false;
+    return;
+  }
+
+  uint32_t ID = P.getOrCreateNativePointer(Val);
+  const char *Data = reinterpret_cast<const char *>(&ID);
+  Code.insert(Code.end(), Data, Data + Size);
+}
+
 template <typename... Tys>
 bool ByteCodeEmitter::emitOp(Opcode Op, const Tys &... Args, const SourceInfo &SI) {
   bool Success = true;
 
-  /// Helper to write bytecode and bail out if 32-bit offsets become invalid.
-  auto emit = [this, &Success](const char *Data, size_t Size) {
-    if (Code.size() + Size > std::numeric_limits<unsigned>::max()) {
-      Success = false;
-      return;
-    }
-    Code.insert(Code.end(), Data, Data + Size);
-  };
-
   /// The opcode is followed by arguments. The source info is
   /// attached to the address after the opcode.
-  emit(reinterpret_cast<const char *>(&Op), sizeof(Opcode));
+  emit(P, Code, Op, Success);
   if (SI)
     SrcMap.emplace_back(Code.size(), SI);
 
   /// The initializer list forces the expression to be evaluated
   /// for each argument in the variadic template, in order.
-  (void)std::initializer_list<int>{
-      (emit(reinterpret_cast<const char *>(&Args), sizeof(Args)), 0)...};
+  (void)std::initializer_list<int>{(emit(P, Code, Args, Success), 0)...};
 
   return Success;
 }
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to