[clang] 40080e7 - [Clang interpreter] Avoid storing pointers at unaligned locations

Jessica Clarke via cfe-commits cfe-commits at lists.llvm.org
Wed Jul 28 08:03:59 PDT 2021


Author: Jessica Clarke
Date: 2021-07-28T16:03:41+01:00
New Revision: 40080e7e7f42857c8edac4a53e476a68563f1a98

URL: https://github.com/llvm/llvm-project/commit/40080e7e7f42857c8edac4a53e476a68563f1a98
DIFF: https://github.com/llvm/llvm-project/commit/40080e7e7f42857c8edac4a53e476a68563f1a98.diff

LOG: [Clang interpreter] Avoid storing pointers at unaligned locations

The Clang interpreter's bytecode uses a packed stream of bytes
representation, but also wants to have some opcodes take pointers as
arguments, which are currently embedded in the bytecode directly.

However, CHERI, and thus Arm's upcoming experimental Morello prototype,
provide spatial memory safety for C/C++ by implementing language-level
(and sub-language-level) pointers as capabilities, which track bounds,
permissions and validity in hardware. This uses tagged memory with a
single tag bit at every capability-aligned address, and so storing
pointers to unaligned addresses results in the tag being stripped,
leading to a tag fault when the pointer is ultimately dereferenced at a
later point.

In order to support a stricter C/C++ implementation like CHERI, we no
longer store pointers directly in the bytecode, instead storing them in
a table and embedding the index in the bytecode.

Reviewed By: nand

Differential Revision: https://reviews.llvm.org/D97606

Added: 
    

Modified: 
    clang/lib/AST/Interp/ByteCodeEmitter.cpp
    clang/lib/AST/Interp/Disasm.cpp
    clang/lib/AST/Interp/Interp.h
    clang/lib/AST/Interp/Program.cpp
    clang/lib/AST/Interp/Program.h
    clang/lib/AST/Interp/Source.h
    clang/utils/TableGen/ClangOpcodesEmitter.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/AST/Interp/ByteCodeEmitter.cpp b/clang/lib/AST/Interp/ByteCodeEmitter.cpp
index 7a4569820a1d1..a69b23fd613cd 100644
--- a/clang/lib/AST/Interp/ByteCodeEmitter.cpp
+++ b/clang/lib/AST/Interp/ByteCodeEmitter.cpp
@@ -11,6 +11,7 @@
 #include "Opcode.h"
 #include "Program.h"
 #include "clang/AST/DeclCXX.h"
+#include <type_traits>
 
 using namespace clang;
 using namespace clang::interp;
@@ -122,29 +123,48 @@ bool ByteCodeEmitter::bail(const SourceLocation &Loc) {
   return false;
 }
 
+/// Helper to write bytecode and bail out if 32-bit offsets become invalid.
+/// Pointers will be automatically marshalled as 32-bit IDs.
+template <typename T>
+static std::enable_if_t<!std::is_pointer<T>::value, void>
+emit(Program &P, std::vector<char> &Code, const T &Val, bool &Success) {
+  size_t Size = sizeof(Val);
+  if (Code.size() + Size > std::numeric_limits<unsigned>::max()) {
+    Success = false;
+    return;
+  }
+
+  const char *Data = reinterpret_cast<const char *>(&Val);
+  Code.insert(Code.end(), Data, Data + Size);
+}
+
+template <typename T>
+static std::enable_if_t<std::is_pointer<T>::value, void>
+emit(Program &P, std::vector<char> &Code, const T &Val, bool &Success) {
+  size_t Size = sizeof(uint32_t);
+  if (Code.size() + Size > std::numeric_limits<unsigned>::max()) {
+    Success = false;
+    return;
+  }
+
+  uint32_t ID = P.getOrCreateNativePointer(Val);
+  const char *Data = reinterpret_cast<const char *>(&ID);
+  Code.insert(Code.end(), Data, Data + Size);
+}
+
 template <typename... Tys>
 bool ByteCodeEmitter::emitOp(Opcode Op, const Tys &... Args, const SourceInfo &SI) {
   bool Success = true;
 
-  /// Helper to write bytecode and bail out if 32-bit offsets become invalid.
-  auto emit = [this, &Success](const char *Data, size_t Size) {
-    if (Code.size() + Size > std::numeric_limits<unsigned>::max()) {
-      Success = false;
-      return;
-    }
-    Code.insert(Code.end(), Data, Data + Size);
-  };
-
   /// The opcode is followed by arguments. The source info is
   /// attached to the address after the opcode.
-  emit(reinterpret_cast<const char *>(&Op), sizeof(Opcode));
+  emit(P, Code, Op, Success);
   if (SI)
     SrcMap.emplace_back(Code.size(), SI);
 
   /// The initializer list forces the expression to be evaluated
   /// for each argument in the variadic template, in order.
-  (void)std::initializer_list<int>{
-      (emit(reinterpret_cast<const char *>(&Args), sizeof(Args)), 0)...};
+  (void)std::initializer_list<int>{(emit(P, Code, Args, Success), 0)...};
 
   return Success;
 }

diff  --git a/clang/lib/AST/Interp/Disasm.cpp b/clang/lib/AST/Interp/Disasm.cpp
index c1c18f832d4fa..36adbe296b0cc 100644
--- a/clang/lib/AST/Interp/Disasm.cpp
+++ b/clang/lib/AST/Interp/Disasm.cpp
@@ -21,6 +21,19 @@
 using namespace clang;
 using namespace clang::interp;
 
+template <typename T>
+inline std::enable_if_t<!std::is_pointer<T>::value, T> ReadArg(Program &P,
+                                                               CodePtr OpPC) {
+  return OpPC.read<T>();
+}
+
+template <typename T>
+inline std::enable_if_t<std::is_pointer<T>::value, T> ReadArg(Program &P,
+                                                              CodePtr OpPC) {
+  uint32_t ID = OpPC.read<uint32_t>();
+  return reinterpret_cast<T>(P.getNativePointer(ID));
+}
+
 LLVM_DUMP_METHOD void Function::dump() const { dump(llvm::errs()); }
 
 LLVM_DUMP_METHOD void Function::dump(llvm::raw_ostream &OS) const {

diff  --git a/clang/lib/AST/Interp/Interp.h b/clang/lib/AST/Interp/Interp.h
index e2f7bf0dc26a4..d7f9653be2e62 100644
--- a/clang/lib/AST/Interp/Interp.h
+++ b/clang/lib/AST/Interp/Interp.h
@@ -13,8 +13,6 @@
 #ifndef LLVM_CLANG_AST_INTERP_INTERP_H
 #define LLVM_CLANG_AST_INTERP_INTERP_H
 
-#include <limits>
-#include <vector>
 #include "Function.h"
 #include "InterpFrame.h"
 #include "InterpStack.h"
@@ -30,6 +28,9 @@
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APSInt.h"
 #include "llvm/Support/Endian.h"
+#include <limits>
+#include <type_traits>
+#include <vector>
 
 namespace clang {
 namespace interp {
@@ -949,6 +950,23 @@ inline bool ExpandPtr(InterpState &S, CodePtr OpPC) {
   return true;
 }
 
+//===----------------------------------------------------------------------===//
+// Read opcode arguments
+//===----------------------------------------------------------------------===//
+
+template <typename T>
+inline std::enable_if_t<!std::is_pointer<T>::value, T> ReadArg(InterpState &S,
+                                                               CodePtr OpPC) {
+  return OpPC.read<T>();
+}
+
+template <typename T>
+inline std::enable_if_t<std::is_pointer<T>::value, T> ReadArg(InterpState &S,
+                                                              CodePtr OpPC) {
+  uint32_t ID = OpPC.read<uint32_t>();
+  return reinterpret_cast<T>(S.P.getNativePointer(ID));
+}
+
 /// Interpreter entry point.
 bool Interpret(InterpState &S, APValue &Result);
 

diff  --git a/clang/lib/AST/Interp/Program.cpp b/clang/lib/AST/Interp/Program.cpp
index fcbab0ea8172c..10d42d533bae0 100644
--- a/clang/lib/AST/Interp/Program.cpp
+++ b/clang/lib/AST/Interp/Program.cpp
@@ -18,6 +18,21 @@
 using namespace clang;
 using namespace clang::interp;
 
+unsigned Program::getOrCreateNativePointer(const void *Ptr) {
+  auto It = NativePointerIndices.find(Ptr);
+  if (It != NativePointerIndices.end())
+    return It->second;
+
+  unsigned Idx = NativePointers.size();
+  NativePointers.push_back(Ptr);
+  NativePointerIndices[Ptr] = Idx;
+  return Idx;
+}
+
+const void *Program::getNativePointer(unsigned Idx) {
+  return NativePointers[Idx];
+}
+
 unsigned Program::createGlobalString(const StringLiteral *S) {
   const size_t CharWidth = S->getCharByteWidth();
   const size_t BitWidth = CharWidth * Ctx.getCharBit();

diff  --git a/clang/lib/AST/Interp/Program.h b/clang/lib/AST/Interp/Program.h
index 5f0012db9b3f2..c81ec777a5fe7 100644
--- a/clang/lib/AST/Interp/Program.h
+++ b/clang/lib/AST/Interp/Program.h
@@ -44,6 +44,12 @@ class Program {
 public:
   Program(Context &Ctx) : Ctx(Ctx) {}
 
+  /// Marshals a native pointer to an ID for embedding in bytecode.
+  unsigned getOrCreateNativePointer(const void *Ptr);
+
+  /// Returns the value of a marshalled native pointer.
+  const void *getNativePointer(unsigned Idx);
+
   /// Emits a string literal among global data.
   unsigned createGlobalString(const StringLiteral *S);
 
@@ -143,6 +149,11 @@ class Program {
   /// Function relocation locations.
   llvm::DenseMap<const FunctionDecl *, std::vector<unsigned>> Relocs;
 
+  /// Native pointers referenced by bytecode.
+  std::vector<const void *> NativePointers;
+  /// Cached native pointer indices.
+  llvm::DenseMap<const void *, unsigned> NativePointerIndices;
+
   /// Custom allocator for global storage.
   using PoolAllocTy = llvm::BumpPtrAllocatorImpl<llvm::MallocAllocator>;
 

diff  --git a/clang/lib/AST/Interp/Source.h b/clang/lib/AST/Interp/Source.h
index 19c652b7331a1..6acaf406b47ab 100644
--- a/clang/lib/AST/Interp/Source.h
+++ b/clang/lib/AST/Interp/Source.h
@@ -44,8 +44,9 @@ class CodePtr {
   bool operator!=(const CodePtr &RHS) const { return Ptr != RHS.Ptr; }
 
   /// Reads data and advances the pointer.
-  template <typename T> T read() {
-    T Value = ReadHelper<T>(Ptr);
+  template <typename T> std::enable_if_t<!std::is_pointer<T>::value, T> read() {
+    using namespace llvm::support;
+    T Value = endian::read<T, endianness::native, 1>(Ptr);
     Ptr += sizeof(T);
     return Value;
   }
@@ -54,22 +55,6 @@ class CodePtr {
   /// Constructor used by Function to generate pointers.
   CodePtr(const char *Ptr) : Ptr(Ptr) {}
 
-  /// Helper to decode a value or a pointer.
-  template <typename T>
-  static std::enable_if_t<!std::is_pointer<T>::value, T>
-  ReadHelper(const char *Ptr) {
-    using namespace llvm::support;
-    return endian::read<T, endianness::native, 1>(Ptr);
-  }
-
-  template <typename T>
-  static std::enable_if_t<std::is_pointer<T>::value, T>
-  ReadHelper(const char *Ptr) {
-    using namespace llvm::support;
-    auto Punned = endian::read<uintptr_t, endianness::native, 1>(Ptr);
-    return reinterpret_cast<T>(Punned);
-  }
-
 private:
   friend class Function;
 

diff  --git a/clang/utils/TableGen/ClangOpcodesEmitter.cpp b/clang/utils/TableGen/ClangOpcodesEmitter.cpp
index ffeedcdf0ee27..8081096633d98 100644
--- a/clang/utils/TableGen/ClangOpcodesEmitter.cpp
+++ b/clang/utils/TableGen/ClangOpcodesEmitter.cpp
@@ -124,7 +124,7 @@ void ClangOpcodesEmitter::EmitInterp(raw_ostream &OS, StringRef N, Record *R) {
     for (size_t I = 0, N = Args.size(); I < N; ++I) {
       OS << "  auto V" << I;
       OS << " = ";
-      OS << "PC.read<" << Args[I]->getValueAsString("Name") << ">();\n";
+      OS << "ReadArg<" << Args[I]->getValueAsString("Name") << ">(S, PC);\n";
     }
 
     // Emit a call to the template method and pass arguments.
@@ -161,8 +161,10 @@ void ClangOpcodesEmitter::EmitDisasm(raw_ostream &OS, StringRef N, Record *R) {
     OS << "  PrintName(\"" << ID << "\");\n";
     OS << "  OS << \"\\t\"";
 
-    for (auto *Arg : R->getValueAsListOfDefs("Args"))
-      OS << " << PC.read<" << Arg->getValueAsString("Name") << ">() << \" \"";
+    for (auto *Arg : R->getValueAsListOfDefs("Args")) {
+      OS << " << ReadArg<" << Arg->getValueAsString("Name") << ">(P, PC)";
+      OS << " << \" \"";
+    }
 
     OS << " << \"\\n\";\n";
     OS << "  continue;\n";


        


More information about the cfe-commits mailing list