[clang] [clang][Interp] Support arbitrary precision constants (PR #79747)

Tue Jan 30 10:26:18 PST 2024

Timm =?utf-8?q?Bäder?= <tbaeder at redhat.com>
Message-ID:
In-Reply-To: <llvm.org/llvm/llvm-project/pull/79747 at github.com>


https://github.com/tbaederr updated https://github.com/llvm/llvm-project/pull/79747

>From c45c1c9bbd944744f69e811b2104de01cf82c55b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Sun, 28 Jan 2024 09:48:15 +0100
Subject: [PATCH 1/2] [clang][Interp] Support arbitrary precision constants

Add (de)serialization support for them, like we do for Floating values.
---
 clang/lib/AST/Interp/ByteCodeEmitter.cpp | 37 ++++++++++++++++++++++++
 clang/lib/AST/Interp/ByteCodeExprGen.cpp | 11 ++++---
 clang/lib/AST/Interp/Disasm.cpp          | 15 ++++++++++
 clang/lib/AST/Interp/IntegralAP.h        | 30 +++++++++++++++++++
 clang/lib/AST/Interp/Interp.h            | 16 ++++++++++
 clang/lib/AST/Interp/Opcodes.td          |  4 +++
 clang/test/AST/Interp/intap.cpp          | 14 +++++++++
 7 files changed, 123 insertions(+), 4 deletions(-)

diff --git a/clang/lib/AST/Interp/ByteCodeEmitter.cpp b/clang/lib/AST/Interp/ByteCodeEmitter.cpp
index fd2a92d9d3f91..4f7ad51773a65 100644
--- a/clang/lib/AST/Interp/ByteCodeEmitter.cpp
+++ b/clang/lib/AST/Interp/ByteCodeEmitter.cpp
@@ -10,6 +10,7 @@
 #include "ByteCodeGenError.h"
 #include "Context.h"
 #include "Floating.h"
+#include "IntegralAP.h"
 #include "Opcode.h"
 #include "Program.h"
 #include "clang/AST/ASTLambda.h"
@@ -228,6 +229,42 @@ void emit(Program &P, std::vector<std::byte> &Code, const Floating &Val,
   Val.serialize(Code.data() + ValPos);
 }
 
+template <>
+void emit(Program &P, std::vector<std::byte> &Code,
+          const IntegralAP<false> &Val, bool &Success) {
+  size_t Size = Val.bytesToSerialize();
+
+  if (Code.size() + Size > std::numeric_limits<unsigned>::max()) {
+    Success = false;
+    return;
+  }
+
+  // Access must be aligned!
+  size_t ValPos = align(Code.size());
+  Size = align(Size);
+  assert(aligned(ValPos + Size));
+  Code.resize(ValPos + Size);
+  Val.serialize(Code.data() + ValPos);
+}
+
+template <>
+void emit(Program &P, std::vector<std::byte> &Code, const IntegralAP<true> &Val,
+          bool &Success) {
+  size_t Size = Val.bytesToSerialize();
+
+  if (Code.size() + Size > std::numeric_limits<unsigned>::max()) {
+    Success = false;
+    return;
+  }
+
+  // Access must be aligned!
+  size_t ValPos = align(Code.size());
+  Size = align(Size);
+  assert(aligned(ValPos + Size));
+  Code.resize(ValPos + Size);
+  Val.serialize(Code.data() + ValPos);
+}
+
 template <typename... Tys>
 bool ByteCodeEmitter::emitOp(Opcode Op, const Tys &... Args, const SourceInfo &SI) {
   bool Success = true;
diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
index d4501cefb2131..c2110834f64c0 100644
--- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp
+++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
@@ -2191,15 +2191,13 @@ bool ByteCodeExprGen<Emitter>::emitConst(T Value, PrimType Ty, const Expr *E) {
     return this->emitConstSint64(Value, E);
   case PT_Uint64:
     return this->emitConstUint64(Value, E);
-  case PT_IntAP:
-  case PT_IntAPS:
-    assert(false);
-    return false;
   case PT_Bool:
     return this->emitConstBool(Value, E);
   case PT_Ptr:
   case PT_FnPtr:
   case PT_Float:
+  case PT_IntAP:
+  case PT_IntAPS:
     llvm_unreachable("Invalid integral type");
     break;
   }
@@ -2215,6 +2213,11 @@ bool ByteCodeExprGen<Emitter>::emitConst(T Value, const Expr *E) {
 template <class Emitter>
 bool ByteCodeExprGen<Emitter>::emitConst(const APSInt &Value, PrimType Ty,
                                          const Expr *E) {
+  if (Ty == PT_IntAPS)
+    return this->emitConstIntAPS(Value, E);
+  else if (Ty == PT_IntAP)
+    return this->emitConstIntAP(Value, E);
+
   if (Value.isSigned())
     return this->emitConst(Value.getSExtValue(), Ty, E);
   return this->emitConst(Value.getZExtValue(), Ty, E);
diff --git a/clang/lib/AST/Interp/Disasm.cpp b/clang/lib/AST/Interp/Disasm.cpp
index d276df8f29262..eba437e05f59d 100644
--- a/clang/lib/AST/Interp/Disasm.cpp
+++ b/clang/lib/AST/Interp/Disasm.cpp
@@ -12,6 +12,7 @@
 
 #include "Floating.h"
 #include "Function.h"
+#include "IntegralAP.h"
 #include "Opcode.h"
 #include "PrimType.h"
 #include "Program.h"
@@ -37,6 +38,20 @@ template <> inline Floating ReadArg<Floating>(Program &P, CodePtr &OpPC) {
   return F;
 }
 
+template <>
+inline IntegralAP<false> ReadArg<IntegralAP<false>>(Program &P, CodePtr &OpPC) {
+  IntegralAP<false> I = IntegralAP<false>::deserialize(*OpPC);
+  OpPC += align(I.bytesToSerialize());
+  return I;
+}
+
+template <>
+inline IntegralAP<true> ReadArg<IntegralAP<true>>(Program &P, CodePtr &OpPC) {
+  IntegralAP<true> I = IntegralAP<true>::deserialize(*OpPC);
+  OpPC += align(I.bytesToSerialize());
+  return I;
+}
+
 LLVM_DUMP_METHOD void Function::dump() const { dump(llvm::errs()); }
 
 LLVM_DUMP_METHOD void Function::dump(llvm::raw_ostream &OS) const {
diff --git a/clang/lib/AST/Interp/IntegralAP.h b/clang/lib/AST/Interp/IntegralAP.h
index 55e29caa1cd74..bab9774288bfa 100644
--- a/clang/lib/AST/Interp/IntegralAP.h
+++ b/clang/lib/AST/Interp/IntegralAP.h
@@ -263,6 +263,31 @@ template <bool Signed> class IntegralAP final {
       *R = IntegralAP(A.V.lshr(ShiftAmount));
   }
 
+  // === Serialization support ===
+  size_t bytesToSerialize() const {
+    // 4 bytes for the BitWidth followed by N bytes for the actual APInt.
+    return sizeof(uint32_t) + (V.getBitWidth() / CHAR_BIT);
+  }
+
+  void serialize(std::byte *Buff) const {
+    assert(V.getBitWidth() < std::numeric_limits<uint8_t>::max());
+    uint32_t BitWidth = V.getBitWidth();
+
+    std::memcpy(Buff, &BitWidth, sizeof(uint32_t));
+    llvm::StoreIntToMemory(V, (uint8_t *)(Buff + sizeof(uint32_t)),
+                           BitWidth / CHAR_BIT);
+  }
+
+  static IntegralAP<Signed> deserialize(const std::byte *Buff) {
+    uint32_t BitWidth;
+    std::memcpy(&BitWidth, Buff, sizeof(uint32_t));
+    IntegralAP<Signed> Val(APInt(BitWidth, 0ull, !Signed));
+
+    llvm::LoadIntFromMemory(Val.V, (const uint8_t *)Buff + sizeof(uint32_t),
+                            BitWidth / CHAR_BIT);
+    return Val;
+  }
+
 private:
   template <template <typename T> class Op>
   static bool CheckAddSubMulUB(const IntegralAP &A, const IntegralAP &B,
@@ -289,6 +314,11 @@ inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
   return OS;
 }
 
+template <bool Signed>
+IntegralAP<Signed> getSwappedBytes(IntegralAP<Signed> F) {
+  return F;
+}
+
 } // namespace interp
 } // namespace clang
 
diff --git a/clang/lib/AST/Interp/Interp.h b/clang/lib/AST/Interp/Interp.h
index 65c54ed9c89b6..ec43481ebe6cf 100644
--- a/clang/lib/AST/Interp/Interp.h
+++ b/clang/lib/AST/Interp/Interp.h
@@ -2041,6 +2041,22 @@ template <> inline Floating ReadArg<Floating>(InterpState &S, CodePtr &OpPC) {
   return F;
 }
 
+template <>
+inline IntegralAP<false> ReadArg<IntegralAP<false>>(InterpState &S,
+                                                    CodePtr &OpPC) {
+  IntegralAP<false> I = IntegralAP<false>::deserialize(*OpPC);
+  OpPC += align(I.bytesToSerialize());
+  return I;
+}
+
+template <>
+inline IntegralAP<true> ReadArg<IntegralAP<true>>(InterpState &S,
+                                                  CodePtr &OpPC) {
+  IntegralAP<true> I = IntegralAP<true>::deserialize(*OpPC);
+  OpPC += align(I.bytesToSerialize());
+  return I;
+}
+
 } // namespace interp
 } // namespace clang
 
diff --git a/clang/lib/AST/Interp/Opcodes.td b/clang/lib/AST/Interp/Opcodes.td
index 24747b6b98c16..b0fafb973dff4 100644
--- a/clang/lib/AST/Interp/Opcodes.td
+++ b/clang/lib/AST/Interp/Opcodes.td
@@ -45,6 +45,8 @@ def ArgUint32 : ArgType { let Name = "uint32_t"; }
 def ArgSint64 : ArgType { let Name = "int64_t"; }
 def ArgUint64 : ArgType { let Name = "uint64_t"; }
 def ArgFloat : ArgType { let Name = "Floating"; }
+def ArgIntAP : ArgType { let Name = "IntegralAP<false>"; }
+def ArgIntAPS : ArgType { let Name = "IntegralAP<true>"; }
 def ArgBool : ArgType { let Name = "bool"; }
 
 def ArgFunction : ArgType { let Name = "const Function *"; }
@@ -244,6 +246,8 @@ def ConstUint32 : ConstOpcode<Uint32, ArgUint32>;
 def ConstSint64 : ConstOpcode<Sint64, ArgSint64>;
 def ConstUint64 : ConstOpcode<Uint64, ArgUint64>;
 def ConstFloat : ConstOpcode<Float, ArgFloat>;
+def constIntAP : ConstOpcode<IntAP, ArgIntAP>;
+def constIntAPS : ConstOpcode<IntAPS, ArgIntAPS>;
 def ConstBool : ConstOpcode<Bool, ArgBool>;
 
 // [] -> [Integer]
diff --git a/clang/test/AST/Interp/intap.cpp b/clang/test/AST/Interp/intap.cpp
index 118dc21b67e87..d444012485691 100644
--- a/clang/test/AST/Interp/intap.cpp
+++ b/clang/test/AST/Interp/intap.cpp
@@ -154,6 +154,20 @@ namespace i128 {
   constexpr uint128_t ui128Zero{};
   static_assert(ui128Zero == 0, "");
 
+
+  enum LargeEnum : signed __int128 {
+    LV = (signed __int128)1 << 127,
+  };
+
+  constexpr LargeEnum F = LV;
+  static_assert(F ==  (signed __int128)1 << 127, "");
+  constexpr LargeEnum getLargeEnum() {
+    return LV;
+  }
+  static_assert(getLargeEnum() ==  (signed __int128)1 << 127, "");
+
+
+
 #if __cplusplus >= 201402L
   template <typename T>
   constexpr T CastFrom(__int128_t A) {

>From 7af3bdf4433eb2c3110b029772d58b513c27dd49 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Tue, 30 Jan 2024 19:16:41 +0100
Subject: [PATCH 2/2] Address review feedback

---
 clang/lib/AST/Interp/ByteCodeEmitter.cpp | 42 ++++++++----------------
 clang/lib/AST/Interp/ByteCodeExprGen.cpp |  2 +-
 2 files changed, 14 insertions(+), 30 deletions(-)

diff --git a/clang/lib/AST/Interp/ByteCodeEmitter.cpp b/clang/lib/AST/Interp/ByteCodeEmitter.cpp
index 4f7ad51773a65..409ce21506caa 100644
--- a/clang/lib/AST/Interp/ByteCodeEmitter.cpp
+++ b/clang/lib/AST/Interp/ByteCodeEmitter.cpp
@@ -210,9 +210,11 @@ static void emit(Program &P, std::vector<std::byte> &Code, const T &Val,
   }
 }
 
-template <>
-void emit(Program &P, std::vector<std::byte> &Code, const Floating &Val,
-          bool &Success) {
+/// Emits a serializable value. These usually (potentially) contain
+/// heap-allocated memory and aren't trivially copyable.
+template <typename T>
+static void emitSerialized(std::vector<std::byte> &Code, const T &Val,
+                           bool &Success) {
   size_t Size = Val.bytesToSerialize();
 
   if (Code.size() + Size > std::numeric_limits<unsigned>::max()) {
@@ -229,40 +231,22 @@ void emit(Program &P, std::vector<std::byte> &Code, const Floating &Val,
   Val.serialize(Code.data() + ValPos);
 }
 
+template <>
+void emit(Program &P, std::vector<std::byte> &Code, const Floating &Val,
+          bool &Success) {
+  emitSerialized(Code, Val, Success);
+}
+
 template <>
 void emit(Program &P, std::vector<std::byte> &Code,
           const IntegralAP<false> &Val, bool &Success) {
-  size_t Size = Val.bytesToSerialize();
-
-  if (Code.size() + Size > std::numeric_limits<unsigned>::max()) {
-    Success = false;
-    return;
-  }
-
-  // Access must be aligned!
-  size_t ValPos = align(Code.size());
-  Size = align(Size);
-  assert(aligned(ValPos + Size));
-  Code.resize(ValPos + Size);
-  Val.serialize(Code.data() + ValPos);
+  emitSerialized(Code, Val, Success);
 }
 
 template <>
 void emit(Program &P, std::vector<std::byte> &Code, const IntegralAP<true> &Val,
           bool &Success) {
-  size_t Size = Val.bytesToSerialize();
-
-  if (Code.size() + Size > std::numeric_limits<unsigned>::max()) {
-    Success = false;
-    return;
-  }
-
-  // Access must be aligned!
-  size_t ValPos = align(Code.size());
-  Size = align(Size);
-  assert(aligned(ValPos + Size));
-  Code.resize(ValPos + Size);
-  Val.serialize(Code.data() + ValPos);
+  emitSerialized(Code, Val, Success);
 }
 
 template <typename... Tys>
diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
index c2110834f64c0..bb8086abbd326 100644
--- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp
+++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
@@ -2215,7 +2215,7 @@ bool ByteCodeExprGen<Emitter>::emitConst(const APSInt &Value, PrimType Ty,
                                          const Expr *E) {
   if (Ty == PT_IntAPS)
     return this->emitConstIntAPS(Value, E);
-  else if (Ty == PT_IntAP)
+  if (Ty == PT_IntAP)
     return this->emitConstIntAP(Value, E);
 
   if (Value.isSigned())