[llvm] [SandboxIR] Implement ConstantDataSequential and subclasses (PR #133547)

via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 28 17:44:38 PDT 2025


https://github.com/vporpo created https://github.com/llvm/llvm-project/pull/133547

This patch implements sandboxir::ConstantDataSequential mirroring LLVM IR.

>From 1a02d68c2c7e06d73bc096aaca513929edabcafe Mon Sep 17 00:00:00 2001
From: Vasileios Porpodas <vporpodas at google.com>
Date: Fri, 14 Mar 2025 16:27:23 -0700
Subject: [PATCH] [SandboxIR] Implement ConstantDataSequential and subclasses

This patch implements sandboxir::ConstantDataSequential mirroring LLVM IR.
---
 llvm/include/llvm/SandboxIR/Constant.h     | 122 +++++++++++++++++++++
 llvm/include/llvm/SandboxIR/Context.h      |   1 +
 llvm/include/llvm/SandboxIR/Type.h         |   1 +
 llvm/include/llvm/SandboxIR/Values.def     |   2 +
 llvm/lib/SandboxIR/Context.cpp             |   8 ++
 llvm/unittests/SandboxIR/SandboxIRTest.cpp | 101 +++++++++++++++++
 6 files changed, 235 insertions(+)

diff --git a/llvm/include/llvm/SandboxIR/Constant.h b/llvm/include/llvm/SandboxIR/Constant.h
index c4841e0b0dd66..ddd90291c509b 100644
--- a/llvm/include/llvm/SandboxIR/Constant.h
+++ b/llvm/include/llvm/SandboxIR/Constant.h
@@ -486,6 +486,128 @@ class ConstantAggregateZero final : public Constant {
 #endif
 };
 
+/// ConstantDataSequential - A vector or array constant whose element type is a
+/// simple 1/2/4/8-byte integer or half/bfloat/float/double, and whose elements
+/// are just simple data values (i.e. ConstantInt/ConstantFP).  This Constant
+/// node has no operands because it stores all of the elements of the constant
+/// as densely packed data, instead of as Value*'s.
+///
+/// This is the common base class of ConstantDataArray and ConstantDataVector.
+class ConstantDataSequential : public Constant {
+protected:
+  ConstantDataSequential(ClassID ID, llvm::ConstantDataSequential *C,
+                         Context &Ctx)
+      : Constant(ID, C, Ctx) {}
+
+public:
+  /// Return true if a ConstantDataSequential can be formed with a vector or
+  /// array of the specified element type.
+  /// ConstantDataArray only works with normal float and int types that are
+  /// stored densely in memory, not with things like i42 or x86_f80.
+  static bool isElementTypeCompatible(Type *Ty) {
+    return llvm::ConstantDataSequential::isElementTypeCompatible(Ty->LLVMTy);
+  }
+  /// If this is a sequential container of integers (of any size), return the
+  /// specified element in the low bits of a uint64_t.
+  uint64_t getElementAsInteger(unsigned ElmIdx) const {
+    return cast<llvm::ConstantDataSequential>(Val)->getElementAsInteger(ElmIdx);
+  }
+  /// If this is a sequential container of integers (of any size), return the
+  /// specified element as an APInt.
+  APInt getElementAsAPInt(unsigned ElmIdx) const {
+    return cast<llvm::ConstantDataSequential>(Val)->getElementAsAPInt(ElmIdx);
+  }
+  /// If this is a sequential container of floating point type, return the
+  /// specified element as an APFloat.
+  APFloat getElementAsAPFloat(unsigned ElmIdx) const {
+    return cast<llvm::ConstantDataSequential>(Val)->getElementAsAPFloat(ElmIdx);
+  }
+  /// If this is an sequential container of floats, return the specified element
+  /// as a float.
+  float getElementAsFloat(unsigned ElmIdx) const {
+    return cast<llvm::ConstantDataSequential>(Val)->getElementAsFloat(ElmIdx);
+  }
+  /// If this is an sequential container of doubles, return the specified
+  /// element as a double.
+  double getElementAsDouble(unsigned ElmIdx) const {
+    return cast<llvm::ConstantDataSequential>(Val)->getElementAsDouble(ElmIdx);
+  }
+  /// Return a Constant for a specified index's element.
+  /// Note that this has to compute a new constant to return, so it isn't as
+  /// efficient as getElementAsInteger/Float/Double.
+  Constant *getElementAsConstant(unsigned ElmIdx) const {
+    return Ctx.getOrCreateConstant(
+        cast<llvm::ConstantDataSequential>(Val)->getElementAsConstant(ElmIdx));
+  }
+  /// Return the element type of the array/vector.
+  Type *getElementType() const {
+    return Ctx.getType(
+        cast<llvm::ConstantDataSequential>(Val)->getElementType());
+  }
+  /// Return the number of elements in the array or vector.
+  unsigned getNumElements() const {
+    return cast<llvm::ConstantDataSequential>(Val)->getNumElements();
+  }
+  /// Return the size (in bytes) of each element in the array/vector.
+  /// The size of the elements is known to be a multiple of one byte.
+  uint64_t getElementByteSize() const {
+    return cast<llvm::ConstantDataSequential>(Val)->getElementByteSize();
+  }
+  /// This method returns true if this is an array of \p CharSize integers.
+  bool isString(unsigned CharSize = 8) const {
+    return cast<llvm::ConstantDataSequential>(Val)->isString(CharSize);
+  }
+  /// This method returns true if the array "isString", ends with a null byte,
+  /// and does not contains any other null bytes.
+  bool isCString() const {
+    return cast<llvm::ConstantDataSequential>(Val)->isCString();
+  }
+  /// If this array is isString(), then this method returns the array as a
+  /// StringRef. Otherwise, it asserts out.
+  StringRef getAsString() const {
+    return cast<llvm::ConstantDataSequential>(Val)->getAsString();
+  }
+  /// If this array is isCString(), then this method returns the array (without
+  /// the trailing null byte) as a StringRef. Otherwise, it asserts out.
+  StringRef getAsCString() const {
+    return cast<llvm::ConstantDataSequential>(Val)->getAsCString();
+  }
+  /// Return the raw, underlying, bytes of this data. Note that this is an
+  /// extremely tricky thing to work with, as it exposes the host endianness of
+  /// the data elements.
+  StringRef getRawDataValues() const {
+    return cast<llvm::ConstantDataSequential>(Val)->getRawDataValues();
+  }
+
+  static bool classof(const Value *From) {
+    return From->getSubclassID() == ClassID::ConstantDataArray ||
+           From->getSubclassID() == ClassID::ConstantDataVector;
+  }
+};
+
+class ConstantDataArray final : public ConstantDataSequential {
+  ConstantDataArray(llvm::ConstantDataArray *C, Context &Ctx)
+      : ConstantDataSequential(ClassID::ConstantDataArray, C, Ctx) {}
+  friend class Context;
+
+public:
+  // TODO: Add missing functions.
+};
+
+/// A vector constant whose element type is a simple 1/2/4/8-byte integer or
+/// float/double, and whose elements are just simple data values
+/// (i.e. ConstantInt/ConstantFP). This Constant node has no operands because it
+/// stores all of the elements of the constant as densely packed data, instead
+/// of as Value*'s.
+class ConstantDataVector final : public ConstantDataSequential {
+  ConstantDataVector(llvm::ConstantDataVector *C, Context &Ctx)
+      : ConstantDataSequential(ClassID::ConstantDataVector, C, Ctx) {}
+  friend class Context;
+
+public:
+  // TODO: Add missing functions.
+};
+
 // TODO: Inherit from ConstantData.
 class ConstantPointerNull final : public Constant {
   ConstantPointerNull(llvm::ConstantPointerNull *C, Context &Ctx)
diff --git a/llvm/include/llvm/SandboxIR/Context.h b/llvm/include/llvm/SandboxIR/Context.h
index 714d1ec78f452..e2b4d0f6baf18 100644
--- a/llvm/include/llvm/SandboxIR/Context.h
+++ b/llvm/include/llvm/SandboxIR/Context.h
@@ -130,6 +130,7 @@ class Context {
   }
   /// Get or create a sandboxir::Constant from an existing LLVM IR \p LLVMC.
   Constant *getOrCreateConstant(llvm::Constant *LLVMC);
+  friend class ConstantDataSequential; // For getOrCreateConstant().
   friend class Utils; // For getMemoryBase
 
   void runEraseInstrCallbacks(Instruction *I);
diff --git a/llvm/include/llvm/SandboxIR/Type.h b/llvm/include/llvm/SandboxIR/Type.h
index ec32284dacd61..5cc1278758cc0 100644
--- a/llvm/include/llvm/SandboxIR/Type.h
+++ b/llvm/include/llvm/SandboxIR/Type.h
@@ -63,6 +63,7 @@ class Type {
   friend class TargetExtType;      // For LLVMTy.
   friend class Module;             // For LLVMTy.
   friend class FPMathOperator;     // For LLVMTy.
+  friend class ConstantDataSequential; // For LLVMTy.
 
   // Friend all instruction classes because `create()` functions use LLVMTy.
 #define DEF_INSTR(ID, OPCODE, CLASS) friend class CLASS;
diff --git a/llvm/include/llvm/SandboxIR/Values.def b/llvm/include/llvm/SandboxIR/Values.def
index f5ead54a08e10..a55abbd20f4c0 100644
--- a/llvm/include/llvm/SandboxIR/Values.def
+++ b/llvm/include/llvm/SandboxIR/Values.def
@@ -28,6 +28,8 @@ DEF_VALUE(Block, BasicBlock)
 DEF_CONST(Constant, Constant)
 DEF_CONST(ConstantInt, ConstantInt)
 DEF_CONST(ConstantFP, ConstantFP)
+DEF_CONST(ConstantDataArray, ConstantDataArray)
+DEF_CONST(ConstantDataVector, ConstantDataVector)
 DEF_CONST(ConstantArray, ConstantArray)
 DEF_CONST(ConstantStruct, ConstantStruct)
 DEF_CONST(ConstantVector, ConstantVector)
diff --git a/llvm/lib/SandboxIR/Context.cpp b/llvm/lib/SandboxIR/Context.cpp
index 21039ce7ed834..fe67f9ef73fb6 100644
--- a/llvm/lib/SandboxIR/Context.cpp
+++ b/llvm/lib/SandboxIR/Context.cpp
@@ -360,6 +360,14 @@ Value *Context::getOrCreateValueInternal(llvm::Value *LLVMV, llvm::User *U) {
       It->second = std::unique_ptr<ConstantVector>(
           new ConstantVector(cast<llvm::ConstantVector>(LLVMC), *this));
       break;
+    case llvm::Value::ConstantDataArrayVal:
+      It->second = std::unique_ptr<ConstantDataArray>(
+          new ConstantDataArray(cast<llvm::ConstantDataArray>(LLVMC), *this));
+      break;
+    case llvm::Value::ConstantDataVectorVal:
+      It->second = std::unique_ptr<ConstantDataVector>(
+          new ConstantDataVector(cast<llvm::ConstantDataVector>(LLVMC), *this));
+      break;
     case llvm::Value::FunctionVal:
       It->second = std::unique_ptr<Function>(
           new Function(cast<llvm::Function>(LLVMC), *this));
diff --git a/llvm/unittests/SandboxIR/SandboxIRTest.cpp b/llvm/unittests/SandboxIR/SandboxIRTest.cpp
index bdc9c2c222ae5..c991de70ec008 100644
--- a/llvm/unittests/SandboxIR/SandboxIRTest.cpp
+++ b/llvm/unittests/SandboxIR/SandboxIRTest.cpp
@@ -607,6 +607,107 @@ define void @foo(ptr %ptr, {i32, i8} %v1, <2 x i8> %v2) {
   EXPECT_EQ(NewVectorCAZ->getElementCount(), ElementCount::getFixed(4));
 }
 
+// Tests ConstantDataSequential, ConstantDataArray and ConstantDataVector.
+TEST_F(SandboxIRTest, ConstantDataSequential) {
+  parseIR(C, R"IR(
+define void @foo() {
+  %array = extractvalue [2 x i8] [i8 0, i8 1], 0
+  %vector = extractelement <2 x i8> <i8 0, i8 1>, i32 0
+  %farray = extractvalue [2 x float] [float 0.0, float 1.0], 0
+  %fvector = extractelement <2 x double> <double 0.0, double 1.0>, i32 0
+  %string = extractvalue [6 x i8] [i8 72, i8 69, i8 76, i8 76, i8 79, i8 0], 0
+  ret void
+}
+)IR");
+  Function &LLVMF = *M->getFunction("foo");
+  sandboxir::Context Ctx(C);
+
+  auto &F = *Ctx.createFunction(&LLVMF);
+  auto &BB = *F.begin();
+  auto It = BB.begin();
+  auto *I0 = &*It++;
+  auto *I1 = &*It++;
+  auto *I2 = &*It++;
+  auto *I3 = &*It++;
+  auto *I4 = &*It++;
+  auto *Array = cast<sandboxir::ConstantDataArray>(I0->getOperand(0));
+  EXPECT_TRUE(isa<sandboxir::ConstantDataSequential>(Array));
+  auto *Vector = cast<sandboxir::ConstantDataVector>(I1->getOperand(0));
+  EXPECT_TRUE(isa<sandboxir::ConstantDataVector>(Vector));
+  auto *FArray = cast<sandboxir::ConstantDataArray>(I2->getOperand(0));
+  EXPECT_TRUE(isa<sandboxir::ConstantDataSequential>(FArray));
+  auto *FVector = cast<sandboxir::ConstantDataArray>(I3->getOperand(0));
+  EXPECT_TRUE(isa<sandboxir::ConstantDataVector>(FVector));
+  auto *String = cast<sandboxir::ConstantDataArray>(I4->getOperand(0));
+  EXPECT_TRUE(isa<sandboxir::ConstantDataArray>(String));
+
+  auto *Zero8 = sandboxir::ConstantInt::get(sandboxir::Type::getInt8Ty(Ctx), 0);
+  auto *One8 = sandboxir::ConstantInt::get(sandboxir::Type::getInt8Ty(Ctx), 1);
+
+  // Check isElementTypeCompatible().
+  for (llvm::Type *LLVMTy :
+       {llvm::Type::getIntNTy(C, 42), llvm::Type::getInt8Ty(C)})
+    EXPECT_EQ(llvm::ConstantDataSequential::isElementTypeCompatible(LLVMTy),
+              sandboxir::ConstantDataSequential::isElementTypeCompatible(
+                  Ctx.getType(LLVMTy)));
+  // Check getElementAsInteger().
+  EXPECT_EQ(Array->getElementAsInteger(0), 0u);
+  EXPECT_EQ(Array->getElementAsInteger(1), 1u);
+  EXPECT_EQ(Vector->getElementAsInteger(0), 0u);
+  EXPECT_EQ(Vector->getElementAsInteger(1), 1u);
+  // Check getElementAsAPInt().
+  EXPECT_EQ(Array->getElementAsAPInt(0), 0u);
+  EXPECT_EQ(Array->getElementAsAPInt(1), 1u);
+  EXPECT_EQ(Vector->getElementAsAPInt(0), 0u);
+  EXPECT_EQ(Vector->getElementAsAPInt(1), 1u);
+  // Check geteElementAsFloat().
+  EXPECT_EQ(FArray->getElementAsFloat(0), 0.0);
+  EXPECT_EQ(FArray->getElementAsFloat(1), 1.0);
+  // Check getElementAsDouble().
+  EXPECT_EQ(FVector->getElementAsDouble(0), 0.0);
+  EXPECT_EQ(FVector->getElementAsDouble(1), 1.0);
+  // Check getElementAsConstant().
+  EXPECT_EQ(Array->getElementAsConstant(0), Zero8);
+  EXPECT_EQ(Array->getElementAsConstant(1), One8);
+  EXPECT_EQ(Vector->getElementAsConstant(0), Zero8);
+  EXPECT_EQ(Vector->getElementAsConstant(1), One8);
+  // Check getElementType().
+  EXPECT_EQ(Array->getElementType(), sandboxir::Type::getInt8Ty(Ctx));
+  EXPECT_EQ(Vector->getElementType(), sandboxir::Type::getInt8Ty(Ctx));
+  EXPECT_EQ(FArray->getElementType(), sandboxir::Type::getFloatTy(Ctx));
+  EXPECT_EQ(FVector->getElementType(), sandboxir::Type::getDoubleTy(Ctx));
+  // Check getNumElements(),
+  EXPECT_EQ(Array->getNumElements(), 2u);
+  EXPECT_EQ(Vector->getNumElements(), 2u);
+  EXPECT_EQ(FArray->getNumElements(), 2u);
+  EXPECT_EQ(FVector->getNumElements(), 2u);
+  // Check getElementByteSize().
+  EXPECT_EQ(Array->getElementByteSize(), 1u);
+  EXPECT_EQ(Vector->getElementByteSize(), 1u);
+  EXPECT_EQ(FArray->getElementByteSize(), 4u);
+  EXPECT_EQ(FVector->getElementByteSize(), 8u);
+  // Check isString().
+  EXPECT_EQ(Array->isString(), true);
+  EXPECT_EQ(Vector->isString(), false);
+  EXPECT_EQ(FArray->isString(), false);
+  EXPECT_EQ(FVector->isString(), false);
+  EXPECT_EQ(String->isString(), true);
+  // Check isCString().
+  EXPECT_EQ(Array->isCString(), false);
+  EXPECT_EQ(Vector->isCString(), false);
+  EXPECT_EQ(FArray->isCString(), false);
+  EXPECT_EQ(FVector->isCString(), false);
+  EXPECT_EQ(String->isCString(), true);
+  // Check getAsString().
+  char Data[] = {'H', 'E', 'L', 'L', 'O', '\0'};
+  StringRef HelloWithNull(Data, 6);
+  EXPECT_EQ(String->getAsString(), HelloWithNull);
+  // Check getAsCString().
+  EXPECT_EQ(String->getAsCString(), "HELLO");
+  // Check getRawDataValues().
+  EXPECT_EQ(String->getRawDataValues(), HelloWithNull);
+}
+
 TEST_F(SandboxIRTest, ConstantPointerNull) {
   parseIR(C, R"IR(
 define ptr @foo() {



More information about the llvm-commits mailing list