[Mlir-commits] [mlir] 0b168a4 - [mlir][LLVMIR] Use a new way to verify GEPOp indices

Min-Yih Hsu llvmlistbot at llvm.org
Tue May 17 10:30:06 PDT 2022


Author: Min-Yih Hsu
Date: 2022-05-17T10:28:44-07:00
New Revision: 0b168a49bf584ddd9aae4be8b1907aee8ed65615

URL: https://github.com/llvm/llvm-project/commit/0b168a49bf584ddd9aae4be8b1907aee8ed65615
DIFF: https://github.com/llvm/llvm-project/commit/0b168a49bf584ddd9aae4be8b1907aee8ed65615.diff

LOG: [mlir][LLVMIR] Use a new way to verify GEPOp indices

Previously, GEPOp relies on `findKnownStructIndices` to check if a GEP
index should be static. The truth is, `findKnownStructIndices` can only
tell you a GEP index _might_ be indexing into a struct (which should use
a static GEP index). But GEPOp::build and GEPOp::verify are falsely
taking this information as a certain answer, which creates many false
alarms like the one depicted in
`test/Target/LLVMIR/Import/dynamic-gep-index.ll`.

The solution presented here adopts a new verification scheme: When we're
recursively checking the child element types of a struct type, instead
of checking every child types, we only check the one dictated by the
(static) GEP index value. We also combine "refinement" logics --
refine/promote struct index mlir::Value into constants -- into the very
verification process since they have lots of logics in common. The
resulting code is more concise and less brittle.

We also hide GEPOp::findKnownStructIndices since most of the
aforementioned logics are already encapsulated within GEPOp::build and
GEPOp::verify, we found little reason for findKnownStructIndices (or the
new findStructIndices) to be public.

Differential Revision: https://reviews.llvm.org/D124935

Added: 
    mlir/test/Dialect/LLVMIR/dynamic-gep-index.mlir
    mlir/test/Target/LLVMIR/Import/dynamic-gep-index.ll

Modified: 
    mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
    mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
    mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
index 8f4669f116632..9824a3710709e 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
@@ -334,16 +334,6 @@ def LLVM_GEPOp : LLVM_Op<"getelementptr", [NoSideEffect]> {
 
     /// Returns the type pointed to by the pointer argument of this GEP.
     Type getSourceElementType();
-
-    /// Populates `indices` with positions of GEP indices that correspond to
-    /// LLVMStructTypes potentially nested in the given `sourceElementType`,
-    /// which is the type pointed to by the pointer argument of a GEP. If
-    /// `structSizes` is provided, it is populated with sizes of the indexed
-    /// structs for bounds verification purposes.
-    static void findKnownStructIndices(
-        Type sourceElementType, SmallVectorImpl<unsigned> &indices,
-        SmallVectorImpl<unsigned> *structSizes = nullptr);
-
   }];
   let hasFolder = 1;
   let hasVerifier = 1;

diff  --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
index ee8cfc63c0008..8a3a9b13130f6 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
@@ -29,6 +29,7 @@
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Type.h"
+#include "llvm/Support/Error.h"
 #include "llvm/Support/Mutex.h"
 #include "llvm/Support/SourceMgr.h"
 
@@ -398,55 +399,139 @@ SuccessorOperands SwitchOp::getSuccessorOperands(unsigned index) {
 
 constexpr int GEPOp::kDynamicIndex;
 
-/// Populates `indices` with positions of GEP indices that would correspond to
-/// LLVMStructTypes potentially nested in the given type. The type currently
-/// visited gets `currentIndex` and LLVM container types are visited
-/// recursively. The recursion is bounded and takes care of recursive types by
-/// means of the `visited` set.
-static void recordStructIndices(Type type, unsigned currentIndex,
-                                SmallVectorImpl<unsigned> &indices,
-                                SmallVectorImpl<unsigned> *structSizes,
-                                SmallPtrSet<Type, 4> &visited) {
-  if (visited.contains(type))
-    return;
+namespace {
+/// Base class for llvm::Error related to GEP index.
+class GEPIndexError : public llvm::ErrorInfo<GEPIndexError> {
+protected:
+  unsigned indexPos;
+
+public:
+  static char ID;
+
+  std::error_code convertToErrorCode() const override {
+    return llvm::inconvertibleErrorCode();
+  }
+
+  explicit GEPIndexError(unsigned pos) : indexPos(pos) {}
+};
+
+/// llvm::Error for out-of-bound GEP index.
+struct GEPIndexOutOfBoundError
+    : public llvm::ErrorInfo<GEPIndexOutOfBoundError, GEPIndexError> {
+  static char ID;
+
+  using ErrorInfo::ErrorInfo;
+
+  void log(llvm::raw_ostream &os) const override {
+    os << "index " << indexPos << " indexing a struct is out of bounds";
+  }
+};
 
-  visited.insert(type);
+/// llvm::Error for non-static GEP index indexing a struct.
+struct GEPStaticIndexError
+    : public llvm::ErrorInfo<GEPStaticIndexError, GEPIndexError> {
+  static char ID;
 
-  llvm::TypeSwitch<Type>(type)
-      .Case<LLVMStructType>([&](LLVMStructType structType) {
-        indices.push_back(currentIndex);
-        if (structSizes)
-          structSizes->push_back(structType.getBody().size());
-        for (Type elementType : structType.getBody())
-          recordStructIndices(elementType, currentIndex + 1, indices,
-                              structSizes, visited);
+  using ErrorInfo::ErrorInfo;
+
+  void log(llvm::raw_ostream &os) const override {
+    os << "expected index " << indexPos << " indexing a struct "
+       << "to be constant";
+  }
+};
+} // end anonymous namespace
+
+char GEPIndexError::ID = 0;
+char GEPIndexOutOfBoundError::ID = 0;
+char GEPStaticIndexError::ID = 0;
+
+/// For the given `structIndices` and `indices`, check if they're complied
+/// with `baseGEPType`, especially check against LLVMStructTypes nested within,
+/// and refine/promote struct index from `indices` to `updatedStructIndices`
+/// if the latter argument is not null.
+static llvm::Error
+recordStructIndices(Type baseGEPType, unsigned indexPos,
+                    ArrayRef<int32_t> structIndices, ValueRange indices,
+                    SmallVectorImpl<int32_t> *updatedStructIndices,
+                    SmallVectorImpl<Value> *remainingIndices) {
+  if (indexPos >= structIndices.size())
+    // Stop searching
+    return llvm::Error::success();
+
+  int32_t gepIndex = structIndices[indexPos];
+  bool isStaticIndex = gepIndex != GEPOp::kDynamicIndex;
+
+  unsigned dynamicIndexPos = indexPos;
+  if (!isStaticIndex)
+    dynamicIndexPos = llvm::count(structIndices.take_front(indexPos + 1),
+                                  LLVM::GEPOp::kDynamicIndex) - 1;
+
+  return llvm::TypeSwitch<Type, llvm::Error>(baseGEPType)
+      .Case<LLVMStructType>([&](LLVMStructType structType) -> llvm::Error {
+        // We don't always want to refine the index (e.g. when performing
+        // verification), so we only refine when updatedStructIndices is not
+        // null.
+        if (!isStaticIndex && updatedStructIndices) {
+          // Try to refine.
+          APInt staticIndexValue;
+          isStaticIndex = matchPattern(indices[dynamicIndexPos],
+                                       m_ConstantInt(&staticIndexValue));
+          if (isStaticIndex) {
+            assert(staticIndexValue.getBitWidth() <= 64 &&
+                   llvm::isInt<32>(staticIndexValue.getLimitedValue()) &&
+                   "struct index can't fit within int32_t");
+            gepIndex = static_cast<int32_t>(staticIndexValue.getSExtValue());
+          }
+        }
+        if (!isStaticIndex)
+          return llvm::make_error<GEPStaticIndexError>(indexPos);
+
+        ArrayRef<Type> elementTypes = structType.getBody();
+        if (gepIndex < 0 ||
+            static_cast<size_t>(gepIndex) >= elementTypes.size())
+          return llvm::make_error<GEPIndexOutOfBoundError>(indexPos);
+
+        if (updatedStructIndices)
+          (*updatedStructIndices)[indexPos] = gepIndex;
+
+        // Instead of recusively going into every children types, we only
+        // dive into the one indexed by gepIndex.
+        return recordStructIndices(elementTypes[gepIndex], indexPos + 1,
+                                   structIndices, indices, updatedStructIndices,
+                                   remainingIndices);
       })
       .Case<VectorType, LLVMScalableVectorType, LLVMFixedVectorType,
-            LLVMArrayType>([&](auto containerType) {
-        recordStructIndices(containerType.getElementType(), currentIndex + 1,
-                            indices, structSizes, visited);
-      });
+            LLVMArrayType>([&](auto containerType) -> llvm::Error {
+        // Currently we don't refine non-struct index even if it's static.
+        if (remainingIndices)
+          remainingIndices->push_back(indices[dynamicIndexPos]);
+        return recordStructIndices(containerType.getElementType(), indexPos + 1,
+                                   structIndices, indices, updatedStructIndices,
+                                   remainingIndices);
+      })
+      .Default(
+          [](auto otherType) -> llvm::Error { return llvm::Error::success(); });
 }
 
-/// Populates `indices` with positions of GEP indices that correspond to
-/// LLVMStructTypes potentially nested in the given `baseGEPType`, which must
-/// be either an LLVMPointer type or a vector thereof. If `structSizes` is
-/// provided, it is populated with sizes of the indexed structs for bounds
-/// verification purposes.
-void GEPOp::findKnownStructIndices(Type sourceElementType,
-                                   SmallVectorImpl<unsigned> &indices,
-                                   SmallVectorImpl<unsigned> *structSizes) {
-  SmallPtrSet<Type, 4> visited;
-  recordStructIndices(sourceElementType, /*currentIndex=*/1, indices,
-                      structSizes, visited);
+/// Driver function around `recordStructIndices`. Note that we always check
+/// from the second GEP index since the first one is always dynamic.
+static llvm::Error
+findStructIndices(Type baseGEPType, ArrayRef<int32_t> structIndices,
+                  ValueRange indices,
+                  SmallVectorImpl<int32_t> *updatedStructIndices = nullptr,
+                  SmallVectorImpl<Value> *remainingIndices = nullptr) {
+  if (remainingIndices)
+    // The first GEP index is always dynamic.
+    remainingIndices->push_back(indices[0]);
+  return recordStructIndices(baseGEPType, /*indexPos=*/1, structIndices,
+                             indices, updatedStructIndices, remainingIndices);
 }
 
 void GEPOp::build(OpBuilder &builder, OperationState &result, Type resultType,
                   Value basePtr, ValueRange operands,
                   ArrayRef<NamedAttribute> attributes) {
   build(builder, result, resultType, basePtr, operands,
-        SmallVector<int32_t>(operands.size(), LLVM::GEPOp::kDynamicIndex),
-        attributes);
+        SmallVector<int32_t>(operands.size(), kDynamicIndex), attributes);
 }
 
 /// Returns the elemental type of any LLVM-compatible vector type or self.
@@ -480,44 +565,10 @@ void GEPOp::build(OpBuilder &builder, OperationState &result, Type resultType,
   SmallVector<Value> remainingIndices;
   SmallVector<int32_t> updatedStructIndices(structIndices.begin(),
                                             structIndices.end());
-  SmallVector<unsigned> structRelatedPositions;
-  findKnownStructIndices(elementType, structRelatedPositions);
-
-  SmallVector<unsigned> operandsToErase;
-  for (unsigned pos : structRelatedPositions) {
-    // GEP may not be indexing as deep as some structs are located.
-    if (pos >= structIndices.size())
-      continue;
-
-    // If the index is already static, it's fine.
-    if (structIndices[pos] != kDynamicIndex)
-      continue;
-
-    // Find the corresponding operand.
-    unsigned operandPos =
-        std::count(structIndices.begin(), std::next(structIndices.begin(), pos),
-                   kDynamicIndex);
-
-    // Extract the constant value from the operand and put it into the attribute
-    // instead.
-    APInt staticIndexValue;
-    bool matched =
-        matchPattern(indices[operandPos], m_ConstantInt(&staticIndexValue));
-    (void)matched;
-    assert(matched && "index into a struct must be a constant");
-    assert(staticIndexValue.sge(APInt::getSignedMinValue(/*numBits=*/32)) &&
-           "struct index underflows 32-bit integer");
-    assert(staticIndexValue.sle(APInt::getSignedMaxValue(/*numBits=*/32)) &&
-           "struct index overflows 32-bit integer");
-    auto staticIndex = static_cast<int32_t>(staticIndexValue.getSExtValue());
-    updatedStructIndices[pos] = staticIndex;
-    operandsToErase.push_back(operandPos);
-  }
-
-  for (unsigned i = 0, e = indices.size(); i < e; ++i) {
-    if (!llvm::is_contained(operandsToErase, i))
-      remainingIndices.push_back(indices[i]);
-  }
+  if (llvm::Error err =
+          findStructIndices(elementType, structIndices, indices,
+                            &updatedStructIndices, &remainingIndices))
+    llvm::report_fatal_error(StringRef(llvm::toString(std::move(err))));
 
   assert(remainingIndices.size() == static_cast<size_t>(llvm::count(
                                         updatedStructIndices, kDynamicIndex)) &&
@@ -582,24 +633,16 @@ LogicalResult LLVM::GEPOp::verify() {
           getElemType())))
     return failure();
 
-  SmallVector<unsigned> indices;
-  SmallVector<unsigned> structSizes;
-  findKnownStructIndices(getSourceElementType(), indices, &structSizes);
-  DenseIntElementsAttr structIndices = getStructIndices();
-  for (unsigned i : llvm::seq<unsigned>(0, indices.size())) {
-    unsigned index = indices[i];
-    // GEP may not be indexing as deep as some structs nested in the type.
-    if (index >= structIndices.getNumElements())
-      continue;
-
-    int32_t staticIndex = structIndices.getValues<int32_t>()[index];
-    if (staticIndex == LLVM::GEPOp::kDynamicIndex)
-      return emitOpError() << "expected index " << index
-                           << " indexing a struct to be constant";
-    if (staticIndex < 0 || static_cast<unsigned>(staticIndex) >= structSizes[i])
-      return emitOpError() << "index " << index
-                           << " indexing a struct is out of bounds";
-  }
+  auto structIndexRange = getStructIndices().getValues<int32_t>();
+  // structIndexRange is a kind of iterator, which cannot be converted
+  // to ArrayRef directly.
+  SmallVector<int32_t> structIndices(structIndexRange.size());
+  for (unsigned i : llvm::seq<unsigned>(0, structIndexRange.size()))
+    structIndices[i] = structIndexRange[i];
+  if (llvm::Error err = findStructIndices(getSourceElementType(), structIndices,
+                                          getIndices()))
+    return emitOpError() << llvm::toString(std::move(err));
+
   return success();
 }
 

diff  --git a/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp b/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp
index cc10d0bccfd2b..840da68dafbbd 100644
--- a/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp
+++ b/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp
@@ -1003,33 +1003,26 @@ LogicalResult Importer::processInstruction(llvm::Instruction *inst) {
     // FIXME: Support inbounds GEPs.
     llvm::GetElementPtrInst *gep = cast<llvm::GetElementPtrInst>(inst);
     Value basePtr = processValue(gep->getOperand(0));
-    SmallVector<int32_t> staticIndices;
-    SmallVector<Value> dynamicIndices;
     Type sourceElementType = processType(gep->getSourceElementType());
-    SmallVector<unsigned> staticIndexPositions;
-    GEPOp::findKnownStructIndices(sourceElementType, staticIndexPositions);
-
-    for (const auto &en :
-         llvm::enumerate(llvm::drop_begin(gep->operand_values()))) {
-      llvm::Value *operand = en.value();
-      if (llvm::find(staticIndexPositions, en.index()) ==
-          staticIndexPositions.end()) {
-        staticIndices.push_back(GEPOp::kDynamicIndex);
-        dynamicIndices.push_back(processValue(operand));
-        if (!dynamicIndices.back())
-          return failure();
-      } else {
-        auto *constantInt = cast<llvm::ConstantInt>(operand);
-        staticIndices.push_back(
-            static_cast<int32_t>(constantInt->getValue().getZExtValue()));
-      }
+
+    SmallVector<Value> indices;
+    for (llvm::Value *operand : llvm::drop_begin(gep->operand_values())) {
+      indices.push_back(processValue(operand));
+      if (!indices.back())
+        return failure();
     }
+    // Treat every indices as dynamic since GEPOp::build will refine those
+    // indices into static attributes later. One small downside of this
+    // approach is that many unused `llvm.mlir.constant` would be emitted
+    // at first place.
+    SmallVector<int32_t> structIndices(indices.size(),
+                                       LLVM::GEPOp::kDynamicIndex);
 
     Type type = processType(inst->getType());
     if (!type)
       return failure();
     instMap[inst] = b.create<GEPOp>(loc, type, sourceElementType, basePtr,
-                                    dynamicIndices, staticIndices);
+                                    indices, structIndices);
     return success();
   }
   case llvm::Instruction::InsertValue: {

diff  --git a/mlir/test/Dialect/LLVMIR/dynamic-gep-index.mlir b/mlir/test/Dialect/LLVMIR/dynamic-gep-index.mlir
new file mode 100644
index 0000000000000..52282bc586fbd
--- /dev/null
+++ b/mlir/test/Dialect/LLVMIR/dynamic-gep-index.mlir
@@ -0,0 +1,12 @@
+// RUN: mlir-opt %s | FileCheck %s
+
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<i1, dense<8> : vector<2xi32>>, #dlti.dl_entry<i8, dense<8> : vector<2xi32>>, #dlti.dl_entry<i16, dense<16> : vector<2xi32>>, #dlti.dl_entry<i32, dense<32> : vector<2xi32>>, #dlti.dl_entry<i64, dense<[32, 64]> : vector<2xi32>>, #dlti.dl_entry<f16, dense<16> : vector<2xi32>>, #dlti.dl_entry<f64, dense<64> : vector<2xi32>>, #dlti.dl_entry<f128, dense<128> : vector<2xi32>>>} {
+  // CHECK: llvm.func @foo(%[[ARG0:.+]]: !llvm.ptr<struct<"my_struct", {{.+}}>>, %[[ARG1:.+]]: i32)
+  llvm.func @foo(%arg0: !llvm.ptr<struct<"my_struct", (struct<"sub_struct", (i32, i8)>, array<4 x i32>)>>, %arg1: i32) {
+    // CHECK: %[[C0:.+]] = llvm.mlir.constant(0 : i32)
+    %0 = llvm.mlir.constant(0 : i32) : i32
+    // CHECK: llvm.getelementptr %[[ARG0]][%[[C0]], 1, %[[ARG1]]]
+    %1 = "llvm.getelementptr"(%arg0, %0, %arg1) {structIndices = dense<[-2147483648, 1, -2147483648]> : tensor<3xi32>} : (!llvm.ptr<struct<"my_struct", (struct<"sub_struct", (i32, i8)>, array<4 x i32>)>>, i32, i32) -> !llvm.ptr<i32>
+    llvm.return
+  }
+}

diff  --git a/mlir/test/Target/LLVMIR/Import/dynamic-gep-index.ll b/mlir/test/Target/LLVMIR/Import/dynamic-gep-index.ll
new file mode 100644
index 0000000000000..df072e90132f7
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/Import/dynamic-gep-index.ll
@@ -0,0 +1,12 @@
+; RUN: mlir-translate --import-llvm %s | FileCheck %s
+
+%sub_struct = type { i32, i8 }
+%my_struct = type { %sub_struct, [4 x i32] }
+
+; CHECK: llvm.func @foo(%[[ARG0:.+]]: !llvm.ptr<struct<"my_struct", {{.+}}>>, %[[ARG1:.+]]: i32)
+define void @foo(%my_struct* %arg, i32 %idx) {
+  ; CHECK: %[[C0:.+]] = llvm.mlir.constant(0 : i32)
+  ; CHECK: llvm.getelementptr %[[ARG0]][%[[C0]], 1, %[[ARG1]]]
+  %1 = getelementptr %my_struct, %my_struct* %arg, i32 0, i32 1, i32 %idx
+  ret void
+}


        


More information about the Mlir-commits mailing list