[Mlir-commits] [mlir] 0b168a4 - [mlir][LLVMIR] Use a new way to verify GEPOp indices
Min-Yih Hsu
llvmlistbot at llvm.org
Tue May 17 10:30:06 PDT 2022
Author: Min-Yih Hsu
Date: 2022-05-17T10:28:44-07:00
New Revision: 0b168a49bf584ddd9aae4be8b1907aee8ed65615
URL: https://github.com/llvm/llvm-project/commit/0b168a49bf584ddd9aae4be8b1907aee8ed65615
DIFF: https://github.com/llvm/llvm-project/commit/0b168a49bf584ddd9aae4be8b1907aee8ed65615.diff
LOG: [mlir][LLVMIR] Use a new way to verify GEPOp indices
Previously, GEPOp relies on `findKnownStructIndices` to check if a GEP
index should be static. The truth is, `findKnownStructIndices` can only
tell you a GEP index _might_ be indexing into a struct (which should use
a static GEP index). But GEPOp::build and GEPOp::verify are falsely
taking this information as a certain answer, which creates many false
alarms like the one depicted in
`test/Target/LLVMIR/Import/dynamic-gep-index.ll`.
The solution presented here adopts a new verification scheme: When we're
recursively checking the child element types of a struct type, instead
of checking every child types, we only check the one dictated by the
(static) GEP index value. We also combine "refinement" logics --
refine/promote struct index mlir::Value into constants -- into the very
verification process since they have lots of logics in common. The
resulting code is more concise and less brittle.
We also hide GEPOp::findKnownStructIndices since most of the
aforementioned logics are already encapsulated within GEPOp::build and
GEPOp::verify, we found little reason for findKnownStructIndices (or the
new findStructIndices) to be public.
Differential Revision: https://reviews.llvm.org/D124935
Added:
mlir/test/Dialect/LLVMIR/dynamic-gep-index.mlir
mlir/test/Target/LLVMIR/Import/dynamic-gep-index.ll
Modified:
mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
index 8f4669f116632..9824a3710709e 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
@@ -334,16 +334,6 @@ def LLVM_GEPOp : LLVM_Op<"getelementptr", [NoSideEffect]> {
/// Returns the type pointed to by the pointer argument of this GEP.
Type getSourceElementType();
-
- /// Populates `indices` with positions of GEP indices that correspond to
- /// LLVMStructTypes potentially nested in the given `sourceElementType`,
- /// which is the type pointed to by the pointer argument of a GEP. If
- /// `structSizes` is provided, it is populated with sizes of the indexed
- /// structs for bounds verification purposes.
- static void findKnownStructIndices(
- Type sourceElementType, SmallVectorImpl<unsigned> &indices,
- SmallVectorImpl<unsigned> *structSizes = nullptr);
-
}];
let hasFolder = 1;
let hasVerifier = 1;
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
index ee8cfc63c0008..8a3a9b13130f6 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
@@ -29,6 +29,7 @@
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Type.h"
+#include "llvm/Support/Error.h"
#include "llvm/Support/Mutex.h"
#include "llvm/Support/SourceMgr.h"
@@ -398,55 +399,139 @@ SuccessorOperands SwitchOp::getSuccessorOperands(unsigned index) {
constexpr int GEPOp::kDynamicIndex;
-/// Populates `indices` with positions of GEP indices that would correspond to
-/// LLVMStructTypes potentially nested in the given type. The type currently
-/// visited gets `currentIndex` and LLVM container types are visited
-/// recursively. The recursion is bounded and takes care of recursive types by
-/// means of the `visited` set.
-static void recordStructIndices(Type type, unsigned currentIndex,
- SmallVectorImpl<unsigned> &indices,
- SmallVectorImpl<unsigned> *structSizes,
- SmallPtrSet<Type, 4> &visited) {
- if (visited.contains(type))
- return;
+namespace {
+/// Base class for llvm::Error related to GEP index.
+class GEPIndexError : public llvm::ErrorInfo<GEPIndexError> {
+protected:
+ unsigned indexPos;
+
+public:
+ static char ID;
+
+ std::error_code convertToErrorCode() const override {
+ return llvm::inconvertibleErrorCode();
+ }
+
+ explicit GEPIndexError(unsigned pos) : indexPos(pos) {}
+};
+
+/// llvm::Error for out-of-bound GEP index.
+struct GEPIndexOutOfBoundError
+ : public llvm::ErrorInfo<GEPIndexOutOfBoundError, GEPIndexError> {
+ static char ID;
+
+ using ErrorInfo::ErrorInfo;
+
+ void log(llvm::raw_ostream &os) const override {
+ os << "index " << indexPos << " indexing a struct is out of bounds";
+ }
+};
- visited.insert(type);
+/// llvm::Error for non-static GEP index indexing a struct.
+struct GEPStaticIndexError
+ : public llvm::ErrorInfo<GEPStaticIndexError, GEPIndexError> {
+ static char ID;
- llvm::TypeSwitch<Type>(type)
- .Case<LLVMStructType>([&](LLVMStructType structType) {
- indices.push_back(currentIndex);
- if (structSizes)
- structSizes->push_back(structType.getBody().size());
- for (Type elementType : structType.getBody())
- recordStructIndices(elementType, currentIndex + 1, indices,
- structSizes, visited);
+ using ErrorInfo::ErrorInfo;
+
+ void log(llvm::raw_ostream &os) const override {
+ os << "expected index " << indexPos << " indexing a struct "
+ << "to be constant";
+ }
+};
+} // end anonymous namespace
+
+char GEPIndexError::ID = 0;
+char GEPIndexOutOfBoundError::ID = 0;
+char GEPStaticIndexError::ID = 0;
+
+/// For the given `structIndices` and `indices`, check if they're complied
+/// with `baseGEPType`, especially check against LLVMStructTypes nested within,
+/// and refine/promote struct index from `indices` to `updatedStructIndices`
+/// if the latter argument is not null.
+static llvm::Error
+recordStructIndices(Type baseGEPType, unsigned indexPos,
+ ArrayRef<int32_t> structIndices, ValueRange indices,
+ SmallVectorImpl<int32_t> *updatedStructIndices,
+ SmallVectorImpl<Value> *remainingIndices) {
+ if (indexPos >= structIndices.size())
+ // Stop searching
+ return llvm::Error::success();
+
+ int32_t gepIndex = structIndices[indexPos];
+ bool isStaticIndex = gepIndex != GEPOp::kDynamicIndex;
+
+ unsigned dynamicIndexPos = indexPos;
+ if (!isStaticIndex)
+ dynamicIndexPos = llvm::count(structIndices.take_front(indexPos + 1),
+ LLVM::GEPOp::kDynamicIndex) - 1;
+
+ return llvm::TypeSwitch<Type, llvm::Error>(baseGEPType)
+ .Case<LLVMStructType>([&](LLVMStructType structType) -> llvm::Error {
+ // We don't always want to refine the index (e.g. when performing
+ // verification), so we only refine when updatedStructIndices is not
+ // null.
+ if (!isStaticIndex && updatedStructIndices) {
+ // Try to refine.
+ APInt staticIndexValue;
+ isStaticIndex = matchPattern(indices[dynamicIndexPos],
+ m_ConstantInt(&staticIndexValue));
+ if (isStaticIndex) {
+ assert(staticIndexValue.getBitWidth() <= 64 &&
+ llvm::isInt<32>(staticIndexValue.getLimitedValue()) &&
+ "struct index can't fit within int32_t");
+ gepIndex = static_cast<int32_t>(staticIndexValue.getSExtValue());
+ }
+ }
+ if (!isStaticIndex)
+ return llvm::make_error<GEPStaticIndexError>(indexPos);
+
+ ArrayRef<Type> elementTypes = structType.getBody();
+ if (gepIndex < 0 ||
+ static_cast<size_t>(gepIndex) >= elementTypes.size())
+ return llvm::make_error<GEPIndexOutOfBoundError>(indexPos);
+
+ if (updatedStructIndices)
+ (*updatedStructIndices)[indexPos] = gepIndex;
+
+ // Instead of recusively going into every children types, we only
+ // dive into the one indexed by gepIndex.
+ return recordStructIndices(elementTypes[gepIndex], indexPos + 1,
+ structIndices, indices, updatedStructIndices,
+ remainingIndices);
})
.Case<VectorType, LLVMScalableVectorType, LLVMFixedVectorType,
- LLVMArrayType>([&](auto containerType) {
- recordStructIndices(containerType.getElementType(), currentIndex + 1,
- indices, structSizes, visited);
- });
+ LLVMArrayType>([&](auto containerType) -> llvm::Error {
+ // Currently we don't refine non-struct index even if it's static.
+ if (remainingIndices)
+ remainingIndices->push_back(indices[dynamicIndexPos]);
+ return recordStructIndices(containerType.getElementType(), indexPos + 1,
+ structIndices, indices, updatedStructIndices,
+ remainingIndices);
+ })
+ .Default(
+ [](auto otherType) -> llvm::Error { return llvm::Error::success(); });
}
-/// Populates `indices` with positions of GEP indices that correspond to
-/// LLVMStructTypes potentially nested in the given `baseGEPType`, which must
-/// be either an LLVMPointer type or a vector thereof. If `structSizes` is
-/// provided, it is populated with sizes of the indexed structs for bounds
-/// verification purposes.
-void GEPOp::findKnownStructIndices(Type sourceElementType,
- SmallVectorImpl<unsigned> &indices,
- SmallVectorImpl<unsigned> *structSizes) {
- SmallPtrSet<Type, 4> visited;
- recordStructIndices(sourceElementType, /*currentIndex=*/1, indices,
- structSizes, visited);
+/// Driver function around `recordStructIndices`. Note that we always check
+/// from the second GEP index since the first one is always dynamic.
+static llvm::Error
+findStructIndices(Type baseGEPType, ArrayRef<int32_t> structIndices,
+ ValueRange indices,
+ SmallVectorImpl<int32_t> *updatedStructIndices = nullptr,
+ SmallVectorImpl<Value> *remainingIndices = nullptr) {
+ if (remainingIndices)
+ // The first GEP index is always dynamic.
+ remainingIndices->push_back(indices[0]);
+ return recordStructIndices(baseGEPType, /*indexPos=*/1, structIndices,
+ indices, updatedStructIndices, remainingIndices);
}
void GEPOp::build(OpBuilder &builder, OperationState &result, Type resultType,
Value basePtr, ValueRange operands,
ArrayRef<NamedAttribute> attributes) {
build(builder, result, resultType, basePtr, operands,
- SmallVector<int32_t>(operands.size(), LLVM::GEPOp::kDynamicIndex),
- attributes);
+ SmallVector<int32_t>(operands.size(), kDynamicIndex), attributes);
}
/// Returns the elemental type of any LLVM-compatible vector type or self.
@@ -480,44 +565,10 @@ void GEPOp::build(OpBuilder &builder, OperationState &result, Type resultType,
SmallVector<Value> remainingIndices;
SmallVector<int32_t> updatedStructIndices(structIndices.begin(),
structIndices.end());
- SmallVector<unsigned> structRelatedPositions;
- findKnownStructIndices(elementType, structRelatedPositions);
-
- SmallVector<unsigned> operandsToErase;
- for (unsigned pos : structRelatedPositions) {
- // GEP may not be indexing as deep as some structs are located.
- if (pos >= structIndices.size())
- continue;
-
- // If the index is already static, it's fine.
- if (structIndices[pos] != kDynamicIndex)
- continue;
-
- // Find the corresponding operand.
- unsigned operandPos =
- std::count(structIndices.begin(), std::next(structIndices.begin(), pos),
- kDynamicIndex);
-
- // Extract the constant value from the operand and put it into the attribute
- // instead.
- APInt staticIndexValue;
- bool matched =
- matchPattern(indices[operandPos], m_ConstantInt(&staticIndexValue));
- (void)matched;
- assert(matched && "index into a struct must be a constant");
- assert(staticIndexValue.sge(APInt::getSignedMinValue(/*numBits=*/32)) &&
- "struct index underflows 32-bit integer");
- assert(staticIndexValue.sle(APInt::getSignedMaxValue(/*numBits=*/32)) &&
- "struct index overflows 32-bit integer");
- auto staticIndex = static_cast<int32_t>(staticIndexValue.getSExtValue());
- updatedStructIndices[pos] = staticIndex;
- operandsToErase.push_back(operandPos);
- }
-
- for (unsigned i = 0, e = indices.size(); i < e; ++i) {
- if (!llvm::is_contained(operandsToErase, i))
- remainingIndices.push_back(indices[i]);
- }
+ if (llvm::Error err =
+ findStructIndices(elementType, structIndices, indices,
+ &updatedStructIndices, &remainingIndices))
+ llvm::report_fatal_error(StringRef(llvm::toString(std::move(err))));
assert(remainingIndices.size() == static_cast<size_t>(llvm::count(
updatedStructIndices, kDynamicIndex)) &&
@@ -582,24 +633,16 @@ LogicalResult LLVM::GEPOp::verify() {
getElemType())))
return failure();
- SmallVector<unsigned> indices;
- SmallVector<unsigned> structSizes;
- findKnownStructIndices(getSourceElementType(), indices, &structSizes);
- DenseIntElementsAttr structIndices = getStructIndices();
- for (unsigned i : llvm::seq<unsigned>(0, indices.size())) {
- unsigned index = indices[i];
- // GEP may not be indexing as deep as some structs nested in the type.
- if (index >= structIndices.getNumElements())
- continue;
-
- int32_t staticIndex = structIndices.getValues<int32_t>()[index];
- if (staticIndex == LLVM::GEPOp::kDynamicIndex)
- return emitOpError() << "expected index " << index
- << " indexing a struct to be constant";
- if (staticIndex < 0 || static_cast<unsigned>(staticIndex) >= structSizes[i])
- return emitOpError() << "index " << index
- << " indexing a struct is out of bounds";
- }
+ auto structIndexRange = getStructIndices().getValues<int32_t>();
+ // structIndexRange is a kind of iterator, which cannot be converted
+ // to ArrayRef directly.
+ SmallVector<int32_t> structIndices(structIndexRange.size());
+ for (unsigned i : llvm::seq<unsigned>(0, structIndexRange.size()))
+ structIndices[i] = structIndexRange[i];
+ if (llvm::Error err = findStructIndices(getSourceElementType(), structIndices,
+ getIndices()))
+ return emitOpError() << llvm::toString(std::move(err));
+
return success();
}
diff --git a/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp b/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp
index cc10d0bccfd2b..840da68dafbbd 100644
--- a/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp
+++ b/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp
@@ -1003,33 +1003,26 @@ LogicalResult Importer::processInstruction(llvm::Instruction *inst) {
// FIXME: Support inbounds GEPs.
llvm::GetElementPtrInst *gep = cast<llvm::GetElementPtrInst>(inst);
Value basePtr = processValue(gep->getOperand(0));
- SmallVector<int32_t> staticIndices;
- SmallVector<Value> dynamicIndices;
Type sourceElementType = processType(gep->getSourceElementType());
- SmallVector<unsigned> staticIndexPositions;
- GEPOp::findKnownStructIndices(sourceElementType, staticIndexPositions);
-
- for (const auto &en :
- llvm::enumerate(llvm::drop_begin(gep->operand_values()))) {
- llvm::Value *operand = en.value();
- if (llvm::find(staticIndexPositions, en.index()) ==
- staticIndexPositions.end()) {
- staticIndices.push_back(GEPOp::kDynamicIndex);
- dynamicIndices.push_back(processValue(operand));
- if (!dynamicIndices.back())
- return failure();
- } else {
- auto *constantInt = cast<llvm::ConstantInt>(operand);
- staticIndices.push_back(
- static_cast<int32_t>(constantInt->getValue().getZExtValue()));
- }
+
+ SmallVector<Value> indices;
+ for (llvm::Value *operand : llvm::drop_begin(gep->operand_values())) {
+ indices.push_back(processValue(operand));
+ if (!indices.back())
+ return failure();
}
+ // Treat every indices as dynamic since GEPOp::build will refine those
+ // indices into static attributes later. One small downside of this
+ // approach is that many unused `llvm.mlir.constant` would be emitted
+ // at first place.
+ SmallVector<int32_t> structIndices(indices.size(),
+ LLVM::GEPOp::kDynamicIndex);
Type type = processType(inst->getType());
if (!type)
return failure();
instMap[inst] = b.create<GEPOp>(loc, type, sourceElementType, basePtr,
- dynamicIndices, staticIndices);
+ indices, structIndices);
return success();
}
case llvm::Instruction::InsertValue: {
diff --git a/mlir/test/Dialect/LLVMIR/dynamic-gep-index.mlir b/mlir/test/Dialect/LLVMIR/dynamic-gep-index.mlir
new file mode 100644
index 0000000000000..52282bc586fbd
--- /dev/null
+++ b/mlir/test/Dialect/LLVMIR/dynamic-gep-index.mlir
@@ -0,0 +1,12 @@
+// RUN: mlir-opt %s | FileCheck %s
+
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<i1, dense<8> : vector<2xi32>>, #dlti.dl_entry<i8, dense<8> : vector<2xi32>>, #dlti.dl_entry<i16, dense<16> : vector<2xi32>>, #dlti.dl_entry<i32, dense<32> : vector<2xi32>>, #dlti.dl_entry<i64, dense<[32, 64]> : vector<2xi32>>, #dlti.dl_entry<f16, dense<16> : vector<2xi32>>, #dlti.dl_entry<f64, dense<64> : vector<2xi32>>, #dlti.dl_entry<f128, dense<128> : vector<2xi32>>>} {
+ // CHECK: llvm.func @foo(%[[ARG0:.+]]: !llvm.ptr<struct<"my_struct", {{.+}}>>, %[[ARG1:.+]]: i32)
+ llvm.func @foo(%arg0: !llvm.ptr<struct<"my_struct", (struct<"sub_struct", (i32, i8)>, array<4 x i32>)>>, %arg1: i32) {
+ // CHECK: %[[C0:.+]] = llvm.mlir.constant(0 : i32)
+ %0 = llvm.mlir.constant(0 : i32) : i32
+ // CHECK: llvm.getelementptr %[[ARG0]][%[[C0]], 1, %[[ARG1]]]
+ %1 = "llvm.getelementptr"(%arg0, %0, %arg1) {structIndices = dense<[-2147483648, 1, -2147483648]> : tensor<3xi32>} : (!llvm.ptr<struct<"my_struct", (struct<"sub_struct", (i32, i8)>, array<4 x i32>)>>, i32, i32) -> !llvm.ptr<i32>
+ llvm.return
+ }
+}
diff --git a/mlir/test/Target/LLVMIR/Import/dynamic-gep-index.ll b/mlir/test/Target/LLVMIR/Import/dynamic-gep-index.ll
new file mode 100644
index 0000000000000..df072e90132f7
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/Import/dynamic-gep-index.ll
@@ -0,0 +1,12 @@
+; RUN: mlir-translate --import-llvm %s | FileCheck %s
+
+%sub_struct = type { i32, i8 }
+%my_struct = type { %sub_struct, [4 x i32] }
+
+; CHECK: llvm.func @foo(%[[ARG0:.+]]: !llvm.ptr<struct<"my_struct", {{.+}}>>, %[[ARG1:.+]]: i32)
+define void @foo(%my_struct* %arg, i32 %idx) {
+ ; CHECK: %[[C0:.+]] = llvm.mlir.constant(0 : i32)
+ ; CHECK: llvm.getelementptr %[[ARG0]][%[[C0]], 1, %[[ARG1]]]
+ %1 = getelementptr %my_struct, %my_struct* %arg, i32 0, i32 1, i32 %idx
+ ret void
+}
More information about the Mlir-commits
mailing list