[Mlir-commits] [mlir] 2171f34 - [MLIR][LLVM] Remove the type consistency pass (#93283)

llvmlistbot at llvm.org llvmlistbot at llvm.org
Fri May 24 04:22:34 PDT 2024


Author: Christian Ulmann
Date: 2024-05-24T13:22:30+02:00
New Revision: 2171f34121759a1495a6de840bcb92982d1b7a64

URL: https://github.com/llvm/llvm-project/commit/2171f34121759a1495a6de840bcb92982d1b7a64
DIFF: https://github.com/llvm/llvm-project/commit/2171f34121759a1495a6de840bcb92982d1b7a64.diff

LOG: [MLIR][LLVM] Remove the type consistency pass (#93283)

This commit removes the LLVM dialect's type consistency pass. This pass
was originally introduced to make type information on memory operations
consistent. The main benefactor of this consistency where Mem2Reg and
SROA, which were in the meantime improved to no longer require
consistent type information.

Apart from providing a no longer required functionality, the pass had
some fundamental flaws that lead to issues:
* It introduced trivial GEPs (only zero indices) that could be folded
again.
* Aggressively splitting stores lead to substantial performance
regressions in some cases. Subsequent memory coalescing were not able to
recover this information, due to using non-trivial bit-fiddling.

Added: 
    

Modified: 
    mlir/include/mlir/Dialect/LLVMIR/Transforms/Passes.h
    mlir/include/mlir/Dialect/LLVMIR/Transforms/Passes.td
    mlir/lib/Dialect/LLVMIR/Transforms/CMakeLists.txt

Removed: 
    mlir/include/mlir/Dialect/LLVMIR/Transforms/TypeConsistency.h
    mlir/lib/Dialect/LLVMIR/Transforms/TypeConsistency.cpp
    mlir/test/Dialect/LLVMIR/type-consistency.mlir


################################################################################
diff  --git a/mlir/include/mlir/Dialect/LLVMIR/Transforms/Passes.h b/mlir/include/mlir/Dialect/LLVMIR/Transforms/Passes.h
index 13e10b29c0743..a7bf8796c0277 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/LLVMIR/Transforms/Passes.h
@@ -13,7 +13,6 @@
 #include "mlir/Dialect/LLVMIR/Transforms/LegalizeForExport.h"
 #include "mlir/Dialect/LLVMIR/Transforms/OptimizeForNVVM.h"
 #include "mlir/Dialect/LLVMIR/Transforms/RequestCWrappers.h"
-#include "mlir/Dialect/LLVMIR/Transforms/TypeConsistency.h"
 #include "mlir/Pass/Pass.h"
 
 namespace mlir {

diff  --git a/mlir/include/mlir/Dialect/LLVMIR/Transforms/Passes.td b/mlir/include/mlir/Dialect/LLVMIR/Transforms/Passes.td
index 0242cfd9abb7d..11d1b94110714 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/Transforms/Passes.td
@@ -43,24 +43,6 @@ def LLVMRequestCWrappers
   let constructor = "::mlir::LLVM::createRequestCWrappersPass()";
 }
 
-def LLVMTypeConsistency
-    : Pass<"llvm-type-consistency", "::mlir::LLVM::LLVMFuncOp"> {
-  let summary = "Rewrites to improve type consistency";
-  let description = [{
-    Set of rewrites to improve the coherency of types within an LLVM dialect
-    program. This will adjust operations operating on pointers so they interpret
-    their associated pointee type as consistently as possible.
-  }];
-  let constructor = "::mlir::LLVM::createTypeConsistencyPass()";
-
-  let options = [
-    Option<"maxVectorSplitSize", "max-vector-split-size", "unsigned",
-           /*default=*/"512",
-           "Maximum size in bits of a vector value in a load or store operation"
-           " operating on multiple elements that should still be split">,
-  ];
-}
-
 def NVVMOptimizeForTarget : Pass<"llvm-optimize-for-nvvm-target"> {
   let summary = "Optimize NVVM IR";
   let constructor = "::mlir::NVVM::createOptimizeForTargetPass()";

diff  --git a/mlir/include/mlir/Dialect/LLVMIR/Transforms/TypeConsistency.h b/mlir/include/mlir/Dialect/LLVMIR/Transforms/TypeConsistency.h
deleted file mode 100644
index a4bb380b99b86..0000000000000
--- a/mlir/include/mlir/Dialect/LLVMIR/Transforms/TypeConsistency.h
+++ /dev/null
@@ -1,73 +0,0 @@
-//===- TypeConsistency.h - Rewrites to improve type consistency -----------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Set of rewrites to improve the coherency of types within an LLVM dialect
-// program. This will adjust operations around a given pointer so they interpret
-// its pointee type as consistently as possible.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_DIALECT_LLVMIR_TRANSFORMS_TYPECONSISTENCY_H
-#define MLIR_DIALECT_LLVMIR_TRANSFORMS_TYPECONSISTENCY_H
-
-#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/Pass/Pass.h"
-
-namespace mlir {
-namespace LLVM {
-
-#define GEN_PASS_DECL_LLVMTYPECONSISTENCY
-#include "mlir/Dialect/LLVMIR/Transforms/Passes.h.inc"
-
-/// Creates a pass that adjusts operations operating on pointers so they
-/// interpret pointee types as consistently as possible.
-std::unique_ptr<Pass> createTypeConsistencyPass();
-
-/// Canonicalizes GEPs of which the base type and the pointer's type hint do not
-/// match. This is done by replacing the original GEP into a GEP with the type
-/// hint as a base type when an element of the hinted type aligns with the
-/// original GEP.
-class CanonicalizeAlignedGep : public OpRewritePattern<GEPOp> {
-public:
-  using OpRewritePattern::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(GEPOp gep,
-                                PatternRewriter &rewriter) const override;
-};
-
-/// Splits stores which write into multiple adjacent elements of an aggregate
-/// through a pointer. Currently, integers and vector are split and stores
-/// are generated for every element being stored to in a type-consistent manner.
-/// This is done on a best-effort basis.
-class SplitStores : public OpRewritePattern<StoreOp> {
-  unsigned maxVectorSplitSize;
-
-public:
-  SplitStores(MLIRContext *context, unsigned maxVectorSplitSize)
-      : OpRewritePattern(context), maxVectorSplitSize(maxVectorSplitSize) {}
-
-  LogicalResult matchAndRewrite(StoreOp store,
-                                PatternRewriter &rewrite) const override;
-};
-
-/// Splits GEPs with more than two indices into multiple GEPs with exactly
-/// two indices. The created GEPs are then guaranteed to index into only
-/// one aggregate at a time.
-class SplitGEP : public OpRewritePattern<GEPOp> {
-public:
-  using OpRewritePattern::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(GEPOp gepOp,
-                                PatternRewriter &rewriter) const override;
-};
-
-} // namespace LLVM
-} // namespace mlir
-
-#endif // MLIR_DIALECT_LLVMIR_TRANSFORMS_TYPECONSISTENCY_H

diff  --git a/mlir/lib/Dialect/LLVMIR/Transforms/CMakeLists.txt b/mlir/lib/Dialect/LLVMIR/Transforms/CMakeLists.txt
index c80494a440116..728885fcbeaf3 100644
--- a/mlir/lib/Dialect/LLVMIR/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/LLVMIR/Transforms/CMakeLists.txt
@@ -6,7 +6,6 @@ add_mlir_dialect_library(MLIRLLVMIRTransforms
   LegalizeForExport.cpp
   OptimizeForNVVM.cpp
   RequestCWrappers.cpp
-  TypeConsistency.cpp
 
   DEPENDS
   MLIRLLVMPassIncGen

diff  --git a/mlir/lib/Dialect/LLVMIR/Transforms/TypeConsistency.cpp b/mlir/lib/Dialect/LLVMIR/Transforms/TypeConsistency.cpp
deleted file mode 100644
index 0a372ad0c52fc..0000000000000
--- a/mlir/lib/Dialect/LLVMIR/Transforms/TypeConsistency.cpp
+++ /dev/null
@@ -1,575 +0,0 @@
-//===- TypeConsistency.cpp - Rewrites to improve type consistency ---------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/LLVMIR/Transforms/TypeConsistency.h"
-#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
-#include "llvm/ADT/TypeSwitch.h"
-
-namespace mlir {
-namespace LLVM {
-#define GEN_PASS_DEF_LLVMTYPECONSISTENCY
-#include "mlir/Dialect/LLVMIR/Transforms/Passes.h.inc"
-} // namespace LLVM
-} // namespace mlir
-
-using namespace mlir;
-using namespace LLVM;
-
-//===----------------------------------------------------------------------===//
-// Utils
-//===----------------------------------------------------------------------===//
-
-/// Checks that a pointer value has a pointee type hint consistent with the
-/// expected type. Returns the type it actually hints to if it 
diff ers, or
-/// nullptr if the type is consistent or impossible to analyze.
-static Type isElementTypeInconsistent(Value addr, Type expectedType) {
-  auto defOp = dyn_cast_or_null<GetResultPtrElementType>(addr.getDefiningOp());
-  if (!defOp)
-    return nullptr;
-
-  Type elemType = defOp.getResultPtrElementType();
-  if (!elemType)
-    return nullptr;
-
-  if (elemType == expectedType)
-    return nullptr;
-
-  return elemType;
-}
-
-//===----------------------------------------------------------------------===//
-// CanonicalizeAlignedGep
-//===----------------------------------------------------------------------===//
-
-/// Returns the amount of bytes the provided GEP elements will offset the
-/// pointer by. Returns nullopt if the offset could not be computed.
-static std::optional<uint64_t> gepToByteOffset(DataLayout &layout, GEPOp gep) {
-
-  SmallVector<uint32_t> indices;
-  // Ensures all indices are static and fetches them.
-  for (auto index : gep.getIndices()) {
-    IntegerAttr indexInt = llvm::dyn_cast_if_present<IntegerAttr>(index);
-    if (!indexInt)
-      return std::nullopt;
-    int32_t gepIndex = indexInt.getInt();
-    if (gepIndex < 0)
-      return std::nullopt;
-    indices.push_back(static_cast<uint32_t>(gepIndex));
-  }
-
-  uint64_t offset = indices[0] * layout.getTypeSize(gep.getElemType());
-
-  Type currentType = gep.getElemType();
-  for (uint32_t index : llvm::drop_begin(indices)) {
-    bool shouldCancel =
-        TypeSwitch<Type, bool>(currentType)
-            .Case([&](LLVMArrayType arrayType) {
-              if (arrayType.getNumElements() <= index)
-                return true;
-              offset += index * layout.getTypeSize(arrayType.getElementType());
-              currentType = arrayType.getElementType();
-              return false;
-            })
-            .Case([&](LLVMStructType structType) {
-              ArrayRef<Type> body = structType.getBody();
-              if (body.size() <= index)
-                return true;
-              for (uint32_t i = 0; i < index; i++) {
-                if (!structType.isPacked())
-                  offset = llvm::alignTo(offset,
-                                         layout.getTypeABIAlignment(body[i]));
-                offset += layout.getTypeSize(body[i]);
-              }
-              currentType = body[index];
-              return false;
-            })
-            .Default([](Type) { return true; });
-
-    if (shouldCancel)
-      return std::nullopt;
-  }
-
-  return offset;
-}
-
-/// Fills in `equivalentIndicesOut` with GEP indices that would be equivalent to
-/// offsetting a pointer by `offset` bytes, assuming the GEP has `base` as base
-/// type.
-static LogicalResult
-findIndicesForOffset(DataLayout &layout, Type base, uint64_t offset,
-                     SmallVectorImpl<GEPArg> &equivalentIndicesOut) {
-
-  uint64_t baseSize = layout.getTypeSize(base);
-  uint64_t rootIndex = offset / baseSize;
-  if (rootIndex > std::numeric_limits<uint32_t>::max())
-    return failure();
-  equivalentIndicesOut.push_back(rootIndex);
-
-  uint64_t distanceToStart = rootIndex * baseSize;
-
-#ifndef NDEBUG
-  auto isWithinCurrentType = [&](Type currentType) {
-    return offset < distanceToStart + layout.getTypeSize(currentType);
-  };
-#endif
-
-  Type currentType = base;
-  while (distanceToStart < offset) {
-    // While an index that does not perfectly align with offset has not been
-    // reached...
-
-    assert(isWithinCurrentType(currentType));
-
-    bool shouldCancel =
-        TypeSwitch<Type, bool>(currentType)
-            .Case([&](LLVMArrayType arrayType) {
-              // Find which element of the array contains the offset.
-              uint64_t elemSize =
-                  layout.getTypeSize(arrayType.getElementType());
-              uint64_t index = (offset - distanceToStart) / elemSize;
-              equivalentIndicesOut.push_back(index);
-              distanceToStart += index * elemSize;
-
-              // Then, try to find where in the element the offset is. If the
-              // offset is exactly the beginning of the element, the loop is
-              // complete.
-              currentType = arrayType.getElementType();
-
-              // Only continue if the element in question can be indexed using
-              // an i32.
-              return index > std::numeric_limits<uint32_t>::max();
-            })
-            .Case([&](LLVMStructType structType) {
-              ArrayRef<Type> body = structType.getBody();
-              uint32_t index = 0;
-
-              // Walk over the elements of the struct to find in which of them
-              // the offset is.
-              for (Type elem : body) {
-                uint64_t elemSize = layout.getTypeSize(elem);
-                if (!structType.isPacked()) {
-                  distanceToStart = llvm::alignTo(
-                      distanceToStart, layout.getTypeABIAlignment(elem));
-                  // If the offset is in padding, cancel the rewrite.
-                  if (offset < distanceToStart)
-                    return true;
-                }
-
-                if (offset < distanceToStart + elemSize) {
-                  // The offset is within this element, stop iterating the
-                  // struct and look within the current element.
-                  equivalentIndicesOut.push_back(index);
-                  currentType = elem;
-                  return false;
-                }
-
-                // The offset is not within this element, continue walking over
-                // the struct.
-                distanceToStart += elemSize;
-                index++;
-              }
-
-              // The offset was supposed to be within this struct but is not.
-              // This can happen if the offset points into final padding.
-              // Anyway, nothing can be done.
-              return true;
-            })
-            .Default([](Type) {
-              // If the offset is within a type that cannot be split, no indices
-              // will yield this offset. This can happen if the offset is not
-              // perfectly aligned with a leaf type.
-              // TODO: support vectors.
-              return true;
-            });
-
-    if (shouldCancel)
-      return failure();
-  }
-
-  return success();
-}
-
-/// Returns the consistent type for the GEP if the GEP is not type-consistent.
-/// Returns failure if the GEP is already consistent.
-static FailureOr<Type> getRequiredConsistentGEPType(GEPOp gep) {
-  // GEP of typed pointers are not supported.
-  if (!gep.getElemType())
-    return failure();
-
-  std::optional<Type> maybeBaseType = gep.getElemType();
-  if (!maybeBaseType)
-    return failure();
-  Type baseType = *maybeBaseType;
-
-  Type typeHint = isElementTypeInconsistent(gep.getBase(), baseType);
-  if (!typeHint)
-    return failure();
-  return typeHint;
-}
-
-LogicalResult
-CanonicalizeAlignedGep::matchAndRewrite(GEPOp gep,
-                                        PatternRewriter &rewriter) const {
-  FailureOr<Type> typeHint = getRequiredConsistentGEPType(gep);
-  if (failed(typeHint)) {
-    // GEP is already canonical, nothing to do here.
-    return failure();
-  }
-
-  DataLayout layout = DataLayout::closest(gep);
-  std::optional<uint64_t> desiredOffset = gepToByteOffset(layout, gep);
-  if (!desiredOffset)
-    return failure();
-
-  SmallVector<GEPArg> newIndices;
-  if (failed(
-          findIndicesForOffset(layout, *typeHint, *desiredOffset, newIndices)))
-    return failure();
-
-  rewriter.replaceOpWithNewOp<GEPOp>(
-      gep, LLVM::LLVMPointerType::get(getContext()), *typeHint, gep.getBase(),
-      newIndices, gep.getInbounds());
-
-  return success();
-}
-
-namespace {
-/// Class abstracting over both array and struct types, turning each into ranges
-/// of their sub-types.
-class DestructurableTypeRange
-    : public llvm::indexed_accessor_range<DestructurableTypeRange,
-                                          DestructurableTypeInterface, Type,
-                                          Type *, Type> {
-
-  using Base = llvm::indexed_accessor_range<
-      DestructurableTypeRange, DestructurableTypeInterface, Type, Type *, Type>;
-
-public:
-  using Base::Base;
-
-  /// Constructs a DestructurableTypeRange from either a LLVMStructType or
-  /// LLVMArrayType.
-  explicit DestructurableTypeRange(DestructurableTypeInterface base)
-      : Base(base, 0, [&]() -> ptr
diff _t {
-          return TypeSwitch<DestructurableTypeInterface, ptr
diff _t>(base)
-              .Case([](LLVMStructType structType) {
-                return structType.getBody().size();
-              })
-              .Case([](LLVMArrayType arrayType) {
-                return arrayType.getNumElements();
-              })
-              .Default([](auto) -> ptr
diff _t {
-                llvm_unreachable(
-                    "Only LLVMStructType or LLVMArrayType supported");
-              });
-        }()) {}
-
-  /// Returns true if this is a range over a packed struct.
-  bool isPacked() const {
-    if (auto structType = dyn_cast<LLVMStructType>(getBase()))
-      return structType.isPacked();
-    return false;
-  }
-
-private:
-  static Type dereference(DestructurableTypeInterface base, ptr
diff _t index) {
-    // i32 chosen because the implementations of ArrayType and StructType
-    // specifically expect it to be 32 bit. They will fail otherwise.
-    Type result = base.getTypeAtIndex(
-        IntegerAttr::get(IntegerType::get(base.getContext(), 32), index));
-    assert(result && "Should always succeed");
-    return result;
-  }
-
-  friend Base;
-};
-} // namespace
-
-/// Returns the list of elements of `destructurableType` that are written to by
-/// a store operation writing `storeSize` bytes at `storeOffset`.
-/// `storeOffset` is required to cleanly point to an immediate element within
-/// the type. If the write operation were to write to any padding, write beyond
-/// the aggregate or partially write to a non-aggregate, failure is returned.
-static FailureOr<DestructurableTypeRange>
-getWrittenToFields(const DataLayout &dataLayout,
-                   DestructurableTypeInterface destructurableType,
-                   unsigned storeSize, unsigned storeOffset) {
-  DestructurableTypeRange destructurableTypeRange(destructurableType);
-
-  unsigned currentOffset = 0;
-  for (; !destructurableTypeRange.empty();
-       destructurableTypeRange = destructurableTypeRange.drop_front()) {
-    Type type = destructurableTypeRange.front();
-    if (!destructurableTypeRange.isPacked()) {
-      unsigned alignment = dataLayout.getTypeABIAlignment(type);
-      currentOffset = llvm::alignTo(currentOffset, alignment);
-    }
-
-    // currentOffset is guaranteed to be equal to offset since offset is either
-    // 0 or stems from a type-consistent GEP indexing into just a single
-    // aggregate.
-    if (currentOffset == storeOffset)
-      break;
-
-    assert(currentOffset < storeOffset &&
-           "storeOffset should cleanly point into an immediate field");
-
-    currentOffset += dataLayout.getTypeSize(type);
-  }
-
-  size_t exclusiveEnd = 0;
-  for (; exclusiveEnd < destructurableTypeRange.size() && storeSize > 0;
-       exclusiveEnd++) {
-    if (!destructurableTypeRange.isPacked()) {
-      unsigned alignment =
-          dataLayout.getTypeABIAlignment(destructurableTypeRange[exclusiveEnd]);
-      // No padding allowed inbetween fields at this point in time.
-      if (!llvm::isAligned(llvm::Align(alignment), currentOffset))
-        return failure();
-    }
-
-    unsigned fieldSize =
-        dataLayout.getTypeSize(destructurableTypeRange[exclusiveEnd]);
-    if (fieldSize > storeSize) {
-      // Partial writes into an aggregate are okay since subsequent pattern
-      // applications can further split these up into writes into the
-      // sub-elements.
-      auto subAggregate = dyn_cast<DestructurableTypeInterface>(
-          destructurableTypeRange[exclusiveEnd]);
-      if (!subAggregate)
-        return failure();
-
-      // Avoid splitting redundantly by making sure the store into the
-      // aggregate can actually be split.
-      if (failed(getWrittenToFields(dataLayout, subAggregate, storeSize,
-                                    /*storeOffset=*/0)))
-        return failure();
-
-      return destructurableTypeRange.take_front(exclusiveEnd + 1);
-    }
-    currentOffset += fieldSize;
-    storeSize -= fieldSize;
-  }
-
-  // If the storeSize is not 0 at this point we are  writing past the aggregate
-  // as a whole. Abort.
-  if (storeSize > 0)
-    return failure();
-  return destructurableTypeRange.take_front(exclusiveEnd);
-}
-
-/// Splits a store of the vector `value` into `address` at `storeOffset` into
-/// multiple stores of each element with the goal of each generated store
-/// becoming type-consistent through subsequent pattern applications.
-static void splitVectorStore(const DataLayout &dataLayout, Location loc,
-                             RewriterBase &rewriter, Value address,
-                             TypedValue<VectorType> value,
-                             unsigned storeOffset) {
-  VectorType vectorType = value.getType();
-  unsigned elementSize = dataLayout.getTypeSize(vectorType.getElementType());
-
-  // Extract every element in the vector and store it in the given address.
-  for (size_t index : llvm::seq<size_t>(0, vectorType.getNumElements())) {
-    auto pos =
-        rewriter.create<ConstantOp>(loc, rewriter.getI32IntegerAttr(index));
-    auto extractOp = rewriter.create<ExtractElementOp>(loc, value, pos);
-
-    // For convenience, we do indexing by calculating the final byte offset.
-    // Other patterns will turn this into a type-consistent GEP.
-    auto gepOp = rewriter.create<GEPOp>(
-        loc, address.getType(), rewriter.getI8Type(), address,
-        ArrayRef<GEPArg>{
-            static_cast<int32_t>(storeOffset + index * elementSize)});
-
-    rewriter.create<StoreOp>(loc, extractOp, gepOp);
-  }
-}
-
-/// Splits a store of the integer `value` into `address` at `storeOffset` into
-/// multiple stores to each 'writtenToFields', making each store operation
-/// type-consistent.
-static void splitIntegerStore(const DataLayout &dataLayout, Location loc,
-                              RewriterBase &rewriter, Value address,
-                              Value value, unsigned storeSize,
-                              unsigned storeOffset,
-                              DestructurableTypeRange writtenToFields) {
-  unsigned currentOffset = storeOffset;
-  for (Type type : writtenToFields) {
-    unsigned fieldSize = dataLayout.getTypeSize(type);
-
-    // Extract the data out of the integer by first shifting right and then
-    // truncating it.
-    auto pos = rewriter.create<ConstantOp>(
-        loc, rewriter.getIntegerAttr(value.getType(),
-                                     (currentOffset - storeOffset) * 8));
-
-    auto shrOp = rewriter.create<LShrOp>(loc, value, pos);
-
-    // If we are doing a partial write into a direct field the remaining
-    // `storeSize` will be less than the size of the field. We have to truncate
-    // to the `storeSize` to avoid creating a store that wasn't in the original
-    // code.
-    IntegerType fieldIntType =
-        rewriter.getIntegerType(std::min(fieldSize, storeSize) * 8);
-    Value valueToStore = rewriter.create<TruncOp>(loc, fieldIntType, shrOp);
-
-    // We create an `i8` indexed GEP here as that is the easiest (offset is
-    // already known). Other patterns turn this into a type-consistent GEP.
-    auto gepOp = rewriter.create<GEPOp>(
-        loc, address.getType(), rewriter.getI8Type(), address,
-        ArrayRef<GEPArg>{static_cast<int32_t>(currentOffset)});
-    rewriter.create<StoreOp>(loc, valueToStore, gepOp);
-
-    // No need to care about padding here since we already checked previously
-    // that no padding exists in this range.
-    currentOffset += fieldSize;
-    storeSize -= fieldSize;
-  }
-}
-
-LogicalResult SplitStores::matchAndRewrite(StoreOp store,
-                                           PatternRewriter &rewriter) const {
-  Type sourceType = store.getValue().getType();
-  if (!isa<IntegerType, VectorType>(sourceType)) {
-    // We currently only support integer and vector sources.
-    return failure();
-  }
-
-  Type typeHint = isElementTypeInconsistent(store.getAddr(), sourceType);
-  if (!typeHint) {
-    // Nothing to do, since it is already consistent.
-    return failure();
-  }
-
-  auto dataLayout = DataLayout::closest(store);
-
-  unsigned storeSize = dataLayout.getTypeSize(sourceType);
-  unsigned offset = 0;
-  Value address = store.getAddr();
-  if (auto gepOp = address.getDefiningOp<GEPOp>()) {
-    // Currently only handle canonical GEPs with exactly two indices,
-    // indexing a single aggregate deep.
-    // If the GEP is not canonical we have to fail, otherwise we would not
-    // create type-consistent IR.
-    if (gepOp.getIndices().size() != 2 ||
-        succeeded(getRequiredConsistentGEPType(gepOp)))
-      return failure();
-
-    // If the size of the element indexed by the  GEP is smaller than the store
-    // size, it is pointing into the middle of an aggregate with the store
-    // storing into multiple adjacent elements. Destructure into the base
-    // address of the aggregate with a store offset.
-    if (storeSize > dataLayout.getTypeSize(gepOp.getResultPtrElementType())) {
-      std::optional<uint64_t> byteOffset = gepToByteOffset(dataLayout, gepOp);
-      if (!byteOffset)
-        return failure();
-
-      offset = *byteOffset;
-      typeHint = gepOp.getElemType();
-      address = gepOp.getBase();
-    }
-  }
-
-  auto destructurableType = dyn_cast<DestructurableTypeInterface>(typeHint);
-  if (!destructurableType)
-    return failure();
-
-  FailureOr<DestructurableTypeRange> writtenToElements =
-      getWrittenToFields(dataLayout, destructurableType, storeSize, offset);
-  if (failed(writtenToElements))
-    return failure();
-
-  if (writtenToElements->size() <= 1) {
-    // Other patterns should take care of this case, we are only interested in
-    // splitting element stores.
-    return failure();
-  }
-
-  if (isa<IntegerType>(sourceType)) {
-    splitIntegerStore(dataLayout, store.getLoc(), rewriter, address,
-                      store.getValue(), storeSize, offset, *writtenToElements);
-    rewriter.eraseOp(store);
-    return success();
-  }
-
-  // Add a reasonable bound to not split very large vectors that would end up
-  // generating lots of code.
-  if (dataLayout.getTypeSizeInBits(sourceType) > maxVectorSplitSize)
-    return failure();
-
-  // Vector types are simply split into its elements and new stores generated
-  // with those. Subsequent pattern applications will split these stores further
-  // if required.
-  splitVectorStore(dataLayout, store.getLoc(), rewriter, address,
-                   cast<TypedValue<VectorType>>(store.getValue()), offset);
-  rewriter.eraseOp(store);
-  return success();
-}
-
-LogicalResult SplitGEP::matchAndRewrite(GEPOp gepOp,
-                                        PatternRewriter &rewriter) const {
-  FailureOr<Type> typeHint = getRequiredConsistentGEPType(gepOp);
-  if (succeeded(typeHint) || gepOp.getIndices().size() <= 2) {
-    // GEP is not canonical or a single aggregate deep, nothing to do here.
-    return failure();
-  }
-
-  auto indexToGEPArg =
-      [](GEPIndicesAdaptor<ValueRange>::value_type index) -> GEPArg {
-    if (auto integerAttr = dyn_cast<IntegerAttr>(index))
-      return integerAttr.getValue().getSExtValue();
-    return cast<Value>(index);
-  };
-
-  GEPIndicesAdaptor<ValueRange> indices = gepOp.getIndices();
-
-  auto splitIter = std::next(indices.begin(), 2);
-
-  // Split of the first GEP using the first two indices.
-  auto subGepOp = rewriter.create<GEPOp>(
-      gepOp.getLoc(), gepOp.getType(), gepOp.getElemType(), gepOp.getBase(),
-      llvm::map_to_vector(llvm::make_range(indices.begin(), splitIter),
-                          indexToGEPArg),
-      gepOp.getInbounds());
-
-  // The second GEP indexes on the result pointer element type of the previous
-  // with all the remaining indices and a zero upfront. If this GEP has more
-  // than two indices remaining it'll be further split in subsequent pattern
-  // applications.
-  SmallVector<GEPArg> newIndices = {0};
-  llvm::transform(llvm::make_range(splitIter, indices.end()),
-                  std::back_inserter(newIndices), indexToGEPArg);
-  rewriter.replaceOpWithNewOp<GEPOp>(gepOp, gepOp.getType(),
-                                     subGepOp.getResultPtrElementType(),
-                                     subGepOp, newIndices, gepOp.getInbounds());
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// Type consistency pass
-//===----------------------------------------------------------------------===//
-
-namespace {
-struct LLVMTypeConsistencyPass
-    : public LLVM::impl::LLVMTypeConsistencyBase<LLVMTypeConsistencyPass> {
-  void runOnOperation() override {
-    RewritePatternSet rewritePatterns(&getContext());
-    rewritePatterns.add<CanonicalizeAlignedGep>(&getContext());
-    rewritePatterns.add<SplitStores>(&getContext(), maxVectorSplitSize);
-    rewritePatterns.add<SplitGEP>(&getContext());
-    FrozenRewritePatternSet frozen(std::move(rewritePatterns));
-
-    if (failed(applyPatternsAndFoldGreedily(getOperation(), frozen)))
-      signalPassFailure();
-  }
-};
-} // namespace
-
-std::unique_ptr<Pass> LLVM::createTypeConsistencyPass() {
-  return std::make_unique<LLVMTypeConsistencyPass>();
-}

diff  --git a/mlir/test/Dialect/LLVMIR/type-consistency.mlir b/mlir/test/Dialect/LLVMIR/type-consistency.mlir
deleted file mode 100644
index c9c1355d16df9..0000000000000
--- a/mlir/test/Dialect/LLVMIR/type-consistency.mlir
+++ /dev/null
@@ -1,533 +0,0 @@
-// RUN: mlir-opt %s --pass-pipeline="builtin.module(llvm.func(llvm-type-consistency))" --split-input-file | FileCheck %s
-
-// CHECK-LABEL: llvm.func @same_address
-llvm.func @same_address(%arg: i32) {
-  %0 = llvm.mlir.constant(1 : i32) : i32
-  // CHECK: %[[ALLOCA:.*]] = llvm.alloca %{{.*}} x !llvm.struct<"foo", (i32, i32, i32)>
-  %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, i32, i32)> : (i32) -> !llvm.ptr
-  // CHECK: = llvm.getelementptr %[[ALLOCA]][0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, i32, i32)>
-  %7 = llvm.getelementptr %1[8] : (!llvm.ptr) -> !llvm.ptr, i8
-  llvm.store %arg, %7 : i32, !llvm.ptr
-  llvm.return
-}
-
-// -----
-
-// CHECK-LABEL: llvm.func @same_address_keep_inbounds
-llvm.func @same_address_keep_inbounds(%arg: i32) {
-  %0 = llvm.mlir.constant(1 : i32) : i32
-  // CHECK: %[[ALLOCA:.*]] = llvm.alloca %{{.*}} x !llvm.struct<"foo", (i32, i32, i32)>
-  %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, i32, i32)> : (i32) -> !llvm.ptr
-  // CHECK: = llvm.getelementptr inbounds %[[ALLOCA]][0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, i32, i32)>
-  %7 = llvm.getelementptr inbounds %1[8] : (!llvm.ptr) -> !llvm.ptr, i8
-  llvm.store %arg, %7 : i32, !llvm.ptr
-  llvm.return
-}
-
-// -----
-
-// CHECK-LABEL: llvm.func @index_in_final_padding
-llvm.func @index_in_final_padding(%arg: i32) {
-  %0 = llvm.mlir.constant(1 : i32) : i32
-  // CHECK: %[[ALLOCA:.*]] = llvm.alloca %{{.*}} x !llvm.struct<"foo", (i32, i8)>
-  %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, i8)> : (i32) -> !llvm.ptr
-  // CHECK: = llvm.getelementptr %[[ALLOCA]][7] : (!llvm.ptr) -> !llvm.ptr, i8
-  %7 = llvm.getelementptr %1[7] : (!llvm.ptr) -> !llvm.ptr, i8
-  llvm.store %arg, %7 : i32, !llvm.ptr
-  llvm.return
-}
-
-// -----
-
-// CHECK-LABEL: llvm.func @index_out_of_bounds
-llvm.func @index_out_of_bounds(%arg: i32) {
-  %0 = llvm.mlir.constant(1 : i32) : i32
-  // CHECK: %[[ALLOCA:.*]] = llvm.alloca %{{.*}} x !llvm.struct<"foo", (i32, i32)>
-  %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, i32)> : (i32) -> !llvm.ptr
-  // CHECK: = llvm.getelementptr %[[ALLOCA]][9] : (!llvm.ptr) -> !llvm.ptr, i8
-  %7 = llvm.getelementptr %1[9] : (!llvm.ptr) -> !llvm.ptr, i8
-  llvm.store %arg, %7 : i32, !llvm.ptr
-  llvm.return
-}
-
-// -----
-
-// CHECK-LABEL: llvm.func @index_in_padding
-llvm.func @index_in_padding(%arg: i16) {
-  %0 = llvm.mlir.constant(1 : i32) : i32
-  // CHECK: %[[ALLOCA:.*]] = llvm.alloca %{{.*}} x !llvm.struct<"foo", (i16, i32)>
-  %1 = llvm.alloca %0 x !llvm.struct<"foo", (i16, i32)> : (i32) -> !llvm.ptr
-  // CHECK: = llvm.getelementptr %[[ALLOCA]][2] : (!llvm.ptr) -> !llvm.ptr, i8
-  %7 = llvm.getelementptr %1[2] : (!llvm.ptr) -> !llvm.ptr, i8
-  llvm.store %arg, %7 : i16, !llvm.ptr
-  llvm.return
-}
-
-// -----
-
-// CHECK-LABEL: llvm.func @index_not_in_padding_because_packed
-llvm.func @index_not_in_padding_because_packed(%arg: i16) {
-  %0 = llvm.mlir.constant(1 : i32) : i32
-  // CHECK: %[[ALLOCA:.*]] = llvm.alloca %{{.*}} x !llvm.struct<"foo", packed (i16, i32)>
-  %1 = llvm.alloca %0 x !llvm.struct<"foo", packed (i16, i32)> : (i32) -> !llvm.ptr
-  // CHECK: = llvm.getelementptr %[[ALLOCA]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", packed (i16, i32)>
-  %7 = llvm.getelementptr %1[2] : (!llvm.ptr) -> !llvm.ptr, i8
-  llvm.store %arg, %7 : i16, !llvm.ptr
-  llvm.return
-}
-
-// -----
-
-// CHECK-LABEL: llvm.func @no_crash_on_negative_gep_index
-llvm.func @no_crash_on_negative_gep_index() {
-  %0 = llvm.mlir.constant(1.000000e+00 : f16) : f16
-  %1 = llvm.mlir.constant(1 : i32) : i32
-  // CHECK: %[[ALLOCA:.*]] = llvm.alloca %{{.*}} x !llvm.struct<"foo", (i32, i32, i32)>
-  %2 = llvm.alloca %1 x !llvm.struct<"foo", (i32, i32, i32)> : (i32) -> !llvm.ptr
-  // CHECK: llvm.getelementptr %[[ALLOCA]][-1] : (!llvm.ptr) -> !llvm.ptr, f32
-  %3 = llvm.getelementptr %2[-1] : (!llvm.ptr) -> !llvm.ptr, f32
-  llvm.store %0, %3 : f16, !llvm.ptr
-  llvm.return
-}
-
-// -----
-
-// CHECK-LABEL: llvm.func @coalesced_store_ints
-// CHECK-SAME: %[[ARG:.*]]: i64
-llvm.func @coalesced_store_ints(%arg: i64) {
-  // CHECK-DAG: %[[CST0:.*]] = llvm.mlir.constant(0 : i64) : i64
-  // CHECK-DAG: %[[CST32:.*]] = llvm.mlir.constant(32 : i64) : i64
-
-  %0 = llvm.mlir.constant(1 : i32) : i32
-  // CHECK: %[[ALLOCA:.*]] = llvm.alloca %{{.*}} x !llvm.struct<"foo", (i32, i32)>
-  %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, i32)> : (i32) -> !llvm.ptr
-
-  // CHECK: %[[SHR:.*]] = llvm.lshr %[[ARG]], %[[CST0]]
-  // CHECK: %[[TRUNC:.*]] = llvm.trunc %[[SHR]] : i64 to i32
-  // CHECK: llvm.store %[[TRUNC]], %[[ALLOCA]]
-  // CHECK: %[[SHR:.*]] = llvm.lshr %[[ARG]], %[[CST32]] : i64
-  // CHECK: %[[TRUNC:.*]] = llvm.trunc %[[SHR]] : i64 to i32
-  // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ALLOCA]][0, 1] : (!llvm.ptr)  -> !llvm.ptr, !llvm.struct<"foo", (i32, i32)>
-  // CHECK: llvm.store %[[TRUNC]], %[[GEP]]
-  llvm.store %arg, %1 : i64, !llvm.ptr
-  // CHECK-NOT: llvm.store %[[ARG]], %[[ALLOCA]]
-  llvm.return
-}
-
-// -----
-
-// CHECK-LABEL: llvm.func @coalesced_store_ints_offset
-// CHECK-SAME: %[[ARG:.*]]: i64
-llvm.func @coalesced_store_ints_offset(%arg: i64) {
-  // CHECK-DAG: %[[CST0:.*]] = llvm.mlir.constant(0 : i64) : i64
-  // CHECK-DAG: %[[CST32:.*]] = llvm.mlir.constant(32 : i64) : i64
-  %0 = llvm.mlir.constant(1 : i32) : i32
-  // CHECK: %[[ALLOCA:.*]] = llvm.alloca %{{.*}} x !llvm.struct<"foo", (i64, i32, i32)>
-  %1 = llvm.alloca %0 x !llvm.struct<"foo", (i64, i32, i32)> : (i32) -> !llvm.ptr
-  %3 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i64, i32, i32)>
-
-  // CHECK: %[[SHR:.*]] = llvm.lshr %[[ARG]], %[[CST0]]
-  // CHECK: %[[TRUNC:.*]] = llvm.trunc %[[SHR]] : i64 to i32
-  // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ALLOCA]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i64, i32, i32)>
-  // CHECK: llvm.store %[[TRUNC]], %[[GEP]]
-  // CHECK: %[[SHR:.*]] = llvm.lshr %[[ARG]], %[[CST32]] : i64
-  // CHECK: %[[TRUNC:.*]] = llvm.trunc %[[SHR]] : i64 to i32
-  // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ALLOCA]][0, 2] : (!llvm.ptr)  -> !llvm.ptr, !llvm.struct<"foo", (i64, i32, i32)>
-  // CHECK: llvm.store %[[TRUNC]], %[[GEP]]
-  llvm.store %arg, %3 : i64, !llvm.ptr
-  // CHECK-NOT: llvm.store %[[ARG]], %[[ALLOCA]]
-  llvm.return
-}
-
-// -----
-
-// CHECK-LABEL: llvm.func @coalesced_store_floats
-// CHECK-SAME: %[[ARG:.*]]: i64
-llvm.func @coalesced_store_floats(%arg: i64) {
-  // CHECK-DAG: %[[CST0:.*]] = llvm.mlir.constant(0 : i64) : i64
-  // CHECK-DAG: %[[CST32:.*]] = llvm.mlir.constant(32 : i64) : i64
-  %0 = llvm.mlir.constant(1 : i32) : i32
-
-  // CHECK: %[[ALLOCA:.*]] = llvm.alloca %{{.*}} x !llvm.struct<"foo", (f32, f32)>
-  %1 = llvm.alloca %0 x !llvm.struct<"foo", (f32, f32)> : (i32) -> !llvm.ptr
-
-  // CHECK: %[[SHR:.*]] = llvm.lshr %[[ARG]], %[[CST0]]
-  // CHECK: %[[TRUNC:.*]] = llvm.trunc %[[SHR]] : i64 to i32
-  // CHECK: llvm.store %[[TRUNC]], %[[ALLOCA]]
-  // CHECK: %[[SHR:.*]] = llvm.lshr %[[ARG]], %[[CST32]] : i64
-  // CHECK: %[[TRUNC:.*]] = llvm.trunc %[[SHR]] : i64 to i32
-  // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ALLOCA]][0, 1] : (!llvm.ptr)  -> !llvm.ptr, !llvm.struct<"foo", (f32, f32)>
-  // CHECK: llvm.store %[[TRUNC]], %[[GEP]]
-  llvm.store %arg, %1 : i64, !llvm.ptr
-  // CHECK-NOT: llvm.store %[[ARG]], %[[ALLOCA]]
-  llvm.return
-}
-
-// -----
-
-// Padding test purposefully not modified.
-
-// CHECK-LABEL: llvm.func @coalesced_store_padding_inbetween
-// CHECK-SAME: %[[ARG:.*]]: i64
-llvm.func @coalesced_store_padding_inbetween(%arg: i64) {
-  %0 = llvm.mlir.constant(1 : i32) : i32
-
-  // CHECK: %[[ALLOCA:.*]] = llvm.alloca %{{.*}} x !llvm.struct<"foo", (i16, i32)>
-  %1 = llvm.alloca %0 x !llvm.struct<"foo", (i16, i32)> : (i32) -> !llvm.ptr
-  // CHECK: llvm.store %[[ARG]], %[[ALLOCA]]
-  llvm.store %arg, %1 : i64, !llvm.ptr
-  llvm.return
-}
-
-// -----
-
-// Padding test purposefully not modified.
-
-// CHECK-LABEL: llvm.func @coalesced_store_padding_end
-// CHECK-SAME: %[[ARG:.*]]: i64
-llvm.func @coalesced_store_padding_end(%arg: i64) {
-  %0 = llvm.mlir.constant(1 : i32) : i32
-
-  // CHECK: %[[ALLOCA:.*]] = llvm.alloca %{{.*}} x !llvm.struct<"foo", (i32, i16)>
-  %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, i16)> : (i32) -> !llvm.ptr
-  // CHECK: llvm.store %[[ARG]], %[[ALLOCA]]
-  llvm.store %arg, %1 : i64, !llvm.ptr
-  llvm.return
-}
-
-// -----
-
-// CHECK-LABEL: llvm.func @coalesced_store_past_end
-// CHECK-SAME: %[[ARG:.*]]: i64
-llvm.func @coalesced_store_past_end(%arg: i64) {
-  %0 = llvm.mlir.constant(1 : i32) : i32
-
-  // CHECK: %[[ALLOCA:.*]] = llvm.alloca %{{.*}} x !llvm.struct<"foo", (i32)>
-  %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32)> : (i32) -> !llvm.ptr
-  // CHECK: llvm.store %[[ARG]], %[[ALLOCA]]
-  llvm.store %arg, %1 : i64, !llvm.ptr
-  llvm.return
-}
-
-// -----
-
-// CHECK-LABEL: llvm.func @coalesced_store_packed_struct
-// CHECK-SAME: %[[ARG:.*]]: i64
-llvm.func @coalesced_store_packed_struct(%arg: i64) {
-  %0 = llvm.mlir.constant(1 : i32) : i32
-  // CHECK-DAG: %[[CST0:.*]] = llvm.mlir.constant(0 : i64) : i64
-  // CHECK-DAG: %[[CST16:.*]] = llvm.mlir.constant(16 : i64) : i64
-  // CHECK-DAG: %[[CST48:.*]] = llvm.mlir.constant(48 : i64) : i64
-
-  // CHECK: %[[ALLOCA:.*]] = llvm.alloca %{{.*}} x !llvm.struct<"foo", packed (i16, i32, i16)>
-  %1 = llvm.alloca %0 x !llvm.struct<"foo", packed (i16, i32, i16)> : (i32) -> !llvm.ptr
-  // CHECK: %[[SHR:.*]] = llvm.lshr %[[ARG]], %[[CST0]]
-  // CHECK: %[[TRUNC:.*]] = llvm.trunc %[[SHR]] : i64 to i16
-  // CHECK: llvm.store %[[TRUNC]], %[[ALLOCA]]
-  // CHECK: %[[SHR:.*]] = llvm.lshr %[[ARG]], %[[CST16]]
-  // CHECK: %[[TRUNC:.*]] = llvm.trunc %[[SHR]] : i64 to i32
-  // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ALLOCA]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", packed (i16, i32, i16)>
-  // CHECK: llvm.store %[[TRUNC]], %[[GEP]]
-  // CHECK: %[[SHR:.*]] = llvm.lshr %[[ARG]], %[[CST48]]
-  // CHECK: %[[TRUNC:.*]] = llvm.trunc %[[SHR]] : i64 to i16
-  // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ALLOCA]][0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", packed (i16, i32, i16)>
-  // CHECK: llvm.store %[[TRUNC]], %[[GEP]]
-  llvm.store %arg, %1 : i64, !llvm.ptr
-  // CHECK-NOT: llvm.store %[[ARG]], %[[ALLOCA]]
-  llvm.return
-}
-
-// -----
-
-// CHECK-LABEL: llvm.func @vector_write_split
-// CHECK-SAME: %[[ARG:.*]]: vector<4xi32>
-llvm.func @vector_write_split(%arg: vector<4xi32>) {
-  // CHECK-DAG: %[[CST0:.*]] = llvm.mlir.constant(0 : i32) : i32
-  // CHECK-DAG: %[[CST1:.*]] = llvm.mlir.constant(1 : i32) : i32
-  // CHECK-DAG: %[[CST2:.*]] = llvm.mlir.constant(2 : i32) : i32
-  // CHECK-DAG: %[[CST3:.*]] = llvm.mlir.constant(3 : i32) : i32
-  %0 = llvm.mlir.constant(1 : i32) : i32
-  // CHECK: %[[ALLOCA:.*]] = llvm.alloca %{{.*}} x !llvm.struct<"foo", (i32, i32, i32, i32)>
-  %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, i32, i32, i32)> : (i32) -> !llvm.ptr
-
-  // CHECK: %[[EXTRACT:.*]] = llvm.extractelement %[[ARG]][%[[CST0]] : i32] : vector<4xi32>
-  // CHECK: llvm.store %[[EXTRACT]], %[[ALLOCA]] : i32, !llvm.ptr
-
-  // CHECK: %[[EXTRACT:.*]] = llvm.extractelement %[[ARG]][%[[CST1]] : i32] : vector<4xi32>
-  // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ALLOCA]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, i32, i32, i32)>
-  // CHECK: llvm.store %[[EXTRACT]], %[[GEP]] : i32, !llvm.ptr
-
-  // CHECK: %[[EXTRACT:.*]] = llvm.extractelement %[[ARG]][%[[CST2]] : i32] : vector<4xi32>
-  // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ALLOCA]][0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, i32, i32, i32)>
-  // CHECK: llvm.store %[[EXTRACT]], %[[GEP]] : i32, !llvm.ptr
-
-  // CHECK: %[[EXTRACT:.*]] = llvm.extractelement %[[ARG]][%[[CST3]] : i32] : vector<4xi32>
-  // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ALLOCA]][0, 3] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, i32, i32, i32)>
-  // CHECK: llvm.store %[[EXTRACT]], %[[GEP]] : i32, !llvm.ptr
-
-  llvm.store %arg, %1 : vector<4xi32>, !llvm.ptr
-  // CHECK-NOT: llvm.store %[[ARG]], %[[ALLOCA]]
-  llvm.return
-}
-
-// -----
-
-// CHECK-LABEL: llvm.func @vector_write_split_offset
-// CHECK-SAME: %[[ARG:.*]]: vector<4xi32>
-llvm.func @vector_write_split_offset(%arg: vector<4xi32>) {
-  // CHECK-DAG: %[[CST0:.*]] = llvm.mlir.constant(0 : i32) : i32
-  // CHECK-DAG: %[[CST1:.*]] = llvm.mlir.constant(1 : i32) : i32
-  // CHECK-DAG: %[[CST2:.*]] = llvm.mlir.constant(2 : i32) : i32
-  // CHECK-DAG: %[[CST3:.*]] = llvm.mlir.constant(3 : i32) : i32
-  %0 = llvm.mlir.constant(1 : i32) : i32
-  // CHECK: %[[ALLOCA:.*]] = llvm.alloca %{{.*}} x !llvm.struct<"foo", (i64, i32, i32, i32, i32)>
-  %1 = llvm.alloca %0 x !llvm.struct<"foo", (i64, i32, i32, i32, i32)> : (i32) -> !llvm.ptr
-  %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i64, i32, i32, i32, i32)>
-
-  // CHECK: %[[EXTRACT:.*]] = llvm.extractelement %[[ARG]][%[[CST0]] : i32] : vector<4xi32>
-  // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ALLOCA]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i64, i32, i32, i32, i32)>
-  // CHECK: llvm.store %[[EXTRACT]], %[[GEP]] : i32, !llvm.ptr
-
-  // CHECK: %[[EXTRACT:.*]] = llvm.extractelement %[[ARG]][%[[CST1]] : i32] : vector<4xi32>
-  // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ALLOCA]][0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i64, i32, i32, i32, i32)>
-  // CHECK: llvm.store %[[EXTRACT]], %[[GEP]] : i32, !llvm.ptr
-
-  // CHECK: %[[EXTRACT:.*]] = llvm.extractelement %[[ARG]][%[[CST2]] : i32] : vector<4xi32>
-  // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ALLOCA]][0, 3] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i64, i32, i32, i32, i32)>
-  // CHECK: llvm.store %[[EXTRACT]], %[[GEP]] : i32, !llvm.ptr
-
-  // CHECK: %[[EXTRACT:.*]] = llvm.extractelement %[[ARG]][%[[CST3]] : i32] : vector<4xi32>
-  // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ALLOCA]][0, 4] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i64, i32, i32, i32, i32)>
-  // CHECK: llvm.store %[[EXTRACT]], %[[GEP]] : i32, !llvm.ptr
-
-  llvm.store %arg, %2 : vector<4xi32>, !llvm.ptr
-  // CHECK-NOT: llvm.store %[[ARG]], %[[ALLOCA]]
-  llvm.return
-}
-
-// -----
-
-// Small test that a split vector store will be further optimized (to than e.g.
-// split integer loads to structs as shown here)
-
-// CHECK-LABEL: llvm.func @vector_write_split_struct
-// CHECK-SAME: %[[ARG:.*]]: vector<2xi64>
-llvm.func @vector_write_split_struct(%arg: vector<2xi64>) {
-  %0 = llvm.mlir.constant(1 : i32) : i32
-  // CHECK: %[[ALLOCA:.*]] = llvm.alloca %{{.*}} x !llvm.struct<"foo", (i32, i32, i32, i32)>
-  %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, i32, i32, i32)> : (i32) -> !llvm.ptr
-
-  // CHECK-COUNT-4: llvm.store %{{.*}}, %{{.*}} : i32, !llvm.ptr
-
-  llvm.store %arg, %1 : vector<2xi64>, !llvm.ptr
-  // CHECK-NOT: llvm.store %[[ARG]], %[[ALLOCA]]
-  llvm.return
-}
-
-// -----
-
-// CHECK-LABEL: llvm.func @gep_split
-// CHECK-SAME: %[[ARG:.*]]: i64
-llvm.func @gep_split(%arg: i64) {
-  %0 = llvm.mlir.constant(1 : i32) : i32
-  // CHECK: %[[ALLOCA:.*]] = llvm.alloca %{{.*}} x !llvm.array<2 x struct<"foo", (i64)>>
-  %1 = llvm.alloca %0 x !llvm.array<2 x struct<"foo", (i64)>> : (i32) -> !llvm.ptr
-  %3 = llvm.getelementptr %1[0, 1, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<2 x struct<"foo", (i64)>>
-  // CHECK: %[[TOP_GEP:.*]] = llvm.getelementptr %[[ALLOCA]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<2 x struct<"foo", (i64)>>
-  // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[TOP_GEP]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i64)>
-  // CHECK: llvm.store %[[ARG]], %[[GEP]]
-  llvm.store %arg, %3 : i64, !llvm.ptr
-  // CHECK-NOT: llvm.store %[[ARG]], %[[ALLOCA]]
-  llvm.return
-}
-
-// -----
-
-// CHECK-LABEL: llvm.func @coalesced_store_ints_subaggregate
-// CHECK-SAME: %[[ARG:.*]]: i64
-llvm.func @coalesced_store_ints_subaggregate(%arg: i64) {
-  // CHECK-DAG: %[[CST0:.*]] = llvm.mlir.constant(0 : i64) : i64
-  // CHECK-DAG: %[[CST32:.*]] = llvm.mlir.constant(32 : i64) : i64
-  %0 = llvm.mlir.constant(1 : i32) : i32
-  // CHECK: %[[ALLOCA:.*]] = llvm.alloca %{{.*}} x !llvm.struct<"foo", (i64, struct<(i32, i32)>)>
-  %1 = llvm.alloca %0 x !llvm.struct<"foo", (i64, struct<(i32, i32)>)> : (i32) -> !llvm.ptr
-  %3 = llvm.getelementptr %1[0, 1, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i64, struct<(i32, i32)>)>
-
-  // CHECK: %[[TOP_GEP:.*]] = llvm.getelementptr %[[ALLOCA]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i64, struct<(i32, i32)>)>
-  // CHECK: %[[SHR:.*]] = llvm.lshr %[[ARG]], %[[CST0]]
-  // CHECK: %[[TRUNC:.*]] = llvm.trunc %[[SHR]] : i64 to i32
-  // CHECK: llvm.store %[[TRUNC]], %[[TOP_GEP]]
-  // CHECK: %[[SHR:.*]] = llvm.lshr %[[ARG]], %[[CST32]] : i64
-  // CHECK: %[[TRUNC:.*]] = llvm.trunc %[[SHR]] : i64 to i32
-  // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[TOP_GEP]][0, 1] : (!llvm.ptr)  -> !llvm.ptr, !llvm.struct<(i32, i32)>
-  // CHECK: llvm.store %[[TRUNC]], %[[GEP]]
-  llvm.store %arg, %3 : i64, !llvm.ptr
-  // CHECK-NOT: llvm.store %[[ARG]], %[[ALLOCA]]
-  llvm.return
-}
-
-// -----
-
-// CHECK-LABEL: llvm.func @gep_result_ptr_type_dynamic
-// CHECK-SAME: %[[ARG:.*]]: i64
-llvm.func @gep_result_ptr_type_dynamic(%arg: i64) {
-  %0 = llvm.mlir.constant(1 : i32) : i32
-  // CHECK: %[[ALLOCA:.*]] = llvm.alloca %{{.*}} x !llvm.array<2 x struct<"foo", (i64)>>
-  %1 = llvm.alloca %0 x !llvm.array<2 x struct<"foo", (i64)>> : (i32) -> !llvm.ptr
-  %3 = llvm.getelementptr %1[0, %arg, 0] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.array<2 x struct<"foo", (i64)>>
-  // CHECK: %[[TOP_GEP:.*]] = llvm.getelementptr %[[ALLOCA]][0, %[[ARG]]] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.array<2 x struct<"foo", (i64)>>
-  // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[TOP_GEP]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i64)>
-  // CHECK: llvm.store %[[ARG]], %[[GEP]]
-  llvm.store %arg, %3 : i64, !llvm.ptr
-  // CHECK-NOT: llvm.store %[[ARG]], %[[ALLOCA]]
-  llvm.return
-}
-
-// -----
-
-// CHECK-LABEL: llvm.func @overlapping_int_aggregate_store
-// CHECK-SAME: %[[ARG:.*]]: i64
-llvm.func @overlapping_int_aggregate_store(%arg: i64) {
-  // CHECK-DAG: %[[CST0:.*]] = llvm.mlir.constant(0 : i64) : i64
-  // CHECK-DAG: %[[CST16:.*]] = llvm.mlir.constant(16 : i64) : i64
-
-  %0 = llvm.mlir.constant(1 : i32) : i32
-  // CHECK: %[[ALLOCA:.*]] = llvm.alloca %{{.*}} x !llvm.struct<"foo", (i16, struct<(i16, i16, i16)>)>
-  %1 = llvm.alloca %0 x !llvm.struct<"foo", (i16, struct<(i16, i16, i16)>)> : (i32) -> !llvm.ptr
-
-  // CHECK: %[[SHR:.*]] = llvm.lshr %[[ARG]], %[[CST0]]
-  // CHECK: %[[TRUNC:.*]] = llvm.trunc %[[SHR]] : i64 to i16
-  // CHECK: llvm.store %[[TRUNC]], %[[ALLOCA]]
-
-  // CHECK: %[[SHR:.*]] = llvm.lshr %[[ARG]], %[[CST16]] : i64
-  // CHECK: [[TRUNC:.*]] = llvm.trunc %[[SHR]] : i64 to i48
-  // CHECK: %[[TOP_GEP:.*]] = llvm.getelementptr %[[ALLOCA]][0, 1] : (!llvm.ptr)  -> !llvm.ptr, !llvm.struct<"foo", (i16, struct<(i16, i16, i16)>)>
-
-  // Normal integer splitting of [[TRUNC]] follows:
-
-  // CHECK: llvm.store %{{.*}}, %[[TOP_GEP]]
-  // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[TOP_GEP]][0, 1] : (!llvm.ptr)  -> !llvm.ptr, !llvm.struct<(i16, i16, i16)>
-  // CHECK: llvm.store %{{.*}}, %[[GEP]]
-  // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[TOP_GEP]][0, 2] : (!llvm.ptr)  -> !llvm.ptr, !llvm.struct<(i16, i16, i16)>
-  // CHECK: llvm.store %{{.*}}, %[[GEP]]
-
-  llvm.store %arg, %1 : i64, !llvm.ptr
-  // CHECK-NOT: llvm.store %[[ARG]], %[[ALLOCA]]
-  llvm.return
-}
-
-// -----
-
-// CHECK-LABEL: llvm.func @overlapping_vector_aggregate_store
-// CHECK-SAME: %[[ARG:.*]]: vector<4xi16>
-llvm.func @overlapping_vector_aggregate_store(%arg: vector<4 x i16>) {
-  // CHECK-DAG: %[[CST0:.*]] = llvm.mlir.constant(0 : i32) : i32
-  // CHECK-DAG: %[[CST1:.*]] = llvm.mlir.constant(1 : i32) : i32
-  // CHECK-DAG: %[[CST2:.*]] = llvm.mlir.constant(2 : i32) : i32
-  // CHECK-DAG: %[[CST3:.*]] = llvm.mlir.constant(3 : i32) : i32
-
-  %0 = llvm.mlir.constant(1 : i32) : i32
-  // CHECK: %[[ALLOCA:.*]] = llvm.alloca %{{.*}} x !llvm.struct<"foo", (i16, struct<(i16, i16, i16)>)>
-  %1 = llvm.alloca %0 x !llvm.struct<"foo", (i16, struct<(i16, i16, i16)>)> : (i32) -> !llvm.ptr
-
-  // CHECK: %[[EXTRACT:.*]] = llvm.extractelement %[[ARG]][%[[CST0]] : i32]
-  // CHECK: llvm.store %[[EXTRACT]], %[[ALLOCA]]
-
-  // CHECK: %[[EXTRACT:.*]] = llvm.extractelement %[[ARG]][%[[CST1]] : i32]
-  // CHECK: %[[GEP0:.*]] = llvm.getelementptr %[[ALLOCA]][0, 1] : (!llvm.ptr)  -> !llvm.ptr, !llvm.struct<"foo", (i16, struct<(i16, i16, i16)>)>
-  // CHECK: llvm.store %[[EXTRACT]], %[[GEP0]]
-
-  // CHECK: %[[EXTRACT:.*]] = llvm.extractelement %[[ARG]][%[[CST2]] : i32]
-  // CHECK: %[[GEP0:.*]] = llvm.getelementptr %[[ALLOCA]][0, 1] : (!llvm.ptr)  -> !llvm.ptr, !llvm.struct<"foo", (i16, struct<(i16, i16, i16)>)>
-  // CHECK: %[[GEP1:.*]] = llvm.getelementptr %[[GEP0]][0, 1] : (!llvm.ptr)  -> !llvm.ptr, !llvm.struct<(i16, i16, i16)>
-  // CHECK: llvm.store %[[EXTRACT]], %[[GEP1]]
-
-  // CHECK: %[[EXTRACT:.*]] = llvm.extractelement %[[ARG]][%[[CST3]] : i32]
-  // CHECK: %[[GEP0:.*]] = llvm.getelementptr %[[ALLOCA]][0, 1] : (!llvm.ptr)  -> !llvm.ptr, !llvm.struct<"foo", (i16, struct<(i16, i16, i16)>)>
-  // CHECK: %[[GEP1:.*]] = llvm.getelementptr %[[GEP0]][0, 2] : (!llvm.ptr)  -> !llvm.ptr, !llvm.struct<(i16, i16, i16)>
-  // CHECK: llvm.store %[[EXTRACT]], %[[GEP1]]
-
-  llvm.store %arg, %1 : vector<4 x i16>, !llvm.ptr
-  // CHECK-NOT: llvm.store %[[ARG]], %[[ALLOCA]]
-  llvm.return
-}
-
-// -----
-
-// CHECK-LABEL: llvm.func @partially_overlapping_aggregate_store
-// CHECK-SAME: %[[ARG:.*]]: i64
-llvm.func @partially_overlapping_aggregate_store(%arg: i64) {
-  // CHECK-DAG: %[[CST0:.*]] = llvm.mlir.constant(0 : i64) : i64
-  // CHECK-DAG: %[[CST16:.*]] = llvm.mlir.constant(16 : i64) : i64
-
-  %0 = llvm.mlir.constant(1 : i32) : i32
-  // CHECK: %[[ALLOCA:.*]] = llvm.alloca %{{.*}} x !llvm.struct<"foo", (i16, struct<(i16, i16, i16, i16)>)>
-  %1 = llvm.alloca %0 x !llvm.struct<"foo", (i16, struct<(i16, i16, i16, i16)>)> : (i32) -> !llvm.ptr
-
-  // CHECK: %[[SHR:.*]] = llvm.lshr %[[ARG]], %[[CST0]]
-  // CHECK: %[[TRUNC:.*]] = llvm.trunc %[[SHR]] : i64 to i16
-  // CHECK: llvm.store %[[TRUNC]], %[[ALLOCA]]
-
-  // CHECK: %[[SHR:.*]] = llvm.lshr %[[ARG]], %[[CST16]] : i64
-  // CHECK: [[TRUNC:.*]] = llvm.trunc %[[SHR]] : i64 to i48
-  // CHECK: %[[TOP_GEP:.*]] = llvm.getelementptr %[[ALLOCA]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i16, struct<(i16, i16, i16, i16)>)>
-
-  // Normal integer splitting of [[TRUNC]] follows:
-
-  // CHECK: llvm.store %{{.*}}, %[[TOP_GEP]]
-  // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[TOP_GEP]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(i16, i16, i16, i16)>
-  // CHECK: llvm.store %{{.*}}, %[[GEP]]
-  // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[TOP_GEP]][0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(i16, i16, i16, i16)>
-  // CHECK: llvm.store %{{.*}}, %[[GEP]]
-
-  // It is important that there are no more stores at this point.
-  // Specifically a store into the fourth field of %[[TOP_GEP]] would
-  // incorrectly change the semantics of the code.
-  // CHECK-NOT: llvm.store %{{.*}}, %{{.*}}
-
-  llvm.store %arg, %1 : i64, !llvm.ptr
-
-  llvm.return
-}
-
-// -----
-
-// Here a split is undesirable since the store does a partial store into the field.
-
-// CHECK-LABEL: llvm.func @undesirable_overlapping_aggregate_store
-// CHECK-SAME: %[[ARG:.*]]: i64
-llvm.func @undesirable_overlapping_aggregate_store(%arg: i64) {
-  %0 = llvm.mlir.constant(1 : i32) : i32
-  // CHECK: %[[ALLOCA:.*]] = llvm.alloca %{{.*}} x !llvm.struct<"foo", (i32, i32, struct<(i64, i16, i16, i16)>)>
-  %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, i32, struct<(i64, i16, i16, i16)>)> : (i32) -> !llvm.ptr
-  // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ALLOCA]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, i32, struct<(i64, i16, i16, i16)>)>
-  %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, i32, struct<(i64, i16, i16, i16)>)>
-  // CHECK: llvm.store %[[ARG]], %[[GEP]]
-  llvm.store %arg, %2 : i64, !llvm.ptr
-
-  llvm.return
-}
-
-// -----
-
-// CHECK-LABEL: llvm.func @coalesced_store_ints_array
-// CHECK-SAME: %[[ARG:.*]]: i64
-llvm.func @coalesced_store_ints_array(%arg: i64) {
-  // CHECK-DAG: %[[CST0:.*]] = llvm.mlir.constant(0 : i64) : i64
-  // CHECK-DAG: %[[CST32:.*]] = llvm.mlir.constant(32 : i64) : i64
-
-  %0 = llvm.mlir.constant(1 : i32) : i32
-  // CHECK: %[[ALLOCA:.*]] = llvm.alloca %{{.*}} x !llvm.array<2 x i32>
-  %1 = llvm.alloca %0 x !llvm.array<2 x i32> : (i32) -> !llvm.ptr
-
-  // CHECK: %[[SHR:.*]] = llvm.lshr %[[ARG]], %[[CST0]]
-  // CHECK: %[[TRUNC:.*]] = llvm.trunc %[[SHR]] : i64 to i32
-  // CHECK: llvm.store %[[TRUNC]], %[[ALLOCA]]
-  // CHECK: %[[SHR:.*]] = llvm.lshr %[[ARG]], %[[CST32]] : i64
-  // CHECK: %[[TRUNC:.*]] = llvm.trunc %[[SHR]] : i64 to i32
-  // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ALLOCA]][0, 1] : (!llvm.ptr)  -> !llvm.ptr, !llvm.array<2 x i32>
-  // CHECK: llvm.store %[[TRUNC]], %[[GEP]]
-  llvm.store %arg, %1 : i64, !llvm.ptr
-  // CHECK-NOT: llvm.store %[[ARG]], %[[ALLOCA]]
-  llvm.return
-}


        


More information about the Mlir-commits mailing list