[Mlir-commits] [mlir] ba87f99 - [mlir] make vector to llvm conversion truly partial
Alex Zinenko
llvmlistbot at llvm.org
Thu Feb 4 02:33:34 PST 2021
Author: Alex Zinenko
Date: 2021-02-04T11:33:24+01:00
New Revision: ba87f99168c93461b28a4aa2d05e238ff774d57a
URL: https://github.com/llvm/llvm-project/commit/ba87f99168c93461b28a4aa2d05e238ff774d57a
DIFF: https://github.com/llvm/llvm-project/commit/ba87f99168c93461b28a4aa2d05e238ff774d57a.diff
LOG: [mlir] make vector to llvm conversion truly partial
Historically, the Vector to LLVM dialect conversion subsumed the Standard to
LLVM dialect conversion patterns. This was necessary because the conversion
infrastructure did not have sufficient support for reconciling type
conversions. This support is now available. Only keep the patterns related to
the Vector dialect in the Vector to LLVM conversion and require type casts
operations to be inserted if necessary. These casts will be removed by
following conversions if possible. Update integration tests to also run the
Standard to LLVM conversion.
There is a significant amount of test churn, which is due to (a) unnecessarily
strict tests in VectorToLLVM and (b) many patterns actually targeting Standard
dialect ops instead of LLVM dialect ops leading to tests actually exercising a
Vector->Standard->LLVM conversion. This churn is a good illustration of the
reason to make the conversion partial: now the tests only check the code in the
Vector to LLVM conversion and will not be randomly broken by changes in
Standard to LLVM conversion.
Arguably, it may be possible to extract Vector to Standard patterns into a
separate pass, but given the ongoing splitting of the Standard dialect, such
pass will be short-lived and will require further refactoring.
Depends On D95626
Reviewed By: nicolasvasilache, aartbik
Differential Revision: https://reviews.llvm.org/D95685
Added:
Modified:
mlir/integration_test/Dialect/Linalg/CPU/benchmark_matmul.mlir
mlir/integration_test/Dialect/Linalg/CPU/benchmark_matmul_column_major.mlir
mlir/integration_test/Dialect/Linalg/CPU/benchmark_matmul_column_major_as_row_major.mlir
mlir/integration_test/Dialect/Linalg/CPU/benchmark_matmul_i8_i8_i32.mlir
mlir/integration_test/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir
mlir/integration_test/Dialect/Vector/CPU/test-transfer-read-2d.mlir
mlir/integration_test/Dialect/Vector/CPU/test-transfer-to-loops.mlir
mlir/integration_test/Dialect/Vector/CPU/test-vector-distribute.mlir
mlir/lib/Conversion/ArmSVEToLLVM/ArmSVEToLLVM.cpp
mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp
mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
mlir/test/Conversion/ArmSVEToLLVM/convert-to-llvm.mlir
mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir
mlir/test/Conversion/VectorToLLVM/vector-reduction-to-llvm.mlir
mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
mlir/test/Dialect/LLVMIR/dialect-cast.mlir
mlir/test/Target/vector-to-llvm-ir.mlir
Removed:
################################################################################
diff --git a/mlir/integration_test/Dialect/Linalg/CPU/benchmark_matmul.mlir b/mlir/integration_test/Dialect/Linalg/CPU/benchmark_matmul.mlir
index 5e577d778210..62c5ee2b2c47 100644
--- a/mlir/integration_test/Dialect/Linalg/CPU/benchmark_matmul.mlir
+++ b/mlir/integration_test/Dialect/Linalg/CPU/benchmark_matmul.mlir
@@ -5,7 +5,7 @@
// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-op=linalg.copy register-tile-sizes=4,32 vectorize" | \
// RUN: mlir-opt -canonicalize -convert-vector-to-scf -lower-affine -convert-linalg-to-loops | \
-// RUN: mlir-opt -canonicalize -convert-scf-to-std -convert-vector-to-llvm | \
+// RUN: mlir-opt -canonicalize -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
// RUN: mlir-cpu-runner -O3 -e main -entry-point-result=void \
// Activate to dump assembly
// R_UN: -dump-object-file -object-filename=/tmp/a.o \
diff --git a/mlir/integration_test/Dialect/Linalg/CPU/benchmark_matmul_column_major.mlir b/mlir/integration_test/Dialect/Linalg/CPU/benchmark_matmul_column_major.mlir
index de4e51bd8c0e..4bd0a64cb4b2 100644
--- a/mlir/integration_test/Dialect/Linalg/CPU/benchmark_matmul_column_major.mlir
+++ b/mlir/integration_test/Dialect/Linalg/CPU/benchmark_matmul_column_major.mlir
@@ -7,7 +7,7 @@
// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-op=linalg.copy register-tile-sizes=4,16 vectorize" | \
// RUN: mlir-opt -canonicalize -convert-vector-to-scf -lower-affine -convert-linalg-to-loops | \
-// RUN: mlir-opt -canonicalize -convert-scf-to-std -convert-vector-to-llvm | \
+// RUN: mlir-opt -canonicalize -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
// RUN: mlir-cpu-runner -O3 -e main -entry-point-result=void \
// Activate to dump assembly
// R_UN: -dump-object-file -object-filename=/tmp/a.o \
diff --git a/mlir/integration_test/Dialect/Linalg/CPU/benchmark_matmul_column_major_as_row_major.mlir b/mlir/integration_test/Dialect/Linalg/CPU/benchmark_matmul_column_major_as_row_major.mlir
index 95fc57506c43..e32d898e6c6d 100644
--- a/mlir/integration_test/Dialect/Linalg/CPU/benchmark_matmul_column_major_as_row_major.mlir
+++ b/mlir/integration_test/Dialect/Linalg/CPU/benchmark_matmul_column_major_as_row_major.mlir
@@ -8,7 +8,7 @@
// R_UN: mlir-opt -test-linalg-codegen-strategy="anchor-op=linalg.copy register-tile-sizes=4,16 vectorize" | \
// RUN: mlir-opt -canonicalize -convert-vector-to-scf -lower-affine -convert-linalg-to-loops | \
-// RUN: mlir-opt -canonicalize -convert-scf-to-std -convert-vector-to-llvm | \
+// RUN: mlir-opt -canonicalize -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
// RUN: mlir-cpu-runner -O3 -e main -entry-point-result=void \
// Activate to dump assembly
// R_UN: -dump-object-file -object-filename=/tmp/a.o \
diff --git a/mlir/integration_test/Dialect/Linalg/CPU/benchmark_matmul_i8_i8_i32.mlir b/mlir/integration_test/Dialect/Linalg/CPU/benchmark_matmul_i8_i8_i32.mlir
index abfb14739e25..32c977fbbe44 100644
--- a/mlir/integration_test/Dialect/Linalg/CPU/benchmark_matmul_i8_i8_i32.mlir
+++ b/mlir/integration_test/Dialect/Linalg/CPU/benchmark_matmul_i8_i8_i32.mlir
@@ -5,7 +5,7 @@
// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-op=linalg.copy register-tile-sizes=4,32 vectorize" | \
// RUN: mlir-opt -canonicalize -convert-vector-to-scf -lower-affine -convert-linalg-to-loops | \
-// RUN: mlir-opt -canonicalize -convert-scf-to-std -convert-vector-to-llvm -mlir-disable-threading | \
+// RUN: mlir-opt -canonicalize -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm -mlir-disable-threading | \
// RUN: mlir-cpu-runner -O3 -e main -entry-point-result=void \
// Activate to dump assembly
// R_UN: -dump-object-file -object-filename=/tmp/a.o \
diff --git a/mlir/integration_test/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir b/mlir/integration_test/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir
index a982baa646c3..a7c06c7c8391 100644
--- a/mlir/integration_test/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir
+++ b/mlir/integration_test/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -std-expand -convert-vector-to-llvm | \
+// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -std-expand -convert-vector-to-llvm -convert-std-to-llvm | \
// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
// RUN: FileCheck %s
diff --git a/mlir/integration_test/Dialect/Vector/CPU/test-transfer-read-2d.mlir b/mlir/integration_test/Dialect/Vector/CPU/test-transfer-read-2d.mlir
index b6146137f4e1..3d259661d081 100644
--- a/mlir/integration_test/Dialect/Vector/CPU/test-transfer-read-2d.mlir
+++ b/mlir/integration_test/Dialect/Vector/CPU/test-transfer-read-2d.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm | \
+// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
// RUN: FileCheck %s
diff --git a/mlir/integration_test/Dialect/Vector/CPU/test-transfer-to-loops.mlir b/mlir/integration_test/Dialect/Vector/CPU/test-transfer-to-loops.mlir
index b8f4045495ff..15d043f11e77 100644
--- a/mlir/integration_test/Dialect/Vector/CPU/test-transfer-to-loops.mlir
+++ b/mlir/integration_test/Dialect/Vector/CPU/test-transfer-to-loops.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm | \
+// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
// RUN: mlir-cpu-runner -e main -entry-point-result=void \
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext,%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
// RUN: FileCheck %s
diff --git a/mlir/integration_test/Dialect/Vector/CPU/test-vector-distribute.mlir b/mlir/integration_test/Dialect/Vector/CPU/test-vector-distribute.mlir
index fb4b1029c790..a6c053a5b014 100644
--- a/mlir/integration_test/Dialect/Vector/CPU/test-vector-distribute.mlir
+++ b/mlir/integration_test/Dialect/Vector/CPU/test-vector-distribute.mlir
@@ -1,11 +1,11 @@
// RUN: mlir-opt %s -test-vector-to-forloop -convert-vector-to-scf \
-// RUN: -lower-affine -convert-scf-to-std -convert-vector-to-llvm | \
+// RUN: -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
// RUN: mlir-cpu-runner -e main -entry-point-result=void \
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext | \
// RUN: FileCheck %s
// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine \
-// RUN: -convert-scf-to-std -convert-vector-to-llvm | mlir-cpu-runner -e main \
+// RUN: -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | mlir-cpu-runner -e main \
// RUN: -entry-point-result=void \
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext | \
// RUN: FileCheck %s
diff --git a/mlir/lib/Conversion/ArmSVEToLLVM/ArmSVEToLLVM.cpp b/mlir/lib/Conversion/ArmSVEToLLVM/ArmSVEToLLVM.cpp
index 8a5790352263..1d95f73327fd 100644
--- a/mlir/lib/Conversion/ArmSVEToLLVM/ArmSVEToLLVM.cpp
+++ b/mlir/lib/Conversion/ArmSVEToLLVM/ArmSVEToLLVM.cpp
@@ -58,13 +58,56 @@ convertScalableVectorTypeToLLVM(ScalableVectorType svType,
return sVectorType;
}
+template <typename OpTy>
+class ForwardOperands : public OpConversionPattern<OpTy> {
+ using OpConversionPattern<OpTy>::OpConversionPattern;
+
+ LogicalResult
+ matchAndRewrite(OpTy op, ArrayRef<Value> operands,
+ ConversionPatternRewriter &rewriter) const final {
+ if (ValueRange(operands).getTypes() == op->getOperands().getTypes())
+ return rewriter.notifyMatchFailure(op, "operand types already match");
+
+ rewriter.updateRootInPlace(op, [&]() { op->setOperands(operands); });
+ return success();
+ }
+};
+
+class ReturnOpTypeConversion : public OpConversionPattern<ReturnOp> {
+public:
+ using OpConversionPattern<ReturnOp>::OpConversionPattern;
+
+ LogicalResult
+ matchAndRewrite(ReturnOp op, ArrayRef<Value> operands,
+ ConversionPatternRewriter &rewriter) const final {
+ rewriter.updateRootInPlace(op, [&]() { op->setOperands(operands); });
+ return success();
+ }
+};
+
+static Optional<Value> addUnrealizedCast(OpBuilder &builder,
+ ScalableVectorType svType,
+ ValueRange inputs, Location loc) {
+ if (inputs.size() != 1 ||
+ !inputs[0].getType().isa<LLVM::LLVMScalableVectorType>())
+ return Value();
+ return builder.create<UnrealizedConversionCastOp>(loc, svType, inputs)
+ .getResult(0);
+}
+
/// Populate the given list with patterns that convert from ArmSVE to LLVM.
void mlir::populateArmSVEToLLVMConversionPatterns(
LLVMTypeConverter &converter, OwningRewritePatternList &patterns) {
converter.addConversion([&converter](ScalableVectorType svType) {
return convertScalableVectorTypeToLLVM(svType, converter);
});
+ converter.addSourceMaterialization(addUnrealizedCast);
+
// clang-format off
+ patterns.insert<ForwardOperands<CallOp>,
+ ForwardOperands<CallIndirectOp>,
+ ForwardOperands<ReturnOp>>(converter,
+ &converter.getContext());
patterns.insert<SdotOpLowering,
SmmlaOpLowering,
UdotOpLowering,
diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
index 7686e7752cfc..9e88250e2cab 100644
--- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
+++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
@@ -102,6 +102,27 @@ static SmallVector<int64_t, 4> getI64SubArray(ArrayAttr arrayAttr,
return res;
}
+static Value createCastToIndexLike(ConversionPatternRewriter &rewriter,
+ Location loc, Type targetType, Value value) {
+ if (targetType == value.getType())
+ return value;
+
+ bool targetIsIndex = targetType.isIndex();
+ bool valueIsIndex = value.getType().isIndex();
+ if (targetIsIndex ^ valueIsIndex)
+ return rewriter.create<IndexCastOp>(loc, targetType, value);
+
+ auto targetIntegerType = targetType.dyn_cast<IntegerType>();
+ auto valueIntegerType = value.getType().dyn_cast<IntegerType>();
+ assert(targetIntegerType && valueIntegerType &&
+ "unexpected cast between types other than integers and index");
+ assert(targetIntegerType.getSignedness() == valueIntegerType.getSignedness());
+
+ if (targetIntegerType.getWidth() > valueIntegerType.getWidth())
+ return rewriter.create<SignExtendIOp>(loc, targetIntegerType, value);
+ return rewriter.create<TruncateIOp>(loc, targetIntegerType, value);
+}
+
// Helper that returns a vector comparison that constructs a mask:
// mask = [0,1,..,n-1] + [o,o,..,o] < [b,b,..,b]
//
@@ -131,12 +152,12 @@ static Value buildVectorComparison(ConversionPatternRewriter &rewriter,
}
// Add in an offset if requested.
if (off) {
- Value o = rewriter.create<IndexCastOp>(loc, idxType, *off);
+ Value o = createCastToIndexLike(rewriter, loc, idxType, *off);
Value ov = rewriter.create<SplatOp>(loc, indices.getType(), o);
indices = rewriter.create<AddIOp>(loc, ov, indices);
}
// Construct the vector comparison.
- Value bound = rewriter.create<IndexCastOp>(loc, idxType, b);
+ Value bound = createCastToIndexLike(rewriter, loc, idxType, b);
Value bounds = rewriter.create<SplatOp>(loc, indices.getType(), bound);
return rewriter.create<CmpIOp>(loc, CmpIPredicate::slt, indices, bounds);
}
@@ -216,10 +237,8 @@ replaceTransferOpWithMasked(ConversionPatternRewriter &rewriter,
LLVMTypeConverter &typeConverter, Location loc,
TransferReadOp xferOp, ArrayRef<Value> operands,
Value dataPtr, Value mask) {
- auto toLLVMTy = [&](Type t) { return typeConverter.convertType(t); };
VectorType fillType = xferOp.getVectorType();
Value fill = rewriter.create<SplatOp>(loc, fillType, xferOp.padding());
- fill = rewriter.create<LLVM::DialectCastOp>(loc, toLLVMTy(fillType), fill);
Type vecTy = typeConverter.convertType(xferOp.getVectorType());
if (!vecTy)
diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp
index af6ce6a0a68c..910524041bed 100644
--- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp
+++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp
@@ -22,6 +22,7 @@
#include "mlir/Dialect/LLVMIR/LLVMArmNeonDialect.h"
#include "mlir/Dialect/LLVMIR/LLVMArmSVEDialect.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/Dialect/Vector/VectorOps.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
@@ -70,10 +71,12 @@ void LowerVectorToLLVMPass::runOnOperation() {
populateVectorToLLVMConversionPatterns(
converter, patterns, reassociateFPReductions, enableIndexOptimizations);
populateVectorToLLVMMatrixConversionPatterns(converter, patterns);
- populateStdToLLVMConversionPatterns(converter, patterns);
// Architecture specific augmentations.
LLVMConversionTarget target(getContext());
+ target.addLegalOp<LLVM::DialectCastOp>();
+ target.addLegalDialect<StandardOpsDialect>();
+ target.addLegalOp<UnrealizedConversionCastOp>();
if (enableArmNeon) {
target.addLegalDialect<LLVM::LLVMArmNeonDialect>();
target.addIllegalDialect<arm_neon::ArmNeonDialect>();
@@ -82,6 +85,23 @@ void LowerVectorToLLVMPass::runOnOperation() {
if (enableArmSVE) {
target.addLegalDialect<LLVM::LLVMArmSVEDialect>();
target.addIllegalDialect<arm_sve::ArmSVEDialect>();
+ auto hasScalableVectorType = [](TypeRange types) {
+ for (Type type : types)
+ if (type.isa<arm_sve::ScalableVectorType>())
+ return true;
+ return false;
+ };
+ // Remove any ArmSVE-specific types from function signatures and results.
+ populateFuncOpTypeConversionPattern(patterns, &getContext(), converter);
+ target.addDynamicallyLegalOp<FuncOp>([hasScalableVectorType](FuncOp op) {
+ return !hasScalableVectorType(op.getType().getInputs()) &&
+ !hasScalableVectorType(op.getType().getResults());
+ });
+ target.addDynamicallyLegalOp<CallOp, CallIndirectOp, ReturnOp>(
+ [hasScalableVectorType](Operation *op) {
+ return !hasScalableVectorType(op->getOperandTypes()) &&
+ !hasScalableVectorType(op->getResultTypes());
+ });
populateArmSVEToLLVMConversionPatterns(converter, patterns);
}
if (enableAVX512) {
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
index adf7ff7b74f5..8a3d2ce61055 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
@@ -1274,10 +1274,40 @@ static LogicalResult verifyCast(DialectCastOp op, Type llvmType, Type type,
return op.emitOpError("invalid cast between index and non-integer type");
}
+ if (type.isa<IntegerType>()) {
+ auto llvmIntegerType = llvmType.dyn_cast<IntegerType>();
+ if (!llvmIntegerType)
+ return op->emitOpError("invalid cast between integer and non-integer");
+ if (llvmIntegerType.getWidth() != type.getIntOrFloatBitWidth())
+ return op.emitOpError("invalid cast changing integer width");
+ return success();
+ }
+
// Vectors are compatible if they are 1D non-scalable, and their element types
- // are compatible.
- if (auto vectorType = type.dyn_cast<VectorType>())
- return op.emitOpError("vector types should not be casted");
+ // are compatible. nD vectors are compatible with (n-1)D arrays containing 1D
+ // vector.
+ if (auto vectorType = type.dyn_cast<VectorType>()) {
+ if (vectorType == llvmType && !isElement)
+ return op.emitOpError("vector types should not be casted");
+
+ if (vectorType.getRank() == 1) {
+ auto llvmVectorType = llvmType.dyn_cast<VectorType>();
+ if (!llvmVectorType || llvmVectorType.getRank() != 1)
+ return op.emitOpError("invalid cast for vector types");
+
+ return verifyCast(op, llvmVectorType.getElementType(),
+ vectorType.getElementType(), /*isElement=*/true);
+ }
+
+ auto arrayType = llvmType.dyn_cast<LLVM::LLVMArrayType>();
+ if (!arrayType ||
+ arrayType.getNumElements() != vectorType.getShape().front())
+ return op.emitOpError("invalid cast for vector, expected array");
+ return verifyCast(op, arrayType.getElementType(),
+ VectorType::get(vectorType.getShape().drop_front(),
+ vectorType.getElementType()),
+ /*isElement=*/true);
+ }
if (auto memrefType = type.dyn_cast<MemRefType>()) {
// Bare pointer convention: statically-shaped memref is compatible with an
diff --git a/mlir/test/Conversion/ArmSVEToLLVM/convert-to-llvm.mlir b/mlir/test/Conversion/ArmSVEToLLVM/convert-to-llvm.mlir
index 5f218c9f421a..f05b37644fc7 100644
--- a/mlir/test/Conversion/ArmSVEToLLVM/convert-to-llvm.mlir
+++ b/mlir/test/Conversion/ArmSVEToLLVM/convert-to-llvm.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-llvm="enable-arm-sve" | mlir-opt | FileCheck %s
+// RUN: mlir-opt %s -convert-vector-to-llvm="enable-arm-sve" -convert-std-to-llvm | mlir-opt | FileCheck %s
func @arm_sve_sdot(%a: !arm_sve.vector<16xi8>,
%b: !arm_sve.vector<16xi8>,
diff --git a/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir
index 85e19da84013..249f8c09e599 100644
--- a/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir
+++ b/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir
@@ -1,45 +1,41 @@
// RUN: mlir-opt %s --convert-vector-to-llvm='enable-index-optimizations=1' | FileCheck %s --check-prefix=CMP32
// RUN: mlir-opt %s --convert-vector-to-llvm='enable-index-optimizations=0' | FileCheck %s --check-prefix=CMP64
-// CMP32-LABEL: llvm.func @genbool_var_1d(
-// CMP32-SAME: %[[A:.*]]: i64)
-// CMP32: %[[T0:.*]] = llvm.mlir.constant(dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]> : vector<11xi32>) : vector<11xi32>
-// CMP32: %[[T1:.*]] = llvm.trunc %[[A]] : i64 to i32
-// CMP32: %[[T2:.*]] = llvm.mlir.undef : vector<11xi32>
-// CMP32: %[[T3:.*]] = llvm.mlir.constant(0 : i32) : i32
-// CMP32: %[[T4:.*]] = llvm.insertelement %[[T1]], %[[T2]][%[[T3]] : i32] : vector<11xi32>
-// CMP32: %[[T5:.*]] = llvm.shufflevector %[[T4]], %[[T2]] [0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32] : vector<11xi32>, vector<11xi32>
-// CMP32: %[[T6:.*]] = llvm.icmp "slt" %[[T0]], %[[T5]] : vector<11xi32>
-// CMP32: llvm.return %[[T6]] : vector<11xi1>
+// CMP32-LABEL: @genbool_var_1d(
+// CMP32-SAME: %[[ARG:.*]]: index)
+// CMP32: %[[A:.*]] = llvm.mlir.cast %[[ARG]] : index to i64
+// CMP32: %[[T0:.*]] = constant dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]> : vector<11xi32>
+// CMP32: %[[T1:.*]] = trunci %[[A]] : i64 to i32
+// CMP32: %[[T2:.*]] = splat %[[T1]] : vector<11xi32>
+// CMP32: %[[T3:.*]] = cmpi slt, %[[T0]], %[[T2]] : vector<11xi32>
+// CMP32: return %[[T3]] : vector<11xi1>
-// CMP64-LABEL: llvm.func @genbool_var_1d(
-// CMP64-SAME: %[[A:.*]]: i64)
-// CMP64: %[[T0:.*]] = llvm.mlir.constant(dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]> : vector<11xi64>) : vector<11xi64>
-// CMP64: %[[T1:.*]] = llvm.mlir.undef : vector<11xi64>
-// CMP64: %[[T2:.*]] = llvm.mlir.constant(0 : i32) : i32
-// CMP64: %[[T3:.*]] = llvm.insertelement %[[A]], %[[T1]][%[[T2]] : i32] : vector<11xi64>
-// CMP64: %[[T4:.*]] = llvm.shufflevector %[[T3]], %[[T1]] [0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32] : vector<11xi64>, vector<11xi64>
-// CMP64: %[[T5:.*]] = llvm.icmp "slt" %[[T0]], %[[T4]] : vector<11xi64>
-// CMP64: llvm.return %[[T5]] : vector<11xi1>
+// CMP64-LABEL: @genbool_var_1d(
+// CMP64-SAME: %[[ARG:.*]]: index)
+// CMP64: %[[A:.*]] = llvm.mlir.cast %[[ARG]] : index to i64
+// CMP64: %[[T0:.*]] = constant dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]> : vector<11xi64>
+// CMP64: %[[T1:.*]] = splat %[[A]] : vector<11xi64>
+// CMP64: %[[T2:.*]] = cmpi slt, %[[T0]], %[[T1]] : vector<11xi64>
+// CMP64: return %[[T2]] : vector<11xi1>
func @genbool_var_1d(%arg0: index) -> vector<11xi1> {
%0 = vector.create_mask %arg0 : vector<11xi1>
return %0 : vector<11xi1>
}
-// CMP32-LABEL: llvm.func @transfer_read_1d
-// CMP32: %[[C:.*]] = llvm.mlir.constant(dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]> : vector<16xi32>) : vector<16xi32>
-// CMP32: %[[A:.*]] = llvm.add %{{.*}}, %[[C]] : vector<16xi32>
-// CMP32: %[[M:.*]] = llvm.icmp "slt" %[[A]], %{{.*}} : vector<16xi32>
+// CMP32-LABEL: @transfer_read_1d
+// CMP32: %[[C:.*]] = constant dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]> : vector<16xi32>
+// CMP32: %[[A:.*]] = addi %{{.*}}, %[[C]] : vector<16xi32>
+// CMP32: %[[M:.*]] = cmpi slt, %[[A]], %{{.*}} : vector<16xi32>
// CMP32: %[[L:.*]] = llvm.intr.masked.load %{{.*}}, %[[M]], %{{.*}}
-// CMP32: llvm.return %[[L]] : vector<16xf32>
+// CMP32: return %[[L]] : vector<16xf32>
-// CMP64-LABEL: llvm.func @transfer_read_1d
-// CMP64: %[[C:.*]] = llvm.mlir.constant(dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]> : vector<16xi64>) : vector<16xi64>
-// CMP64: %[[A:.*]] = llvm.add %{{.*}}, %[[C]] : vector<16xi64>
-// CMP64: %[[M:.*]] = llvm.icmp "slt" %[[A]], %{{.*}} : vector<16xi64>
+// CMP64-LABEL: @transfer_read_1d
+// CMP64: %[[C:.*]] = constant dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]> : vector<16xi64>
+// CMP64: %[[A:.*]] = addi %{{.*}}, %[[C]] : vector<16xi64>
+// CMP64: %[[M:.*]] = cmpi slt, %[[A]], %{{.*}} : vector<16xi64>
// CMP64: %[[L:.*]] = llvm.intr.masked.load %{{.*}}, %[[M]], %{{.*}}
-// CMP64: llvm.return %[[L]] : vector<16xf32>
+// CMP64: return %[[L]] : vector<16xf32>
func @transfer_read_1d(%A : memref<?xf32>, %i: index) -> vector<16xf32> {
%d = constant -1.0: f32
diff --git a/mlir/test/Conversion/VectorToLLVM/vector-reduction-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-reduction-to-llvm.mlir
index 71d091413b2e..b2df9745070e 100644
--- a/mlir/test/Conversion/VectorToLLVM/vector-reduction-to-llvm.mlir
+++ b/mlir/test/Conversion/VectorToLLVM/vector-reduction-to-llvm.mlir
@@ -2,19 +2,19 @@
// RUN: mlir-opt %s -convert-vector-to-llvm='reassociate-fp-reductions' | FileCheck %s --check-prefix=REASSOC
//
-// CHECK-LABEL: llvm.func @reduce_add_f32(
+// CHECK-LABEL: @reduce_add_f32(
// CHECK-SAME: %[[A:.*]]: vector<16xf32>)
// CHECK: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f32) : f32
// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.fadd"(%[[C]], %[[A]])
// CHECK-SAME: {reassoc = false} : (f32, vector<16xf32>) -> f32
-// CHECK: llvm.return %[[V]] : f32
+// CHECK: return %[[V]] : f32
//
-// REASSOC-LABEL: llvm.func @reduce_add_f32(
+// REASSOC-LABEL: @reduce_add_f32(
// REASSOC-SAME: %[[A:.*]]: vector<16xf32>)
// REASSOC: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f32) : f32
// REASSOC: %[[V:.*]] = "llvm.intr.vector.reduce.fadd"(%[[C]], %[[A]])
// REASSOC-SAME: {reassoc = true} : (f32, vector<16xf32>) -> f32
-// REASSOC: llvm.return %[[V]] : f32
+// REASSOC: return %[[V]] : f32
//
func @reduce_add_f32(%arg0: vector<16xf32>) -> f32 {
%0 = vector.reduction "add", %arg0 : vector<16xf32> into f32
@@ -22,19 +22,19 @@ func @reduce_add_f32(%arg0: vector<16xf32>) -> f32 {
}
//
-// CHECK-LABEL: llvm.func @reduce_mul_f32(
+// CHECK-LABEL: @reduce_mul_f32(
// CHECK-SAME: %[[A:.*]]: vector<16xf32>)
// CHECK: %[[C:.*]] = llvm.mlir.constant(1.000000e+00 : f32) : f32
// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.fmul"(%[[C]], %[[A]])
// CHECK-SAME: {reassoc = false} : (f32, vector<16xf32>) -> f32
-// CHECK: llvm.return %[[V]] : f32
+// CHECK: return %[[V]] : f32
//
-// REASSOC-LABEL: llvm.func @reduce_mul_f32(
+// REASSOC-LABEL: @reduce_mul_f32(
// REASSOC-SAME: %[[A:.*]]: vector<16xf32>)
// REASSOC: %[[C:.*]] = llvm.mlir.constant(1.000000e+00 : f32) : f32
// REASSOC: %[[V:.*]] = "llvm.intr.vector.reduce.fmul"(%[[C]], %[[A]])
// REASSOC-SAME: {reassoc = true} : (f32, vector<16xf32>) -> f32
-// REASSOC: llvm.return %[[V]] : f32
+// REASSOC: return %[[V]] : f32
//
func @reduce_mul_f32(%arg0: vector<16xf32>) -> f32 {
%0 = vector.reduction "mul", %arg0 : vector<16xf32> into f32
diff --git a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
index 1881c84ce1c9..facc91cf03d0 100644
--- a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
+++ b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
@@ -6,7 +6,7 @@ func @bitcast_f32_to_i32_vector(%input: vector<16xf32>) -> vector<16xi32> {
return %0 : vector<16xi32>
}
-// CHECK-LABEL: llvm.func @bitcast_f32_to_i32_vector(
+// CHECK-LABEL: @bitcast_f32_to_i32_vector
// CHECK-SAME: %[[input:.*]]: vector<16xf32>
// CHECK: llvm.bitcast %[[input]] : vector<16xf32> to vector<16xi32>
@@ -17,7 +17,7 @@ func @bitcast_i8_to_f32_vector(%input: vector<64xi8>) -> vector<16xf32> {
return %0 : vector<16xf32>
}
-// CHECK-LABEL: llvm.func @bitcast_i8_to_f32_vector(
+// CHECK-LABEL: @bitcast_i8_to_f32_vector
// CHECK-SAME: %[[input:.*]]: vector<64xi8>
// CHECK: llvm.bitcast %[[input]] : vector<64xi8> to vector<16xf32>
@@ -27,13 +27,10 @@ func @broadcast_vec1d_from_scalar(%arg0: f32) -> vector<2xf32> {
%0 = vector.broadcast %arg0 : f32 to vector<2xf32>
return %0 : vector<2xf32>
}
-// CHECK-LABEL: llvm.func @broadcast_vec1d_from_scalar(
+// CHECK-LABEL: @broadcast_vec1d_from_scalar
// CHECK-SAME: %[[A:.*]]: f32)
-// CHECK: %[[T0:.*]] = llvm.mlir.undef : vector<2xf32>
-// CHECK: %[[T1:.*]] = llvm.mlir.constant(0 : i32) : i32
-// CHECK: %[[T2:.*]] = llvm.insertelement %[[A]], %[[T0]][%[[T1]] : i32] : vector<2xf32>
-// CHECK: %[[T3:.*]] = llvm.shufflevector %[[T2]], %[[T0]] [0 : i32, 0 : i32] : vector<2xf32>, vector<2xf32>
-// CHECK: llvm.return %[[T3]] : vector<2xf32>
+// CHECK: %[[T0:.*]] = splat %[[A]] : vector<2xf32>
+// CHECK: return %[[T0]] : vector<2xf32>
// -----
@@ -41,16 +38,10 @@ func @broadcast_vec2d_from_scalar(%arg0: f32) -> vector<2x3xf32> {
%0 = vector.broadcast %arg0 : f32 to vector<2x3xf32>
return %0 : vector<2x3xf32>
}
-// CHECK-LABEL: llvm.func @broadcast_vec2d_from_scalar(
+// CHECK-LABEL: @broadcast_vec2d_from_scalar(
// CHECK-SAME: %[[A:.*]]: f32)
-// CHECK: %[[T0:.*]] = llvm.mlir.undef : !llvm.array<2 x vector<3xf32>>
-// CHECK: %[[T1:.*]] = llvm.mlir.undef : vector<3xf32>
-// CHECK: %[[T2:.*]] = llvm.mlir.constant(0 : i32) : i32
-// CHECK: %[[T3:.*]] = llvm.insertelement %[[A]], %[[T1]][%[[T2]] : i32] : vector<3xf32>
-// CHECK: %[[T4:.*]] = llvm.shufflevector %[[T3]], %[[T3]] [0 : i32, 0 : i32, 0 : i32] : vector<3xf32>, vector<3xf32>
-// CHECK: %[[T5:.*]] = llvm.insertvalue %[[T4]], %[[T0]][0] : !llvm.array<2 x vector<3xf32>>
-// CHECK: %[[T6:.*]] = llvm.insertvalue %[[T4]], %[[T5]][1] : !llvm.array<2 x vector<3xf32>>
-// CHECK: llvm.return %[[T6]] : !llvm.array<2 x vector<3xf32>>
+// CHECK: %[[T0:.*]] = splat %[[A]] : vector<2x3xf32>
+// CHECK: return %[[T0]] : vector<2x3xf32>
// -----
@@ -58,20 +49,10 @@ func @broadcast_vec3d_from_scalar(%arg0: f32) -> vector<2x3x4xf32> {
%0 = vector.broadcast %arg0 : f32 to vector<2x3x4xf32>
return %0 : vector<2x3x4xf32>
}
-// CHECK-LABEL: llvm.func @broadcast_vec3d_from_scalar(
+// CHECK-LABEL: @broadcast_vec3d_from_scalar(
// CHECK-SAME: %[[A:.*]]: f32)
-// CHECK: %[[T0:.*]] = llvm.mlir.undef : !llvm.array<2 x array<3 x vector<4xf32>>>
-// CHECK: %[[T1:.*]] = llvm.mlir.undef : vector<4xf32>
-// CHECK: %[[T2:.*]] = llvm.mlir.constant(0 : i32) : i32
-// CHECK: %[[T3:.*]] = llvm.insertelement %[[A]], %[[T1]][%[[T2]] : i32] : vector<4xf32>
-// CHECK: %[[T4:.*]] = llvm.shufflevector %[[T3]], %[[T3]] [0 : i32, 0 : i32, 0 : i32, 0 : i32] : vector<4xf32>, vector<4xf32>
-// CHECK: %[[T5:.*]] = llvm.insertvalue %[[T4]], %[[T0]][0, 0] : !llvm.array<2 x array<3 x vector<4xf32>>>
-// CHECK: %[[T6:.*]] = llvm.insertvalue %[[T4]], %[[T5]][0, 1] : !llvm.array<2 x array<3 x vector<4xf32>>>
-// CHECK: %[[T7:.*]] = llvm.insertvalue %[[T4]], %[[T6]][0, 2] : !llvm.array<2 x array<3 x vector<4xf32>>>
-// CHECK: %[[T8:.*]] = llvm.insertvalue %[[T4]], %[[T7]][1, 0] : !llvm.array<2 x array<3 x vector<4xf32>>>
-// CHECK: %[[T9:.*]] = llvm.insertvalue %[[T4]], %[[T8]][1, 1] : !llvm.array<2 x array<3 x vector<4xf32>>>
-// CHECK: %[[T10:.*]] = llvm.insertvalue %[[T4]], %[[T9]][1, 2] : !llvm.array<2 x array<3 x vector<4xf32>>>
-// CHECK: llvm.return %[[T10]] : !llvm.array<2 x array<3 x vector<4xf32>>>
+// CHECK: %[[T0:.*]] = splat %[[A]] : vector<2x3x4xf32>
+// CHECK: return %[[T0]] : vector<2x3x4xf32>
// -----
@@ -79,9 +60,9 @@ func @broadcast_vec1d_from_vec1d(%arg0: vector<2xf32>) -> vector<2xf32> {
%0 = vector.broadcast %arg0 : vector<2xf32> to vector<2xf32>
return %0 : vector<2xf32>
}
-// CHECK-LABEL: llvm.func @broadcast_vec1d_from_vec1d(
+// CHECK-LABEL: @broadcast_vec1d_from_vec1d(
// CHECK-SAME: %[[A:.*]]: vector<2xf32>)
-// CHECK: llvm.return %[[A]] : vector<2xf32>
+// CHECK: return %[[A]] : vector<2xf32>
// -----
@@ -89,13 +70,15 @@ func @broadcast_vec2d_from_vec1d(%arg0: vector<2xf32>) -> vector<3x2xf32> {
%0 = vector.broadcast %arg0 : vector<2xf32> to vector<3x2xf32>
return %0 : vector<3x2xf32>
}
-// CHECK-LABEL: llvm.func @broadcast_vec2d_from_vec1d(
+// CHECK-LABEL: @broadcast_vec2d_from_vec1d(
// CHECK-SAME: %[[A:.*]]: vector<2xf32>)
-// CHECK: %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<3x2xf32>) : !llvm.array<3 x vector<2xf32>>
-// CHECK: %[[T1:.*]] = llvm.insertvalue %[[A]], %[[T0]][0] : !llvm.array<3 x vector<2xf32>>
-// CHECK: %[[T2:.*]] = llvm.insertvalue %[[A]], %[[T1]][1] : !llvm.array<3 x vector<2xf32>>
-// CHECK: %[[T3:.*]] = llvm.insertvalue %[[A]], %[[T2]][2] : !llvm.array<3 x vector<2xf32>>
-// CHECK: llvm.return %[[T3]] : !llvm.array<3 x vector<2xf32>>
+// CHECK: %[[T0:.*]] = constant dense<0.000000e+00> : vector<3x2xf32>
+// CHECK: %[[T1:.*]] = llvm.mlir.cast %[[T0]] : vector<3x2xf32> to !llvm.array<3 x vector<2xf32>>
+// CHECK: %[[T2:.*]] = llvm.insertvalue %[[A]], %[[T1]][0] : !llvm.array<3 x vector<2xf32>>
+// CHECK: %[[T3:.*]] = llvm.insertvalue %[[A]], %[[T2]][1] : !llvm.array<3 x vector<2xf32>>
+// CHECK: %[[T4:.*]] = llvm.insertvalue %[[A]], %[[T3]][2] : !llvm.array<3 x vector<2xf32>>
+// CHECK: %[[T5:.*]] = llvm.mlir.cast %[[T4]] : !llvm.array<3 x vector<2xf32>> to vector<3x2xf32>
+// CHECK: return %[[T5]] : vector<3x2xf32>
// -----
@@ -103,18 +86,24 @@ func @broadcast_vec3d_from_vec1d(%arg0: vector<2xf32>) -> vector<4x3x2xf32> {
%0 = vector.broadcast %arg0 : vector<2xf32> to vector<4x3x2xf32>
return %0 : vector<4x3x2xf32>
}
-// CHECK-LABEL: llvm.func @broadcast_vec3d_from_vec1d(
+// CHECK-LABEL: @broadcast_vec3d_from_vec1d(
// CHECK-SAME: %[[A:.*]]: vector<2xf32>)
-// CHECK: %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<3x2xf32>) : !llvm.array<3 x vector<2xf32>>
-// CHECK: %[[T1:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<4x3x2xf32>) : !llvm.array<4 x array<3 x vector<2xf32>>>
-// CHECK: %[[T2:.*]] = llvm.insertvalue %[[A]], %[[T0]][0] : !llvm.array<3 x vector<2xf32>>
-// CHECK: %[[T3:.*]] = llvm.insertvalue %[[A]], %[[T2]][1] : !llvm.array<3 x vector<2xf32>>
-// CHECK: %[[T4:.*]] = llvm.insertvalue %[[A]], %[[T3]][2] : !llvm.array<3 x vector<2xf32>>
-// CHECK: %[[T5:.*]] = llvm.insertvalue %[[T4]], %[[T1]][0] : !llvm.array<4 x array<3 x vector<2xf32>>>
-// CHECK: %[[T6:.*]] = llvm.insertvalue %[[T4]], %[[T5]][1] : !llvm.array<4 x array<3 x vector<2xf32>>>
-// CHECK: %[[T7:.*]] = llvm.insertvalue %[[T4]], %[[T6]][2] : !llvm.array<4 x array<3 x vector<2xf32>>>
-// CHECK: %[[T8:.*]] = llvm.insertvalue %[[T4]], %[[T7]][3] : !llvm.array<4 x array<3 x vector<2xf32>>>
-// CHECK: llvm.return %[[T8]] : !llvm.array<4 x array<3 x vector<2xf32>>>
+// CHECK: %[[T0:.*]] = constant dense<0.000000e+00> : vector<3x2xf32>
+// CHECK: %[[T1:.*]] = constant dense<0.000000e+00> : vector<4x3x2xf32>
+
+// CHECK: %[[T2:.*]] = llvm.mlir.cast %[[T0]] : vector<3x2xf32> to !llvm.array<3 x vector<2xf32>>
+// CHECK: %[[T3:.*]] = llvm.insertvalue %[[A]], %[[T2]][0] : !llvm.array<3 x vector<2xf32>>
+// CHECK: %[[T4:.*]] = llvm.insertvalue %[[A]], %[[T3]][1] : !llvm.array<3 x vector<2xf32>>
+// CHECK: %[[T5:.*]] = llvm.insertvalue %[[A]], %[[T4]][2] : !llvm.array<3 x vector<2xf32>>
+
+// CHECK: %[[T6:.*]] = llvm.mlir.cast %[[T1]] : vector<4x3x2xf32> to !llvm.array<4 x array<3 x vector<2xf32>>>
+// CHECK: %[[T7:.*]] = llvm.insertvalue %[[T5]], %[[T6]][0] : !llvm.array<4 x array<3 x vector<2xf32>>>
+// CHECK: %[[T8:.*]] = llvm.insertvalue %[[T5]], %[[T7]][1] : !llvm.array<4 x array<3 x vector<2xf32>>>
+// CHECK: %[[T9:.*]] = llvm.insertvalue %[[T5]], %[[T8]][2] : !llvm.array<4 x array<3 x vector<2xf32>>>
+// CHECK: %[[T10:.*]] = llvm.insertvalue %[[T5]], %[[T9]][3] : !llvm.array<4 x array<3 x vector<2xf32>>>
+
+// CHECK: %[[T11:.*]] = llvm.mlir.cast %[[T10]] : !llvm.array<4 x array<3 x vector<2xf32>>> to vector<4x3x2xf32>
+// CHECK: return %[[T11]] : vector<4x3x2xf32>
// -----
@@ -122,14 +111,21 @@ func @broadcast_vec3d_from_vec2d(%arg0: vector<3x2xf32>) -> vector<4x3x2xf32> {
%0 = vector.broadcast %arg0 : vector<3x2xf32> to vector<4x3x2xf32>
return %0 : vector<4x3x2xf32>
}
-// CHECK-LABEL: llvm.func @broadcast_vec3d_from_vec2d(
-// CHECK-SAME: %[[A:.*]]: !llvm.array<3 x vector<2xf32>>)
-// CHECK: %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<4x3x2xf32>) : !llvm.array<4 x array<3 x vector<2xf32>>>
-// CHECK: %[[T1:.*]] = llvm.insertvalue %[[A]], %[[T0]][0] : !llvm.array<4 x array<3 x vector<2xf32>>>
-// CHECK: %[[T2:.*]] = llvm.insertvalue %[[A]], %[[T1]][1] : !llvm.array<4 x array<3 x vector<2xf32>>>
-// CHECK: %[[T3:.*]] = llvm.insertvalue %[[A]], %[[T2]][2] : !llvm.array<4 x array<3 x vector<2xf32>>>
-// CHECK: %[[T4:.*]] = llvm.insertvalue %[[A]], %[[T3]][3] : !llvm.array<4 x array<3 x vector<2xf32>>>
-// CHECK: llvm.return %[[T4]] : !llvm.array<4 x array<3 x vector<2xf32>>>
+// CHECK-LABEL: @broadcast_vec3d_from_vec2d(
+// CHECK-SAME: %[[A:.*]]: vector<3x2xf32>)
+// CHECK: %[[T0:.*]] = constant dense<0.000000e+00> : vector<4x3x2xf32>
+// CHECK: %[[T1:.*]] = llvm.mlir.cast %[[A]] : vector<3x2xf32> to !llvm.array<3 x vector<2xf32>>
+// CHECK: %[[T2:.*]] = llvm.mlir.cast %[[T0]] : vector<4x3x2xf32> to !llvm.array<4 x array<3 x vector<2xf32>>>
+// CHECK: %[[T3:.*]] = llvm.insertvalue %[[T1]], %[[T2]][0] : !llvm.array<4 x array<3 x vector<2xf32>>>
+// CHECK: %[[T4:.*]] = llvm.mlir.cast %[[A]] : vector<3x2xf32> to !llvm.array<3 x vector<2xf32>>
+// CHECK: %[[T5:.*]] = llvm.insertvalue %[[T4]], %[[T3]][1] : !llvm.array<4 x array<3 x vector<2xf32>>>
+// CHECK: %[[T6:.*]] = llvm.mlir.cast %[[A]] : vector<3x2xf32> to !llvm.array<3 x vector<2xf32>>
+// CHECK: %[[T7:.*]] = llvm.insertvalue %[[T6]], %[[T5]][2] : !llvm.array<4 x array<3 x vector<2xf32>>>
+// CHECK: %[[T8:.*]] = llvm.mlir.cast %[[A]] : vector<3x2xf32> to !llvm.array<3 x vector<2xf32>>
+// CHECK: %[[T9:.*]] = llvm.insertvalue %[[T8]], %[[T7]][3] : !llvm.array<4 x array<3 x vector<2xf32>>>
+// CHECK: %[[T10:.*]] = llvm.mlir.cast %[[T9]] : !llvm.array<4 x array<3 x vector<2xf32>>> to vector<4x3x2xf32>
+// CHECK: return %[[T10]] : vector<4x3x2xf32>
+
// -----
@@ -137,15 +133,12 @@ func @broadcast_stretch(%arg0: vector<1xf32>) -> vector<4xf32> {
%0 = vector.broadcast %arg0 : vector<1xf32> to vector<4xf32>
return %0 : vector<4xf32>
}
-// CHECK-LABEL: llvm.func @broadcast_stretch(
+// CHECK-LABEL: @broadcast_stretch(
// CHECK-SAME: %[[A:.*]]: vector<1xf32>)
-// CHECK: %[[T0:.*]] = llvm.mlir.constant(0 : i64) : i64
-// CHECK: %[[T1:.*]] = llvm.extractelement %[[A]][%[[T0]] : i64] : vector<1xf32>
-// CHECK: %[[T2:.*]] = llvm.mlir.undef : vector<4xf32>
-// CHECK: %[[T3:.*]] = llvm.mlir.constant(0 : i32) : i32
-// CHECK: %[[T4:.*]] = llvm.insertelement %[[T1]], %[[T2]][%3 : i32] : vector<4xf32>
-// CHECK: %[[T5:.*]] = llvm.shufflevector %[[T4]], %[[T2]] [0 : i32, 0 : i32, 0 : i32, 0 : i32] : vector<4xf32>, vector<4xf32>
-// CHECK: llvm.return %[[T5]] : vector<4xf32>
+// CHECK: %[[T1:.*]] = llvm.mlir.constant(0 : i64) : i64
+// CHECK: %[[T2:.*]] = llvm.extractelement %[[A]]{{\[}}%[[T1]] : i64] : vector<1xf32>
+// CHECK: %[[T3:.*]] = splat %[[T2]] : vector<4xf32>
+// CHECK: return %[[T3]] : vector<4xf32>
// -----
@@ -153,14 +146,17 @@ func @broadcast_stretch_at_start(%arg0: vector<1x4xf32>) -> vector<3x4xf32> {
%0 = vector.broadcast %arg0 : vector<1x4xf32> to vector<3x4xf32>
return %0 : vector<3x4xf32>
}
-// CHECK-LABEL: llvm.func @broadcast_stretch_at_start(
-// CHECK-SAME: %[[A:.*]]: !llvm.array<1 x vector<4xf32>>)
-// CHECK: %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<3x4xf32>) : !llvm.array<3 x vector<4xf32>>
-// CHECK: %[[T1:.*]] = llvm.extractvalue %[[A]][0] : !llvm.array<1 x vector<4xf32>>
-// CHECK: %[[T2:.*]] = llvm.insertvalue %[[T1]], %[[T0]][0] : !llvm.array<3 x vector<4xf32>>
-// CHECK: %[[T3:.*]] = llvm.insertvalue %[[T1]], %[[T2]][1] : !llvm.array<3 x vector<4xf32>>
-// CHECK: %[[T4:.*]] = llvm.insertvalue %[[T1]], %[[T3]][2] : !llvm.array<3 x vector<4xf32>>
-// CHECK: llvm.return %[[T4]] : !llvm.array<3 x vector<4xf32>>
+// CHECK-LABEL: @broadcast_stretch_at_start(
+// CHECK-SAME: %[[A:.*]]: vector<1x4xf32>)
+// CHECK: %[[T1:.*]] = constant dense<0.000000e+00> : vector<3x4xf32>
+// CHECK: %[[T2:.*]] = llvm.mlir.cast %[[A]] : vector<1x4xf32> to !llvm.array<1 x vector<4xf32>>
+// CHECK: %[[T3:.*]] = llvm.extractvalue %[[T2]][0] : !llvm.array<1 x vector<4xf32>>
+// CHECK: %[[T4:.*]] = llvm.mlir.cast %[[T1]] : vector<3x4xf32> to !llvm.array<3 x vector<4xf32>>
+// CHECK: %[[T5:.*]] = llvm.insertvalue %[[T3]], %[[T4]][0] : !llvm.array<3 x vector<4xf32>>
+// CHECK: %[[T6:.*]] = llvm.insertvalue %[[T3]], %[[T5]][1] : !llvm.array<3 x vector<4xf32>>
+// CHECK: %[[T7:.*]] = llvm.insertvalue %[[T3]], %[[T6]][2] : !llvm.array<3 x vector<4xf32>>
+// CHECK: %[[T8:.*]] = llvm.mlir.cast %[[T7]] : !llvm.array<3 x vector<4xf32>> to vector<3x4xf32>
+// CHECK: return %[[T8]] : vector<3x4xf32>
// -----
@@ -168,42 +164,36 @@ func @broadcast_stretch_at_end(%arg0: vector<4x1xf32>) -> vector<4x3xf32> {
%0 = vector.broadcast %arg0 : vector<4x1xf32> to vector<4x3xf32>
return %0 : vector<4x3xf32>
}
-// CHECK-LABEL: llvm.func @broadcast_stretch_at_end(
-// CHECK-SAME: %[[A:.*]]: !llvm.array<4 x vector<1xf32>>)
-// CHECK: %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<4x3xf32>) : !llvm.array<4 x vector<3xf32>>
-// CHECK: %[[T1:.*]] = llvm.extractvalue %[[A]][0] : !llvm.array<4 x vector<1xf32>>
-// CHECK: %[[T2:.*]] = llvm.mlir.constant(0 : i64) : i64
-// CHECK: %[[T3:.*]] = llvm.extractelement %[[T1]][%[[T2]] : i64] : vector<1xf32>
-// CHECK: %[[T4:.*]] = llvm.mlir.undef : vector<3xf32>
-// CHECK: %[[T5:.*]] = llvm.mlir.constant(0 : i32) : i32
-// CHECK: %[[T6:.*]] = llvm.insertelement %[[T3]], %[[T4]][%[[T5]] : i32] : vector<3xf32>
-// CHECK: %[[T7:.*]] = llvm.shufflevector %[[T6]], %[[T4]] [0 : i32, 0 : i32, 0 : i32] : vector<3xf32>, vector<3xf32>
-// CHECK: %[[T8:.*]] = llvm.insertvalue %[[T7]], %[[T0]][0] : !llvm.array<4 x vector<3xf32>>
-// CHECK: %[[T9:.*]] = llvm.extractvalue %[[A]][1] : !llvm.array<4 x vector<1xf32>>
-// CHECK: %[[T10:.*]] = llvm.mlir.constant(0 : i64) : i64
-// CHECK: %[[T11:.*]] = llvm.extractelement %[[T9]][%[[T10]] : i64] : vector<1xf32>
-// CHECK: %[[T12:.*]] = llvm.mlir.undef : vector<3xf32>
-// CHECK: %[[T13:.*]] = llvm.mlir.constant(0 : i32) : i32
-// CHECK: %[[T14:.*]] = llvm.insertelement %[[T11]], %[[T12]][%[[T13]] : i32] : vector<3xf32>
-// CHECK: %[[T15:.*]] = llvm.shufflevector %[[T14]], %[[T12]] [0 : i32, 0 : i32, 0 : i32] : vector<3xf32>, vector<3xf32>
-// CHECK: %[[T16:.*]] = llvm.insertvalue %[[T15]], %[[T8]][1] : !llvm.array<4 x vector<3xf32>>
-// CHECK: %[[T17:.*]] = llvm.extractvalue %[[A]][2] : !llvm.array<4 x vector<1xf32>>
-// CHECK: %[[T18:.*]] = llvm.mlir.constant(0 : i64) : i64
-// CHECK: %[[T19:.*]] = llvm.extractelement %[[T17]][%[[T18]] : i64] : vector<1xf32>
-// CHECK: %[[T20:.*]] = llvm.mlir.undef : vector<3xf32>
-// CHECK: %[[T21:.*]] = llvm.mlir.constant(0 : i32) : i32
-// CHECK: %[[T22:.*]] = llvm.insertelement %[[T19]], %[[T20]][%[[T21]] : i32] : vector<3xf32>
-// CHECK: %[[T23:.*]] = llvm.shufflevector %[[T22]], %[[T20]] [0 : i32, 0 : i32, 0 : i32] : vector<3xf32>, vector<3xf32>
-// CHECK: %[[T24:.*]] = llvm.insertvalue %[[T23]], %[[T16]][2] : !llvm.array<4 x vector<3xf32>>
-// CHECK: %[[T25:.*]] = llvm.extractvalue %[[A]][3] : !llvm.array<4 x vector<1xf32>>
-// CHECK: %[[T26:.*]] = llvm.mlir.constant(0 : i64) : i64
-// CHECK: %[[T27:.*]] = llvm.extractelement %[[T25]][%[[T26]] : i64] : vector<1xf32>
-// CHECK: %[[T28:.*]] = llvm.mlir.undef : vector<3xf32>
-// CHECK: %[[T29:.*]] = llvm.mlir.constant(0 : i32) : i32
-// CHECK: %[[T30:.*]] = llvm.insertelement %[[T27]], %[[T28]][%[[T29]] : i32] : vector<3xf32>
-// CHECK: %[[T31:.*]] = llvm.shufflevector %[[T30]], %[[T28]] [0 : i32, 0 : i32, 0 : i32] : vector<3xf32>, vector<3xf32>
-// CHECK: %[[T32:.*]] = llvm.insertvalue %[[T31]], %[[T24]][3] : !llvm.array<4 x vector<3xf32>>
-// CHECK: llvm.return %[[T32]] : !llvm.array<4 x vector<3xf32>>
+// CHECK-LABEL: @broadcast_stretch_at_end(
+// CHECK-SAME: %[[A:.*]]: vector<4x1xf32>)
+// CHECK: %[[T1:.*]] = constant dense<0.000000e+00> : vector<4x3xf32>
+// CHECK: %[[T2:.*]] = llvm.mlir.cast %[[A]] : vector<4x1xf32> to !llvm.array<4 x vector<1xf32>>
+// CHECK: %[[T3:.*]] = llvm.extractvalue %[[T2]][0] : !llvm.array<4 x vector<1xf32>>
+// CHECK: %[[T4:.*]] = llvm.mlir.constant(0 : i64) : i64
+// CHECK: %[[T5:.*]] = llvm.extractelement %[[T3]]{{\[}}%[[T4]] : i64] : vector<1xf32>
+// CHECK: %[[T6:.*]] = splat %[[T5]] : vector<3xf32>
+// CHECK: %[[T7:.*]] = llvm.mlir.cast %[[T1]] : vector<4x3xf32> to !llvm.array<4 x vector<3xf32>>
+// CHECK: %[[T8:.*]] = llvm.insertvalue %[[T6]], %[[T7]][0] : !llvm.array<4 x vector<3xf32>>
+// CHECK: %[[T9:.*]] = llvm.mlir.cast %[[A]] : vector<4x1xf32> to !llvm.array<4 x vector<1xf32>>
+// CHECK: %[[T10:.*]] = llvm.extractvalue %[[T9]][1] : !llvm.array<4 x vector<1xf32>>
+// CHECK: %[[T11:.*]] = llvm.mlir.constant(0 : i64) : i64
+// CHECK: %[[T12:.*]] = llvm.extractelement %[[T10]]{{\[}}%[[T11]] : i64] : vector<1xf32>
+// CHECK: %[[T13:.*]] = splat %[[T12]] : vector<3xf32>
+// CHECK: %[[T14:.*]] = llvm.insertvalue %[[T13]], %[[T8]][1] : !llvm.array<4 x vector<3xf32>>
+// CHECK: %[[T15:.*]] = llvm.mlir.cast %[[A]] : vector<4x1xf32> to !llvm.array<4 x vector<1xf32>>
+// CHECK: %[[T16:.*]] = llvm.extractvalue %[[T15]][2] : !llvm.array<4 x vector<1xf32>>
+// CHECK: %[[T17:.*]] = llvm.mlir.constant(0 : i64) : i64
+// CHECK: %[[T18:.*]] = llvm.extractelement %[[T16]]{{\[}}%[[T17]] : i64] : vector<1xf32>
+// CHECK: %[[T19:.*]] = splat %[[T18]] : vector<3xf32>
+// CHECK: %[[T20:.*]] = llvm.insertvalue %[[T19]], %[[T14]][2] : !llvm.array<4 x vector<3xf32>>
+// CHECK: %[[T21:.*]] = llvm.mlir.cast %[[A]] : vector<4x1xf32> to !llvm.array<4 x vector<1xf32>>
+// CHECK: %[[T22:.*]] = llvm.extractvalue %[[T21]][3] : !llvm.array<4 x vector<1xf32>>
+// CHECK: %[[T23:.*]] = llvm.mlir.constant(0 : i64) : i64
+// CHECK: %[[T24:.*]] = llvm.extractelement %[[T22]]{{\[}}%[[T23]] : i64] : vector<1xf32>
+// CHECK: %[[T25:.*]] = splat %[[T24]] : vector<3xf32>
+// CHECK: %[[T26:.*]] = llvm.insertvalue %[[T25]], %[[T20]][3] : !llvm.array<4 x vector<3xf32>>
+// CHECK: %[[T27:.*]] = llvm.mlir.cast %[[T26]] : !llvm.array<4 x vector<3xf32>> to vector<4x3xf32>
+// CHECK: return %[[T27]] : vector<4x3xf32>
// -----
@@ -211,31 +201,41 @@ func @broadcast_stretch_in_middle(%arg0: vector<4x1x2xf32>) -> vector<4x3x2xf32>
%0 = vector.broadcast %arg0 : vector<4x1x2xf32> to vector<4x3x2xf32>
return %0 : vector<4x3x2xf32>
}
-// CHECK-LABEL: llvm.func @broadcast_stretch_in_middle(
-// CHECK-SAME: %[[A:.*]]: !llvm.array<4 x array<1 x vector<2xf32>>>)
-// CHECK: %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<4x3x2xf32>) : !llvm.array<4 x array<3 x vector<2xf32>>>
-// CHECK: %[[T1:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<3x2xf32>) : !llvm.array<3 x vector<2xf32>>
-// CHECK: %[[T2:.*]] = llvm.extractvalue %[[A]][0, 0] : !llvm.array<4 x array<1 x vector<2xf32>>>
-// CHECK: %[[T4:.*]] = llvm.insertvalue %[[T2]], %[[T1]][0] : !llvm.array<3 x vector<2xf32>>
-// CHECK: %[[T5:.*]] = llvm.insertvalue %[[T2]], %[[T4]][1] : !llvm.array<3 x vector<2xf32>>
-// CHECK: %[[T6:.*]] = llvm.insertvalue %[[T2]], %[[T5]][2] : !llvm.array<3 x vector<2xf32>>
-// CHECK: %[[T7:.*]] = llvm.insertvalue %[[T6]], %[[T0]][0] : !llvm.array<4 x array<3 x vector<2xf32>>>
-// CHECK: %[[T8:.*]] = llvm.extractvalue %[[A]][1, 0] : !llvm.array<4 x array<1 x vector<2xf32>>>
-// CHECK: %[[T10:.*]] = llvm.insertvalue %[[T8]], %[[T1]][0] : !llvm.array<3 x vector<2xf32>>
-// CHECK: %[[T11:.*]] = llvm.insertvalue %[[T8]], %[[T10]][1] : !llvm.array<3 x vector<2xf32>>
-// CHECK: %[[T12:.*]] = llvm.insertvalue %[[T8]], %[[T11]][2] : !llvm.array<3 x vector<2xf32>>
-// CHECK: %[[T13:.*]] = llvm.insertvalue %[[T12]], %[[T7]][1] : !llvm.array<4 x array<3 x vector<2xf32>>>
-// CHECK: %[[T14:.*]] = llvm.extractvalue %[[A]][2, 0] : !llvm.array<4 x array<1 x vector<2xf32>>>
-// CHECK: %[[T16:.*]] = llvm.insertvalue %[[T14]], %[[T1]][0] : !llvm.array<3 x vector<2xf32>>
-// CHECK: %[[T17:.*]] = llvm.insertvalue %[[T14]], %[[T16]][1] : !llvm.array<3 x vector<2xf32>>
-// CHECK: %[[T18:.*]] = llvm.insertvalue %[[T14]], %[[T17]][2] : !llvm.array<3 x vector<2xf32>>
-// CHECK: %[[T19:.*]] = llvm.insertvalue %[[T18]], %[[T13]][2] : !llvm.array<4 x array<3 x vector<2xf32>>>
-// CHECK: %[[T20:.*]] = llvm.extractvalue %[[A]][3, 0] : !llvm.array<4 x array<1 x vector<2xf32>>>
-// CHECK: %[[T22:.*]] = llvm.insertvalue %[[T20]], %[[T1]][0] : !llvm.array<3 x vector<2xf32>>
-// CHECK: %[[T23:.*]] = llvm.insertvalue %[[T20]], %[[T22]][1] : !llvm.array<3 x vector<2xf32>>
-// CHECK: %[[T24:.*]] = llvm.insertvalue %[[T20]], %[[T23]][2] : !llvm.array<3 x vector<2xf32>>
-// CHECK: %[[T25:.*]] = llvm.insertvalue %[[T24]], %[[T19]][3] : !llvm.array<4 x array<3 x vector<2xf32>>>
-// CHECK: llvm.return %[[T25]] : !llvm.array<4 x array<3 x vector<2xf32>>>
+// CHECK-LABEL: @broadcast_stretch_in_middle(
+// CHECK-SAME: %[[A:.*]]: vector<4x1x2xf32>) -> vector<4x3x2xf32> {
+// CHECK: %[[T1:.*]] = constant dense<0.000000e+00> : vector<4x3x2xf32>
+// CHECK: %[[T2:.*]] = constant dense<0.000000e+00> : vector<3x2xf32>
+// CHECK: %[[T3:.*]] = llvm.mlir.cast %[[A]] : vector<4x1x2xf32> to !llvm.array<4 x array<1 x vector<2xf32>>>
+// CHECK: %[[T4:.*]] = llvm.extractvalue %[[T3]][0, 0] : !llvm.array<4 x array<1 x vector<2xf32>>>
+// CHECK: %[[T5:.*]] = llvm.mlir.cast %[[T2]] : vector<3x2xf32> to !llvm.array<3 x vector<2xf32>>
+// CHECK: %[[T6:.*]] = llvm.insertvalue %[[T4]], %[[T5]][0] : !llvm.array<3 x vector<2xf32>>
+// CHECK: %[[T7:.*]] = llvm.insertvalue %[[T4]], %[[T6]][1] : !llvm.array<3 x vector<2xf32>>
+// CHECK: %[[T8:.*]] = llvm.insertvalue %[[T4]], %[[T7]][2] : !llvm.array<3 x vector<2xf32>>
+// CHECK: %[[T9:.*]] = llvm.mlir.cast %[[T1]] : vector<4x3x2xf32> to !llvm.array<4 x array<3 x vector<2xf32>>>
+// CHECK: %[[T10:.*]] = llvm.insertvalue %[[T8]], %[[T9]][0] : !llvm.array<4 x array<3 x vector<2xf32>>>
+// CHECK: %[[T11:.*]] = llvm.mlir.cast %[[A]] : vector<4x1x2xf32> to !llvm.array<4 x array<1 x vector<2xf32>>>
+// CHECK: %[[T12:.*]] = llvm.extractvalue %[[T11]][1, 0] : !llvm.array<4 x array<1 x vector<2xf32>>>
+// CHECK: %[[T13:.*]] = llvm.mlir.cast %[[T2]] : vector<3x2xf32> to !llvm.array<3 x vector<2xf32>>
+// CHECK: %[[T14:.*]] = llvm.insertvalue %[[T12]], %[[T13]][0] : !llvm.array<3 x vector<2xf32>>
+// CHECK: %[[T15:.*]] = llvm.insertvalue %[[T12]], %[[T14]][1] : !llvm.array<3 x vector<2xf32>>
+// CHECK: %[[T16:.*]] = llvm.insertvalue %[[T12]], %[[T15]][2] : !llvm.array<3 x vector<2xf32>>
+// CHECK: %[[T17:.*]] = llvm.insertvalue %[[T16]], %[[T10]][1] : !llvm.array<4 x array<3 x vector<2xf32>>>
+// CHECK: %[[T18:.*]] = llvm.mlir.cast %[[A]] : vector<4x1x2xf32> to !llvm.array<4 x array<1 x vector<2xf32>>>
+// CHECK: %[[T19:.*]] = llvm.extractvalue %[[T18]][2, 0] : !llvm.array<4 x array<1 x vector<2xf32>>>
+// CHECK: %[[T20:.*]] = llvm.mlir.cast %[[T2]] : vector<3x2xf32> to !llvm.array<3 x vector<2xf32>>
+// CHECK: %[[T21:.*]] = llvm.insertvalue %[[T19]], %[[T20]][0] : !llvm.array<3 x vector<2xf32>>
+// CHECK: %[[T22:.*]] = llvm.insertvalue %[[T19]], %[[T21]][1] : !llvm.array<3 x vector<2xf32>>
+// CHECK: %[[T23:.*]] = llvm.insertvalue %[[T19]], %[[T22]][2] : !llvm.array<3 x vector<2xf32>>
+// CHECK: %[[T24:.*]] = llvm.insertvalue %[[T23]], %[[T17]][2] : !llvm.array<4 x array<3 x vector<2xf32>>>
+// CHECK: %[[T25:.*]] = llvm.mlir.cast %[[A]] : vector<4x1x2xf32> to !llvm.array<4 x array<1 x vector<2xf32>>>
+// CHECK: %[[T26:.*]] = llvm.extractvalue %[[T25]][3, 0] : !llvm.array<4 x array<1 x vector<2xf32>>>
+// CHECK: %[[T27:.*]] = llvm.mlir.cast %[[T2]] : vector<3x2xf32> to !llvm.array<3 x vector<2xf32>>
+// CHECK: %[[T28:.*]] = llvm.insertvalue %[[T26]], %[[T27]][0] : !llvm.array<3 x vector<2xf32>>
+// CHECK: %[[T29:.*]] = llvm.insertvalue %[[T26]], %[[T28]][1] : !llvm.array<3 x vector<2xf32>>
+// CHECK: %[[T30:.*]] = llvm.insertvalue %[[T26]], %[[T29]][2] : !llvm.array<3 x vector<2xf32>>
+// CHECK: %[[T31:.*]] = llvm.insertvalue %[[T30]], %[[T24]][3] : !llvm.array<4 x array<3 x vector<2xf32>>>
+// CHECK: %[[T32:.*]] = llvm.mlir.cast %[[T31]] : !llvm.array<4 x array<3 x vector<2xf32>>> to vector<4x3x2xf32>
+// CHECK: return %[[T32]] : vector<4x3x2xf32>
// -----
@@ -243,27 +243,23 @@ func @outerproduct(%arg0: vector<2xf32>, %arg1: vector<3xf32>) -> vector<2x3xf32
%2 = vector.outerproduct %arg0, %arg1 : vector<2xf32>, vector<3xf32>
return %2 : vector<2x3xf32>
}
-// CHECK-LABEL: llvm.func @outerproduct(
-// CHECK-SAME: %[[A:.*]]: vector<2xf32>,
-// CHECK-SAME: %[[B:.*]]: vector<3xf32>)
-// CHECK: %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<2x3xf32>)
-// CHECK: %[[T1:.*]] = llvm.mlir.constant(0 : i64) : i64
-// CHECK: %[[T2:.*]] = llvm.extractelement %[[A]][%[[T1]] : i64] : vector<2xf32>
-// CHECK: %[[T3:.*]] = llvm.mlir.undef : vector<3xf32>
-// CHECK: %[[T4:.*]] = llvm.mlir.constant(0 : i32) : i32
-// CHECK: %[[T5:.*]] = llvm.insertelement %[[T2]], %[[T3]][%4 : i32] : vector<3xf32>
-// CHECK: %[[T6:.*]] = llvm.shufflevector %[[T5]], %[[T3]] [0 : i32, 0 : i32, 0 : i32] : vector<3xf32>, vector<3xf32>
-// CHECK: %[[T7:.*]] = llvm.fmul %[[T6]], %[[B]] : vector<3xf32>
-// CHECK: %[[T8:.*]] = llvm.insertvalue %[[T7]], %[[T0]][0] : !llvm.array<2 x vector<3xf32>>
-// CHECK: %[[T9:.*]] = llvm.mlir.constant(1 : i64) : i64
-// CHECK: %[[T10:.*]] = llvm.extractelement %[[A]][%9 : i64] : vector<2xf32>
-// CHECK: %[[T11:.*]] = llvm.mlir.undef : vector<3xf32>
-// CHECK: %[[T12:.*]] = llvm.mlir.constant(0 : i32) : i32
-// CHECK: %[[T13:.*]] = llvm.insertelement %[[T10]], %[[T11]][%12 : i32] : vector<3xf32>
-// CHECK: %[[T14:.*]] = llvm.shufflevector %[[T13]], %[[T11]] [0 : i32, 0 : i32, 0 : i32] : vector<3xf32>, vector<3xf32>
-// CHECK: %[[T15:.*]] = llvm.fmul %[[T14]], %[[B]] : vector<3xf32>
-// CHECK: %[[T16:.*]] = llvm.insertvalue %[[T15]], %[[T8]][1] : !llvm.array<2 x vector<3xf32>>
-// CHECK: llvm.return %[[T16]] : !llvm.array<2 x vector<3xf32>>
+// CHECK-LABEL: @outerproduct(
+// CHECK-SAME: %[[A:.*]]: vector<2xf32>,
+// CHECK-SAME: %[[B:.*]]: vector<3xf32>)
+// CHECK: %[[T2:.*]] = constant dense<0.000000e+00> : vector<2x3xf32>
+// CHECK: %[[T3:.*]] = llvm.mlir.constant(0 : i64) : i64
+// CHECK: %[[T4:.*]] = llvm.extractelement %[[A]]{{\[}}%[[T3]] : i64] : vector<2xf32>
+// CHECK: %[[T5:.*]] = splat %[[T4]] : vector<3xf32>
+// CHECK: %[[T6:.*]] = mulf %[[T5]], %[[B]] : vector<3xf32>
+// CHECK: %[[T7:.*]] = llvm.mlir.cast %[[T2]] : vector<2x3xf32> to !llvm.array<2 x vector<3xf32>>
+// CHECK: %[[T8:.*]] = llvm.insertvalue %[[T6]], %[[T7]][0] : !llvm.array<2 x vector<3xf32>>
+// CHECK: %[[T9:.*]] = llvm.mlir.constant(1 : i64) : i64
+// CHECK: %[[T10:.*]] = llvm.extractelement %[[A]]{{\[}}%[[T9]] : i64] : vector<2xf32>
+// CHECK: %[[T11:.*]] = splat %[[T10]] : vector<3xf32>
+// CHECK: %[[T12:.*]] = mulf %[[T11]], %[[B]] : vector<3xf32>
+// CHECK: %[[T13:.*]] = llvm.insertvalue %[[T12]], %[[T8]][1] : !llvm.array<2 x vector<3xf32>>
+// CHECK: %[[T14:.*]] = llvm.mlir.cast %[[T13]] : !llvm.array<2 x vector<3xf32>> to vector<2x3xf32>
+// CHECK: return %[[T14]] : vector<2x3xf32>
// -----
@@ -271,30 +267,28 @@ func @outerproduct_add(%arg0: vector<2xf32>, %arg1: vector<3xf32>, %arg2: vector
%2 = vector.outerproduct %arg0, %arg1, %arg2 : vector<2xf32>, vector<3xf32>
return %2 : vector<2x3xf32>
}
-// CHECK-LABEL: llvm.func @outerproduct_add(
-// CHECK-SAME: %[[A:.*]]: vector<2xf32>,
-// CHECK-SAME: %[[B:.*]]: vector<3xf32>,
-// CHECK-SAME: %[[C:.*]]: !llvm.array<2 x vector<3xf32>>)
-// CHECK: %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<2x3xf32>)
-// CHECK: %[[T1:.*]] = llvm.mlir.constant(0 : i64) : i64
-// CHECK: %[[T2:.*]] = llvm.extractelement %[[A]][%[[T1]] : i64] : vector<2xf32>
-// CHECK: %[[T3:.*]] = llvm.mlir.undef : vector<3xf32>
-// CHECK: %[[T4:.*]] = llvm.mlir.constant(0 : i32) : i32
-// CHECK: %[[T5:.*]] = llvm.insertelement %[[T2]], %[[T3]][%[[T4]] : i32] : vector<3xf32>
-// CHECK: %[[T6:.*]] = llvm.shufflevector %[[T5]], %[[T3]] [0 : i32, 0 : i32, 0 : i32] : vector<3xf32>, vector<3xf32>
-// CHECK: %[[T7:.*]] = llvm.extractvalue %[[C]][0] : !llvm.array<2 x vector<3xf32>>
-// CHECK: %[[T8:.*]] = "llvm.intr.fmuladd"(%[[T6]], %[[B]], %[[T7]]) : (vector<3xf32>, vector<3xf32>, vector<3xf32>)
-// CHECK: %[[T9:.*]] = llvm.insertvalue %[[T8]], %[[T0]][0] : !llvm.array<2 x vector<3xf32>>
-// CHECK: %[[T10:.*]] = llvm.mlir.constant(1 : i64) : i64
-// CHECK: %[[T11:.*]] = llvm.extractelement %[[A]][%[[T10]] : i64] : vector<2xf32>
-// CHECK: %[[T12:.*]] = llvm.mlir.undef : vector<3xf32>
-// CHECK: %[[T13:.*]] = llvm.mlir.constant(0 : i32) : i32
-// CHECK: %[[T14:.*]] = llvm.insertelement %[[T11]], %[[T12]][%[[T13]] : i32] : vector<3xf32>
-// CHECK: %[[T15:.*]] = llvm.shufflevector %[[T14]], %[[T12]] [0 : i32, 0 : i32, 0 : i32] : vector<3xf32>, vector<3xf32>
-// CHECK: %[[T16:.*]] = llvm.extractvalue %[[C]][1] : !llvm.array<2 x vector<3xf32>>
-// CHECK: %[[T17:.*]] = "llvm.intr.fmuladd"(%[[T15]], %[[B]], %[[T16]]) : (vector<3xf32>, vector<3xf32>, vector<3xf32>)
-// CHECK: %[[T18:.*]] = llvm.insertvalue %[[T17]], %[[T9]][1] : !llvm.array<2 x vector<3xf32>>
-// CHECK: llvm.return %[[T18]] : !llvm.array<2 x vector<3xf32>>
+// CHECK-LABEL: @outerproduct_add(
+// CHECK-SAME: %[[A:.*]]: vector<2xf32>,
+// CHECK-SAME: %[[B:.*]]: vector<3xf32>,
+// CHECK-SAME: %[[C:.*]]: vector<2x3xf32>) -> vector<2x3xf32>
+// CHECK: %[[T3:.*]] = constant dense<0.000000e+00> : vector<2x3xf32>
+// CHECK: %[[T4:.*]] = llvm.mlir.constant(0 : i64) : i64
+// CHECK: %[[T5:.*]] = llvm.extractelement %[[A]]{{\[}}%[[T4]] : i64] : vector<2xf32>
+// CHECK: %[[T6:.*]] = splat %[[T5]] : vector<3xf32>
+// CHECK: %[[T7:.*]] = llvm.mlir.cast %[[C]] : vector<2x3xf32> to !llvm.array<2 x vector<3xf32>>
+// CHECK: %[[T8:.*]] = llvm.extractvalue %[[T7]][0] : !llvm.array<2 x vector<3xf32>>
+// CHECK: %[[T9:.*]] = "llvm.intr.fmuladd"(%[[T6]], %[[B]], %[[T8]]) : (vector<3xf32>, vector<3xf32>, vector<3xf32>) -> vector<3xf32>
+// CHECK: %[[T10:.*]] = llvm.mlir.cast %[[T3]] : vector<2x3xf32> to !llvm.array<2 x vector<3xf32>>
+// CHECK: %[[T11:.*]] = llvm.insertvalue %[[T9]], %[[T10]][0] : !llvm.array<2 x vector<3xf32>>
+// CHECK: %[[T12:.*]] = llvm.mlir.constant(1 : i64) : i64
+// CHECK: %[[T13:.*]] = llvm.extractelement %[[A]]{{\[}}%[[T12]] : i64] : vector<2xf32>
+// CHECK: %[[T14:.*]] = splat %[[T13]] : vector<3xf32>
+// CHECK: %[[T15:.*]] = llvm.mlir.cast %[[C]] : vector<2x3xf32> to !llvm.array<2 x vector<3xf32>>
+// CHECK: %[[T16:.*]] = llvm.extractvalue %[[T15]][1] : !llvm.array<2 x vector<3xf32>>
+// CHECK: %[[T17:.*]] = "llvm.intr.fmuladd"(%[[T14]], %[[B]], %[[T16]]) : (vector<3xf32>, vector<3xf32>, vector<3xf32>) -> vector<3xf32>
+// CHECK: %[[T18:.*]] = llvm.insertvalue %[[T17]], %[[T11]][1] : !llvm.array<2 x vector<3xf32>>
+// CHECK: %[[T19:.*]] = llvm.mlir.cast %[[T18]] : !llvm.array<2 x vector<3xf32>> to vector<2x3xf32>
+// CHECK: return %[[T19]] : vector<2x3xf32>
// -----
@@ -302,11 +296,11 @@ func @shuffle_1D_direct(%arg0: vector<2xf32>, %arg1: vector<2xf32>) -> vector<2x
%1 = vector.shuffle %arg0, %arg1 [0, 1] : vector<2xf32>, vector<2xf32>
return %1 : vector<2xf32>
}
-// CHECK-LABEL: llvm.func @shuffle_1D_direct(
+// CHECK-LABEL: @shuffle_1D_direct(
// CHECK-SAME: %[[A:.*]]: vector<2xf32>,
// CHECK-SAME: %[[B:.*]]: vector<2xf32>)
// CHECK: %[[s:.*]] = llvm.shufflevector %[[A]], %[[B]] [0, 1] : vector<2xf32>, vector<2xf32>
-// CHECK: llvm.return %[[s]] : vector<2xf32>
+// CHECK: return %[[s]] : vector<2xf32>
// -----
@@ -314,7 +308,7 @@ func @shuffle_1D(%arg0: vector<2xf32>, %arg1: vector<3xf32>) -> vector<5xf32> {
%1 = vector.shuffle %arg0, %arg1 [4, 3, 2, 1, 0] : vector<2xf32>, vector<3xf32>
return %1 : vector<5xf32>
}
-// CHECK-LABEL: llvm.func @shuffle_1D(
+// CHECK-LABEL: @shuffle_1D(
// CHECK-SAME: %[[A:.*]]: vector<2xf32>,
// CHECK-SAME: %[[B:.*]]: vector<3xf32>)
// CHECK: %[[u0:.*]] = llvm.mlir.undef : vector<5xf32>
@@ -338,7 +332,7 @@ func @shuffle_1D(%arg0: vector<2xf32>, %arg1: vector<3xf32>) -> vector<5xf32> {
// CHECK: %[[e5:.*]] = llvm.extractelement %[[A]][%[[c0]] : i64] : vector<2xf32>
// CHECK: %[[c4:.*]] = llvm.mlir.constant(4 : index) : i64
// CHECK: %[[i5:.*]] = llvm.insertelement %[[e5]], %[[i4]][%[[c4]] : i64] : vector<5xf32>
-// CHECK: llvm.return %[[i5]] : vector<5xf32>
+// CHECK: return %[[i5]] : vector<5xf32>
// -----
@@ -346,17 +340,20 @@ func @shuffle_2D(%a: vector<1x4xf32>, %b: vector<2x4xf32>) -> vector<3x4xf32> {
%1 = vector.shuffle %a, %b[1, 0, 2] : vector<1x4xf32>, vector<2x4xf32>
return %1 : vector<3x4xf32>
}
-// CHECK-LABEL: llvm.func @shuffle_2D(
-// CHECK-SAME: %[[A:.*]]: !llvm.array<1 x vector<4xf32>>,
-// CHECK-SAME: %[[B:.*]]: !llvm.array<2 x vector<4xf32>>)
+// CHECK-LABEL: @shuffle_2D(
+// CHECK-SAME: %[[A:.*]]: vector<1x4xf32>,
+// CHECK-SAME: %[[B:.*]]: vector<2x4xf32>)
+// CHECK: %[[VAL_0:.*]] = llvm.mlir.cast %[[A]] : vector<1x4xf32> to !llvm.array<1 x vector<4xf32>>
+// CHECK: %[[VAL_1:.*]] = llvm.mlir.cast %[[B]] : vector<2x4xf32> to !llvm.array<2 x vector<4xf32>>
// CHECK: %[[u0:.*]] = llvm.mlir.undef : !llvm.array<3 x vector<4xf32>>
-// CHECK: %[[e1:.*]] = llvm.extractvalue %[[B]][0] : !llvm.array<2 x vector<4xf32>>
+// CHECK: %[[e1:.*]] = llvm.extractvalue %[[VAL_1]][0] : !llvm.array<2 x vector<4xf32>>
// CHECK: %[[i1:.*]] = llvm.insertvalue %[[e1]], %[[u0]][0] : !llvm.array<3 x vector<4xf32>>
-// CHECK: %[[e2:.*]] = llvm.extractvalue %[[A]][0] : !llvm.array<1 x vector<4xf32>>
+// CHECK: %[[e2:.*]] = llvm.extractvalue %[[VAL_0]][0] : !llvm.array<1 x vector<4xf32>>
// CHECK: %[[i2:.*]] = llvm.insertvalue %[[e2]], %[[i1]][1] : !llvm.array<3 x vector<4xf32>>
-// CHECK: %[[e3:.*]] = llvm.extractvalue %[[B]][1] : !llvm.array<2 x vector<4xf32>>
+// CHECK: %[[e3:.*]] = llvm.extractvalue %[[VAL_1]][1] : !llvm.array<2 x vector<4xf32>>
// CHECK: %[[i3:.*]] = llvm.insertvalue %[[e3]], %[[i2]][2] : !llvm.array<3 x vector<4xf32>>
-// CHECK: llvm.return %[[i3]] : !llvm.array<3 x vector<4xf32>>
+// CHECK: %[[VAL_3:.*]] = llvm.mlir.cast %[[i3]] : !llvm.array<3 x vector<4xf32>> to vector<3x4xf32>
+// CHECK: return %[[VAL_3]] : vector<3x4xf32>
// -----
@@ -365,11 +362,11 @@ func @extract_element(%arg0: vector<16xf32>) -> f32 {
%1 = vector.extractelement %arg0[%0 : i32]: vector<16xf32>
return %1 : f32
}
-// CHECK-LABEL: llvm.func @extract_element(
+// CHECK-LABEL: @extract_element(
// CHECK-SAME: %[[A:.*]]: vector<16xf32>)
-// CHECK: %[[c:.*]] = llvm.mlir.constant(15 : i32) : i32
+// CHECK: %[[c:.*]] = constant 15 : i32
// CHECK: %[[x:.*]] = llvm.extractelement %[[A]][%[[c]] : i32] : vector<16xf32>
-// CHECK: llvm.return %[[x]] : f32
+// CHECK: return %[[x]] : f32
// -----
@@ -377,10 +374,10 @@ func @extract_element_from_vec_1d(%arg0: vector<16xf32>) -> f32 {
%0 = vector.extract %arg0[15]: vector<16xf32>
return %0 : f32
}
-// CHECK-LABEL: llvm.func @extract_element_from_vec_1d
+// CHECK-LABEL: @extract_element_from_vec_1d
// CHECK: llvm.mlir.constant(15 : i64) : i64
// CHECK: llvm.extractelement {{.*}}[{{.*}} : i64] : vector<16xf32>
-// CHECK: llvm.return {{.*}} : f32
+// CHECK: return {{.*}} : f32
// -----
@@ -388,9 +385,9 @@ func @extract_vec_2d_from_vec_3d(%arg0: vector<4x3x16xf32>) -> vector<3x16xf32>
%0 = vector.extract %arg0[0]: vector<4x3x16xf32>
return %0 : vector<3x16xf32>
}
-// CHECK-LABEL: llvm.func @extract_vec_2d_from_vec_3d
+// CHECK-LABEL: @extract_vec_2d_from_vec_3d
// CHECK: llvm.extractvalue {{.*}}[0] : !llvm.array<4 x array<3 x vector<16xf32>>>
-// CHECK: llvm.return {{.*}} : !llvm.array<3 x vector<16xf32>>
+// CHECK: return {{.*}} : vector<3x16xf32>
// -----
@@ -398,9 +395,9 @@ func @extract_vec_1d_from_vec_3d(%arg0: vector<4x3x16xf32>) -> vector<16xf32> {
%0 = vector.extract %arg0[0, 0]: vector<4x3x16xf32>
return %0 : vector<16xf32>
}
-// CHECK-LABEL: llvm.func @extract_vec_1d_from_vec_3d
+// CHECK-LABEL: @extract_vec_1d_from_vec_3d
// CHECK: llvm.extractvalue {{.*}}[0, 0] : !llvm.array<4 x array<3 x vector<16xf32>>>
-// CHECK: llvm.return {{.*}} : vector<16xf32>
+// CHECK: return {{.*}} : vector<16xf32>
// -----
@@ -408,11 +405,11 @@ func @extract_element_from_vec_3d(%arg0: vector<4x3x16xf32>) -> f32 {
%0 = vector.extract %arg0[0, 0, 0]: vector<4x3x16xf32>
return %0 : f32
}
-// CHECK-LABEL: llvm.func @extract_element_from_vec_3d
+// CHECK-LABEL: @extract_element_from_vec_3d
// CHECK: llvm.extractvalue {{.*}}[0, 0] : !llvm.array<4 x array<3 x vector<16xf32>>>
// CHECK: llvm.mlir.constant(0 : i64) : i64
// CHECK: llvm.extractelement {{.*}}[{{.*}} : i64] : vector<16xf32>
-// CHECK: llvm.return {{.*}} : f32
+// CHECK: return {{.*}} : f32
// -----
@@ -421,12 +418,12 @@ func @insert_element(%arg0: f32, %arg1: vector<4xf32>) -> vector<4xf32> {
%1 = vector.insertelement %arg0, %arg1[%0 : i32] : vector<4xf32>
return %1 : vector<4xf32>
}
-// CHECK-LABEL: llvm.func @insert_element(
+// CHECK-LABEL: @insert_element(
// CHECK-SAME: %[[A:.*]]: f32,
// CHECK-SAME: %[[B:.*]]: vector<4xf32>)
-// CHECK: %[[c:.*]] = llvm.mlir.constant(3 : i32) : i32
+// CHECK: %[[c:.*]] = constant 3 : i32
// CHECK: %[[x:.*]] = llvm.insertelement %[[A]], %[[B]][%[[c]] : i32] : vector<4xf32>
-// CHECK: llvm.return %[[x]] : vector<4xf32>
+// CHECK: return %[[x]] : vector<4xf32>
// -----
@@ -434,10 +431,10 @@ func @insert_element_into_vec_1d(%arg0: f32, %arg1: vector<4xf32>) -> vector<4xf
%0 = vector.insert %arg0, %arg1[3] : f32 into vector<4xf32>
return %0 : vector<4xf32>
}
-// CHECK-LABEL: llvm.func @insert_element_into_vec_1d
+// CHECK-LABEL: @insert_element_into_vec_1d
// CHECK: llvm.mlir.constant(3 : i64) : i64
// CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : i64] : vector<4xf32>
-// CHECK: llvm.return {{.*}} : vector<4xf32>
+// CHECK: return {{.*}} : vector<4xf32>
// -----
@@ -445,9 +442,9 @@ func @insert_vec_2d_into_vec_3d(%arg0: vector<8x16xf32>, %arg1: vector<4x8x16xf3
%0 = vector.insert %arg0, %arg1[3] : vector<8x16xf32> into vector<4x8x16xf32>
return %0 : vector<4x8x16xf32>
}
-// CHECK-LABEL: llvm.func @insert_vec_2d_into_vec_3d
+// CHECK-LABEL: @insert_vec_2d_into_vec_3d
// CHECK: llvm.insertvalue {{.*}}, {{.*}}[3] : !llvm.array<4 x array<8 x vector<16xf32>>>
-// CHECK: llvm.return {{.*}} : !llvm.array<4 x array<8 x vector<16xf32>>>
+// CHECK: return {{.*}} : vector<4x8x16xf32>
// -----
@@ -455,9 +452,9 @@ func @insert_vec_1d_into_vec_3d(%arg0: vector<16xf32>, %arg1: vector<4x8x16xf32>
%0 = vector.insert %arg0, %arg1[3, 7] : vector<16xf32> into vector<4x8x16xf32>
return %0 : vector<4x8x16xf32>
}
-// CHECK-LABEL: llvm.func @insert_vec_1d_into_vec_3d
+// CHECK-LABEL: @insert_vec_1d_into_vec_3d
// CHECK: llvm.insertvalue {{.*}}, {{.*}}[3, 7] : !llvm.array<4 x array<8 x vector<16xf32>>>
-// CHECK: llvm.return {{.*}} : !llvm.array<4 x array<8 x vector<16xf32>>>
+// CHECK: return {{.*}} : vector<4x8x16xf32>
// -----
@@ -465,12 +462,12 @@ func @insert_element_into_vec_3d(%arg0: f32, %arg1: vector<4x8x16xf32>) -> vecto
%0 = vector.insert %arg0, %arg1[3, 7, 15] : f32 into vector<4x8x16xf32>
return %0 : vector<4x8x16xf32>
}
-// CHECK-LABEL: llvm.func @insert_element_into_vec_3d
+// CHECK-LABEL: @insert_element_into_vec_3d
// CHECK: llvm.extractvalue {{.*}}[3, 7] : !llvm.array<4 x array<8 x vector<16xf32>>>
// CHECK: llvm.mlir.constant(15 : i64) : i64
// CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : i64] : vector<16xf32>
// CHECK: llvm.insertvalue {{.*}}, {{.*}}[3, 7] : !llvm.array<4 x array<8 x vector<16xf32>>>
-// CHECK: llvm.return {{.*}} : !llvm.array<4 x array<8 x vector<16xf32>>>
+// CHECK: return {{.*}} : vector<4x8x16xf32>
// -----
@@ -478,7 +475,7 @@ func @vector_type_cast(%arg0: memref<8x8x8xf32>) -> memref<vector<8x8x8xf32>> {
%0 = vector.type_cast %arg0: memref<8x8x8xf32> to memref<vector<8x8x8xf32>>
return %0 : memref<vector<8x8x8xf32>>
}
-// CHECK-LABEL: llvm.func @vector_type_cast
+// CHECK-LABEL: @vector_type_cast
// CHECK: llvm.mlir.undef : !llvm.struct<(ptr<array<8 x array<8 x vector<8xf32>>>>, ptr<array<8 x array<8 x vector<8xf32>>>>, i64)>
// CHECK: %[[allocated:.*]] = llvm.extractvalue {{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<3 x i64>, array<3 x i64>)>
// CHECK: %[[allocatedBit:.*]] = llvm.bitcast %[[allocated]] : !llvm.ptr<f32> to !llvm.ptr<array<8 x array<8 x vector<8xf32>>>>
@@ -495,7 +492,7 @@ func @vector_type_cast_non_zero_addrspace(%arg0: memref<8x8x8xf32, 3>) -> memref
%0 = vector.type_cast %arg0: memref<8x8x8xf32, 3> to memref<vector<8x8x8xf32>, 3>
return %0 : memref<vector<8x8x8xf32>, 3>
}
-// CHECK-LABEL: llvm.func @vector_type_cast_non_zero_addrspace
+// CHECK-LABEL: @vector_type_cast_non_zero_addrspace
// CHECK: llvm.mlir.undef : !llvm.struct<(ptr<array<8 x array<8 x vector<8xf32>>>, 3>, ptr<array<8 x array<8 x vector<8xf32>>>, 3>, i64)>
// CHECK: %[[allocated:.*]] = llvm.extractvalue {{.*}}[0] : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i64, array<3 x i64>, array<3 x i64>)>
// CHECK: %[[allocatedBit:.*]] = llvm.bitcast %[[allocated]] : !llvm.ptr<f32, 3> to !llvm.ptr<array<8 x array<8 x vector<8xf32>>>, 3>
@@ -515,9 +512,9 @@ func @vector_print_scalar_i1(%arg0: i1) {
//
// Type "boolean" always uses zero extension.
//
-// CHECK-LABEL: llvm.func @vector_print_scalar_i1(
+// CHECK-LABEL: @vector_print_scalar_i1(
// CHECK-SAME: %[[A:.*]]: i1)
-// CHECK: %[[S:.*]] = llvm.zext %[[A]] : i1 to i64
+// CHECK: %[[S:.*]] = zexti %[[A]] : i1 to i64
// CHECK: llvm.call @printI64(%[[S]]) : (i64) -> ()
// CHECK: llvm.call @printNewline() : () -> ()
@@ -527,9 +524,9 @@ func @vector_print_scalar_i4(%arg0: i4) {
vector.print %arg0 : i4
return
}
-// CHECK-LABEL: llvm.func @vector_print_scalar_i4(
+// CHECK-LABEL: @vector_print_scalar_i4(
// CHECK-SAME: %[[A:.*]]: i4)
-// CHECK: %[[S:.*]] = llvm.sext %[[A]] : i4 to i64
+// CHECK: %[[S:.*]] = sexti %[[A]] : i4 to i64
// CHECK: llvm.call @printI64(%[[S]]) : (i64) -> ()
// CHECK: llvm.call @printNewline() : () -> ()
@@ -539,9 +536,10 @@ func @vector_print_scalar_si4(%arg0: si4) {
vector.print %arg0 : si4
return
}
-// CHECK-LABEL: llvm.func @vector_print_scalar_si4(
-// CHECK-SAME: %[[A:.*]]: i4)
-// CHECK: %[[S:.*]] = llvm.sext %[[A]] : i4 to i64
+// CHECK-LABEL: @vector_print_scalar_si4(
+// CHECK-SAME: %[[A:.*]]: si4)
+// CHECK: %[[C:.*]] = llvm.mlir.cast %[[A]] : si4 to i4
+// CHECK: %[[S:.*]] = sexti %[[C]] : i4 to i64
// CHECK: llvm.call @printI64(%[[S]]) : (i64) -> ()
// CHECK: llvm.call @printNewline() : () -> ()
@@ -551,9 +549,10 @@ func @vector_print_scalar_ui4(%arg0: ui4) {
vector.print %arg0 : ui4
return
}
-// CHECK-LABEL: llvm.func @vector_print_scalar_ui4(
-// CHECK-SAME: %[[A:.*]]: i4)
-// CHECK: %[[S:.*]] = llvm.zext %[[A]] : i4 to i64
+// CHECK-LABEL: @vector_print_scalar_ui4(
+// CHECK-SAME: %[[A:.*]]: ui4)
+// CHECK: %[[C:.*]] = llvm.mlir.cast %[[A]] : ui4 to i4
+// CHECK: %[[S:.*]] = zexti %[[C]] : i4 to i64
// CHECK: llvm.call @printU64(%[[S]]) : (i64) -> ()
// CHECK: llvm.call @printNewline() : () -> ()
@@ -563,9 +562,9 @@ func @vector_print_scalar_i32(%arg0: i32) {
vector.print %arg0 : i32
return
}
-// CHECK-LABEL: llvm.func @vector_print_scalar_i32(
+// CHECK-LABEL: @vector_print_scalar_i32(
// CHECK-SAME: %[[A:.*]]: i32)
-// CHECK: %[[S:.*]] = llvm.sext %[[A]] : i32 to i64
+// CHECK: %[[S:.*]] = sexti %[[A]] : i32 to i64
// CHECK: llvm.call @printI64(%[[S]]) : (i64) -> ()
// CHECK: llvm.call @printNewline() : () -> ()
@@ -575,9 +574,10 @@ func @vector_print_scalar_ui32(%arg0: ui32) {
vector.print %arg0 : ui32
return
}
-// CHECK-LABEL: llvm.func @vector_print_scalar_ui32(
-// CHECK-SAME: %[[A:.*]]: i32)
-// CHECK: %[[S:.*]] = llvm.zext %[[A]] : i32 to i64
+// CHECK-LABEL: @vector_print_scalar_ui32(
+// CHECK-SAME: %[[A:.*]]: ui32)
+// CHECK: %[[C:.*]] = llvm.mlir.cast %[[A]] : ui32 to i32
+// CHECK: %[[S:.*]] = zexti %[[C]] : i32 to i64
// CHECK: llvm.call @printU64(%[[S]]) : (i64) -> ()
// -----
@@ -586,9 +586,9 @@ func @vector_print_scalar_i40(%arg0: i40) {
vector.print %arg0 : i40
return
}
-// CHECK-LABEL: llvm.func @vector_print_scalar_i40(
+// CHECK-LABEL: @vector_print_scalar_i40(
// CHECK-SAME: %[[A:.*]]: i40)
-// CHECK: %[[S:.*]] = llvm.sext %[[A]] : i40 to i64
+// CHECK: %[[S:.*]] = sexti %[[A]] : i40 to i64
// CHECK: llvm.call @printI64(%[[S]]) : (i64) -> ()
// CHECK: llvm.call @printNewline() : () -> ()
@@ -598,9 +598,10 @@ func @vector_print_scalar_si40(%arg0: si40) {
vector.print %arg0 : si40
return
}
-// CHECK-LABEL: llvm.func @vector_print_scalar_si40(
-// CHECK-SAME: %[[A:.*]]: i40)
-// CHECK: %[[S:.*]] = llvm.sext %[[A]] : i40 to i64
+// CHECK-LABEL: @vector_print_scalar_si40(
+// CHECK-SAME: %[[A:.*]]: si40)
+// CHECK: %[[C:.*]] = llvm.mlir.cast %[[A]] : si40 to i40
+// CHECK: %[[S:.*]] = sexti %[[C]] : i40 to i64
// CHECK: llvm.call @printI64(%[[S]]) : (i64) -> ()
// CHECK: llvm.call @printNewline() : () -> ()
@@ -610,9 +611,10 @@ func @vector_print_scalar_ui40(%arg0: ui40) {
vector.print %arg0 : ui40
return
}
-// CHECK-LABEL: llvm.func @vector_print_scalar_ui40(
-// CHECK-SAME: %[[A:.*]]: i40)
-// CHECK: %[[S:.*]] = llvm.zext %[[A]] : i40 to i64
+// CHECK-LABEL: @vector_print_scalar_ui40(
+// CHECK-SAME: %[[A:.*]]: ui40)
+// CHECK: %[[C:.*]] = llvm.mlir.cast %[[A]] : ui40 to i40
+// CHECK: %[[S:.*]] = zexti %[[C]] : i40 to i64
// CHECK: llvm.call @printU64(%[[S]]) : (i64) -> ()
// CHECK: llvm.call @printNewline() : () -> ()
@@ -622,7 +624,7 @@ func @vector_print_scalar_i64(%arg0: i64) {
vector.print %arg0 : i64
return
}
-// CHECK-LABEL: llvm.func @vector_print_scalar_i64(
+// CHECK-LABEL: @vector_print_scalar_i64(
// CHECK-SAME: %[[A:.*]]: i64)
// CHECK: llvm.call @printI64(%[[A]]) : (i64) -> ()
// CHECK: llvm.call @printNewline() : () -> ()
@@ -633,9 +635,10 @@ func @vector_print_scalar_ui64(%arg0: ui64) {
vector.print %arg0 : ui64
return
}
-// CHECK-LABEL: llvm.func @vector_print_scalar_ui64(
-// CHECK-SAME: %[[A:.*]]: i64)
-// CHECK: llvm.call @printU64(%[[A]]) : (i64) -> ()
+// CHECK-LABEL: @vector_print_scalar_ui64(
+// CHECK-SAME: %[[A:.*]]: ui64)
+// CHECK: %[[C:.*]] = llvm.mlir.cast %[[A]] : ui64 to i64
+// CHECK: llvm.call @printU64(%[[C]]) : (i64) -> ()
// CHECK: llvm.call @printNewline() : () -> ()
// -----
@@ -644,9 +647,10 @@ func @vector_print_scalar_index(%arg0: index) {
vector.print %arg0 : index
return
}
-// CHECK-LABEL: llvm.func @vector_print_scalar_index(
-// CHECK-SAME: %[[A:.*]]: i64)
-// CHECK: llvm.call @printU64(%[[A]]) : (i64) -> ()
+// CHECK-LABEL: @vector_print_scalar_index(
+// CHECK-SAME: %[[A:.*]]: index)
+// CHECK: %[[C:.*]] = llvm.mlir.cast %[[A]] : index to i64
+// CHECK: llvm.call @printU64(%[[C]]) : (i64) -> ()
// CHECK: llvm.call @printNewline() : () -> ()
// -----
@@ -655,7 +659,7 @@ func @vector_print_scalar_f32(%arg0: f32) {
vector.print %arg0 : f32
return
}
-// CHECK-LABEL: llvm.func @vector_print_scalar_f32(
+// CHECK-LABEL: @vector_print_scalar_f32(
// CHECK-SAME: %[[A:.*]]: f32)
// CHECK: llvm.call @printF32(%[[A]]) : (f32) -> ()
// CHECK: llvm.call @printNewline() : () -> ()
@@ -666,7 +670,7 @@ func @vector_print_scalar_f64(%arg0: f64) {
vector.print %arg0 : f64
return
}
-// CHECK-LABEL: llvm.func @vector_print_scalar_f64(
+// CHECK-LABEL: @vector_print_scalar_f64(
// CHECK-SAME: %[[A:.*]]: f64)
// CHECK: llvm.call @printF64(%[[A]]) : (f64) -> ()
// CHECK: llvm.call @printNewline() : () -> ()
@@ -677,10 +681,11 @@ func @vector_print_vector(%arg0: vector<2x2xf32>) {
vector.print %arg0 : vector<2x2xf32>
return
}
-// CHECK-LABEL: llvm.func @vector_print_vector(
-// CHECK-SAME: %[[A:.*]]: !llvm.array<2 x vector<2xf32>>)
+// CHECK-LABEL: @vector_print_vector(
+// CHECK-SAME: %[[A:.*]]: vector<2x2xf32>)
+// CHECK: %[[VAL_1:.*]] = llvm.mlir.cast %[[A]] : vector<2x2xf32> to !llvm.array<2 x vector<2xf32>>
// CHECK: llvm.call @printOpen() : () -> ()
-// CHECK: %[[x0:.*]] = llvm.extractvalue %[[A]][0] : !llvm.array<2 x vector<2xf32>>
+// CHECK: %[[x0:.*]] = llvm.extractvalue %[[VAL_1]][0] : !llvm.array<2 x vector<2xf32>>
// CHECK: llvm.call @printOpen() : () -> ()
// CHECK: %[[x1:.*]] = llvm.mlir.constant(0 : index) : i64
// CHECK: %[[x2:.*]] = llvm.extractelement %[[x0]][%[[x1]] : i64] : vector<2xf32>
@@ -691,7 +696,7 @@ func @vector_print_vector(%arg0: vector<2x2xf32>) {
// CHECK: llvm.call @printF32(%[[x4]]) : (f32) -> ()
// CHECK: llvm.call @printClose() : () -> ()
// CHECK: llvm.call @printComma() : () -> ()
-// CHECK: %[[x5:.*]] = llvm.extractvalue %[[A]][1] : !llvm.array<2 x vector<2xf32>>
+// CHECK: %[[x5:.*]] = llvm.extractvalue %[[VAL_1]][1] : !llvm.array<2 x vector<2xf32>>
// CHECK: llvm.call @printOpen() : () -> ()
// CHECK: %[[x6:.*]] = llvm.mlir.constant(0 : index) : i64
// CHECK: %[[x7:.*]] = llvm.extractelement %[[x5]][%[[x6]] : i64] : vector<2xf32>
@@ -710,10 +715,10 @@ func @extract_strided_slice1(%arg0: vector<4xf32>) -> vector<2xf32> {
%0 = vector.extract_strided_slice %arg0 {offsets = [2], sizes = [2], strides = [1]} : vector<4xf32> to vector<2xf32>
return %0 : vector<2xf32>
}
-// CHECK-LABEL: llvm.func @extract_strided_slice1(
+// CHECK-LABEL: @extract_strided_slice1(
// CHECK-SAME: %[[A:.*]]: vector<4xf32>)
// CHECK: %[[T0:.*]] = llvm.shufflevector %[[A]], %[[A]] [2, 3] : vector<4xf32>, vector<4xf32>
-// CHECK: llvm.return %[[T0]] : vector<2xf32>
+// CHECK: return %[[T0]] : vector<2xf32>
// -----
@@ -721,14 +726,16 @@ func @extract_strided_slice2(%arg0: vector<4x8xf32>) -> vector<2x8xf32> {
%0 = vector.extract_strided_slice %arg0 {offsets = [2], sizes = [2], strides = [1]} : vector<4x8xf32> to vector<2x8xf32>
return %0 : vector<2x8xf32>
}
-// CHECK-LABEL: llvm.func @extract_strided_slice2(
-// CHECK-SAME: %[[A:.*]]: !llvm.array<4 x vector<8xf32>>)
+// CHECK-LABEL: @extract_strided_slice2(
+// CHECK-SAME: %[[ARG:.*]]: vector<4x8xf32>)
+// CHECK: %[[A:.*]] = llvm.mlir.cast %[[ARG]] : vector<4x8xf32> to !llvm.array<4 x vector<8xf32>>
// CHECK: %[[T0:.*]] = llvm.mlir.undef : !llvm.array<2 x vector<8xf32>>
// CHECK: %[[T1:.*]] = llvm.extractvalue %[[A]][2] : !llvm.array<4 x vector<8xf32>>
// CHECK: %[[T2:.*]] = llvm.insertvalue %[[T1]], %[[T0]][0] : !llvm.array<2 x vector<8xf32>>
// CHECK: %[[T3:.*]] = llvm.extractvalue %[[A]][3] : !llvm.array<4 x vector<8xf32>>
// CHECK: %[[T4:.*]] = llvm.insertvalue %[[T3]], %[[T2]][1] : !llvm.array<2 x vector<8xf32>>
-// CHECK: llvm.return %[[T4]] : !llvm.array<2 x vector<8xf32>>
+// CHECK: %[[T5:.*]] = llvm.mlir.cast %[[T4]] : !llvm.array<2 x vector<8xf32>> to vector<2x8xf32>
+// CHECK: return %[[T5]]
// -----
@@ -736,16 +743,21 @@ func @extract_strided_slice3(%arg0: vector<4x8xf32>) -> vector<2x2xf32> {
%0 = vector.extract_strided_slice %arg0 {offsets = [2, 2], sizes = [2, 2], strides = [1, 1]} : vector<4x8xf32> to vector<2x2xf32>
return %0 : vector<2x2xf32>
}
-// CHECK-LABEL: llvm.func @extract_strided_slice3(
-// CHECK-SAME: %[[A:.*]]: !llvm.array<4 x vector<8xf32>>)
-// CHECK: %[[T1:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<2x2xf32>) : !llvm.array<2 x vector<2xf32>>
+// CHECK-LABEL: @extract_strided_slice3(
+// CHECK-SAME: %[[ARG:.*]]: vector<4x8xf32>)
+// CHECK: %[[VAL_1:.*]] = constant 0.000000e+00 : f32
+// CHECK: %[[VAL_2:.*]] = splat %[[VAL_1]] : vector<2x2xf32>
+// CHECK: %[[A:.*]] = llvm.mlir.cast %[[ARG]] : vector<4x8xf32> to !llvm.array<4 x vector<8xf32>>
// CHECK: %[[T2:.*]] = llvm.extractvalue %[[A]][2] : !llvm.array<4 x vector<8xf32>>
// CHECK: %[[T3:.*]] = llvm.shufflevector %[[T2]], %[[T2]] [2, 3] : vector<8xf32>, vector<8xf32>
-// CHECK: %[[T4:.*]] = llvm.insertvalue %[[T3]], %[[T1]][0] : !llvm.array<2 x vector<2xf32>>
+// CHECK: %[[VAL_6:.*]] = llvm.mlir.cast %[[VAL_2]] : vector<2x2xf32> to !llvm.array<2 x vector<2xf32>>
+// CHECK: %[[T4:.*]] = llvm.insertvalue %[[T3]], %[[VAL_6]][0] : !llvm.array<2 x vector<2xf32>>
+// CHECK: %[[A:.*]] = llvm.mlir.cast %[[ARG]] : vector<4x8xf32> to !llvm.array<4 x vector<8xf32>>
// CHECK: %[[T5:.*]] = llvm.extractvalue %[[A]][3] : !llvm.array<4 x vector<8xf32>>
// CHECK: %[[T6:.*]] = llvm.shufflevector %[[T5]], %[[T5]] [2, 3] : vector<8xf32>, vector<8xf32>
// CHECK: %[[T7:.*]] = llvm.insertvalue %[[T6]], %[[T4]][1] : !llvm.array<2 x vector<2xf32>>
-// CHECK: llvm.return %[[T7]] : !llvm.array<2 x vector<2xf32>>
+// CHECK: %[[VAL_12:.*]] = llvm.mlir.cast %[[T7]] : !llvm.array<2 x vector<2xf32>> to vector<2x2xf32>
+// CHECK: return %[[VAL_12]] : vector<2x2xf32>
// -----
@@ -753,9 +765,9 @@ func @insert_strided_slice1(%b: vector<4x4xf32>, %c: vector<4x4x4xf32>) -> vecto
%0 = vector.insert_strided_slice %b, %c {offsets = [2, 0, 0], strides = [1, 1]} : vector<4x4xf32> into vector<4x4x4xf32>
return %0 : vector<4x4x4xf32>
}
-// CHECK-LABEL: llvm.func @insert_strided_slice1
+// CHECK-LABEL: @insert_strided_slice1
// CHECK: llvm.extractvalue {{.*}}[2] : !llvm.array<4 x array<4 x vector<4xf32>>>
-// CHECK-NEXT: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm.array<4 x array<4 x vector<4xf32>>>
+// CHECK: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm.array<4 x array<4 x vector<4xf32>>>
// -----
@@ -763,35 +775,46 @@ func @insert_strided_slice2(%a: vector<2x2xf32>, %b: vector<4x4xf32>) -> vector<
%0 = vector.insert_strided_slice %a, %b {offsets = [2, 2], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
return %0 : vector<4x4xf32>
}
-// CHECK-LABEL: llvm.func @insert_strided_slice2
+// CHECK-LABEL: @insert_strided_slice2
//
// Subvector vector<2xf32> @0 into vector<4xf32> @2
// CHECK: llvm.extractvalue {{.*}}[0] : !llvm.array<2 x vector<2xf32>>
+// CHECK-NEXT: llvm.mlir.cast %{{.*}} : vector<4x4xf32> to !llvm.array<4 x vector<4xf32>>
// CHECK-NEXT: llvm.extractvalue {{.*}}[2] : !llvm.array<4 x vector<4xf32>>
// Element @0 -> element @2
-// CHECK-NEXT: llvm.mlir.constant(0 : index) : i64
+// CHECK-NEXT: constant 0 : index
+// CHECK-NEXT: llvm.mlir.cast %{{.*}} : index to i64
// CHECK-NEXT: llvm.extractelement {{.*}}[{{.*}} : i64] : vector<2xf32>
-// CHECK-NEXT: llvm.mlir.constant(2 : index) : i64
+// CHECK-NEXT: constant 2 : index
+// CHECK-NEXT: llvm.mlir.cast %{{.*}} : index to i64
// CHECK-NEXT: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : i64] : vector<4xf32>
// Element @1 -> element @3
-// CHECK-NEXT: llvm.mlir.constant(1 : index) : i64
+// CHECK-NEXT: constant 1 : index
+// CHECK-NEXT: llvm.mlir.cast %{{.*}} : index to i64
// CHECK-NEXT: llvm.extractelement {{.*}}[{{.*}} : i64] : vector<2xf32>
-// CHECK-NEXT: llvm.mlir.constant(3 : index) : i64
+// CHECK-NEXT: constant 3 : index
+// CHECK-NEXT: llvm.mlir.cast %{{.*}} : index to i64
// CHECK-NEXT: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : i64] : vector<4xf32>
+// CHECK-NEXT: llvm.mlir.cast %{{.*}} : vector<4x4xf32> to !llvm.array<4 x vector<4xf32>>
// CHECK-NEXT: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm.array<4 x vector<4xf32>>
//
// Subvector vector<2xf32> @1 into vector<4xf32> @3
// CHECK: llvm.extractvalue {{.*}}[1] : !llvm.array<2 x vector<2xf32>>
+// CHECK-NEXT: llvm.mlir.cast %{{.*}} : vector<4x4xf32> to !llvm.array<4 x vector<4xf32>>
// CHECK-NEXT: llvm.extractvalue {{.*}}[3] : !llvm.array<4 x vector<4xf32>>
// Element @0 -> element @2
-// CHECK-NEXT: llvm.mlir.constant(0 : index) : i64
+// CHECK-NEXT: constant 0 : index
+// CHECK-NEXT: llvm.mlir.cast %{{.*}} : index to i64
// CHECK-NEXT: llvm.extractelement {{.*}}[{{.*}} : i64] : vector<2xf32>
-// CHECK-NEXT: llvm.mlir.constant(2 : index) : i64
+// CHECK-NEXT: constant 2 : index
+// CHECK-NEXT: llvm.mlir.cast %{{.*}} : index to i64
// CHECK-NEXT: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : i64] : vector<4xf32>
// Element @1 -> element @3
-// CHECK-NEXT: llvm.mlir.constant(1 : index) : i64
+// CHECK-NEXT: constant 1 : index
+// CHECK-NEXT: llvm.mlir.cast %{{.*}} : index to i64
// CHECK-NEXT: llvm.extractelement {{.*}}[{{.*}} : i64] : vector<2xf32>
-// CHECK-NEXT: llvm.mlir.constant(3 : index) : i64
+// CHECK-NEXT: constant 3 : index
+// CHECK-NEXT: llvm.mlir.cast %{{.*}} : index to i64
// CHECK-NEXT: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : i64] : vector<4xf32>
// CHECK-NEXT: llvm.insertvalue {{.*}}, {{.*}}[3] : !llvm.array<4 x vector<4xf32>>
@@ -802,50 +825,73 @@ func @insert_strided_slice3(%arg0: vector<2x4xf32>, %arg1: vector<16x4x8xf32>) -
vector<2x4xf32> into vector<16x4x8xf32>
return %0 : vector<16x4x8xf32>
}
-// CHECK-LABEL: llvm.func @insert_strided_slice3(
-// CHECK-SAME: %[[A:.*]]: !llvm.array<2 x vector<4xf32>>,
-// CHECK-SAME: %[[B:.*]]: !llvm.array<16 x array<4 x vector<8xf32>>>)
-// CHECK: %[[s0:.*]] = llvm.extractvalue %[[B]][0] : !llvm.array<16 x array<4 x vector<8xf32>>>
-// CHECK: %[[s1:.*]] = llvm.extractvalue %[[A]][0] : !llvm.array<2 x vector<4xf32>>
-// CHECK: %[[s2:.*]] = llvm.extractvalue %[[B]][0, 0] : !llvm.array<16 x array<4 x vector<8xf32>>>
-// CHECK: %[[s3:.*]] = llvm.mlir.constant(0 : index) : i64
-// CHECK: %[[s4:.*]] = llvm.extractelement %[[s1]][%[[s3]] : i64] : vector<4xf32>
-// CHECK: %[[s5:.*]] = llvm.mlir.constant(2 : index) : i64
-// CHECK: %[[s6:.*]] = llvm.insertelement %[[s4]], %[[s2]][%[[s5]] : i64] : vector<8xf32>
-// CHECK: %[[s7:.*]] = llvm.mlir.constant(1 : index) : i64
-// CHECK: %[[s8:.*]] = llvm.extractelement %[[s1]][%[[s7]] : i64] : vector<4xf32>
-// CHECK: %[[s9:.*]] = llvm.mlir.constant(3 : index) : i64
-// CHECK: %[[s10:.*]] = llvm.insertelement %[[s8]], %[[s6]][%[[s9]] : i64] : vector<8xf32>
-// CHECK: %[[s11:.*]] = llvm.mlir.constant(2 : index) : i64
-// CHECK: %[[s12:.*]] = llvm.extractelement %[[s1]][%[[s11]] : i64] : vector<4xf32>
-// CHECK: %[[s13:.*]] = llvm.mlir.constant(4 : index) : i64
-// CHECK: %[[s14:.*]] = llvm.insertelement %[[s12]], %[[s10]][%[[s13]] : i64] : vector<8xf32>
-// CHECK: %[[s15:.*]] = llvm.mlir.constant(3 : index) : i64
-// CHECK: %[[s16:.*]] = llvm.extractelement %[[s1]][%[[s15]] : i64] : vector<4xf32>
-// CHECK: %[[s17:.*]] = llvm.mlir.constant(5 : index) : i64
-// CHECK: %[[s18:.*]] = llvm.insertelement %[[s16]], %[[s14]][%[[s17]] : i64] : vector<8xf32>
-// CHECK: %[[s19:.*]] = llvm.insertvalue %[[s18]], %[[s0]][0] : !llvm.array<4 x vector<8xf32>>
-// CHECK: %[[s20:.*]] = llvm.extractvalue %[[A]][1] : !llvm.array<2 x vector<4xf32>>
-// CHECK: %[[s21:.*]] = llvm.extractvalue %[[B]][0, 1] : !llvm.array<16 x array<4 x vector<8xf32>>>
-// CHECK: %[[s22:.*]] = llvm.mlir.constant(0 : index) : i64
-// CHECK: %[[s23:.*]] = llvm.extractelement %[[s20]][%[[s22]] : i64] : vector<4xf32>
-// CHECK: %[[s24:.*]] = llvm.mlir.constant(2 : index) : i64
-// CHECK: %[[s25:.*]] = llvm.insertelement %[[s23]], %[[s21]][%[[s24]] : i64] : vector<8xf32>
-// CHECK: %[[s26:.*]] = llvm.mlir.constant(1 : index) : i64
-// CHECK: %[[s27:.*]] = llvm.extractelement %[[s20]][%[[s26]] : i64] : vector<4xf32>
-// CHECK: %[[s28:.*]] = llvm.mlir.constant(3 : index) : i64
-// CHECK: %[[s29:.*]] = llvm.insertelement %[[s27]], %[[s25]][%[[s28]] : i64] : vector<8xf32>
-// CHECK: %[[s30:.*]] = llvm.mlir.constant(2 : index) : i64
-// CHECK: %[[s31:.*]] = llvm.extractelement %[[s20]][%[[s30]] : i64] : vector<4xf32>
-// CHECK: %[[s32:.*]] = llvm.mlir.constant(4 : index) : i64
-// CHECK: %[[s33:.*]] = llvm.insertelement %[[s31]], %[[s29]][%[[s32]] : i64] : vector<8xf32>
-// CHECK: %[[s34:.*]] = llvm.mlir.constant(3 : index) : i64
-// CHECK: %[[s35:.*]] = llvm.extractelement %[[s20]][%[[s34]] : i64] : vector<4xf32>
-// CHECK: %[[s36:.*]] = llvm.mlir.constant(5 : index) : i64
-// CHECK: %[[s37:.*]] = llvm.insertelement %[[s35]], %[[s33]][%[[s36]] : i64] : vector<8xf32>
-// CHECK: %[[s38:.*]] = llvm.insertvalue %[[s37]], %[[s19]][1] : !llvm.array<4 x vector<8xf32>>
-// CHECK: %[[s39:.*]] = llvm.insertvalue %[[s38]], %[[B]][0] : !llvm.array<16 x array<4 x vector<8xf32>>>
-// CHECK: llvm.return %[[s39]] : !llvm.array<16 x array<4 x vector<8xf32>>>
+// CHECK-LABEL: @insert_strided_slice3(
+// CHECK-SAME: %[[A:.*]]: vector<2x4xf32>,
+// CHECK-SAME: %[[B:.*]]: vector<16x4x8xf32>)
+// CHECK: %[[s2:.*]] = llvm.mlir.cast %[[B]] : vector<16x4x8xf32> to !llvm.array<16 x array<4 x vector<8xf32>>>
+// CHECK: %[[s3:.*]] = llvm.extractvalue %[[s2]][0] : !llvm.array<16 x array<4 x vector<8xf32>>>
+// CHECK: %[[s4:.*]] = llvm.mlir.cast %[[A]] : vector<2x4xf32> to !llvm.array<2 x vector<4xf32>>
+// CHECK: %[[s5:.*]] = llvm.extractvalue %[[s4]][0] : !llvm.array<2 x vector<4xf32>>
+// CHECK: %[[s6:.*]] = llvm.mlir.cast %[[B]] : vector<16x4x8xf32> to !llvm.array<16 x array<4 x vector<8xf32>>>
+// CHECK: %[[s7:.*]] = llvm.extractvalue %[[s6]][0, 0] : !llvm.array<16 x array<4 x vector<8xf32>>>
+// CHECK: %[[s8:.*]] = constant 0 : index
+// CHECK: %[[s9:.*]] = llvm.mlir.cast %[[s8]] : index to i64
+// CHECK: %[[s10:.*]] = llvm.extractelement %[[s5]]{{\[}}%[[s9]] : i64] : vector<4xf32>
+// CHECK: %[[s11:.*]] = constant 2 : index
+// CHECK: %[[s12:.*]] = llvm.mlir.cast %[[s11]] : index to i64
+// CHECK: %[[s13:.*]] = llvm.insertelement %[[s10]], %[[s7]]{{\[}}%[[s12]] : i64] : vector<8xf32>
+// CHECK: %[[s14:.*]] = constant 1 : index
+// CHECK: %[[s15:.*]] = llvm.mlir.cast %[[s14]] : index to i64
+// CHECK: %[[s16:.*]] = llvm.extractelement %[[s5]]{{\[}}%[[s15]] : i64] : vector<4xf32>
+// CHECK: %[[s17:.*]] = constant 3 : index
+// CHECK: %[[s18:.*]] = llvm.mlir.cast %[[s17]] : index to i64
+// CHECK: %[[s19:.*]] = llvm.insertelement %[[s16]], %[[s13]]{{\[}}%[[s18]] : i64] : vector<8xf32>
+// CHECK: %[[s20:.*]] = constant 2 : index
+// CHECK: %[[s21:.*]] = llvm.mlir.cast %[[s20]] : index to i64
+// CHECK: %[[s22:.*]] = llvm.extractelement %[[s5]]{{\[}}%[[s21]] : i64] : vector<4xf32>
+// CHECK: %[[s23:.*]] = constant 4 : index
+// CHECK: %[[s24:.*]] = llvm.mlir.cast %[[s23]] : index to i64
+// CHECK: %[[s25:.*]] = llvm.insertelement %[[s22]], %[[s19]]{{\[}}%[[s24]] : i64] : vector<8xf32>
+// CHECK: %[[s26:.*]] = constant 3 : index
+// CHECK: %[[s27:.*]] = llvm.mlir.cast %[[s26]] : index to i64
+// CHECK: %[[s28:.*]] = llvm.extractelement %[[s5]]{{\[}}%[[s27]] : i64] : vector<4xf32>
+// CHECK: %[[s29:.*]] = constant 5 : index
+// CHECK: %[[s30:.*]] = llvm.mlir.cast %[[s29]] : index to i64
+// CHECK: %[[s31:.*]] = llvm.insertelement %[[s28]], %[[s25]]{{\[}}%[[s30]] : i64] : vector<8xf32>
+// CHECK: %[[s32:.*]] = llvm.insertvalue %[[s31]], %[[s3]][0] : !llvm.array<4 x vector<8xf32>>
+// CHECK: %[[s33:.*]] = llvm.mlir.cast %[[A]] : vector<2x4xf32> to !llvm.array<2 x vector<4xf32>>
+// CHECK: %[[s34:.*]] = llvm.extractvalue %[[s33]][1] : !llvm.array<2 x vector<4xf32>>
+// CHECK: %[[s35:.*]] = llvm.mlir.cast %[[B]] : vector<16x4x8xf32> to !llvm.array<16 x array<4 x vector<8xf32>>>
+// CHECK: %[[s36:.*]] = llvm.extractvalue %[[s35]][0, 1] : !llvm.array<16 x array<4 x vector<8xf32>>>
+// CHECK: %[[s37:.*]] = constant 0 : index
+// CHECK: %[[s38:.*]] = llvm.mlir.cast %[[s37]] : index to i64
+// CHECK: %[[s39:.*]] = llvm.extractelement %[[s34]]{{\[}}%[[s38]] : i64] : vector<4xf32>
+// CHECK: %[[s40:.*]] = constant 2 : index
+// CHECK: %[[s41:.*]] = llvm.mlir.cast %[[s40]] : index to i64
+// CHECK: %[[s42:.*]] = llvm.insertelement %[[s39]], %[[s36]]{{\[}}%[[s41]] : i64] : vector<8xf32>
+// CHECK: %[[s43:.*]] = constant 1 : index
+// CHECK: %[[s44:.*]] = llvm.mlir.cast %[[s43]] : index to i64
+// CHECK: %[[s45:.*]] = llvm.extractelement %[[s34]]{{\[}}%[[s44]] : i64] : vector<4xf32>
+// CHECK: %[[s46:.*]] = constant 3 : index
+// CHECK: %[[s47:.*]] = llvm.mlir.cast %[[s46]] : index to i64
+// CHECK: %[[s48:.*]] = llvm.insertelement %[[s45]], %[[s42]]{{\[}}%[[s47]] : i64] : vector<8xf32>
+// CHECK: %[[s49:.*]] = constant 2 : index
+// CHECK: %[[s50:.*]] = llvm.mlir.cast %[[s49]] : index to i64
+// CHECK: %[[s51:.*]] = llvm.extractelement %[[s34]]{{\[}}%[[s50]] : i64] : vector<4xf32>
+// CHECK: %[[s52:.*]] = constant 4 : index
+// CHECK: %[[s53:.*]] = llvm.mlir.cast %[[s52]] : index to i64
+// CHECK: %[[s54:.*]] = llvm.insertelement %[[s51]], %[[s48]]{{\[}}%[[s53]] : i64] : vector<8xf32>
+// CHECK: %[[s55:.*]] = constant 3 : index
+// CHECK: %[[s56:.*]] = llvm.mlir.cast %[[s55]] : index to i64
+// CHECK: %[[s57:.*]] = llvm.extractelement %[[s34]]{{\[}}%[[s56]] : i64] : vector<4xf32>
+// CHECK: %[[s58:.*]] = constant 5 : index
+// CHECK: %[[s59:.*]] = llvm.mlir.cast %[[s58]] : index to i64
+// CHECK: %[[s60:.*]] = llvm.insertelement %[[s57]], %[[s54]]{{\[}}%[[s59]] : i64] : vector<8xf32>
+// CHECK: %[[s61:.*]] = llvm.insertvalue %[[s60]], %[[s32]][1] : !llvm.array<4 x vector<8xf32>>
+// CHECK: %[[s62:.*]] = llvm.mlir.cast %[[B]] : vector<16x4x8xf32> to !llvm.array<16 x array<4 x vector<8xf32>>>
+// CHECK: %[[s63:.*]] = llvm.insertvalue %[[s61]], %[[s62]][0] : !llvm.array<16 x array<4 x vector<8xf32>>>
+// CHECK: %[[s64:.*]] = llvm.mlir.cast %[[s63]] : !llvm.array<16 x array<4 x vector<8xf32>>> to vector<16x4x8xf32>
+// CHECK: return %[[s64]] : vector<16x4x8xf32>
// -----
@@ -855,33 +901,43 @@ func @extract_strides(%arg0: vector<3x3xf32>) -> vector<1x1xf32> {
%1 = vector.tuple_get %0, 3 : tuple<vector<2x2xf32>, vector<2x1xf32>, vector<1x2xf32>, vector<1x1xf32>>
return %1 : vector<1x1xf32>
}
-// CHECK-LABEL: llvm.func @extract_strides(
-// CHECK-SAME: %[[A:.*]]: !llvm.array<3 x vector<3xf32>>)
-// CHECK: %[[T1:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<1x1xf32>) : !llvm.array<1 x vector<1xf32>>
+// CHECK-LABEL: @extract_strides(
+// CHECK-SAME: %[[ARG:.*]]: vector<3x3xf32>)
+// CHECK: %[[VAL_1:.*]] = constant 0.000000e+00 : f32
+// CHECK: %[[VAL_2:.*]] = splat %[[VAL_1]] : vector<1x1xf32>
+// CHECK: %[[A:.*]] = llvm.mlir.cast %[[ARG]] : vector<3x3xf32> to !llvm.array<3 x vector<3xf32>>
// CHECK: %[[T2:.*]] = llvm.extractvalue %[[A]][2] : !llvm.array<3 x vector<3xf32>>
// CHECK: %[[T3:.*]] = llvm.shufflevector %[[T2]], %[[T2]] [2] : vector<3xf32>, vector<3xf32>
-// CHECK: %[[T4:.*]] = llvm.insertvalue %[[T3]], %[[T1]][0] : !llvm.array<1 x vector<1xf32>>
-// CHECK: llvm.return %[[T4]] : !llvm.array<1 x vector<1xf32>>
+// CHECK: %[[VAL_6:.*]] = llvm.mlir.cast %[[VAL_2]] : vector<1x1xf32> to !llvm.array<1 x vector<1xf32>>
+// CHECK: %[[T4:.*]] = llvm.insertvalue %[[T3]], %[[VAL_6]][0] : !llvm.array<1 x vector<1xf32>>
+// CHECK: %[[VAL_8:.*]] = llvm.mlir.cast %[[T4]] : !llvm.array<1 x vector<1xf32>> to vector<1x1xf32>
+// CHECK: return %[[VAL_8]] : vector<1x1xf32>
-// CHECK-LABEL: llvm.func @vector_fma(
-// CHECK-SAME: %[[A:.*]]: vector<8xf32>, %[[B:.*]]: !llvm.array<2 x vector<4xf32>>)
-// CHECK-SAME: -> !llvm.struct<(vector<8xf32>, array<2 x vector<4xf32>>)> {
// -----
func @vector_fma(%a: vector<8xf32>, %b: vector<2x4xf32>) -> (vector<8xf32>, vector<2x4xf32>) {
- // CHECK: "llvm.intr.fmuladd"(%[[A]], %[[A]], %[[A]]) :
- // CHECK-SAME: (vector<8xf32>, vector<8xf32>, vector<8xf32>) -> vector<8xf32>
+ // CHECK-LABEL: @vector_fma
+ // CHECK-SAME: %[[A:.*]]: vector<8xf32>
+ // CHECK-SAME: %[[B:.*]]: vector<2x4xf32>
+ // CHECK: "llvm.intr.fmuladd"
+ // CHECK-SAME: (vector<8xf32>, vector<8xf32>, vector<8xf32>) -> vector<8xf32>
%0 = vector.fma %a, %a, %a : vector<8xf32>
- // CHECK: %[[b00:.*]] = llvm.extractvalue %[[B]][0] : !llvm.array<2 x vector<4xf32>>
- // CHECK: %[[b01:.*]] = llvm.extractvalue %[[B]][0] : !llvm.array<2 x vector<4xf32>>
- // CHECK: %[[b02:.*]] = llvm.extractvalue %[[B]][0] : !llvm.array<2 x vector<4xf32>>
+ // CHECK: %[[BL:.*]] = llvm.mlir.cast %[[B]] : vector<2x4xf32> to !llvm.array<2 x vector<4xf32>>
+ // CHECK: %[[b00:.*]] = llvm.extractvalue %[[BL]][0] : !llvm.array<2 x vector<4xf32>>
+ // CHECK: %[[BL:.*]] = llvm.mlir.cast %[[B]] : vector<2x4xf32> to !llvm.array<2 x vector<4xf32>>
+ // CHECK: %[[b01:.*]] = llvm.extractvalue %[[BL]][0] : !llvm.array<2 x vector<4xf32>>
+ // CHECK: %[[BL:.*]] = llvm.mlir.cast %[[B]] : vector<2x4xf32> to !llvm.array<2 x vector<4xf32>>
+ // CHECK: %[[b02:.*]] = llvm.extractvalue %[[BL]][0] : !llvm.array<2 x vector<4xf32>>
// CHECK: %[[B0:.*]] = "llvm.intr.fmuladd"(%[[b00]], %[[b01]], %[[b02]]) :
// CHECK-SAME: (vector<4xf32>, vector<4xf32>, vector<4xf32>) -> vector<4xf32>
// CHECK: llvm.insertvalue %[[B0]], {{.*}}[0] : !llvm.array<2 x vector<4xf32>>
- // CHECK: %[[b10:.*]] = llvm.extractvalue %[[B]][1] : !llvm.array<2 x vector<4xf32>>
- // CHECK: %[[b11:.*]] = llvm.extractvalue %[[B]][1] : !llvm.array<2 x vector<4xf32>>
- // CHECK: %[[b12:.*]] = llvm.extractvalue %[[B]][1] : !llvm.array<2 x vector<4xf32>>
+ // CHECK: %[[BL:.*]] = llvm.mlir.cast %[[B]] : vector<2x4xf32> to !llvm.array<2 x vector<4xf32>>
+ // CHECK: %[[b10:.*]] = llvm.extractvalue %[[BL]][1] : !llvm.array<2 x vector<4xf32>>
+ // CHECK: %[[BL:.*]] = llvm.mlir.cast %[[B]] : vector<2x4xf32> to !llvm.array<2 x vector<4xf32>>
+ // CHECK: %[[b11:.*]] = llvm.extractvalue %[[BL]][1] : !llvm.array<2 x vector<4xf32>>
+ // CHECK: %[[BL:.*]] = llvm.mlir.cast %[[B]] : vector<2x4xf32> to !llvm.array<2 x vector<4xf32>>
+ // CHECK: %[[b12:.*]] = llvm.extractvalue %[[BL]][1] : !llvm.array<2 x vector<4xf32>>
// CHECK: %[[B1:.*]] = "llvm.intr.fmuladd"(%[[b10]], %[[b11]], %[[b12]]) :
// CHECK-SAME: (vector<4xf32>, vector<4xf32>, vector<4xf32>) -> vector<4xf32>
// CHECK: llvm.insertvalue %[[B1]], {{.*}}[1] : !llvm.array<2 x vector<4xf32>>
@@ -896,12 +952,12 @@ func @reduce_f16(%arg0: vector<16xf16>) -> f16 {
%0 = vector.reduction "add", %arg0 : vector<16xf16> into f16
return %0 : f16
}
-// CHECK-LABEL: llvm.func @reduce_f16(
+// CHECK-LABEL: @reduce_f16(
// CHECK-SAME: %[[A:.*]]: vector<16xf16>)
// CHECK: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f16) : f16
// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.fadd"(%[[C]], %[[A]])
// CHECK-SAME: {reassoc = false} : (f16, vector<16xf16>) -> f16
-// CHECK: llvm.return %[[V]] : f16
+// CHECK: return %[[V]] : f16
// -----
@@ -909,12 +965,12 @@ func @reduce_f32(%arg0: vector<16xf32>) -> f32 {
%0 = vector.reduction "add", %arg0 : vector<16xf32> into f32
return %0 : f32
}
-// CHECK-LABEL: llvm.func @reduce_f32(
+// CHECK-LABEL: @reduce_f32(
// CHECK-SAME: %[[A:.*]]: vector<16xf32>)
// CHECK: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f32) : f32
// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.fadd"(%[[C]], %[[A]])
// CHECK-SAME: {reassoc = false} : (f32, vector<16xf32>) -> f32
-// CHECK: llvm.return %[[V]] : f32
+// CHECK: return %[[V]] : f32
// -----
@@ -922,12 +978,12 @@ func @reduce_f64(%arg0: vector<16xf64>) -> f64 {
%0 = vector.reduction "add", %arg0 : vector<16xf64> into f64
return %0 : f64
}
-// CHECK-LABEL: llvm.func @reduce_f64(
+// CHECK-LABEL: @reduce_f64(
// CHECK-SAME: %[[A:.*]]: vector<16xf64>)
// CHECK: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f64) : f64
// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.fadd"(%[[C]], %[[A]])
// CHECK-SAME: {reassoc = false} : (f64, vector<16xf64>) -> f64
-// CHECK: llvm.return %[[V]] : f64
+// CHECK: return %[[V]] : f64
// -----
@@ -935,10 +991,10 @@ func @reduce_i8(%arg0: vector<16xi8>) -> i8 {
%0 = vector.reduction "add", %arg0 : vector<16xi8> into i8
return %0 : i8
}
-// CHECK-LABEL: llvm.func @reduce_i8(
+// CHECK-LABEL: @reduce_i8(
// CHECK-SAME: %[[A:.*]]: vector<16xi8>)
// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.add"(%[[A]])
-// CHECK: llvm.return %[[V]] : i8
+// CHECK: return %[[V]] : i8
// -----
@@ -946,10 +1002,10 @@ func @reduce_i32(%arg0: vector<16xi32>) -> i32 {
%0 = vector.reduction "add", %arg0 : vector<16xi32> into i32
return %0 : i32
}
-// CHECK-LABEL: llvm.func @reduce_i32(
+// CHECK-LABEL: @reduce_i32(
// CHECK-SAME: %[[A:.*]]: vector<16xi32>)
// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.add"(%[[A]])
-// CHECK: llvm.return %[[V]] : i32
+// CHECK: return %[[V]] : i32
// -----
@@ -957,10 +1013,10 @@ func @reduce_i64(%arg0: vector<16xi64>) -> i64 {
%0 = vector.reduction "add", %arg0 : vector<16xi64> into i64
return %0 : i64
}
-// CHECK-LABEL: llvm.func @reduce_i64(
+// CHECK-LABEL: @reduce_i64(
// CHECK-SAME: %[[A:.*]]: vector<16xi64>)
// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.add"(%[[A]])
-// CHECK: llvm.return %[[V]] : i64
+// CHECK: return %[[V]] : i64
// 4x16 16x3 4x3
@@ -972,7 +1028,7 @@ func @matrix_ops(%A: vector<64xf64>, %B: vector<48xf64>) -> vector<12xf64> {
(vector<64xf64>, vector<48xf64>) -> vector<12xf64>
return %C: vector<12xf64>
}
-// CHECK-LABEL: llvm.func @matrix_ops
+// CHECK-LABEL: @matrix_ops
// CHECK: llvm.intr.matrix.multiply %{{.*}}, %{{.*}} {
// CHECK-SAME: lhs_columns = 16 : i32, lhs_rows = 4 : i32, rhs_columns = 3 : i32
// CHECK-SAME: } : (vector<64xf64>, vector<48xf64>) -> vector<12xf64>
@@ -990,53 +1046,35 @@ func @transfer_read_1d(%A : memref<?xf32>, %base: index) -> vector<17xf32> {
return %f: vector<17xf32>
}
// CHECK-LABEL: func @transfer_read_1d
-// CHECK-SAME: %[[BASE:[a-zA-Z0-9]*]]: i64) -> vector<17xf32>
+// CHECK-SAME: %[[BASE:[a-zA-Z0-9]*]]: index) -> vector<17xf32>
+// CHECK: %[[c7:.*]] = constant 7.0
//
// 1. Bitcast to vector form.
// CHECK: %[[gep:.*]] = llvm.getelementptr {{.*}} :
// CHECK-SAME: (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
// CHECK: %[[vecPtr:.*]] = llvm.bitcast %[[gep]] :
// CHECK-SAME: !llvm.ptr<f32> to !llvm.ptr<vector<17xf32>>
-// CHECK: %[[DIM:.*]] = llvm.extractvalue %{{.*}}[3, 0] :
-// CHECK-SAME: !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK: %[[C0:.*]] = constant 0 : index
+// CHECK: %[[DIM:.*]] = dim %{{.*}}, %[[C0]] : memref<?xf32>
//
// 2. Create a vector with linear indices [ 0 .. vector_length - 1 ].
-// CHECK: %[[linearIndex:.*]] = llvm.mlir.constant(dense
+// CHECK: %[[linearIndex:.*]] = constant dense
// CHECK-SAME: <[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]> :
-// CHECK-SAME: vector<17xi32>) : vector<17xi32>
+// CHECK-SAME: vector<17xi32>
//
// 3. Create offsetVector = [ offset + 0 .. offset + vector_length - 1 ].
-// CHECK: %[[otrunc:.*]] = llvm.trunc %[[BASE]] : i64 to i32
-// CHECK: %[[offsetVec:.*]] = llvm.mlir.undef : vector<17xi32>
-// CHECK: %[[c0:.*]] = llvm.mlir.constant(0 : i32) : i32
-// CHECK: %[[offsetVec2:.*]] = llvm.insertelement %[[otrunc]], %[[offsetVec]][%[[c0]] :
-// CHECK-SAME: i32] : vector<17xi32>
-// CHECK: %[[offsetVec3:.*]] = llvm.shufflevector %[[offsetVec2]], %{{.*}} [
-// CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32,
-// CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32,
-// CHECK-SAME: 0 : i32, 0 : i32, 0 : i32] :
-// CHECK-SAME: vector<17xi32>, vector<17xi32>
-// CHECK: %[[offsetVec4:.*]] = llvm.add %[[offsetVec3]], %[[linearIndex]] :
-// CHECK-SAME: vector<17xi32>
+// CHECK: %[[otrunc:.*]] = index_cast %[[BASE]] : index to i32
+// CHECK: %[[offsetVec:.*]] = splat %[[otrunc]] : vector<17xi32>
+// CHECK: %[[offsetVec2:.*]] = addi %[[offsetVec]], %[[linearIndex]] : vector<17xi32>
//
// 4. Let dim the memref dimension, compute the vector comparison mask:
// [ offset + 0 .. offset + vector_length - 1 ] < [ dim .. dim ]
-// CHECK: %[[dtrunc:.*]] = llvm.trunc %[[DIM]] : i64 to i32
-// CHECK: %[[dimVec:.*]] = llvm.mlir.undef : vector<17xi32>
-// CHECK: %[[c01:.*]] = llvm.mlir.constant(0 : i32) : i32
-// CHECK: %[[dimVec2:.*]] = llvm.insertelement %[[dtrunc]], %[[dimVec]][%[[c01]] :
-// CHECK-SAME: i32] : vector<17xi32>
-// CHECK: %[[dimVec3:.*]] = llvm.shufflevector %[[dimVec2]], %{{.*}} [
-// CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32,
-// CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32,
-// CHECK-SAME: 0 : i32, 0 : i32, 0 : i32] :
-// CHECK-SAME: vector<17xi32>, vector<17xi32>
-// CHECK: %[[mask:.*]] = llvm.icmp "slt" %[[offsetVec4]], %[[dimVec3]] :
-// CHECK-SAME: vector<17xi32>
+// CHECK: %[[dtrunc:.*]] = index_cast %[[DIM]] : index to i32
+// CHECK: %[[dimVec:.*]] = splat %[[dtrunc]] : vector<17xi32>
+// CHECK: %[[mask:.*]] = cmpi slt, %[[offsetVec2]], %[[dimVec]] : vector<17xi32>
//
// 5. Rewrite as a masked read.
-// CHECK: %[[PASS_THROUGH:.*]] = llvm.mlir.constant(dense<7.000000e+00> :
-// CHECK-SAME: vector<17xf32>) : vector<17xf32>
+// CHECK: %[[PASS_THROUGH:.*]] = splat %[[c7]] : vector<17xf32>
// CHECK: %[[loaded:.*]] = llvm.intr.masked.load %[[vecPtr]], %[[mask]],
// CHECK-SAME: %[[PASS_THROUGH]] {alignment = 4 : i32} :
// CHECK-SAME: (!llvm.ptr<vector<17xf32>>, vector<17xi1>, vector<17xf32>) -> vector<17xf32>
@@ -1049,24 +1087,18 @@ func @transfer_read_1d(%A : memref<?xf32>, %base: index) -> vector<17xf32> {
// CHECK-SAME: !llvm.ptr<f32> to !llvm.ptr<vector<17xf32>>
//
// 2. Create a vector with linear indices [ 0 .. vector_length - 1 ].
-// CHECK: %[[linearIndex_b:.*]] = llvm.mlir.constant(dense
+// CHECK: %[[linearIndex_b:.*]] = constant dense
// CHECK-SAME: <[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]> :
-// CHECK-SAME: vector<17xi32>) : vector<17xi32>
+// CHECK-SAME: vector<17xi32>
//
// 3. Create offsetVector = [ offset + 0 .. offset + vector_length - 1 ].
-// CHECK: llvm.shufflevector {{.*}} [0 : i32, 0 : i32, 0 : i32, 0 : i32,
-// CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32,
-// CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32] :
-// CHECK-SAME: vector<17xi32>, vector<17xi32>
-// CHECK: llvm.add
+// CHECK: splat %{{.*}} : vector<17xi32>
+// CHECK: addi
//
// 4. Let dim the memref dimension, compute the vector comparison mask:
// [ offset + 0 .. offset + vector_length - 1 ] < [ dim .. dim ]
-// CHECK: llvm.shufflevector {{.*}} [0 : i32, 0 : i32, 0 : i32, 0 : i32,
-// CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32,
-// CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32] :
-// CHECK-SAME: vector<17xi32>, vector<17xi32>
-// CHECK: %[[mask_b:.*]] = llvm.icmp "slt" {{.*}} : vector<17xi32>
+// CHECK: splat %{{.*}} : vector<17xi32>
+// CHECK: %[[mask_b:.*]] = cmpi slt, {{.*}} : vector<17xi32>
//
// 5. Rewrite as a masked write.
// CHECK: llvm.intr.masked.store %[[loaded]], %[[vecPtr_b]], %[[mask_b]]
@@ -1083,34 +1115,18 @@ func @transfer_read_2d_to_1d(%A : memref<?x?xf32>, %base0: index, %base1: index)
return %f: vector<17xf32>
}
// CHECK-LABEL: func @transfer_read_2d_to_1d
-// CHECK-SAME: %[[BASE_0:[a-zA-Z0-9]*]]: i64, %[[BASE_1:[a-zA-Z0-9]*]]: i64) -> vector<17xf32>
-// CHECK: %[[DIM:.*]] = llvm.extractvalue %{{.*}}[3, 1] :
-// CHECK-SAME: !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+// CHECK-SAME: %[[BASE_0:[a-zA-Z0-9]*]]: index, %[[BASE_1:[a-zA-Z0-9]*]]: index) -> vector<17xf32>
+// CHECK: %[[c1:.*]] = constant 1 : index
+// CHECK: %[[DIM:.*]] = dim %{{.*}}, %[[c1]] : memref<?x?xf32>
//
// Create offsetVector = [ offset + 0 .. offset + vector_length - 1 ].
-// CHECK: %[[trunc:.*]] = llvm.trunc %[[BASE_1]] : i64 to i32
-// CHECK: %[[offsetVec:.*]] = llvm.mlir.undef : vector<17xi32>
-// CHECK: %[[c0:.*]] = llvm.mlir.constant(0 : i32) : i32
-// CHECK: %[[offsetVec2:.*]] = llvm.insertelement %[[trunc]], %[[offsetVec]][%[[c0]] :
-// CHECK-SAME: i32] : vector<17xi32>
-// CHECK: %[[offsetVec3:.*]] = llvm.shufflevector %[[offsetVec2]], %{{.*}} [
-// CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32,
-// CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32,
-// CHECK-SAME: 0 : i32, 0 : i32, 0 : i32] :
-// CHECK-SAME: vector<17xi32>, vector<17xi32>
+// CHECK: %[[trunc:.*]] = index_cast %[[BASE_1]] : index to i32
+// CHECK: %[[offsetVec:.*]] = splat %[[trunc]] : vector<17xi32>
//
// Let dim the memref dimension, compute the vector comparison mask:
// [ offset + 0 .. offset + vector_length - 1 ] < [ dim .. dim ]
-// CHECK: %[[dimtrunc:.*]] = llvm.trunc %[[DIM]] : i64 to i32
-// CHECK: %[[dimVec:.*]] = llvm.mlir.undef : vector<17xi32>
-// CHECK: %[[c01:.*]] = llvm.mlir.constant(0 : i32) : i32
-// CHECK: %[[dimVec2:.*]] = llvm.insertelement %[[dimtrunc]], %[[dimVec]][%[[c01]] :
-// CHECK-SAME: i32] : vector<17xi32>
-// CHECK: %[[dimVec3:.*]] = llvm.shufflevector %[[dimVec2]], %{{.*}} [
-// CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32,
-// CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32,
-// CHECK-SAME: 0 : i32, 0 : i32, 0 : i32] :
-// CHECK-SAME: vector<17xi32>, vector<17xi32>
+// CHECK: %[[dimtrunc:.*]] = index_cast %[[DIM]] : index to i32
+// CHECK: splat %[[dimtrunc]] : vector<17xi32>
// -----
@@ -1125,7 +1141,7 @@ func @transfer_read_1d_non_zero_addrspace(%A : memref<?xf32, 3>, %base: index) -
return %f: vector<17xf32>
}
// CHECK-LABEL: func @transfer_read_1d_non_zero_addrspace
-// CHECK-SAME: %[[BASE:[a-zA-Z0-9]*]]: i64) -> vector<17xf32>
+// CHECK-SAME: %[[BASE:[a-zA-Z0-9]*]]: index) -> vector<17xf32>
//
// 1. Check address space for GEP is correct.
// CHECK: %[[gep:.*]] = llvm.getelementptr {{.*}} :
@@ -1134,8 +1150,8 @@ func @transfer_read_1d_non_zero_addrspace(%A : memref<?xf32, 3>, %base: index) -
// CHECK-SAME: !llvm.ptr<f32, 3> to !llvm.ptr<vector<17xf32>>
//
// 2. Check address space of the memref is correct.
-// CHECK: %[[DIM:.*]] = llvm.extractvalue %{{.*}}[3, 0] :
-// CHECK-SAME: !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK: %[[c0:.*]] = constant 0 : index
+// CHECK: %[[DIM:.*]] = dim %{{.*}}, %[[c0]] : memref<?xf32, 3>
//
// 3. Check address apce for GEP is correct.
// CHECK: %[[gep_b:.*]] = llvm.getelementptr {{.*}} :
@@ -1152,7 +1168,7 @@ func @transfer_read_1d_not_masked(%A : memref<?xf32>, %base: index) -> vector<17
return %f: vector<17xf32>
}
// CHECK-LABEL: func @transfer_read_1d_not_masked
-// CHECK-SAME: %[[BASE:[a-zA-Z0-9]*]]: i64) -> vector<17xf32>
+// CHECK-SAME: %[[BASE:[a-zA-Z0-9]*]]: index) -> vector<17xf32>
//
// 1. Bitcast to vector form.
// CHECK: %[[gep:.*]] = llvm.getelementptr {{.*}} :
@@ -1172,7 +1188,7 @@ func @transfer_read_1d_cast(%A : memref<?xi32>, %base: index) -> vector<12xi8> {
return %v: vector<12xi8>
}
// CHECK-LABEL: func @transfer_read_1d_cast
-// CHECK-SAME: %[[BASE:[a-zA-Z0-9]*]]: i64) -> vector<12xi8>
+// CHECK-SAME: %[[BASE:[a-zA-Z0-9]*]]: index) -> vector<12xi8>
//
// 1. Bitcast to vector form.
// CHECK: %[[gep:.*]] = llvm.getelementptr {{.*}} :
@@ -1190,8 +1206,8 @@ func @genbool_1d() -> vector<8xi1> {
return %0 : vector<8xi1>
}
// CHECK-LABEL: func @genbool_1d
-// CHECK: %[[C1:.*]] = llvm.mlir.constant(dense<[true, true, true, true, false, false, false, false]> : vector<8xi1>) : vector<8xi1>
-// CHECK: llvm.return %[[C1]] : vector<8xi1>
+// CHECK: %[[VAL_0:.*]] = constant dense<[true, true, true, true, false, false, false, false]> : vector<8xi1>
+// CHECK: return %[[VAL_0]] : vector<8xi1>
// -----
@@ -1201,11 +1217,13 @@ func @genbool_2d() -> vector<4x4xi1> {
}
// CHECK-LABEL: func @genbool_2d
-// CHECK: %[[C1:.*]] = llvm.mlir.constant(dense<[true, true, false, false]> : vector<4xi1>) : vector<4xi1>
-// CHECK: %[[C2:.*]] = llvm.mlir.constant(dense<false> : vector<4x4xi1>) : !llvm.array<4 x vector<4xi1>>
-// CHECK: %[[T0:.*]] = llvm.insertvalue %[[C1]], %[[C2]][0] : !llvm.array<4 x vector<4xi1>>
-// CHECK: %[[T1:.*]] = llvm.insertvalue %[[C1]], %[[T0]][1] : !llvm.array<4 x vector<4xi1>>
-// CHECK: llvm.return %[[T1]] : !llvm.array<4 x vector<4xi1>>
+// CHECK: %[[VAL_0:.*]] = constant dense<[true, true, false, false]> : vector<4xi1>
+// CHECK: %[[VAL_1:.*]] = constant dense<false> : vector<4x4xi1>
+// CHECK: %[[VAL_2:.*]] = llvm.mlir.cast %[[VAL_1]] : vector<4x4xi1> to !llvm.array<4 x vector<4xi1>>
+// CHECK: %[[VAL_3:.*]] = llvm.insertvalue %[[VAL_0]], %[[VAL_2]][0] : !llvm.array<4 x vector<4xi1>>
+// CHECK: %[[VAL_4:.*]] = llvm.insertvalue %[[VAL_0]], %[[VAL_3]][1] : !llvm.array<4 x vector<4xi1>>
+// CHECK: %[[VAL_5:.*]] = llvm.mlir.cast %[[VAL_4]] : !llvm.array<4 x vector<4xi1>> to vector<4x4xi1>
+// CHECK: return %[[VAL_5]] : vector<4x4xi1>
// -----
@@ -1220,7 +1238,7 @@ func @flat_transpose(%arg0: vector<16xf32>) -> vector<16xf32> {
// CHECK: %[[T:.*]] = llvm.intr.matrix.transpose %[[A]]
// CHECK-SAME: {columns = 4 : i32, rows = 4 : i32} :
// CHECK-SAME: vector<16xf32> into vector<16xf32>
-// CHECK: llvm.return %[[T]] : vector<16xf32>
+// CHECK: return %[[T]] : vector<16xf32>
// -----
@@ -1231,11 +1249,12 @@ func @masked_load_op(%arg0: memref<?xf32>, %arg1: vector<16xi1>, %arg2: vector<1
}
// CHECK-LABEL: func @masked_load_op
-// CHECK: %[[C:.*]] = llvm.mlir.constant(0 : index) : i64
+// CHECK: %[[CO:.*]] = constant 0 : index
+// CHECK: %[[C:.*]] = llvm.mlir.cast %[[CO]] : index to i64
// CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[%[[C]]] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
// CHECK: %[[B:.*]] = llvm.bitcast %[[P]] : !llvm.ptr<f32> to !llvm.ptr<vector<16xf32>>
// CHECK: %[[L:.*]] = llvm.intr.masked.load %[[B]], %{{.*}}, %{{.*}} {alignment = 4 : i32} : (!llvm.ptr<vector<16xf32>>, vector<16xi1>, vector<16xf32>) -> vector<16xf32>
-// CHECK: llvm.return %[[L]] : vector<16xf32>
+// CHECK: return %[[L]] : vector<16xf32>
// -----
@@ -1246,11 +1265,11 @@ func @masked_store_op(%arg0: memref<?xf32>, %arg1: vector<16xi1>, %arg2: vector<
}
// CHECK-LABEL: func @masked_store_op
-// CHECK: %[[C:.*]] = llvm.mlir.constant(0 : index) : i64
+// CHECK: %[[CO:.*]] = constant 0 : index
+// CHECK: %[[C:.*]] = llvm.mlir.cast %[[CO]] : index to i64
// CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[%[[C]]] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
// CHECK: %[[B:.*]] = llvm.bitcast %[[P]] : !llvm.ptr<f32> to !llvm.ptr<vector<16xf32>>
// CHECK: llvm.intr.masked.store %{{.*}}, %[[B]], %{{.*}} {alignment = 4 : i32} : vector<16xf32>, vector<16xi1> into !llvm.ptr<vector<16xf32>>
-// CHECK: llvm.return
// -----
@@ -1262,7 +1281,7 @@ func @gather_op(%arg0: memref<?xf32>, %arg1: vector<3xi32>, %arg2: vector<3xi1>,
// CHECK-LABEL: func @gather_op
// CHECK: %[[P:.*]] = llvm.getelementptr {{.*}}[%{{.*}}] : (!llvm.ptr<f32>, vector<3xi32>) -> !llvm.vec<3 x ptr<f32>>
// CHECK: %[[G:.*]] = llvm.intr.masked.gather %[[P]], %{{.*}}, %{{.*}} {alignment = 4 : i32} : (!llvm.vec<3 x ptr<f32>>, vector<3xi1>, vector<3xf32>) -> vector<3xf32>
-// CHECK: llvm.return %[[G]] : vector<3xf32>
+// CHECK: return %[[G]] : vector<3xf32>
// -----
@@ -1274,7 +1293,6 @@ func @scatter_op(%arg0: memref<?xf32>, %arg1: vector<3xi32>, %arg2: vector<3xi1>
// CHECK-LABEL: func @scatter_op
// CHECK: %[[P:.*]] = llvm.getelementptr {{.*}}[%{{.*}}] : (!llvm.ptr<f32>, vector<3xi32>) -> !llvm.vec<3 x ptr<f32>>
// CHECK: llvm.intr.masked.scatter %{{.*}}, %[[P]], %{{.*}} {alignment = 4 : i32} : vector<3xf32>, vector<3xi1> into !llvm.vec<3 x ptr<f32>>
-// CHECK: llvm.return
// -----
@@ -1285,10 +1303,11 @@ func @expand_load_op(%arg0: memref<?xf32>, %arg1: vector<11xi1>, %arg2: vector<1
}
// CHECK-LABEL: func @expand_load_op
-// CHECK: %[[C:.*]] = llvm.mlir.constant(0 : index) : i64
+// CHECK: %[[CO:.*]] = constant 0 : index
+// CHECK: %[[C:.*]] = llvm.mlir.cast %[[CO]] : index to i64
// CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[%[[C]]] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
// CHECK: %[[E:.*]] = "llvm.intr.masked.expandload"(%[[P]], %{{.*}}, %{{.*}}) : (!llvm.ptr<f32>, vector<11xi1>, vector<11xf32>) -> vector<11xf32>
-// CHECK: llvm.return %[[E]] : vector<11xf32>
+// CHECK: return %[[E]] : vector<11xf32>
// -----
@@ -1299,7 +1318,7 @@ func @compress_store_op(%arg0: memref<?xf32>, %arg1: vector<11xi1>, %arg2: vecto
}
// CHECK-LABEL: func @compress_store_op
-// CHECK: %[[C:.*]] = llvm.mlir.constant(0 : index) : i64
+// CHECK: %[[CO:.*]] = constant 0 : index
+// CHECK: %[[C:.*]] = llvm.mlir.cast %[[CO]] : index to i64
// CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[%[[C]]] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
// CHECK: "llvm.intr.masked.compressstore"(%{{.*}}, %[[P]], %{{.*}}) : (vector<11xf32>, !llvm.ptr<f32>, vector<11xi1>) -> ()
-// CHECK: llvm.return
diff --git a/mlir/test/Dialect/LLVMIR/dialect-cast.mlir b/mlir/test/Dialect/LLVMIR/dialect-cast.mlir
index b72141dc4142..e3e2d96b6c28 100644
--- a/mlir/test/Dialect/LLVMIR/dialect-cast.mlir
+++ b/mlir/test/Dialect/LLVMIR/dialect-cast.mlir
@@ -3,12 +3,13 @@
// These are the supported cases, just make sure they don't trigger errors, op
// syntax is tested elsewhere.
-func @mlir_dialect_cast(%0: index, %1: i32, %2: bf16, %3: f16, %4: f32, %5: f64,
+func @mlir_dialect_cast(%0: index, %1: vector<2x2x2xf32>,
%6: vector<42xf32>, %7: memref<42xf32>,
%8: memref<?xf32>, %9: memref<f32>,
%10: memref<*xf32>) {
llvm.mlir.cast %0 : index to i64
llvm.mlir.cast %0 : index to i32
+ llvm.mlir.cast %1 : vector<2x2x2xf32> to !llvm.array<2 x array<2 x vector<2xf32>>>
llvm.mlir.cast %7 : memref<42xf32> to !llvm.ptr<f32>
llvm.mlir.cast %7 : memref<42xf32> to !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1xi64>, array<1xi64>)>
llvm.mlir.cast %8 : memref<?xf32> to !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1xi64>, array<1xi64>)>
@@ -65,19 +66,33 @@ func @mlir_dialect_cast_f64(%0 : f64) {
// -----
func @mlir_dialect_cast_integer_non_integer(%0 : i16) {
- // expected-error at +1 {{unsupported cast}}
+ // expected-error at +1 {{invalid cast between integer and non-integer}}
llvm.mlir.cast %0 : i16 to f16
}
// -----
func @mlir_dialect_cast_scalable_vector(%0 : vector<2xf32>) {
- // expected-error at +1 {{vector types should not be casted}}
+ // expected-error at +1 {{invalid cast for vector types}}
llvm.mlir.cast %0 : vector<2xf32> to !llvm.vec<?x2xf32>
}
// -----
+func @mlir_dialect_cast_vector_to_self(%0 : vector<2xf32>) {
+ // expected-error at +1 {{vector types should not be casted}}
+ llvm.mlir.cast %0 : vector<2xf32> to vector<2xf32>
+}
+
+// -----
+
+func @mlir_dialect_cast_nd_vector(%0 : vector<2x2xf32>) {
+ // expected-error at +1 {{invalid cast for vector, expected array}}
+ llvm.mlir.cast %0 : vector<2x2xf32> to !llvm.struct<()>
+}
+
+// -----
+
func @mlir_dialect_cast_dynamic_memref_bare_ptr(%0 : memref<?xf32>) {
// expected-error at +1 {{unexpected bare pointer for dynamically shaped memref}}
llvm.mlir.cast %0 : memref<?xf32> to !llvm.ptr<f32>
diff --git a/mlir/test/Target/vector-to-llvm-ir.mlir b/mlir/test/Target/vector-to-llvm-ir.mlir
index 4ede6ca2a5df..271d97a2a170 100644
--- a/mlir/test/Target/vector-to-llvm-ir.mlir
+++ b/mlir/test/Target/vector-to-llvm-ir.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-llvm | mlir-translate -mlir-to-llvmir | FileCheck %s
+// RUN: mlir-opt %s -convert-vector-to-llvm -convert-std-to-llvm | mlir-translate -mlir-to-llvmir | FileCheck %s
func @genbool_1d() -> vector<8xi1> {
%0 = vector.constant_mask [4] : vector<8xi1>
More information about the Mlir-commits
mailing list