[Mlir-commits] [mlir] 2fda620 - [mlir][sparse][vectorization] implement "index" vectorization
Aart Bik
llvmlistbot at llvm.org
Wed Nov 30 11:40:14 PST 2022
Author: Aart Bik
Date: 2022-11-30T11:40:05-08:00
New Revision: 2fda6207118d1d1c19e3b66f615f332ffc2792d0
URL: https://github.com/llvm/llvm-project/commit/2fda6207118d1d1c19e3b66f615f332ffc2792d0
DIFF: https://github.com/llvm/llvm-project/commit/2fda6207118d1d1c19e3b66f615f332ffc2792d0.diff
LOG: [mlir][sparse][vectorization] implement "index" vectorization
This adds the capability to vectorize computations like a[i] = i.
This also generalizes the supported unary and binary ops and
adds a test for each to ensure actual SIMD code can result.
Reviewed By: bixia
Differential Revision: https://reviews.llvm.org/D138956
Added:
mlir/test/Dialect/SparseTensor/sparse_vector_index.mlir
mlir/test/Dialect/SparseTensor/sparse_vector_ops.mlir
Modified:
mlir/lib/Dialect/SparseTensor/Transforms/SparseVectorization.cpp
Removed:
################################################################################
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseVectorization.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseVectorization.cpp
index 028a471f41c8d..7d6ac51dab1b7 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseVectorization.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseVectorization.cpp
@@ -216,7 +216,8 @@ static Value genVectorReducInit(PatternRewriter &rewriter, Location loc,
/// The first call (!codegen) does the analysis. Then, on success, the second
/// call (codegen) yields the proper vector form in the output parameter
/// vector 'idxs'. This mechanism ensures that analysis and rewriting code
-/// stay in sync.
+/// stay in sync. Note that the analyis part is simple because the sparse
+/// compiler only generates relatively simple subscript expressions.
///
/// See https://llvm.org/docs/GetElementPtr.html for some background on
/// the complications described below.
@@ -234,7 +235,7 @@ static bool vectorizeSubscripts(PatternRewriter &rewriter, scf::ForOp forOp,
VL vl, ValueRange subs, bool codegen,
Value vmask, SmallVectorImpl<Value> &idxs) {
for (auto sub : subs) {
- // Invariant indices simply pass through.
+ // Invariant/loop indices simply pass through.
if (sub.dyn_cast<BlockArgument>() ||
sub.getDefiningOp()->getBlock() != &forOp.getRegion().front()) {
if (codegen)
@@ -293,6 +294,15 @@ static bool vectorizeSubscripts(PatternRewriter &rewriter, scf::ForOp forOp,
return true; \
}
+#define TYPEDUNAOP(xxx) \
+ if (auto x = dyn_cast<xxx>(def)) { \
+ if (codegen) { \
+ VectorType vtp = vectorType(vl, x.getType()); \
+ vexp = rewriter.create<xxx>(loc, vtp, vx); \
+ } \
+ return true; \
+ }
+
#define BINOP(xxx) \
if (isa<xxx>(def)) { \
if (codegen) \
@@ -303,27 +313,60 @@ static bool vectorizeSubscripts(PatternRewriter &rewriter, scf::ForOp forOp,
/// This method is called twice to analyze and rewrite the given expression.
/// The first call (!codegen) does the analysis. Then, on success, the second
/// call (codegen) yields the proper vector form in the output parameter 'vexp'.
-/// This mechanism ensures that analysis and rewriting code stay in sync.
+/// This mechanism ensures that analysis and rewriting code stay in sync. Note
+/// that the analyis part is simple because the sparse compiler only generates
+/// relatively simple expressions inside the for-loops.
static bool vectorizeExpr(PatternRewriter &rewriter, scf::ForOp forOp, VL vl,
Value exp, bool codegen, Value vmask, Value &vexp) {
- // A block argument in invariant.
+ Location loc = forOp.getLoc();
+ // Reject unsupported types.
+ if (!VectorType::isValidElementType(exp.getType()))
+ return false;
+ // A block argument is invariant/reduction/index.
if (auto arg = exp.dyn_cast<BlockArgument>()) {
- if (codegen)
- vexp = genVectorInvariantValue(rewriter, vl, exp);
- return true;
+ if (arg == forOp.getInductionVar()) {
+ // We encountered a single, innermost index inside the computation,
+ // such as a[i] = i, which must convert to [i, i+1, ...].
+ if (codegen) {
+ VectorType vtp = vectorType(vl, arg.getType());
+ Value veci = rewriter.create<vector::BroadcastOp>(loc, vtp, arg);
+ Value incr;
+ if (vl.enableVLAVectorization) {
+ Type stepvty = vectorType(vl, rewriter.getI64Type());
+ Value stepv = rewriter.create<LLVM::StepVectorOp>(loc, stepvty);
+ incr = rewriter.create<arith::IndexCastOp>(loc, vtp, stepv);
+ } else {
+ SmallVector<APInt> integers;
+ for (unsigned i = 0, l = vl.vectorLength; i < l; i++)
+ integers.push_back(APInt(/*width=*/64, i));
+ auto values = DenseElementsAttr::get(vtp, integers);
+ incr = rewriter.create<arith::ConstantOp>(loc, vtp, values);
+ }
+ vexp = rewriter.create<arith::AddIOp>(loc, veci, incr);
+ }
+ return true;
+ } else {
+ // An invariant or reduction. In both cases, we treat this as an
+ // invariant value, and rely on later replacing and folding to
+ // construct a proper reduction chain for the latter case.
+ if (codegen)
+ vexp = genVectorInvariantValue(rewriter, vl, exp);
+ return true;
+ }
}
- // Something defined outside the loop-body is invariant as well.
+ // Something defined outside the loop-body is invariant.
Operation *def = exp.getDefiningOp();
if (def->getBlock() != &forOp.getRegion().front()) {
if (codegen)
vexp = genVectorInvariantValue(rewriter, vl, exp);
return true;
}
- // Inside loop-body unary and binary operations. Note that it would be
- // nicer if we could somehow test and build the operations in a more
- // concise manner than just listing them all (although this way we know
- // for certain that they can vectorize).
- Location loc = forOp.getLoc();
+ // Proper load operations. These are either values involved in the
+ // actual computation, such as a[i] = b[i] becomes a[lo:hi] = b[lo:hi],
+ // or index values inside the computation that are now fetched from
+ // the sparse storage index arrays, such as a[i] = i becomes
+ // a[lo:hi] = ind[lo:hi], where 'lo' denotes the current index
+ // and 'hi = lo + vl - 1'.
if (auto load = dyn_cast<memref::LoadOp>(def)) {
auto subs = load.getIndices();
SmallVector<Value> idxs;
@@ -332,7 +375,16 @@ static bool vectorizeExpr(PatternRewriter &rewriter, scf::ForOp forOp, VL vl,
vexp = genVectorLoad(rewriter, loc, vl, load.getMemRef(), idxs, vmask);
return true;
}
- } else if (def->getNumOperands() == 1) {
+ return false;
+ }
+ // Inside loop-body unary and binary operations. Note that it would be
+ // nicer if we could somehow test and build the operations in a more
+ // concise manner than just listing them all (although this way we know
+ // for certain that they can vectorize).
+ //
+ // TODO: avoid visiting CSEs multiple times
+ //
+ if (def->getNumOperands() == 1) {
Value vx;
if (vectorizeExpr(rewriter, forOp, vl, def->getOperand(0), codegen, vmask,
vx)) {
@@ -346,6 +398,17 @@ static bool vectorizeExpr(PatternRewriter &rewriter, scf::ForOp forOp, VL vl,
UNAOP(math::SinOp)
UNAOP(math::TanhOp)
UNAOP(arith::NegFOp)
+ TYPEDUNAOP(arith::TruncFOp)
+ TYPEDUNAOP(arith::ExtFOp)
+ TYPEDUNAOP(arith::FPToSIOp)
+ TYPEDUNAOP(arith::FPToUIOp)
+ TYPEDUNAOP(arith::SIToFPOp)
+ TYPEDUNAOP(arith::UIToFPOp)
+ TYPEDUNAOP(arith::ExtSIOp)
+ TYPEDUNAOP(arith::ExtUIOp)
+ TYPEDUNAOP(arith::IndexCastOp)
+ TYPEDUNAOP(arith::TruncIOp)
+ TYPEDUNAOP(arith::BitcastOp)
}
} else if (def->getNumOperands() == 2) {
Value vx, vy;
@@ -365,12 +428,14 @@ static bool vectorizeExpr(PatternRewriter &rewriter, scf::ForOp forOp, VL vl,
BINOP(arith::AndIOp)
BINOP(arith::OrIOp)
BINOP(arith::XOrIOp)
+ // TODO: shift by invariant?
}
}
return false;
}
#undef UNAOP
+#undef TYPEDUNAOP
#undef BINOP
/// This method is called twice to analyze and rewrite the given for-loop.
diff --git a/mlir/test/Dialect/SparseTensor/sparse_vector_index.mlir b/mlir/test/Dialect/SparseTensor/sparse_vector_index.mlir
new file mode 100644
index 0000000000000..37d5b8049f809
--- /dev/null
+++ b/mlir/test/Dialect/SparseTensor/sparse_vector_index.mlir
@@ -0,0 +1,124 @@
+// RUN: mlir-opt %s -sparsification -cse -sparse-vectorization="vl=8" -cse | \
+// RUN: FileCheck %s
+
+// NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
+
+#SparseVector = #sparse_tensor.encoding<{
+ dimLevelType = ["compressed"]
+}>
+
+#trait_1d = {
+ indexing_maps = [
+ affine_map<(i) -> (i)>, // a
+ affine_map<(i) -> (i)> // x (out)
+ ],
+ iterator_types = ["parallel"],
+ doc = "X(i) = a(i) op i"
+}
+
+// CHECK-LABEL: func.func @sparse_index_1d_conj(
+// CHECK-SAME: %[[VAL_0:.*]]: tensor<8xi64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>>) -> tensor<8xi64> {
+// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 8 : index
+// CHECK-DAG: %[[VAL_2:.*]] = arith.constant dense<0> : vector<8xi64>
+// CHECK-DAG: %[[VAL_3:.*]] = arith.constant dense<0> : vector<8xindex>
+// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : i64
+// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 0 : index
+// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 1 : index
+// CHECK-DAG: %[[VAL_7:.*]] = tensor.empty() : tensor<8xi64>
+// CHECK: %[[VAL_8:.*]] = sparse_tensor.pointers %[[VAL_0]] {dimension = 0 : index} : tensor<8xi64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xindex>
+// CHECK: %[[VAL_9:.*]] = sparse_tensor.indices %[[VAL_0]] {dimension = 0 : index} : tensor<8xi64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xindex>
+// CHECK: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8xi64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xi64>
+// CHECK: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_7]] : memref<8xi64>
+// CHECK: linalg.fill ins(%[[VAL_4]] : i64) outs(%[[VAL_11]] : memref<8xi64>)
+// CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_5]]] : memref<?xindex>
+// CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_6]]] : memref<?xindex>
+// CHECK: scf.for %[[VAL_14:.*]] = %[[VAL_12]] to %[[VAL_13]] step %[[VAL_1]] {
+// CHECK: %[[VAL_15:.*]] = affine.min #map1(%[[VAL_13]], %[[VAL_14]]){{\[}}%[[VAL_1]]]
+// CHECK: %[[VAL_16:.*]] = vector.create_mask %[[VAL_15]] : vector<8xi1>
+// CHECK: %[[VAL_17:.*]] = vector.maskedload %[[VAL_9]]{{\[}}%[[VAL_14]]], %[[VAL_16]], %[[VAL_3]] : memref<?xindex>, vector<8xi1>, vector<8xindex> into vector<8xindex>
+// CHECK: %[[VAL_18:.*]] = vector.maskedload %[[VAL_10]]{{\[}}%[[VAL_14]]], %[[VAL_16]], %[[VAL_2]] : memref<?xi64>, vector<8xi1>, vector<8xi64> into vector<8xi64>
+// CHECK: %[[VAL_19:.*]] = arith.index_cast %[[VAL_17]] : vector<8xindex> to vector<8xi64>
+// CHECK: %[[VAL_20:.*]] = arith.muli %[[VAL_18]], %[[VAL_19]] : vector<8xi64>
+// CHECK: vector.scatter %[[VAL_11]]{{\[}}%[[VAL_5]]] {{\[}}%[[VAL_17]]], %[[VAL_16]], %[[VAL_20]] : memref<8xi64>, vector<8xindex>, vector<8xi1>, vector<8xi64>
+// CHECK: } {"Emitted from" = "linalg.generic"}
+// CHECK: %[[VAL_21:.*]] = bufferization.to_tensor %[[VAL_11]] : memref<8xi64>
+// CHECK: return %[[VAL_21]] : tensor<8xi64>
+// CHECK: }
+func.func @sparse_index_1d_conj(%arga: tensor<8xi64, #SparseVector>) -> tensor<8xi64> {
+ %init = tensor.empty() : tensor<8xi64>
+ %r = linalg.generic #trait_1d
+ ins(%arga: tensor<8xi64, #SparseVector>)
+ outs(%init: tensor<8xi64>) {
+ ^bb(%a: i64, %x: i64):
+ %i = linalg.index 0 : index
+ %ii = arith.index_cast %i : index to i64
+ %m1 = arith.muli %a, %ii : i64
+ linalg.yield %m1 : i64
+ } -> tensor<8xi64>
+ return %r : tensor<8xi64>
+}
+
+// CHECK-LABEL: func.func @sparse_index_1d_disj(
+// CHECK-SAME: %[[VAL_0:.*]]: tensor<8xi64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>>) -> tensor<8xi64> {
+// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 8 : index
+// CHECK-DAG: %[[VAL_2:.*]] = arith.constant dense<[0, 1, 2, 3, 4, 5, 6, 7]> : vector<8xindex>
+// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 0 : i64
+// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index
+// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 1 : index
+// CHECK-DAG: %[[VAL_6:.*]] = arith.constant true
+// CHECK-DAG: %[[VAL_7:.*]] = tensor.empty() : tensor<8xi64>
+// CHECK: %[[VAL_8:.*]] = sparse_tensor.pointers %[[VAL_0]] {dimension = 0 : index} : tensor<8xi64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xindex>
+// CHECK: %[[VAL_9:.*]] = sparse_tensor.indices %[[VAL_0]] {dimension = 0 : index} : tensor<8xi64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xindex>
+// CHECK: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8xi64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xi64>
+// CHECK: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_7]] : memref<8xi64>
+// CHECK: linalg.fill ins(%[[VAL_3]] : i64) outs(%[[VAL_11]] : memref<8xi64>)
+// CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_5]]] : memref<?xindex>
+// CHECK: %[[VAL_14:.*]]:2 = scf.while (%[[VAL_15:.*]] = %[[VAL_12]], %[[VAL_16:.*]] = %[[VAL_4]]) : (index, index) -> (index, index) {
+// CHECK: %[[VAL_17:.*]] = arith.cmpi ult, %[[VAL_15]], %[[VAL_13]] : index
+// CHECK: scf.condition(%[[VAL_17]]) %[[VAL_15]], %[[VAL_16]] : index, index
+// CHECK: } do {
+// CHECK: ^bb0(%[[VAL_18:.*]]: index, %[[VAL_19:.*]]: index):
+// CHECK: %[[VAL_20:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_18]]] : memref<?xindex>
+// CHECK: %[[VAL_21:.*]] = arith.cmpi eq, %[[VAL_20]], %[[VAL_19]] : index
+// CHECK: scf.if %[[VAL_21]] {
+// CHECK: %[[VAL_22:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_18]]] : memref<?xi64>
+// CHECK: %[[VAL_23:.*]] = arith.index_cast %[[VAL_19]] : index to i64
+// CHECK: %[[VAL_24:.*]] = arith.addi %[[VAL_22]], %[[VAL_23]] : i64
+// CHECK: memref.store %[[VAL_24]], %[[VAL_11]]{{\[}}%[[VAL_19]]] : memref<8xi64>
+// CHECK: } else {
+// CHECK: scf.if %[[VAL_6]] {
+// CHECK: %[[VAL_25:.*]] = arith.index_cast %[[VAL_19]] : index to i64
+// CHECK: memref.store %[[VAL_25]], %[[VAL_11]]{{\[}}%[[VAL_19]]] : memref<8xi64>
+// CHECK: } else {
+// CHECK: }
+// CHECK: }
+// CHECK: %[[VAL_26:.*]] = arith.addi %[[VAL_18]], %[[VAL_5]] : index
+// CHECK: %[[VAL_27:.*]] = arith.select %[[VAL_21]], %[[VAL_26]], %[[VAL_18]] : index
+// CHECK: %[[VAL_28:.*]] = arith.addi %[[VAL_19]], %[[VAL_5]] : index
+// CHECK: scf.yield %[[VAL_27]], %[[VAL_28]] : index, index
+// CHECK: } attributes {"Emitted from" = "linalg.generic"}
+// CHECK: scf.for %[[VAL_29:.*]] = %[[VAL_30:.*]]#1 to %[[VAL_1]] step %[[VAL_1]] {
+// CHECK: %[[VAL_31:.*]] = affine.min #map1(%[[VAL_1]], %[[VAL_29]]){{\[}}%[[VAL_1]]]
+// CHECK: %[[VAL_32:.*]] = vector.create_mask %[[VAL_31]] : vector<8xi1>
+// CHECK: %[[VAL_33:.*]] = vector.broadcast %[[VAL_29]] : index to vector<8xindex>
+// CHECK: %[[VAL_34:.*]] = arith.addi %[[VAL_33]], %[[VAL_2]] : vector<8xindex>
+// CHECK: %[[VAL_35:.*]] = arith.index_cast %[[VAL_34]] : vector<8xindex> to vector<8xi64>
+// CHECK: vector.maskedstore %[[VAL_11]]{{\[}}%[[VAL_29]]], %[[VAL_32]], %[[VAL_35]] : memref<8xi64>, vector<8xi1>, vector<8xi64>
+// CHECK: } {"Emitted from" = "linalg.generic"}
+// CHECK: %[[VAL_36:.*]] = bufferization.to_tensor %[[VAL_11]] : memref<8xi64>
+// CHECK: return %[[VAL_36]] : tensor<8xi64>
+// CHECK: }
+func.func @sparse_index_1d_disj(%arga: tensor<8xi64, #SparseVector>) -> tensor<8xi64> {
+ %init = tensor.empty() : tensor<8xi64>
+ %r = linalg.generic #trait_1d
+ ins(%arga: tensor<8xi64, #SparseVector>)
+ outs(%init: tensor<8xi64>) {
+ ^bb(%a: i64, %x: i64):
+ %i = linalg.index 0 : index
+ %ii = arith.index_cast %i : index to i64
+ %m1 = arith.addi %a, %ii : i64
+ linalg.yield %m1 : i64
+ } -> tensor<8xi64>
+ return %r : tensor<8xi64>
+}
diff --git a/mlir/test/Dialect/SparseTensor/sparse_vector_ops.mlir b/mlir/test/Dialect/SparseTensor/sparse_vector_ops.mlir
new file mode 100644
index 0000000000000..32900d93c0bf1
--- /dev/null
+++ b/mlir/test/Dialect/SparseTensor/sparse_vector_ops.mlir
@@ -0,0 +1,77 @@
+// RUN: mlir-opt %s -sparsification -cse -sparse-vectorization="vl=8" -cse | \
+// RUN: FileCheck %s
+
+#DenseVector = #sparse_tensor.encoding<{ dimLevelType = [ "dense" ] }>
+
+#trait = {
+ indexing_maps = [
+ affine_map<(i) -> (i)>, // a
+ affine_map<(i) -> (i)>, // b
+ affine_map<(i) -> (i)> // x (out)
+ ],
+ iterator_types = ["parallel"],
+ doc = "x(i) = a(i) ops b(i)"
+}
+
+// CHECK-LABEL: func.func @vops
+// CHECK-DAG: %[[C1:.*]] = arith.constant dense<2.000000e+00> : vector<8xf32>
+// CHECK-DAG: %[[C2:.*]] = arith.constant dense<1.000000e+00> : vector<8xf32>
+// CHECK-DAG: %[[C3:.*]] = arith.constant dense<255> : vector<8xi64>
+// CHECK: scf.for
+// CHECK: %[[VAL_14:.*]] = vector.load
+// CHECK: %[[VAL_15:.*]] = math.absf %[[VAL_14]] : vector<8xf32>
+// CHECK: %[[VAL_16:.*]] = math.ceil %[[VAL_15]] : vector<8xf32>
+// CHECK: %[[VAL_17:.*]] = math.floor %[[VAL_16]] : vector<8xf32>
+// CHECK: %[[VAL_18:.*]] = math.sqrt %[[VAL_17]] : vector<8xf32>
+// CHECK: %[[VAL_19:.*]] = math.expm1 %[[VAL_18]] : vector<8xf32>
+// CHECK: %[[VAL_20:.*]] = math.sin %[[VAL_19]] : vector<8xf32>
+// CHECK: %[[VAL_21:.*]] = math.tanh %[[VAL_20]] : vector<8xf32>
+// CHECK: %[[VAL_22:.*]] = arith.negf %[[VAL_21]] : vector<8xf32>
+// CHECK: %[[VAL_23:.*]] = vector.load
+// CHECK: %[[VAL_24:.*]] = arith.mulf %[[VAL_22]], %[[VAL_23]] : vector<8xf32>
+// CHECK: %[[VAL_25:.*]] = arith.divf %[[VAL_24]], %[[C1]] : vector<8xf32>
+// CHECK: %[[VAL_26:.*]] = arith.addf %[[VAL_25]], %[[C1]] : vector<8xf32>
+// CHECK: %[[VAL_27:.*]] = arith.subf %[[VAL_26]], %[[C2]] : vector<8xf32>
+// CHECK: %[[VAL_28:.*]] = arith.extf %[[VAL_27]] : vector<8xf32> to vector<8xf64>
+// CHECK: %[[VAL_29:.*]] = arith.bitcast %[[VAL_28]] : vector<8xf64> to vector<8xi64>
+// CHECK: %[[VAL_30:.*]] = arith.addi %[[VAL_29]], %[[VAL_29]] : vector<8xi64>
+// CHECK: %[[VAL_31:.*]] = arith.andi %[[VAL_30]], %[[C3]] : vector<8xi64>
+// CHECK: %[[VAL_32:.*]] = arith.trunci %[[VAL_31]] : vector<8xi64> to vector<8xi16>
+// CHECK: %[[VAL_33:.*]] = arith.extsi %[[VAL_32]] : vector<8xi16> to vector<8xi32>
+// CHECK: %[[VAL_34:.*]] = arith.uitofp %[[VAL_33]] : vector<8xi32> to vector<8xf32>
+// CHECK: vector.store %[[VAL_34]]
+// CHECK: }
+func.func @vops(%arga: tensor<1024xf32, #DenseVector>,
+ %argb: tensor<1024xf32, #DenseVector>) -> tensor<1024xf32> {
+ %init = bufferization.alloc_tensor() : tensor<1024xf32>
+ %o = arith.constant 1.0 : f32
+ %c = arith.constant 2.0 : f32
+ %i = arith.constant 255 : i64
+ %0 = linalg.generic #trait
+ ins(%arga, %argb: tensor<1024xf32, #DenseVector>, tensor<1024xf32, #DenseVector>)
+ outs(%init: tensor<1024xf32>) {
+ ^bb(%a: f32, %b: f32, %x: f32):
+ %0 = math.absf %a : f32
+ %1 = math.ceil %0 : f32
+ %2 = math.floor %1 : f32
+ %3 = math.sqrt %2 : f32
+ %4 = math.expm1 %3 : f32
+ %5 = math.sin %4 : f32
+ %6 = math.tanh %5 : f32
+ %7 = arith.negf %6 : f32
+ %8 = arith.mulf %7, %b : f32
+ %9 = arith.divf %8, %c : f32
+ %10 = arith.addf %9, %c : f32
+ %11 = arith.subf %10, %o : f32
+ %12 = arith.extf %11 : f32 to f64
+ %13 = arith.bitcast %12 : f64 to i64
+ %14 = arith.addi %13, %13 : i64
+ %15 = arith.andi %14, %i : i64
+ %16 = arith.trunci %15 : i64 to i16
+ %17 = arith.extsi %16 : i16 to i32
+ %18 = arith.uitofp %17 : i32 to f32
+ linalg.yield %18 : f32
+ } -> tensor<1024xf32>
+ return %0 : tensor<1024xf32>
+}
+
More information about the Mlir-commits
mailing list