[Mlir-commits] [mlir] [mlir][Vector] Add patterns for efficient unsigned i4 -> i8 conversion emulation (PR #89131)

Cullen Rhodes llvmlistbot at llvm.org
Mon Apr 22 23:58:18 PDT 2024


================
@@ -880,6 +880,38 @@ static Value rewriteI4ToI8SignedExt(PatternRewriter &rewriter, Location loc,
   return rewriter.create<vector::InterleaveOp>(loc, low, high);
 }
 
+/// Rewrite the i4 -> i8 unsigned extension into a sequence of shuffles and
+/// bitwise ops that take advantage of high-level information to avoid leaving
+/// LLVM to scramble with peephole optimizations.
+static Value rewriteI4ToI8UnsignedExt(PatternRewriter &rewriter, Location loc,
+                                      Value srcValue) {
+  VectorType srcVecType = cast<VectorType>(srcValue.getType());
+  assert(srcVecType.getElementType().isSignlessInteger(4) &&
+         "Expected i4 type");
+
+  // 1. Generate a bitcast vector<Xxi4> -> vector<X/2xi8>.
+  SmallVector<int64_t> i8VecShape = llvm::to_vector(srcVecType.getShape());
+  constexpr int64_t i4Toi8BitwidthFactor = 2;
+  i8VecShape.back() = i8VecShape.back() / i4Toi8BitwidthFactor;
+  auto i8VecType = VectorType::get(i8VecShape, rewriter.getI8Type());
+  Value i8Vector = rewriter.create<vector::BitCastOp>(loc, i8VecType, srcValue);
+
+  // 2 Extend the i4 elements using shifts & masking. Low i4 elemens of each
+  //  byte are place in one vector and the high i4 elements in another vector.
+  constexpr unsigned char lowBitsMask = 15; // Equivalent to [0000IIII] bit mask
+  auto lowBitsMaskValues = rewriter.create<arith::ConstantOp>(
+      loc, DenseElementsAttr::get(i8VecType, lowBitsMask));
+  Value low = rewriter.create<arith::AndIOp>(loc, i8Vector.getType(), i8Vector,
----------------
c-rhodes wrote:

```suggestion
  Value low = rewriter.create<arith::AndIOp>(loc, i8VecType, i8Vector,
```

https://github.com/llvm/llvm-project/pull/89131


More information about the Mlir-commits mailing list