[Mlir-commits] [mlir] [mlir] AMDGPUToROCDL: lower `amdgpu.swizzle_bitmode` (PR #136223)

Fri Apr 18 02:00:38 PDT 2025

================
@@ -381,3 +381,95 @@ LogicalResult LLVM::detail::oneToOneRewrite(
   rewriter.replaceOp(op, results);
   return success();
 }
+
+static unsigned getBitWidth(Type type) {
+  if (type.isIntOrFloat())
+    return type.getIntOrFloatBitWidth();
+
+  auto vec = cast<VectorType>(type);
+  return vec.getNumElements() * getBitWidth(vec.getElementType());
+}
+
+static Value createI32Constant(OpBuilder &builder, Location loc,
+                               int32_t value) {
+  Type i32 = builder.getI32Type();
+  return builder.create<LLVM::ConstantOp>(loc, i32, value);
+}
+
+SmallVector<Value> mlir::LLVM::decomposeValue(OpBuilder &builder, Location loc,
+                                              Value src, Type dstType) {
+  Type srcType = src.getType();
+  if (srcType == dstType)
+    return {src};
+
+  unsigned srcBitWidth = getBitWidth(srcType);
+  unsigned dstBitWidth = getBitWidth(dstType);
+  if (srcBitWidth == dstBitWidth) {
+    Value cast = builder.create<LLVM::BitcastOp>(loc, dstType, src);
+    return {cast};
+  }
+
+  if (dstBitWidth > srcBitWidth) {
+    auto smallerInt = builder.getIntegerType(srcBitWidth);
+    if (srcType != smallerInt)
+      src = builder.create<LLVM::BitcastOp>(loc, smallerInt, src);
+
+    auto largerInt = builder.getIntegerType(dstBitWidth);
+    Value res = builder.create<LLVM::ZExtOp>(loc, largerInt, src);
+    return {res};
+  }
+  assert(srcBitWidth % dstBitWidth == 0 &&
+         "src bit width must be a multiple of dst bit width");
+  int64_t numElements = srcBitWidth / dstBitWidth;
+  auto vecType = VectorType::get(numElements, dstType);
+
+  src = builder.create<LLVM::BitcastOp>(loc, vecType, src);
+
+  SmallVector<Value> res;
+  for (auto i : llvm::seq<int64_t>(0, numElements)) {
----------------
Hardcode84 wrote:

I like `llvm:seq` more than C loops. But switched to 1-arg version.

https://github.com/llvm/llvm-project/pull/136223