[Mlir-commits] [mlir] [mlir][gpu][spirv] Remove rotation semantics of gpu.shuffle up/down (PR #139105)

Fri Jun 6 07:59:08 PDT 2025

================
@@ -416,6 +416,30 @@ LogicalResult GPUBarrierConversion::matchAndRewrite(
   return success();
 }
 
+template <typename T>
+Value getDimOp(OpBuilder &builder, MLIRContext *ctx, Location loc,
+               gpu::Dimension dimension) {
+  Type indexType = IndexType::get(ctx);
+  IntegerType i32Type = IntegerType::get(ctx, 32);
+  Value dim = builder.create<T>(loc, indexType, dimension);
+  return builder.create<arith::IndexCastOp>(loc, i32Type, dim);
+}
+
+Value getLaneId(OpBuilder &rewriter, MLIRContext *ctx, Location loc) {
+  Value dimX = getDimOp<gpu::BlockDimOp>(rewriter, ctx, loc, gpu::Dimension::x);
+  Value dimY = getDimOp<gpu::BlockDimOp>(rewriter, ctx, loc, gpu::Dimension::y);
+  Value tidX = getDimOp<gpu::ThreadIdOp>(rewriter, ctx, loc, gpu::Dimension::x);
+  Value tidY = getDimOp<gpu::ThreadIdOp>(rewriter, ctx, loc, gpu::Dimension::y);
+  Value tidZ = getDimOp<gpu::ThreadIdOp>(rewriter, ctx, loc, gpu::Dimension::z);
+  auto i32Type = rewriter.getIntegerType(32);
+  Value tmp1 = rewriter.create<arith::MulIOp>(loc, i32Type, tidZ, dimY);
+  Value tmp2 = rewriter.create<arith::AddIOp>(loc, i32Type, tmp1, tidY);
+  Value tmp3 = rewriter.create<arith::MulIOp>(loc, i32Type, tmp2, dimX);
+  Value laneId = rewriter.create<arith::AddIOp>(loc, i32Type, tmp3, tidX);
----------------
kuhar wrote:

> In addition, MLIR converts `gpu::BlockDimOp` to SPIR-V workgroup size for x, y, z dimensions and converts `gpu::ThreadIdOp` to SPIR-V local invocation Id along x, y, z dimensions.

you can convert thread id to lane id if you know the workgroup size, but the math used to be incorrect

https://github.com/llvm/llvm-project/pull/139105