[Mlir-commits] [mlir] [mlir][gpu] Add gpu.ballot operation to GPU dialect (PR #188647)

Mehdi Amini llvmlistbot at llvm.org
Fri Mar 27 03:39:52 PDT 2026


================
@@ -236,6 +236,41 @@ struct GPULaneIdOpToNVVM : ConvertOpToLLVMPattern<gpu::LaneIdOp> {
   }
 };
 
+struct GPUBallotOpToNVVM : public ConvertOpToLLVMPattern<gpu::BallotOp> {
+  using ConvertOpToLLVMPattern<gpu::BallotOp>::ConvertOpToLLVMPattern;
+
+  LogicalResult
+  matchAndRewrite(gpu::BallotOp op, gpu::BallotOp::Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    Location loc = op->getLoc();
+    auto int32Type = IntegerType::get(rewriter.getContext(), 32);
+    auto intType = cast<IntegerType>(op.getType());
+    unsigned width = intType.getWidth();
+
+    // NVVM ballot natively returns i32. For i64 results, zero-extend since
+    // NVIDIA warps have exactly 32 threads, so upper 32 bits are always zero.
+    if (width != 32 && width != 64)
+      return rewriter.notifyMatchFailure(
+          op, "nvvm.vote.sync ballot only supports i32 and i64 result types");
+
+    // Use full mask (-1) so all 32 lanes participate in the ballot.
+    Value mask = LLVM::ConstantOp::create(rewriter, loc, int32Type,
+                                          rewriter.getI32IntegerAttr(-1));
+
+    auto voteKind = NVVM::VoteSyncKindAttr::get(rewriter.getContext(),
+                                                NVVM::VoteSyncKind::ballot);
+    Value result = NVVM::VoteSyncOp::create(rewriter, loc, int32Type, mask,
+                                            adaptor.getPredicate(), voteKind);
+
+    if (width == 64) {
+      result = LLVM::ZExtOp::create(rewriter, loc, op.getType(), result);
+    }
----------------
joker-eph wrote:

```suggestion
    if (width == 64)
      result = LLVM::ZExtOp::create(rewriter, loc, op.getType(), result);
```

Nit: no trivial braces

https://github.com/llvm/llvm-project/pull/188647


More information about the Mlir-commits mailing list