[Mlir-commits] [mlir] [MLIR][NVVM] Fix undef in cp.async.bulk.tensor.reduce Op (PR #157423)
Guray Ozen
llvmlistbot at llvm.org
Mon Sep 15 03:59:33 PDT 2025
================
@@ -1634,53 +1634,124 @@ CpAsyncBulkTensorSharedCTAToGlobalOp::getIntrinsicIDAndArgs(
return {id, std::move(args)};
}
-#define CP_ASYNC_BULK_TENSOR_REDUCE_MODE(op, dim, mode) \
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_##op##_##mode##_##dim##d
+NVVM::IDArgPair CpAsyncBulkTensorReduceOp::getIntrinsicIDAndArgs(
+ Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
+ auto thisOp = cast<NVVM::CpAsyncBulkTensorReduceOp>(op);
+ llvm::LLVMContext &ctx = mt.getLLVMContext();
-#define CP_ASYNC_BULK_TENSOR_REDUCE(op, dim, is_im2col) \
- is_im2col ? CP_ASYNC_BULK_TENSOR_REDUCE_MODE(op, dim, im2col) \
- : CP_ASYNC_BULK_TENSOR_REDUCE_MODE(op, dim, tile)
+ llvm::SmallVector<llvm::Value *> args;
-#define GET_CP_ASYNC_BULK_TENSOR_ID(op, dims, is_im2col) \
- [&]() -> auto { \
- switch (dims) { \
- case 1: \
- return CP_ASYNC_BULK_TENSOR_REDUCE_MODE(op, 1, tile); \
- case 2: \
- return CP_ASYNC_BULK_TENSOR_REDUCE_MODE(op, 2, tile); \
- case 3: \
- return CP_ASYNC_BULK_TENSOR_REDUCE(op, 3, is_im2col); \
- case 4: \
- return CP_ASYNC_BULK_TENSOR_REDUCE(op, 4, is_im2col); \
- case 5: \
- return CP_ASYNC_BULK_TENSOR_REDUCE(op, 5, is_im2col); \
- default: \
- llvm_unreachable("Invalid TensorDim in CpAsyncBulkTensorReduceOp."); \
- } \
- }()
+ // Arguments to the intrinsic:
+ // shared_mem_ptr, tmaDesc, tensorDims
+ // cache_hint(if applicable) and flag(boolean)
+ args.push_back(mt.lookupValue(thisOp.getSrcMem()));
+ args.push_back(mt.lookupValue(thisOp.getTmaDescriptor()));
+
+ for (Value v : thisOp.getCoordinates())
+ args.push_back(mt.lookupValue(v));
+
+ mlir::Value cacheHint = thisOp.getL2CacheHint();
+ const bool hasCacheHint = static_cast<bool>(cacheHint);
+ llvm::Value *i64ZeroValue =
+ llvm::ConstantInt::get(llvm::Type::getInt64Ty(ctx), 0);
+ args.push_back(hasCacheHint ? mt.lookupValue(cacheHint) : i64ZeroValue);
+ args.push_back(builder.getInt1(hasCacheHint));
+
+ const unsigned NI = llvm::Intrinsic::not_intrinsic;
+ static constexpr llvm::Intrinsic::ID IDTable[][2][6] = {
----------------
grypp wrote:
can we define table more structured?
```
constexpr unsigned kNumRedTys = 8; // ADD, MIN, MAX, INC, DEC, AND, OR, XOR
constexpr unsigned kNumLayouts = 2; // tile / im2col
constexpr unsigned kMaxRank = 5;
using row = std::array<llvm::Intrinsic::ID, kMaxRank>;
using layoutTable = std::array<row, kNumLayouts>;
using fullTable = std::array<layoutTable, kNumRedTys>;
```
https://github.com/llvm/llvm-project/pull/157423
More information about the Mlir-commits
mailing list