[Mlir-commits] [mlir] [MLIR][NVVM] Migrate CpAsyncOp to intrinsics (PR #123789)
Durgadoss R
llvmlistbot at llvm.org
Wed Jan 22 06:22:51 PST 2025
================
@@ -849,55 +849,30 @@ def LoadCacheModifierKind : I32EnumAttr<"LoadCacheModifierKind",
def LoadCacheModifierAttr : EnumAttr<NVVM_Dialect, LoadCacheModifierKind, "load_cache_modifier">;
-def NVVM_CpAsyncOp : NVVM_PTXBuilder_Op<"cp.async.shared.global">,
+def NVVM_CpAsyncOp : NVVM_Op<"cp.async.shared.global">,
Arguments<(ins LLVM_PointerShared:$dst,
LLVM_PointerGlobal:$src,
I32Attr:$size,
LoadCacheModifierAttr:$modifier,
Optional<LLVM_Type>:$cpSize)> {
- string llvmBuilder = [{
- llvm::Intrinsic::ID id;
- switch ($size) {
- case 4:
- id = llvm::Intrinsic::nvvm_cp_async_ca_shared_global_4;
- break;
- case 8:
- id = llvm::Intrinsic::nvvm_cp_async_ca_shared_global_8;
- break;
- case 16:
- if($modifier == NVVM::LoadCacheModifierKind::CG)
- id = llvm::Intrinsic::nvvm_cp_async_cg_shared_global_16;
- else if($modifier == NVVM::LoadCacheModifierKind::CA)
- id = llvm::Intrinsic::nvvm_cp_async_ca_shared_global_16;
- else
- llvm_unreachable("unsupported cache modifier");
- break;
- default:
- llvm_unreachable("unsupported async copy size");
- }
- createIntrinsicCall(builder, id, {$dst, $src});
- }];
let assemblyFormat = "$dst `,` $src `,` $size `,` `cache` `=` $modifier (`,` $cpSize^)? attr-dict `:` type(operands)";
let hasVerifier = 1;
let extraClassDeclaration = [{
- bool hasIntrinsic() { if(getCpSize()) return false; return true; }
-
- void getAsmValues(RewriterBase &rewriter,
- llvm::SmallVectorImpl<std::pair<mlir::Value, mlir::NVVM::PTXRegisterMod>> &asmValues) {
- asmValues.push_back({getDst(), PTXRegisterMod::Read});
- asmValues.push_back({getSrc(), PTXRegisterMod::Read});
- asmValues.push_back({makeConstantI32(rewriter, getSize()), PTXRegisterMod::Read});
- asmValues.push_back({getCpSize(), PTXRegisterMod::Read});
- }
+ static llvm::Intrinsic::ID getIntrinsicID(int size,
+ NVVM::LoadCacheModifierKind kind,
+ bool hasCpSize);
}];
- let extraClassDefinition = [{
- std::string $cppClass::getPtx() {
- if(getModifier() == NVVM::LoadCacheModifierKind::CG)
- return std::string("cp.async.cg.shared.global [%0], [%1], %2, %3;\n");
- if(getModifier() == NVVM::LoadCacheModifierKind::CA)
- return std::string("cp.async.ca.shared.global [%0], [%1], %2, %3;\n");
- llvm_unreachable("unsupported cache modifier");
- }
+ string llvmBuilder = [{
+ bool hasCpSize = op.getCpSize() ? true : false;
+
+ llvm::SmallVector<llvm::Value *> translatedOperands;
+ translatedOperands.push_back($dst);
+ translatedOperands.push_back($src);
+ if (hasCpSize)
+ translatedOperands.push_back($cpSize);
----------------
durga4github wrote:
I tried this and it seems we have to pass three more arguments to getIntrinsicID() to achieve this.
A short diff of the changes:
auto id = NVVM::CpAsyncOp::getIntrinsicIDAndArgs($size, $modifier, hasCpSize, translatedOperands, *op, moduleTranslation);
Inside getIntrinsicIDAndArgs():
auto op = cast<NVVM::CpAsyncOp>(opInput);
args.push_back(moduleTranslation.lookupValue(op.getDst()));
args.push_back(moduleTranslation.lookupValue(op.getSrc()));
if (hasCpSize)
args.push_back(moduleTranslation.lookupValue(op.getCpSize()));
It feels like, we are manually doing what the translator generates in the NVVMConversions.inc file.
But, I agree that this reduces the llvmBuilder code in the td file by a few lines.
With this, do you think, packing the args inside the getIntrinsicID(), is a good idea?
Kindly let me know what you think.
https://github.com/llvm/llvm-project/pull/123789
More information about the Mlir-commits
mailing list