[llvm] [NVPTX] Add intrinsics and codegen for tensormap.replace (PR #172458)

Thu Dec 18 06:04:03 PST 2025

================
@@ -59,3 +60,93 @@ void nvvm::printTcgen05CollectorUsageOp(raw_ostream &OS,
   llvm_unreachable("printTcgen05CollectorUsageOp called with invalid value for "
                    "immediate argument");
 }
+
+void nvvm::printTensormapElemType(raw_ostream &OS, const Constant *ImmArgVal) {
+  static constexpr StringRef TensormapElemTypes[] = {
+      "u8",       "u16",   "u32",       "s32",      "u64",  "s64",
+      "f16",      "f32",   "f32.ftz",   "f64",      "bf16", "tf32",
+      "tf32.ftz", "b4x16", "b4x16_p64", "b6x16_p32"};
+  if (const auto *CI = dyn_cast<ConstantInt>(ImmArgVal)) {
+    uint64_t Val = CI->getZExtValue();
+    if (Val <= static_cast<uint64_t>(nvvm::TensormapElemType::B6x16_p32)) {
+      OS << TensormapElemTypes[Val];
+      return;
+    }
+  }
+}
+
+void nvvm::printTensormapInterleaveLayout(raw_ostream &OS,
+                                          const Constant *ImmArgVal) {
+  if (const auto *CI = dyn_cast<ConstantInt>(ImmArgVal)) {
+    uint64_t Val = CI->getZExtValue();
+    switch (static_cast<TensormapInterleaveLayout>(Val)) {
+    case TensormapInterleaveLayout::NO_INTERLEAVE:
+      OS << "No interleave";
+      return;
+    case TensormapInterleaveLayout::INTERLEAVE_16B:
+      OS << "16B interleave";
+      return;
+    case TensormapInterleaveLayout::INTERLEAVE_32B:
+      OS << "32B interleave";
+      return;
+    }
+  }
+}
+
+void nvvm::printTensormapSwizzleMode(raw_ostream &OS,
+                                     const Constant *ImmArgVal) {
+  if (const auto *CI = dyn_cast<ConstantInt>(ImmArgVal)) {
+    uint64_t Val = CI->getZExtValue();
+    switch (static_cast<TensormapSwizzleMode>(Val)) {
+    case TensormapSwizzleMode::NO_SWIZZLE:
+      OS << "No swizzling";
+      return;
+    case TensormapSwizzleMode::SWIZZLE_32B:
+      OS << "32B swizzling";
+      return;
+    case TensormapSwizzleMode::SWIZZLE_64B:
+      OS << "64B swizzling";
+      return;
+    case TensormapSwizzleMode::SWIZZLE_128B:
+      OS << "128B swizzling";
+      return;
+    case TensormapSwizzleMode::SWIZZLE_96B:
+      OS << "96B swizzling";
+      return;
+    }
+  }
+}
+
+void nvvm::printTensormapSwizzleAtomicity(raw_ostream &OS,
+                                          const Constant *ImmArgVal) {
+  if (const auto *CI = dyn_cast<ConstantInt>(ImmArgVal)) {
+    uint64_t Val = CI->getZExtValue();
+    switch (static_cast<TensormapSwizzleAtomicity>(Val)) {
+    case TensormapSwizzleAtomicity::SWIZZLE_ATOMICITY_16B:
+      OS << "16B";
+      return;
+    case TensormapSwizzleAtomicity::SWIZZLE_ATOMICITY_32B:
+      OS << "32B";
+      return;
+    case TensormapSwizzleAtomicity::SWIZZLE_ATOMICITY_32B_FLIP_8B:
+      OS << "32B + 8B flip";
+      return;
+    case TensormapSwizzleAtomicity::SWIZZLE_ATOMICITY_64B:
+      OS << "64B";
+      return;
+    }
+  }
+}
+
+void nvvm::printTensormapFillMode(raw_ostream &OS, const Constant *ImmArgVal) {
+  if (const auto *CI = dyn_cast<ConstantInt>(ImmArgVal)) {
+    uint64_t Val = CI->getZExtValue();
+    if (Val == static_cast<uint64_t>(TensormapFillMode::ZERO_FILL)) {
+      OS << "Zero fill";
+      return;
+    } else if (Val == static_cast<uint64_t>(TensormapFillMode::OOB_NAN_FILL)) {
+      OS << "OOB-NaN fill";
+      return;
+    }
+  }
+}
----------------
durga4github wrote:

would leveraging the `isZero()` from constantInt make it simpler? Something like below..

```
if (auto *CI = dyn_cast<ConstantInt>(ImmArgVal))
    OS << (CI->isZero() ? "Zero fill" : "OOB-NaN fill");
```

https://github.com/llvm/llvm-project/pull/172458