[llvm] [NVPTX] Add support for tcgen05 instructions for sm103 target (PR #156613)

via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 3 00:46:21 PDT 2025


https://github.com/masahi created https://github.com/llvm/llvm-project/pull/156613

Currently, NVPTX fails to lower some (but not all?) tcgen05 ops for the sm103 target. It was observed in a downstream project, Triton, in https://github.com/triton-lang/triton/pull/8045.

I verified that my change is functional by testing against `tcgen05-fence.ll`. Without the changes in `NVPTXSubtarget.h`:
```
$ bin/llc < tcgen05-fence.ll -march=nvptx64 -mcpu=sm_103a -mattr=+ptx88
//
// Generated by LLVM NVPTX Back-End
//

.version 8.8
.target sm_103a
.address_size 64

LLVM ERROR: Cannot select: intrinsic %llvm.nvvm.tcgen05.fence.after.thread.sync
```

However, for `tcgen05-ld.ll`, the test passes for sm103 without my change. This is why I said "NVPTX fails to lower **some** tcgen05 ops for the sm103 target" above.

cc @AlexMaclean @rajatbajpai @durga4github 


>From 78408e576675ecedc1c9220b57714ced9c344bc0 Mon Sep 17 00:00:00 2001
From: Masahiro Masuda <masahi129 at gmail.com>
Date: Wed, 3 Sep 2025 16:32:56 +0900
Subject: [PATCH] [NVPTX] Add support for tcgen05 instructions for sm103 target

---
 llvm/lib/Target/NVPTX/NVPTXSubtarget.h   | 7 ++++++-
 llvm/test/CodeGen/NVPTX/tcgen05-fence.ll | 2 ++
 llvm/test/CodeGen/NVPTX/tcgen05-ld.ll    | 2 ++
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
index 6cee4ff52ae0c..0a77a633cb255 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -106,6 +106,7 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
   // Tcgen05 instructions in Blackwell family
   bool hasTcgen05Instructions() const {
     bool HasTcgen05 = false;
+    unsigned MinPTXVersion = 86;
     switch (FullSmVersion) {
     default:
       break;
@@ -113,9 +114,13 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
     case 1013: // sm_101a
       HasTcgen05 = true;
       break;
+    case 1033: // sm_103a
+      HasTcgen05 = true;
+      MinPTXVersion = 88;
+      break;
     }
 
-    return HasTcgen05 && PTXVersion >= 86;
+    return HasTcgen05 && PTXVersion >= MinPTXVersion;
   }
   // f32x2 instructions in Blackwell family
   bool hasF32x2Instructions() const;
diff --git a/llvm/test/CodeGen/NVPTX/tcgen05-fence.ll b/llvm/test/CodeGen/NVPTX/tcgen05-fence.ll
index fe4719cc00f17..cbf647f857173 100644
--- a/llvm/test/CodeGen/NVPTX/tcgen05-fence.ll
+++ b/llvm/test/CodeGen/NVPTX/tcgen05-fence.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 | FileCheck --check-prefixes=CHECK %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_103a -mattr=+ptx88 | FileCheck --check-prefixes=CHECK %s
 ; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 | %ptxas-verify -arch=sm_100a %}
+; RUN: %if ptxas-sm_103a && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mcpu=sm_103a -mattr=+ptx88 | %ptxas-verify -arch=sm_103a %}
 
 declare void @llvm.nvvm.tcgen05.fence.before.thread.sync()
 declare void @llvm.nvvm.tcgen05.fence.after.thread.sync()
diff --git a/llvm/test/CodeGen/NVPTX/tcgen05-ld.ll b/llvm/test/CodeGen/NVPTX/tcgen05-ld.ll
index 16710b4c5bc27..a37b1a95aa800 100644
--- a/llvm/test/CodeGen/NVPTX/tcgen05-ld.ll
+++ b/llvm/test/CodeGen/NVPTX/tcgen05-ld.ll
@@ -1,8 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc < %s -o - -mcpu=sm_100a -march=nvptx64 -mattr=+ptx86 | FileCheck %s
 ; RUN: llc < %s -o - -mcpu=sm_101a -march=nvptx64 -mattr=+ptx86 | FileCheck %s
+; RUN: llc < %s -o - -mcpu=sm_103a -march=nvptx64 -mattr=+ptx88 | FileCheck %s
 ; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -march=nvptx64 -mattr=+ptx86 -mcpu=sm_100a | %ptxas-verify -arch=sm_100a %}
 ; RUN: %if ptxas-sm_101a && ptxas-isa-8.6 %{ llc < %s -march=nvptx64 -mattr=+ptx86 -mcpu=sm_101a | %ptxas-verify -arch=sm_101a %}
+; RUN: %if ptxas-sm_103a && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mattr=+ptx88 -mcpu=sm_103a | %ptxas-verify -arch=sm_103a %}
 
 ; CHECK-LABEL: nvvm_tcgen05_ld_16x64b
 define void @nvvm_tcgen05_ld_16x64b(ptr addrspace(6) %taddr) {



More information about the llvm-commits mailing list