[llvm] [NVPTX] Support intrinsics for shared memory special registers (PR #182354)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 19 11:51:47 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-ir
Author: None (yasmincs)
<details>
<summary>Changes</summary>
Added reserved_smem_offset_{begin|end|cap|0} intrinsics to expose shared memory special registers and NVPTX TableGen support for these intrinsics.
---
Full diff: https://github.com/llvm/llvm-project/pull/182354.diff
5 Files Affected:
- (modified) llvm/include/llvm/IR/IntrinsicsNVVM.td (+12)
- (modified) llvm/lib/Target/NVPTX/NVPTXIntrinsics.td (+10)
- (modified) llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp (+4)
- (modified) llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td (+6)
- (added) llvm/test/CodeGen/NVPTX/reserved-smem-offset.ll (+35)
``````````diff
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index f2e1bcb5517c8..c0c80d92017b7 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -2450,6 +2450,18 @@ defm int_nvvm_read_ptx_sreg_cluster_nctaid : PTXReadSRegIntrinsicNB_v4i32<MAX_GR
def int_nvvm_read_ptx_sreg_cluster_ctarank : PTXReadSRegIntrinsicNB_r32;
def int_nvvm_read_ptx_sreg_cluster_nctarank : PTXReadSRegIntrinsicNB_r32;
+//
+// Reserved Shared Memory Intrinsics
+//
+def int_nvvm_read_ptx_sreg_reserved_smem_offset_begin
+ : PTXReadSRegIntrinsicNB_r32<[], "llvm.nvvm.read.ptx.sreg.reserved_smem_offset_begin">;
+def int_nvvm_read_ptx_sreg_reserved_smem_offset_end
+ : PTXReadSRegIntrinsicNB_r32<[], "llvm.nvvm.read.ptx.sreg.reserved_smem_offset_end">;
+def int_nvvm_read_ptx_sreg_reserved_smem_offset_cap
+ : PTXReadSRegIntrinsicNB_r32<[], "llvm.nvvm.read.ptx.sreg.reserved_smem_offset_cap">;
+def int_nvvm_read_ptx_sreg_reserved_smem_offset_0
+ : PTXReadSRegIntrinsicNB_r32<[], "llvm.nvvm.read.ptx.sreg.reserved_smem_offset_0">;
+
def int_nvvm_read_ptx_sreg_total_smem_size :
PTXReadSRegIntrinsicNB_r32<name = "llvm.nvvm.read.ptx.sreg.total_smem_size">;
def int_nvvm_read_ptx_sreg_aggr_smem_size :
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 7b7b11d14ecc8..6ce77651ac164 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -2986,6 +2986,16 @@ def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
+// Reserved Shared Memory special register reads
+def : Pat<(int_nvvm_read_ptx_sreg_reserved_smem_offset_begin),
+ (MOV_SPECIAL RESERVED_SMEM_OFFSET_BEGIN)>, Requires<[hasPTX<76>, hasSM<80>]>;
+def : Pat<(int_nvvm_read_ptx_sreg_reserved_smem_offset_end),
+ (MOV_SPECIAL RESERVED_SMEM_OFFSET_END)>, Requires<[hasPTX<76>, hasSM<80>]>;
+def : Pat<(int_nvvm_read_ptx_sreg_reserved_smem_offset_cap),
+ (MOV_SPECIAL RESERVED_SMEM_OFFSET_CAP)>, Requires<[hasPTX<76>, hasSM<80>]>;
+def : Pat<(int_nvvm_read_ptx_sreg_reserved_smem_offset_0),
+ (MOV_SPECIAL RESERVED_SMEM_OFFSET_0)>, Requires<[hasPTX<76>, hasSM<80>]>;
+
//-----------------------------------
// Texture Intrinsics
//-----------------------------------
diff --git a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index 646b554878c70..c7751fb50791a 100644
--- a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -98,6 +98,10 @@ BitVector NVPTXRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
markSuperRegs(Reserved, NVPTX::VRFrame64);
markSuperRegs(Reserved, NVPTX::VRFrameLocal64);
markSuperRegs(Reserved, NVPTX::VRDepot);
+ markSuperRegs(Reserved, NVPTX::RESERVED_SMEM_OFFSET_BEGIN);
+ markSuperRegs(Reserved, NVPTX::RESERVED_SMEM_OFFSET_END);
+ markSuperRegs(Reserved, NVPTX::RESERVED_SMEM_OFFSET_CAP);
+ markSuperRegs(Reserved, NVPTX::RESERVED_SMEM_OFFSET_0);
return Reserved;
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td
index 913487b64617a..7cf6b91af5a3e 100644
--- a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td
@@ -44,6 +44,12 @@ foreach i = 0...31 in {
def ENVREG#i : NVPTXReg<"%envreg"#i>;
}
+// Reserved Shared Memory special registers
+def RESERVED_SMEM_OFFSET_BEGIN : NVPTXReg<"%reserved_smem_offset_begin">;
+def RESERVED_SMEM_OFFSET_END : NVPTXReg<"%reserved_smem_offset_end">;
+def RESERVED_SMEM_OFFSET_CAP : NVPTXReg<"%reserved_smem_offset_cap">;
+def RESERVED_SMEM_OFFSET_0 : NVPTXReg<"%reserved_smem_offset_0">;
+
//===----------------------------------------------------------------------===//
// Register classes.
// NOTE: if you add new vector types for a register, you must update
diff --git a/llvm/test/CodeGen/NVPTX/reserved-smem-offset.ll b/llvm/test/CodeGen/NVPTX/reserved-smem-offset.ll
new file mode 100644
index 0000000000000..a72033d302c19
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/reserved-smem-offset.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_80 | FileCheck %s
+; RUN: %if ptxas-sm_80 %{ llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_80 | %ptxas-verify -arch=sm_80 %}
+
+declare i32 @llvm.nvvm.read.ptx.sreg.reserved_smem_offset_begin()
+declare i32 @llvm.nvvm.read.ptx.sreg.reserved_smem_offset_end()
+declare i32 @llvm.nvvm.read.ptx.sreg.reserved_smem_offset_cap()
+declare i32 @llvm.nvvm.read.ptx.sreg.reserved_smem_offset_0()
+
+define i32 @test() {
+; CHECK-LABEL: test(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<8>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: mov.b32 %r1, %reserved_smem_offset_begin;
+; CHECK-NEXT: mov.b32 %r2, %reserved_smem_offset_end;
+; CHECK-NEXT: mov.b32 %r3, %reserved_smem_offset_cap;
+; CHECK-NEXT: mov.b32 %r4, %reserved_smem_offset_0;
+; CHECK-NEXT: add.s32 %r5, %r1, %r2;
+; CHECK-NEXT: add.s32 %r6, %r5, %r3;
+; CHECK-NEXT: add.s32 %r7, %r6, %r4;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r7;
+; CHECK-NEXT: ret;
+ %begin = call i32 @llvm.nvvm.read.ptx.sreg.reserved_smem_offset_begin()
+ %end = call i32 @llvm.nvvm.read.ptx.sreg.reserved_smem_offset_end()
+ %cap = call i32 @llvm.nvvm.read.ptx.sreg.reserved_smem_offset_cap()
+ %offset0 = call i32 @llvm.nvvm.read.ptx.sreg.reserved_smem_offset_0()
+
+ %ret0 = add i32 %begin, %end
+ %ret1 = add i32 %ret0, %cap
+ %ret2 = add i32 %ret1, %offset0
+
+ ret i32 %ret2
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/182354
More information about the llvm-commits
mailing list