[llvm] [NVPTX] Fix generic address in st.bulk intrinsic (PR #130740)

Srinivasa Ravi via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 11 02:50:18 PDT 2025


https://github.com/Wolfram70 created https://github.com/llvm/llvm-project/pull/130740

This fixes an oversight in a previous change that introduced the `st.bulk` intrinsic where `llvm_global_ptr_ty` was used instead of `llvm_ptr_ty` for generic addressing.

PTX Spec Reference:
https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-st-bulk

>From c0d520db0b9bc8bad9db99fa16b540385be53081 Mon Sep 17 00:00:00 2001
From: Srinivasa Ravi <srinivasar at nvidia.com>
Date: Tue, 11 Mar 2025 14:23:53 +0530
Subject: [PATCH] [NVPTX] Fix generic address in st.bulk intrinsic

This fixes an oversight in the previous change that introduced the `st.bulk`
intrinsic where `llvm_global_ptr_ty` was used instead of `llvm_ptr_ty`.

PTX Spec Reference:
https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-st-bulk
---
 llvm/include/llvm/IR/IntrinsicsNVVM.td | 2 +-
 llvm/test/CodeGen/NVPTX/st_bulk.ll     | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 0b183fc30068e..ea58985cbebda 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -5274,7 +5274,7 @@ foreach shape = ["16x64b", "16x128b", "16x256b", "32x32b", "16x32bx2"] in {
 //
 
 def int_nvvm_st_bulk: DefaultAttrsIntrinsic<[],
-  [llvm_global_ptr_ty, llvm_i64_ty, llvm_i64_ty],
+  [llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty],
   [IntrArgMemOnly, IntrWriteMem,
     WriteOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>, ImmArg<ArgIndex<2>>]>;
 
diff --git a/llvm/test/CodeGen/NVPTX/st_bulk.ll b/llvm/test/CodeGen/NVPTX/st_bulk.ll
index 085df7f1d8d3f..785f78a6f9519 100644
--- a/llvm/test/CodeGen/NVPTX/st_bulk.ll
+++ b/llvm/test/CodeGen/NVPTX/st_bulk.ll
@@ -4,8 +4,8 @@
 ; RUN: %if ptxas-12.8 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100 -mattr=+ptx86 | %ptxas-verify -arch=sm_100 %}
 ; RUN: %if ptxas-12.8 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100 -mattr=+ptx86 --nvptx-short-ptr | %ptxas-verify -arch=sm_100 %}
 
-declare void @llvm.nvvm.st.bulk(ptr addrspace(1), i64, i64)
-define void @st_bulk(ptr addrspace(1) %dest_addr, i64 %size) {
+declare void @llvm.nvvm.st.bulk(ptr, i64, i64)
+define void @st_bulk(ptr %dest_addr, i64 %size) {
 ; CHECK-LABEL: st_bulk(
 ; CHECK:       {
 ; CHECK-NEXT:    .reg .b64 %rd<3>;
@@ -15,7 +15,7 @@ define void @st_bulk(ptr addrspace(1) %dest_addr, i64 %size) {
 ; CHECK-NEXT:    ld.param.u64 %rd2, [st_bulk_param_1];
 ; CHECK-NEXT:    st.bulk [%rd1], %rd2, 0;
 ; CHECK-NEXT:    ret;
-  call void @llvm.nvvm.st.bulk(ptr addrspace(1) %dest_addr, i64 %size, i64 0)
+  call void @llvm.nvvm.st.bulk(ptr %dest_addr, i64 %size, i64 0)
   ret void
 }
 



More information about the llvm-commits mailing list