[llvm] c1aa89e - [NVPTX] prefetch.tensormap pattern rewriter fix (#159253)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 24 02:59:45 PDT 2025
Author: Abhilash Majumder
Date: 2025-09-24T15:29:41+05:30
New Revision: c1aa89ef9be2af6599a7a280fe6de646c2a5ca4a
URL: https://github.com/llvm/llvm-project/commit/c1aa89ef9be2af6599a7a280fe6de646c2a5ca4a
DIFF: https://github.com/llvm/llvm-project/commit/c1aa89ef9be2af6599a7a280fe6de646c2a5ca4a.diff
LOG: [NVPTX] prefetch.tensormap pattern rewriter fix (#159253)
Context: Highlighted from #156830 , this is an Isel lowering issue in
the NVPTX backend for prefetch.tensormap intrinsic.
It is caused by unchecked pattern rewrite during infer-address-space
pass.
This intrinsic is valid only for const, param and generic
address-spaces.
Any other address space is invalid. Currently, this intrinsic gets
falsely
re-written to target AS(1), when the pointer-argument of the intrinsic
comes as an argument of a kernel function.
So, this patch adds a check on the correct address-spaces
before re-writing them.
cc @durga4github
FYI: @Wolfram70 @rupprecht @castigli
Added:
Modified:
llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll
llvm/test/CodeGen/NVPTX/prefetch.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index f4f89613b358d..b5bf72e45038a 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -590,8 +590,12 @@ Value *NVPTXTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
}
case Intrinsic::nvvm_prefetch_tensormap: {
IRBuilder<> Builder(II);
- return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_prefetch_tensormap,
- NewV);
+ const unsigned NewAS = NewV->getType()->getPointerAddressSpace();
+ if (NewAS == NVPTXAS::ADDRESS_SPACE_CONST ||
+ NewAS == NVPTXAS::ADDRESS_SPACE_PARAM)
+ return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_prefetch_tensormap,
+ NewV);
+ return nullptr;
}
}
return nullptr;
diff --git a/llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll b/llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll
index bc67471209bf8..32b55a38e55ef 100644
--- a/llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll
+++ b/llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll
@@ -11,7 +11,6 @@ target triple = "nvptx64-unknown-unknown"
define void @test_infer_const_from_cast() {
; INFER-LABEL: @test_infer_const_from_cast
; INFER: call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) @constant_tensormap)
-; BOTH: call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) @constant_tensormap)
; PTX-LABEL: .visible .func test_infer_const_from_cast(
; PTX: mov.b64 %rd{{[0-9]+}}, constant_tensormap;
; PTX: cvta.const.u64 %rd{{[0-9]+}}, %rd{{[0-9]+}};
@@ -69,12 +68,40 @@ entry:
%cast1 = addrspacecast ptr addrspace(4) @constant_tensormap to ptr
%cast2 = addrspacecast ptr %cast1 to ptr addrspace(4)
%cast3 = addrspacecast ptr addrspace(4) %cast2 to ptr
- call void @llvm.nvvm.prefetch.tensormap(ptr %cast3)
+ call void @llvm.nvvm.prefetch.tensormap.p0(ptr %cast3)
+ ret void
+}
+
+; Kernel Function Test
+; Cast from Param space to Generic
+define ptx_kernel void @test_param_to_generic_cast_kernel(ptr addrspace(101) %param_ptr) {
+; INFER-LABEL: @test_param_to_generic_cast_kernel
+; INFER: call void @llvm.nvvm.prefetch.tensormap.p101(ptr addrspace(101) %param_ptr)
+; PTX-LABEL: .visible .entry test_param_to_generic_cast_kernel(
+; PTX: prefetch.param.tensormap [%rd{{[0-9]+}}];
+entry:
+ %cast = addrspacecast ptr addrspace(101) %param_ptr to ptr
+ call void @llvm.nvvm.prefetch.tensormap.p0(ptr %cast)
+ ret void
+}
+
+; Kernel Function Test
+; Multiple casts in sequence
+define ptx_kernel void @test_infer_through_multiple_casts_kernel() {
+; INFER-LABEL: @test_infer_through_multiple_casts_kernel
+; INFER: call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) @constant_tensormap)
+; PTX-LABEL: .visible .entry test_infer_through_multiple_casts_kernel(
+; PTX: mov.b64 %rd{{[0-9]+}}, constant_tensormap;
+; PTX: cvta.const.u64 %rd{{[0-9]+}}, %rd{{[0-9]+}};
+; PTX: prefetch.tensormap [%rd{{[0-9]+}}];
+entry:
+ %cast1 = addrspacecast ptr addrspace(4) @constant_tensormap to ptr
+ %cast2 = addrspacecast ptr %cast1 to ptr addrspace(4)
+ %cast3 = addrspacecast ptr addrspace(4) %cast2 to ptr
+ call void @llvm.nvvm.prefetch.tensormap.p0(ptr %cast3)
ret void
}
declare void @llvm.nvvm.prefetch.tensormap.p0(ptr)
declare void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4))
declare void @llvm.nvvm.prefetch.tensormap.p101(ptr addrspace(101))
-
-
diff --git a/llvm/test/CodeGen/NVPTX/prefetch.ll b/llvm/test/CodeGen/NVPTX/prefetch.ll
index a1c5ec8f50a6b..c0489cc6fd73a 100644
--- a/llvm/test/CodeGen/NVPTX/prefetch.ll
+++ b/llvm/test/CodeGen/NVPTX/prefetch.ll
@@ -121,4 +121,40 @@ define void @prefetch_param_tensormap(ptr addrspace(101) %param_ptr) {
; CHECK-PTX64-NEXT: ret;
tail call void @llvm.nvvm.prefetch.tensormap.p101(ptr addrspace(101) %param_ptr)
ret void
-}
\ No newline at end of file
+}
+
+define ptx_kernel void @prefetch_generic_tensormap_kernel(ptr %ptr) {
+; CHECK-PTX64-LABEL: prefetch_generic_tensormap_kernel(
+; CHECK-PTX64: {
+; CHECK-PTX64-NEXT: .reg .b64 %rd<2>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT: // %bb.0:
+; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [prefetch_generic_tensormap_kernel_param_0];
+; CHECK-PTX64-NEXT: prefetch.tensormap [%rd1];
+; CHECK-PTX64-NEXT: ret;
+ tail call void @llvm.nvvm.prefetch.tensormap.p0(ptr %ptr)
+ ret void
+}
+
+define ptx_kernel void @prefetch_param_tensormap_kernel(ptr addrspace(101) %param_ptr) {
+; CHECK-PTX64-LABEL: prefetch_param_tensormap_kernel(
+; CHECK-PTX64: {
+; CHECK-PTX64-NEXT: .reg .b64 %rd<2>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT: // %bb.0:
+; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [prefetch_param_tensormap_kernel_param_0];
+; CHECK-PTX64-NEXT: prefetch.param.tensormap [%rd1];
+; CHECK-PTX64-NEXT: ret;
+ tail call void @llvm.nvvm.prefetch.tensormap.p101(ptr addrspace(101) %param_ptr)
+ ret void
+}
+
+define ptx_kernel void @prefetch_grid_const_tensormap(ptr byval([64 x i8]) align 64 "nvvm.grid_constant" %ptr) {
+; CHECK-PTX64-LABEL: .visible .entry prefetch_grid_const_tensormap(
+; CHECK-PTX64: prefetch.tensormap [%{{(SP|rd[0-9]+).*}}];
+; CHECK-PTX64: ret;
+
+entry:
+ call void @llvm.nvvm.prefetch.tensormap.p0(ptr addrspace(0) %ptr)
+ ret void
+}
More information about the llvm-commits
mailing list