[llvm] [NVPTX] enforce signed 32 bit type for immediate offset (PR #99682)
Alex MacLean via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 19 11:36:28 PDT 2024
https://github.com/AlexMaclean created https://github.com/llvm/llvm-project/pull/99682
The NVPTX ISA states that an immOff must be a signed 32-bit (https://docs.nvidia.com/cuda/parallel-thread-execution/#addresses-as-operands):
> `[reg+immOff]`
>
> a sum of register `reg` containing a byte address plus a constant integer byte offset (signed, 32-bit).
>
> `[var+immOff]`
>
> a sum of address of addressable variable `var` containing a byte address plus a constant integer byte offset (signed, 32-bit).
Currently we do not consider this constraint, meaning that in some edge cases we generate invalid PTX when a value is offset by a very large immediate.
>From 7e314b16ba6c858e36e905145c5523306920251b Mon Sep 17 00:00:00 2001
From: Alex MacLean <amaclean at nvidia.com>
Date: Fri, 19 Jul 2024 18:30:25 +0000
Subject: [PATCH 1/2] pre-commit tests
---
llvm/test/CodeGen/NVPTX/addr-mode.ll | 84 +++++++++++++++++++
.../NVPTX/split-gep.ll | 24 ++++++
2 files changed, 108 insertions(+)
create mode 100644 llvm/test/CodeGen/NVPTX/addr-mode.ll
diff --git a/llvm/test/CodeGen/NVPTX/addr-mode.ll b/llvm/test/CodeGen/NVPTX/addr-mode.ll
new file mode 100644
index 0000000000000..46078aabad8c6
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/addr-mode.ll
@@ -0,0 +1,84 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -march=nvptx64 | FileCheck %s
+
+target triple = "nvptx64-nvidia-cuda"
+
+define i32 @test_addr_mode_i64(ptr %x) {
+; CHECK-LABEL: test_addr_mode_i64(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-NEXT: .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [test_addr_mode_i64_param_0];
+; CHECK-NEXT: ld.u32 %r1, [%rd1+-4];
+; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1;
+; CHECK-NEXT: ret;
+ %addr = getelementptr i32, ptr %x, i64 -1
+ %res = load i32, ptr %addr
+ ret i32 %res
+}
+
+define i32 @test_addr_mode_i32(ptr %x) {
+; CHECK-LABEL: test_addr_mode_i32(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-NEXT: .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [test_addr_mode_i32_param_0];
+; CHECK-NEXT: ld.u32 %r1, [%rd1+-4];
+; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1;
+; CHECK-NEXT: ret;
+ %addr = getelementptr i32, ptr %x, i32 -1
+ %res = load i32, ptr %addr
+ ret i32 %res
+}
+
+define i32 @test_addr_mode_i16(ptr %x) {
+; CHECK-LABEL: test_addr_mode_i16(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-NEXT: .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [test_addr_mode_i16_param_0];
+; CHECK-NEXT: ld.u32 %r1, [%rd1+-4];
+; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1;
+; CHECK-NEXT: ret;
+ %addr = getelementptr i32, ptr %x, i16 -1
+ %res = load i32, ptr %addr
+ ret i32 %res
+}
+
+define i32 @test_addr_mode_i8(ptr %x) {
+; CHECK-LABEL: test_addr_mode_i8(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-NEXT: .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [test_addr_mode_i8_param_0];
+; CHECK-NEXT: ld.u32 %r1, [%rd1+-4];
+; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1;
+; CHECK-NEXT: ret;
+ %addr = getelementptr i32, ptr %x, i8 -1
+ %res = load i32, ptr %addr
+ ret i32 %res
+}
+
+define i32 @test_addr_mode_i64_large(ptr %x) {
+; CHECK-LABEL: test_addr_mode_i64_large(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-NEXT: .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [test_addr_mode_i64_large_param_0];
+; CHECK-NEXT: ld.u32 %r1, [%rd1+17179869172];
+; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1;
+; CHECK-NEXT: ret;
+ %addr = getelementptr i32, ptr %x, i64 4294967293
+ %res = load i32, ptr %addr
+ ret i32 %res
+}
diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
index c915b9a5e59ac..f8eb50ef4b548 100644
--- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
@@ -397,3 +397,27 @@ entry:
%ptr2 = getelementptr inbounds %struct0, ptr %ptr, i65 1, i32 3, i64 %idx, i32 1
ret ptr %ptr2
}
+
+; Do not extract large constant offset that cannot be folded in to PTX
+; addressing mode
+define void @large_offset(ptr %out, i32 %in) {
+; CHECK-LABEL: define void @large_offset(
+; CHECK-SAME: ptr [[OUT:%.*]], i32 [[IN:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[TMP0]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[OUT]], i64 [[TMP1]]
+; CHECK-NEXT: [[GETELEM:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 2147483648
+; CHECK-NEXT: store i32 [[IN]], ptr [[GETELEM]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ %add = add nuw nsw i32 %0, 536870912
+ %idx = zext nneg i32 %add to i64
+ %getElem = getelementptr inbounds i32, ptr %out, i64 %idx
+ store i32 %in, ptr %getElem, align 4
+ ret void
+}
+
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
>From 7056473c1b0fea67242e6dd51ce8edc18c4378e0 Mon Sep 17 00:00:00 2001
From: Alex MacLean <amaclean at nvidia.com>
Date: Fri, 19 Jul 2024 18:31:49 +0000
Subject: [PATCH 2/2] [NVPTX] enforce signed 32 bit type for immediate offset
---
llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 9 +++++++--
llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 7 +++++--
llvm/test/CodeGen/NVPTX/addr-mode.ll | 5 +++--
.../SeparateConstOffsetFromGEP/NVPTX/split-gep.ll | 6 +++---
4 files changed, 18 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 371ec8596ef63..9fcacae9f7d45 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -3914,8 +3914,13 @@ bool NVPTXDAGToDAGISel::SelectADDRri_imp(
Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
else
Base = Addr.getOperand(0);
- Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
- mvt);
+
+ // Offset is a signed i32 constant in PTX [register+offset] address mode
+ if (!CN->getAPIntValue().isSignedIntN(32))
+ return false;
+
+ Offset = CurDAG->getTargetConstant(CN->getSExtValue(), SDLoc(OpNode),
+ MVT::i32);
return true;
}
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index bc23998455a68..00293d9d67401 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -5167,9 +5167,12 @@ bool NVPTXTargetLowering::isLegalAddressingMode(const DataLayout &DL,
// - [areg+immoff]
// - [immAddr]
- if (AM.BaseGV) {
+ // immoff is signed 32-bit
+ if (!APInt(64, AM.BaseOffs).isSignedIntN(32))
+ return false;
+
+ if (AM.BaseGV)
return !AM.BaseOffs && !AM.HasBaseReg && !AM.Scale;
- }
switch (AM.Scale) {
case 0: // "r", "r+i" or "i" is allowed
diff --git a/llvm/test/CodeGen/NVPTX/addr-mode.ll b/llvm/test/CodeGen/NVPTX/addr-mode.ll
index 46078aabad8c6..a6a085c0e2e33 100644
--- a/llvm/test/CodeGen/NVPTX/addr-mode.ll
+++ b/llvm/test/CodeGen/NVPTX/addr-mode.ll
@@ -71,11 +71,12 @@ define i32 @test_addr_mode_i64_large(ptr %x) {
; CHECK-LABEL: test_addr_mode_i64_large(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
-; CHECK-NEXT: .reg .b64 %rd<2>;
+; CHECK-NEXT: .reg .b64 %rd<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [test_addr_mode_i64_large_param_0];
-; CHECK-NEXT: ld.u32 %r1, [%rd1+17179869172];
+; CHECK-NEXT: add.s64 %rd2, %rd1, 17179869172;
+; CHECK-NEXT: ld.u32 %r1, [%rd2];
; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1;
; CHECK-NEXT: ret;
%addr = getelementptr i32, ptr %x, i64 4294967293
diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
index f8eb50ef4b548..16e9e5eeb6143 100644
--- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
@@ -405,9 +405,9 @@ define void @large_offset(ptr %out, i32 %in) {
; CHECK-SAME: ptr [[OUT:%.*]], i32 [[IN:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
-; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[TMP0]] to i64
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[OUT]], i64 [[TMP1]]
-; CHECK-NEXT: [[GETELEM:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 2147483648
+; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[TMP0]], 536870912
+; CHECK-NEXT: [[IDX:%.*]] = zext nneg i32 [[ADD]] to i64
+; CHECK-NEXT: [[GETELEM:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 [[IDX]]
; CHECK-NEXT: store i32 [[IN]], ptr [[GETELEM]], align 4
; CHECK-NEXT: ret void
;
More information about the llvm-commits
mailing list