[llvm] 560b72c - [NVPTX] Support address offsets added with disjoint or (#122042)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 8 14:11:28 PST 2025
Author: Alex MacLean
Date: 2025-01-08T14:11:23-08:00
New Revision: 560b72c0408a8f7e4340a1d4197b164a14cd30b0
URL: https://github.com/llvm/llvm-project/commit/560b72c0408a8f7e4340a1d4197b164a14cd30b0
DIFF: https://github.com/llvm/llvm-project/commit/560b72c0408a8f7e4340a1d4197b164a14cd30b0.diff
LOG: [NVPTX] Support address offsets added with disjoint or (#122042)
Sometime DAGCombiner gets a little too clever and converts an add of a
small constant offset to a highly aligned pointer into a 'disjoint or'.
When looking for address operands handle this case as well.
Added:
llvm/test/CodeGen/NVPTX/disjoint-or-addr.ll
Modified:
llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
llvm/test/CodeGen/NVPTX/variadics-backend.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 5b4ac50c8fd7b0..ef97844142d403 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -14,6 +14,7 @@
#include "NVPTXUtilities.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
@@ -2449,6 +2450,11 @@ bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) {
return true;
}
+static inline bool isAddLike(const SDValue V) {
+ return V.getOpcode() == ISD::ADD ||
+ (V->getOpcode() == ISD::OR && V->getFlags().hasDisjoint());
+}
+
// SelectDirectAddr - Match a direct address for DAG.
// A direct address could be a globaladdress or externalsymbol.
bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
@@ -2475,7 +2481,7 @@ bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
// symbol+offset
bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
- if (Addr.getOpcode() == ISD::ADD) {
+ if (isAddLike(Addr)) {
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
SDValue base = Addr.getOperand(0);
if (SelectDirectAddr(base, Base)) {
@@ -2512,7 +2518,7 @@ bool NVPTXDAGToDAGISel::SelectADDRri_imp(
Addr.getOpcode() == ISD::TargetGlobalAddress)
return false; // direct calls.
- if (Addr.getOpcode() == ISD::ADD) {
+ if (isAddLike(Addr)) {
if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
return false;
}
diff --git a/llvm/test/CodeGen/NVPTX/disjoint-or-addr.ll b/llvm/test/CodeGen/NVPTX/disjoint-or-addr.ll
new file mode 100644
index 00000000000000..1b1bb91d5c79e0
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/disjoint-or-addr.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_35 -verify-machineinstrs | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_35 | %ptxas-verify %}
+target triple = "nvptx64-nvidia-cuda"
+
+ at a = external global ptr align 16
+
+define i32 @test_disjoint_or_addr(i16 %a) {
+; CHECK-LABEL: test_disjoint_or_addr(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-NEXT: .reg .b64 %rd<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: mov.u64 %rd1, a;
+; CHECK-NEXT: cvta.global.u64 %rd2, %rd1;
+; CHECK-NEXT: ld.u32 %r1, [%rd2+8];
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
+ %a1 = ptrtoint ptr @a to i64
+ %a2 = or disjoint i64 %a1, 8
+ %a3 = inttoptr i64 %a2 to ptr
+ %v = load i32, ptr %a3
+ ret i32 %v
+}
diff --git a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
index 27cf8ca5b61d69..208d4f0ef32ae4 100644
--- a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
@@ -29,7 +29,7 @@ define dso_local noundef i32 @non_kernel_function(ptr nocapture noundef readonly
; PTX-NEXT: .reg .pred %p<2>;
; PTX-NEXT: .reg .b16 %rs<3>;
; PTX-NEXT: .reg .b32 %r<11>;
-; PTX-NEXT: .reg .b64 %rd<10>;
+; PTX-NEXT: .reg .b64 %rd<9>;
; PTX-EMPTY:
; PTX-NEXT: // %bb.0: // %entry
; PTX-NEXT: mov.u64 %SPL, __local_depot0;
@@ -38,23 +38,22 @@ define dso_local noundef i32 @non_kernel_function(ptr nocapture noundef readonly
; PTX-NEXT: and.b16 %rs2, %rs1, 1;
; PTX-NEXT: setp.eq.b16 %p1, %rs2, 1;
; PTX-NEXT: ld.param.s32 %rd1, [non_kernel_function_param_2];
-; PTX-NEXT: add.u64 %rd2, %SP, 0;
-; PTX-NEXT: or.b64 %rd3, %rd2, 8;
-; PTX-NEXT: ld.param.u64 %rd4, [non_kernel_function_param_0+8];
-; PTX-NEXT: st.u64 [%rd3], %rd4;
-; PTX-NEXT: ld.param.u64 %rd5, [non_kernel_function_param_0];
-; PTX-NEXT: st.u64 [%SP], %rd5;
-; PTX-NEXT: mov.u64 %rd6, gi;
-; PTX-NEXT: cvta.global.u64 %rd7, %rd6;
-; PTX-NEXT: selp.b64 %rd8, %rd2, %rd7, %p1;
-; PTX-NEXT: add.s64 %rd9, %rd8, %rd1;
-; PTX-NEXT: ld.u8 %r1, [%rd9];
-; PTX-NEXT: ld.u8 %r2, [%rd9+1];
+; PTX-NEXT: ld.param.u64 %rd2, [non_kernel_function_param_0+8];
+; PTX-NEXT: st.u64 [%SP+8], %rd2;
+; PTX-NEXT: ld.param.u64 %rd3, [non_kernel_function_param_0];
+; PTX-NEXT: st.u64 [%SP], %rd3;
+; PTX-NEXT: mov.u64 %rd4, gi;
+; PTX-NEXT: cvta.global.u64 %rd5, %rd4;
+; PTX-NEXT: add.u64 %rd6, %SP, 0;
+; PTX-NEXT: selp.b64 %rd7, %rd6, %rd5, %p1;
+; PTX-NEXT: add.s64 %rd8, %rd7, %rd1;
+; PTX-NEXT: ld.u8 %r1, [%rd8];
+; PTX-NEXT: ld.u8 %r2, [%rd8+1];
; PTX-NEXT: shl.b32 %r3, %r2, 8;
; PTX-NEXT: or.b32 %r4, %r3, %r1;
-; PTX-NEXT: ld.u8 %r5, [%rd9+2];
+; PTX-NEXT: ld.u8 %r5, [%rd8+2];
; PTX-NEXT: shl.b32 %r6, %r5, 16;
-; PTX-NEXT: ld.u8 %r7, [%rd9+3];
+; PTX-NEXT: ld.u8 %r7, [%rd8+3];
; PTX-NEXT: shl.b32 %r8, %r7, 24;
; PTX-NEXT: or.b32 %r9, %r8, %r6;
; PTX-NEXT: or.b32 %r10, %r9, %r4;
diff --git a/llvm/test/CodeGen/NVPTX/variadics-backend.ll b/llvm/test/CodeGen/NVPTX/variadics-backend.ll
index cb54812dea6d98..f7ed690efabcf3 100644
--- a/llvm/test/CodeGen/NVPTX/variadics-backend.ll
+++ b/llvm/test/CodeGen/NVPTX/variadics-backend.ll
@@ -153,7 +153,7 @@ define dso_local i32 @variadics2(i32 noundef %first, ...) {
; CHECK-PTX-NEXT: .reg .b64 %SPL;
; CHECK-PTX-NEXT: .reg .b16 %rs<6>;
; CHECK-PTX-NEXT: .reg .b32 %r<7>;
-; CHECK-PTX-NEXT: .reg .b64 %rd<11>;
+; CHECK-PTX-NEXT: .reg .b64 %rd<7>;
; CHECK-PTX-EMPTY:
; CHECK-PTX-NEXT: // %bb.0: // %entry
; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot2;
@@ -163,24 +163,20 @@ define dso_local i32 @variadics2(i32 noundef %first, ...) {
; CHECK-PTX-NEXT: add.s64 %rd2, %rd1, 7;
; CHECK-PTX-NEXT: and.b64 %rd3, %rd2, -8;
; CHECK-PTX-NEXT: ld.u32 %r2, [%rd3];
-; CHECK-PTX-NEXT: or.b64 %rd4, %rd3, 4;
-; CHECK-PTX-NEXT: ld.s8 %r3, [%rd4];
-; CHECK-PTX-NEXT: or.b64 %rd5, %rd3, 5;
-; CHECK-PTX-NEXT: or.b64 %rd6, %rd3, 7;
-; CHECK-PTX-NEXT: ld.u8 %rs1, [%rd6];
+; CHECK-PTX-NEXT: ld.s8 %r3, [%rd3+4];
+; CHECK-PTX-NEXT: ld.u8 %rs1, [%rd3+7];
; CHECK-PTX-NEXT: st.u8 [%SP+2], %rs1;
-; CHECK-PTX-NEXT: ld.u8 %rs2, [%rd5];
-; CHECK-PTX-NEXT: or.b64 %rd7, %rd3, 6;
-; CHECK-PTX-NEXT: ld.u8 %rs3, [%rd7];
+; CHECK-PTX-NEXT: ld.u8 %rs2, [%rd3+5];
+; CHECK-PTX-NEXT: ld.u8 %rs3, [%rd3+6];
; CHECK-PTX-NEXT: shl.b16 %rs4, %rs3, 8;
; CHECK-PTX-NEXT: or.b16 %rs5, %rs4, %rs2;
; CHECK-PTX-NEXT: st.u16 [%SP], %rs5;
-; CHECK-PTX-NEXT: ld.u64 %rd8, [%rd3+8];
+; CHECK-PTX-NEXT: ld.u64 %rd4, [%rd3+8];
; CHECK-PTX-NEXT: add.s32 %r4, %r1, %r2;
; CHECK-PTX-NEXT: add.s32 %r5, %r4, %r3;
-; CHECK-PTX-NEXT: cvt.u64.u32 %rd9, %r5;
-; CHECK-PTX-NEXT: add.s64 %rd10, %rd9, %rd8;
-; CHECK-PTX-NEXT: cvt.u32.u64 %r6, %rd10;
+; CHECK-PTX-NEXT: cvt.u64.u32 %rd5, %r5;
+; CHECK-PTX-NEXT: add.s64 %rd6, %rd5, %rd4;
+; CHECK-PTX-NEXT: cvt.u32.u64 %r6, %rd6;
; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %r6;
; CHECK-PTX-NEXT: ret;
entry:
@@ -219,7 +215,7 @@ define dso_local i32 @bar() {
; CHECK-PTX-NEXT: .reg .b64 %SPL;
; CHECK-PTX-NEXT: .reg .b16 %rs<10>;
; CHECK-PTX-NEXT: .reg .b32 %r<4>;
-; CHECK-PTX-NEXT: .reg .b64 %rd<8>;
+; CHECK-PTX-NEXT: .reg .b64 %rd<7>;
; CHECK-PTX-EMPTY:
; CHECK-PTX-NEXT: // %bb.0: // %entry
; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot3;
@@ -240,17 +236,16 @@ define dso_local i32 @bar() {
; CHECK-PTX-NEXT: st.u16 [%SP], %rs8;
; CHECK-PTX-NEXT: mov.b32 %r1, 1;
; CHECK-PTX-NEXT: st.u32 [%SP+8], %r1;
-; CHECK-PTX-NEXT: add.u64 %rd5, %SP, 8;
-; CHECK-PTX-NEXT: or.b64 %rd6, %rd5, 4;
; CHECK-PTX-NEXT: mov.b16 %rs9, 1;
-; CHECK-PTX-NEXT: st.u8 [%rd6], %rs9;
-; CHECK-PTX-NEXT: mov.b64 %rd7, 1;
-; CHECK-PTX-NEXT: st.u64 [%SP+16], %rd7;
+; CHECK-PTX-NEXT: st.u8 [%SP+12], %rs9;
+; CHECK-PTX-NEXT: mov.b64 %rd5, 1;
+; CHECK-PTX-NEXT: st.u64 [%SP+16], %rd5;
+; CHECK-PTX-NEXT: add.u64 %rd6, %SP, 8;
; CHECK-PTX-NEXT: { // callseq 1, 0
; CHECK-PTX-NEXT: .param .b32 param0;
; CHECK-PTX-NEXT: st.param.b32 [param0], 1;
; CHECK-PTX-NEXT: .param .b64 param1;
-; CHECK-PTX-NEXT: st.param.b64 [param1], %rd5;
+; CHECK-PTX-NEXT: st.param.b64 [param1], %rd6;
; CHECK-PTX-NEXT: .param .b32 retval0;
; CHECK-PTX-NEXT: call.uni (retval0),
; CHECK-PTX-NEXT: variadics2,
More information about the llvm-commits
mailing list