[llvm] [DAGCombiner] Add some very basic folds for ADDRSPACECAST (PR #127733)
Alex MacLean via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 19 08:06:59 PST 2025
https://github.com/AlexMaclean updated https://github.com/llvm/llvm-project/pull/127733
>From 1833ddf13cba1cadfb0beba35487839e1c8051b1 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Wed, 19 Feb 2025 00:20:32 +0000
Subject: [PATCH 1/3] pre-commit tests
---
.../CodeGen/NVPTX/addrspacecast-folding.ll | 37 +++++++++++++++++++
1 file changed, 37 insertions(+)
create mode 100644 llvm/test/CodeGen/NVPTX/addrspacecast-folding.ll
diff --git a/llvm/test/CodeGen/NVPTX/addrspacecast-folding.ll b/llvm/test/CodeGen/NVPTX/addrspacecast-folding.ll
new file mode 100644
index 0000000000000..05eb0385eb571
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/addrspacecast-folding.ll
@@ -0,0 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mcpu=sm_20 -O0 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -mcpu=sm_20 -O0 | %ptxas-verify %}
+
+target triple = "nvptx64-unknown-unknown"
+
+define ptr @test1(ptr %p) {
+; CHECK-LABEL: test1(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [test1_param_0];
+; CHECK-NEXT: cvta.to.local.u64 %rd2, %rd1;
+; CHECK-NEXT: cvta.local.u64 %rd3, %rd2;
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
+; CHECK-NEXT: ret;
+ %a = addrspacecast ptr %p to ptr addrspace(5)
+ %b = addrspacecast ptr addrspace(5) %a to ptr
+ ret ptr %b
+}
+
+define ptr addrspace(1) @test2(ptr addrspace(5) %p) {
+; CHECK-LABEL: test2(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [test2_param_0];
+; CHECK-NEXT: cvta.local.u64 %rd2, %rd1;
+; CHECK-NEXT: cvta.to.global.u64 %rd3, %rd2;
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
+; CHECK-NEXT: ret;
+ %a = addrspacecast ptr addrspace(5) %p to ptr
+ %b = addrspacecast ptr %a to ptr addrspace(1)
+ ret ptr addrspace(1) %b
+}
>From a304a777b8e18513d1eed6ee9a469a1318905190 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Wed, 19 Feb 2025 00:21:55 +0000
Subject: [PATCH 2/3] [DAGCombiner] Add very basic folds for ADDRSPACECAST
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 21 +++++++++++++++++++
.../CodeGen/NVPTX/addrspacecast-folding.ll | 11 +++-------
2 files changed, 24 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index bc7cdf38dbc2a..1ed3da6e2dd12 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -488,6 +488,7 @@ namespace {
SDValue visitTRUNCATE(SDNode *N);
SDValue visitTRUNCATE_USAT_U(SDNode *N);
SDValue visitBITCAST(SDNode *N);
+ SDValue visitADDRSPACECAST(SDNode *N);
SDValue visitFREEZE(SDNode *N);
SDValue visitBUILD_PAIR(SDNode *N);
SDValue visitFADD(SDNode *N);
@@ -1920,6 +1921,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::TRUNCATE: return visitTRUNCATE(N);
case ISD::TRUNCATE_USAT_U: return visitTRUNCATE_USAT_U(N);
case ISD::BITCAST: return visitBITCAST(N);
+ case ISD::ADDRSPACECAST: return visitADDRSPACECAST(N);
case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
case ISD::FADD: return visitFADD(N);
case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
@@ -16054,6 +16056,25 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitADDRSPACECAST(SDNode *N) {
+ auto *ASCN1 = cast<AddrSpaceCastSDNode>(N);
+
+ if (auto *ASCN2 = dyn_cast<AddrSpaceCastSDNode>(ASCN1->getOperand(0))) {
+ assert(ASCN2->getDestAddressSpace() == ASCN1->getSrcAddressSpace());
+
+ // Fold asc[B -> A](asc[A -> B](x)) -> x
+ if (ASCN1->getDestAddressSpace() == ASCN2->getSrcAddressSpace())
+ return ASCN2->getOperand(0);
+
+ // Fold asc[B -> C](asc[A -> B](x)) -> asc[A -> C](x)
+ return DAG.getAddrSpaceCast(
+ SDLoc(N), N->getValueType(0), ASCN2->getOperand(0),
+ ASCN2->getSrcAddressSpace(), ASCN1->getDestAddressSpace());
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
EVT VT = N->getValueType(0);
return CombineConsecutiveLoads(N, VT);
diff --git a/llvm/test/CodeGen/NVPTX/addrspacecast-folding.ll b/llvm/test/CodeGen/NVPTX/addrspacecast-folding.ll
index 05eb0385eb571..11c2b6782e0d3 100644
--- a/llvm/test/CodeGen/NVPTX/addrspacecast-folding.ll
+++ b/llvm/test/CodeGen/NVPTX/addrspacecast-folding.ll
@@ -7,13 +7,11 @@ target triple = "nvptx64-unknown-unknown"
define ptr @test1(ptr %p) {
; CHECK-LABEL: test1(
; CHECK: {
-; CHECK-NEXT: .reg .b64 %rd<4>;
+; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [test1_param_0];
-; CHECK-NEXT: cvta.to.local.u64 %rd2, %rd1;
-; CHECK-NEXT: cvta.local.u64 %rd3, %rd2;
-; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd1;
; CHECK-NEXT: ret;
%a = addrspacecast ptr %p to ptr addrspace(5)
%b = addrspacecast ptr addrspace(5) %a to ptr
@@ -23,13 +21,10 @@ define ptr @test1(ptr %p) {
define ptr addrspace(1) @test2(ptr addrspace(5) %p) {
; CHECK-LABEL: test2(
; CHECK: {
-; CHECK-NEXT: .reg .b64 %rd<4>;
+; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [test2_param_0];
-; CHECK-NEXT: cvta.local.u64 %rd2, %rd1;
-; CHECK-NEXT: cvta.to.global.u64 %rd3, %rd2;
-; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
; CHECK-NEXT: ret;
%a = addrspacecast ptr addrspace(5) %p to ptr
%b = addrspacecast ptr %a to ptr addrspace(1)
>From 6dc9383f637f7b656e8f06cc87227e694ba5ea93 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Wed, 19 Feb 2025 16:06:22 +0000
Subject: [PATCH 3/3] fixup tests
---
.../codegen-prepare-addrspacecast-non-null.ll | 32 +++++++++----------
llvm/test/CodeGen/SystemZ/mixed-ptr-sizes.ll | 6 ++--
2 files changed, 18 insertions(+), 20 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll b/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll
index 3216e71e6221a..9b79d528c14a2 100644
--- a/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll
+++ b/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll
@@ -320,13 +320,6 @@ define i32 @cast_private_to_flat_to_local(ptr addrspace(5) %private.ptr) {
; DAGISEL-ASM-LABEL: cast_private_to_flat_to_local:
; DAGISEL-ASM: ; %bb.0:
; DAGISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; DAGISEL-ASM-NEXT: s_mov_b64 s[4:5], src_private_base
-; DAGISEL-ASM-NEXT: v_mov_b32_e32 v1, s5
-; DAGISEL-ASM-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0
-; DAGISEL-ASM-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; DAGISEL-ASM-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
-; DAGISEL-ASM-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
-; DAGISEL-ASM-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc
; DAGISEL-ASM-NEXT: ds_read_b32 v0, v0
; DAGISEL-ASM-NEXT: s_waitcnt lgkmcnt(0)
; DAGISEL-ASM-NEXT: s_setpc_b64 s[30:31]
@@ -359,15 +352,22 @@ define i32 @cast_private_to_flat_to_global(ptr addrspace(6) %const32.ptr) {
; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, ptr addrspace(3) [[LOCAL_PTR]], align 4
; OPT-NEXT: ret i32 [[LOAD]]
;
-; ASM-LABEL: cast_private_to_flat_to_global:
-; ASM: ; %bb.0:
-; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; ASM-NEXT: v_mov_b32_e32 v1, 0
-; ASM-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
-; ASM-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc
-; ASM-NEXT: ds_read_b32 v0, v0
-; ASM-NEXT: s_waitcnt lgkmcnt(0)
-; ASM-NEXT: s_setpc_b64 s[30:31]
+; DAGISEL-ASM-LABEL: cast_private_to_flat_to_global:
+; DAGISEL-ASM: ; %bb.0:
+; DAGISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; DAGISEL-ASM-NEXT: ds_read_b32 v0, v0
+; DAGISEL-ASM-NEXT: s_waitcnt lgkmcnt(0)
+; DAGISEL-ASM-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-ASM-LABEL: cast_private_to_flat_to_global:
+; GISEL-ASM: ; %bb.0:
+; GISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-ASM-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-ASM-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; GISEL-ASM-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc
+; GISEL-ASM-NEXT: ds_read_b32 v0, v0
+; GISEL-ASM-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-ASM-NEXT: s_setpc_b64 s[30:31]
%flat.ptr = addrspacecast ptr addrspace(6) %const32.ptr to ptr
%local.ptr = addrspacecast ptr %flat.ptr to ptr addrspace(3)
%load = load volatile i32, ptr addrspace(3) %local.ptr
diff --git a/llvm/test/CodeGen/SystemZ/mixed-ptr-sizes.ll b/llvm/test/CodeGen/SystemZ/mixed-ptr-sizes.ll
index a95f68b5e118d..b46069c782968 100644
--- a/llvm/test/CodeGen/SystemZ/mixed-ptr-sizes.ll
+++ b/llvm/test/CodeGen/SystemZ/mixed-ptr-sizes.ll
@@ -332,9 +332,7 @@ define signext i32 @setlength() {
; CHECK: lgr [[MALLOC:[0-9]+]],3
; CHECK: basr 7,6
; CHECK: lgr [[LENGTH:[0-9]+]],3
-; CHECK: la [[ADDR:[0-9]+]],4([[MALLOC]])
-; CHECK: llgtr [[ADDR]],[[ADDR]]
-; CHECK: stg [[LENGTH]],0([[ADDR]])
+; CHECK: stg [[LENGTH]],4([[MALLOC]])
entry:
%call = tail call ptr @__malloc31(i64 noundef 8)
%call1 = tail call signext i32 @foo()
@@ -357,7 +355,7 @@ define signext i32 @setlength2() {
; CHECK: basr 7,6
; CHECK: lgr [[LENGTH:[0-9]+]],3
; CHECK: ahi [[MALLOC]],4
-; CHECK: llgtr [[ADDR]],[[MALLOC]]
+; CHECK: llgtr [[ADDR:[0-9]+]],[[MALLOC]]
; CHECK: stg [[LENGTH]],0([[ADDR]])
entry:
%call = tail call ptr addrspace(1) @domalloc(i64 noundef 8)
More information about the llvm-commits
mailing list