[llvm] [SDAG] Fix CSE for ADDRSPACECAST nodes (PR #122912)

Alex MacLean via llvm-commits llvm-commits at lists.llvm.org
Sun Jan 19 16:15:31 PST 2025


https://github.com/AlexMaclean updated https://github.com/llvm/llvm-project/pull/122912

>From 7176ce0db58dca7531c37cb0ce2fe326b21134f4 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Tue, 14 Jan 2025 15:03:35 +0000
Subject: [PATCH 1/2] [SDAG] Fix CSE for ADDRSPACECAST nodes

---
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |  6 ++++++
 llvm/test/CodeGen/NVPTX/addrspacecast-cse.ll  | 19 +++++++++++++++++++
 2 files changed, 25 insertions(+)
 create mode 100644 llvm/test/CodeGen/NVPTX/addrspacecast-cse.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 0dfd0302ae5438..743ae4895a1b1c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -954,6 +954,12 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
       ID.AddInteger(M);
     break;
   }
+  case ISD::ADDRSPACECAST: {
+    const AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(N);
+    ID.AddInteger(ASC->getSrcAddressSpace());
+    ID.AddInteger(ASC->getDestAddressSpace());
+    break;
+  }
   case ISD::TargetBlockAddress:
   case ISD::BlockAddress: {
     const BlockAddressSDNode *BA = cast<BlockAddressSDNode>(N);
diff --git a/llvm/test/CodeGen/NVPTX/addrspacecast-cse.ll b/llvm/test/CodeGen/NVPTX/addrspacecast-cse.ll
new file mode 100644
index 00000000000000..a1bf8042817b43
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/addrspacecast-cse.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mcpu=sm_80 -mattr=+ptx73 -debug-only=isel -o /dev/null 2>&1 | FileCheck %s
+
+; REQUIRES: asserts
+
+target triple = "nvptx64-nvidia-cuda"
+
+;; Selection DAG CSE is hard to test since we run CSE/GVN on the IR before and
+;; after selection DAG ISel so most cases will be handled by one of these.
+define void @foo(ptr %p) {
+; CHECK-LABEL: Optimized legalized selection DAG: %bb.0 'foo:'
+; CHECK:       addrspacecast[0 -> 5]
+; CHECK-NOT:   addrspacecast[0 -> 5]
+; CHECK-LABEL: ===== Instruction selection begins
+;
+  %a1 = addrspacecast ptr %p to ptr addrspace(5)
+  call void @llvm.stackrestore(ptr %p)
+  store ptr %p, ptr addrspace(5) %a1
+  ret void
+}

>From 7e9217bda44b292c05eae898c1f3c234d0bf5223 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Mon, 20 Jan 2025 00:15:07 +0000
Subject: [PATCH 2/2] address comments -- fixup test somewhat

---
 llvm/test/CodeGen/NVPTX/addrspacecast-cse.ll | 24 ++++++++++++--------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/llvm/test/CodeGen/NVPTX/addrspacecast-cse.ll b/llvm/test/CodeGen/NVPTX/addrspacecast-cse.ll
index a1bf8042817b43..5053a2296466b8 100644
--- a/llvm/test/CodeGen/NVPTX/addrspacecast-cse.ll
+++ b/llvm/test/CodeGen/NVPTX/addrspacecast-cse.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mcpu=sm_80 -mattr=+ptx73 -debug-only=isel -o /dev/null 2>&1 | FileCheck %s
+; RUN: llc < %s -O0 -debug-only=isel -o /dev/null 2>&1 | FileCheck %s
 
 ; REQUIRES: asserts
 
@@ -7,13 +7,17 @@ target triple = "nvptx64-nvidia-cuda"
 ;; Selection DAG CSE is hard to test since we run CSE/GVN on the IR before and
 ;; after selection DAG ISel so most cases will be handled by one of these.
 define void @foo(ptr %p) {
-; CHECK-LABEL: Optimized legalized selection DAG: %bb.0 'foo:'
-; CHECK:       addrspacecast[0 -> 5]
-; CHECK-NOT:   addrspacecast[0 -> 5]
-; CHECK-LABEL: ===== Instruction selection begins
+; CHECK-LABEL: Initial selection DAG
 ;
-  %a1 = addrspacecast ptr %p to ptr addrspace(5)
-  call void @llvm.stackrestore(ptr %p)
-  store ptr %p, ptr addrspace(5) %a1
-  ret void
-}
+; CHECK:  [[ASC:t[0-9]+]]{{.*}} = addrspacecast
+; CHECK:                          store{{.*}} [[ASC]]
+; CHECK:                          store{{.*}} [[ASC]]
+;
+; CHECK-LABEL: Optimized lowered selection
+;
+   %a1 = addrspacecast ptr %p to ptr addrspace(5)
+   %a2 = addrspacecast ptr %p to ptr addrspace(5)
+   store i32 0, ptr addrspace(5) %a1
+   store i32 0, ptr addrspace(5) %a2
+   ret void
+ }
\ No newline at end of file



More information about the llvm-commits mailing list