[llvm] [NVPTX] Custom lower ADDRSPACECAST (PR #125607)

Justin Fargnoli via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 3 16:24:53 PST 2025


https://github.com/justinfargnoli created https://github.com/llvm/llvm-project/pull/125607

Avoid [crashing](https://godbolt.org/z/8T58vcM68) when lowering `addrspacecast ptr addrspace(<non-zero>) %ptr to ptr addrspace(<non-zero>)`. 

>From f5be3c4574dcfea9936cb1bd653fde57d8e17c16 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Sun, 2 Feb 2025 19:40:45 -0800
Subject: [PATCH 1/3] [NVPTX] Custom lower ADDRSPACECAST

---
 llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 21 ++++++++++++++++++++-
 llvm/lib/Target/NVPTX/NVPTXISelLowering.h   |  1 +
 llvm/test/CodeGen/NVPTX/addrspacecast.ll    | 14 ++++++++++++++
 3 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 773c97f7b4dc0f..0ae185226ab7e5 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -989,6 +989,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
     setOperationAction(ISD::FLOG2, {MVT::v2f16, MVT::v2bf16}, Expand);
   }
 
+  setOperationAction(ISD::ADDRSPACECAST, {MVT::i32, MVT::i64}, Custom);
+
   // No FPOW or FREM in PTX.
 
   // Now deduce the information based on the above mentioned
@@ -2652,6 +2654,8 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
     return SDValue();
   case ISD::FRAMEADDR:
     return SDValue();
+  case ISD::ADDRSPACECAST:
+    return LowerADDRSPACECAST(Op, DAG);
   case ISD::GlobalAddress:
     return LowerGlobalAddress(Op, DAG);
   case ISD::INTRINSIC_W_CHAIN:
@@ -2726,7 +2730,6 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::FMUL:
     // Used only for bf16 on SM80, where we select fma for non-ftz operation
     return PromoteBinOpIfF32FTZ(Op, DAG);
-
   default:
     llvm_unreachable("Custom lowering not defined for operation");
   }
@@ -2767,6 +2770,22 @@ unsigned NVPTXTargetLowering::getJumpTableEncoding() const {
   return MachineJumpTableInfo::EK_Inline;
 }
 
+SDValue NVPTXTargetLowering::LowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  AddrSpaceCastSDNode *N = cast<AddrSpaceCastSDNode>(Op.getNode());
+
+  EVT OperandVT = Op.getOperand(0).getValueType();
+  unsigned SrcAS = N->getSrcAddressSpace();
+  EVT ResultVT = Op.getValueType();
+  unsigned DestAS = N->getDestAddressSpace();
+
+  if (SrcAS == llvm::ADDRESS_SPACE_GENERIC || DestAS == llvm::ADDRESS_SPACE_GENERIC)
+    return Op;
+
+  SDValue ToGeneric = DAG.getAddrSpaceCast(DL, OperandVT, Op.getOperand(0), SrcAS, llvm::ADDRESS_SPACE_GENERIC);
+  return DAG.getAddrSpaceCast(DL, ResultVT, ToGeneric, llvm::ADDRESS_SPACE_GENERIC, DestAS);
+}
+
 // This function is almost a copy of SelectionDAG::expandVAArg().
 // The only diff is that this one produces loads from local address space.
 SDValue NVPTXTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index 5adf69d621552f..74ec14ba5f8e32 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -264,6 +264,7 @@ class NVPTXTargetLowering : public TargetLowering {
   const NVPTXSubtarget &STI; // cache the subtarget here
   SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const;
 
+  SDValue LowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
 
   SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/NVPTX/addrspacecast.ll b/llvm/test/CodeGen/NVPTX/addrspacecast.ll
index 23428b3728674e..e3ebb2f458d46a 100644
--- a/llvm/test/CodeGen/NVPTX/addrspacecast.ll
+++ b/llvm/test/CodeGen/NVPTX/addrspacecast.ll
@@ -99,6 +99,20 @@ define i32 @conv8(ptr %ptr) {
   ret i32 %val
 }
 
+; ALL-LABEL: conv9
+define i32 @conv9(ptr addrspace(1) %ptr) {
+; CLS32: cvta.global.u32
+; CLS32: cvta.to.shared.u32
+; CLS64: cvta.global.u64
+; CLS64: cvta.to.shared.u64
+; PTRCONV: cvt.u32.u64
+; NOPTRCONV-NOT: cvt.u32.u64
+; ALL: ld.shared.u32
+  %specptr = addrspacecast ptr addrspace(1) %ptr to ptr addrspace(3)
+  %val = load i32, ptr addrspace(3) %specptr
+  ret i32 %val
+}
+
 ; Check that we support addrspacecast when splitting the vector
 ; result (<2 x ptr> => 2 x <1 x ptr>).
 ; This also checks that scalarization works for addrspacecast

>From e03397b594b3d422683adcc502a7f45d5d187eca Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Sun, 2 Feb 2025 19:43:20 -0800
Subject: [PATCH 2/3] Fix whitespace

---
 llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 0ae185226ab7e5..9badb587a501dd 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -2730,6 +2730,7 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::FMUL:
     // Used only for bf16 on SM80, where we select fma for non-ftz operation
     return PromoteBinOpIfF32FTZ(Op, DAG);
+
   default:
     llvm_unreachable("Custom lowering not defined for operation");
   }

>From f3ce5533747af9078028a5222f594f84fee16b96 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Sun, 2 Feb 2025 19:43:50 -0800
Subject: [PATCH 3/3] clang-format

---
 llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 9badb587a501dd..962c971e6970cd 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -2771,7 +2771,8 @@ unsigned NVPTXTargetLowering::getJumpTableEncoding() const {
   return MachineJumpTableInfo::EK_Inline;
 }
 
-SDValue NVPTXTargetLowering::LowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) const {
+SDValue NVPTXTargetLowering::LowerADDRSPACECAST(SDValue Op,
+                                                SelectionDAG &DAG) const {
   SDLoc DL(Op);
   AddrSpaceCastSDNode *N = cast<AddrSpaceCastSDNode>(Op.getNode());
 
@@ -2780,11 +2781,14 @@ SDValue NVPTXTargetLowering::LowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) c
   EVT ResultVT = Op.getValueType();
   unsigned DestAS = N->getDestAddressSpace();
 
-  if (SrcAS == llvm::ADDRESS_SPACE_GENERIC || DestAS == llvm::ADDRESS_SPACE_GENERIC)
+  if (SrcAS == llvm::ADDRESS_SPACE_GENERIC ||
+      DestAS == llvm::ADDRESS_SPACE_GENERIC)
     return Op;
 
-  SDValue ToGeneric = DAG.getAddrSpaceCast(DL, OperandVT, Op.getOperand(0), SrcAS, llvm::ADDRESS_SPACE_GENERIC);
-  return DAG.getAddrSpaceCast(DL, ResultVT, ToGeneric, llvm::ADDRESS_SPACE_GENERIC, DestAS);
+  SDValue ToGeneric = DAG.getAddrSpaceCast(DL, OperandVT, Op.getOperand(0),
+                                           SrcAS, llvm::ADDRESS_SPACE_GENERIC);
+  return DAG.getAddrSpaceCast(DL, ResultVT, ToGeneric,
+                              llvm::ADDRESS_SPACE_GENERIC, DestAS);
 }
 
 // This function is almost a copy of SelectionDAG::expandVAArg().



More information about the llvm-commits mailing list