[llvm] 1706960 - AMDGPU/R600: Special case addrspacecast lowering for null

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Sun Dec 18 05:02:52 PST 2022


Author: Matt Arsenault
Date: 2022-12-18T08:02:45-05:00
New Revision: 17069608940d22cd6266afb948443b11793f0a57

URL: https://github.com/llvm/llvm-project/commit/17069608940d22cd6266afb948443b11793f0a57
DIFF: https://github.com/llvm/llvm-project/commit/17069608940d22cd6266afb948443b11793f0a57.diff

LOG: AMDGPU/R600: Special case addrspacecast lowering for null

Due to poor support for non-0 null pointers, clang always emits
addrspacecast from a null flat constant for private/local null. We can
trivially handle this case for old hardware.

Should fix issue 55679.

Added: 
    llvm/test/CodeGen/AMDGPU/addrspacecast.r600.ll

Modified: 
    llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
    llvm/lib/Target/AMDGPU/R600ISelLowering.h

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index 2b2d27ca1694..22457723fc46 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -18,6 +18,7 @@
 #include "R600InstrInfo.h"
 #include "R600MachineFunctionInfo.h"
 #include "R600Subtarget.h"
+#include "R600TargetMachine.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/IR/IntrinsicsAMDGPU.h"
 #include "llvm/IR/IntrinsicsR600.h"
@@ -180,6 +181,7 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
     setHasExtractBitsInsn(true);
 
   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+  setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);
 
   const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
   for (MVT VT : ScalarIntVTs)
@@ -418,6 +420,8 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
   case ISD::BRCOND: return LowerBRCOND(Op, DAG);
   case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
   case ISD::FrameIndex: return lowerFrameIndex(Op, DAG);
+  case ISD::ADDRSPACECAST:
+    return lowerADDRSPACECAST(Op, DAG);
   case ISD::INTRINSIC_VOID: {
     SDValue Chain = Op.getOperand(0);
     unsigned IntrinsicID =
@@ -937,6 +941,26 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
       DAG.getCondCode(ISD::SETNE));
 }
 
+SDValue R600TargetLowering::lowerADDRSPACECAST(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  SDLoc SL(Op);
+  EVT VT = Op.getValueType();
+
+  const R600TargetMachine &TM =
+      static_cast<const R600TargetMachine &>(getTargetMachine());
+
+  const AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(Op);
+  unsigned SrcAS = ASC->getSrcAddressSpace();
+  unsigned DestAS = ASC->getDestAddressSpace();
+
+  if (auto *ConstSrc = dyn_cast<ConstantSDNode>(Op.getOperand(0))) {
+    if (SrcAS == AMDGPUAS::FLAT_ADDRESS && ConstSrc->isNullValue())
+      return DAG.getConstant(TM.getNullPointerValue(DestAS), SL, VT);
+  }
+
+  return Op;
+}
+
 /// LLVM generates byte-addressed pointers.  For indirect addressing, we need to
 /// convert these pointers to a register index.  Each register holds
 /// 16 bytes, (4 x 32bit sub-register), but we need to take into account the

diff  --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.h b/llvm/lib/Target/AMDGPU/R600ISelLowering.h
index 0484ff5e7b7f..8a5479db4ee6 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.h
@@ -92,6 +92,7 @@ class R600TargetLowering final : public AMDGPUTargetLowering {
 
   SDValue lowerPrivateExtLoad(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const;

diff  --git a/llvm/test/CodeGen/AMDGPU/addrspacecast.r600.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast.r600.ll
new file mode 100644
index 000000000000..1895a710b878
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast.r600.ll
@@ -0,0 +1,84 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck %s
+
+define amdgpu_kernel void @addrspacecast_flat_to_global(ptr addrspace(1) %out, ptr %src.ptr) {
+; CHECK-LABEL: addrspacecast_flat_to_global:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ALU 2, @4, KC0[CB0:0-32], KC1[]
+; CHECK-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X
+; CHECK-NEXT:    CF_END
+; CHECK-NEXT:    PAD
+; CHECK-NEXT:    ALU clause starting at 4:
+; CHECK-NEXT:     LSHR * T0.X, KC0[2].Y, literal.x,
+; CHECK-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; CHECK-NEXT:     MOV * T1.X, KC0[2].Z,
+  %cast = addrspacecast ptr %src.ptr to ptr addrspace(1)
+  store ptr addrspace(1) %cast, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @addrspacecast_global_to_flat(ptr addrspace(1) %out, ptr addrspace(1) %src.ptr) {
+; CHECK-LABEL: addrspacecast_global_to_flat:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ALU 2, @4, KC0[CB0:0-32], KC1[]
+; CHECK-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X
+; CHECK-NEXT:    CF_END
+; CHECK-NEXT:    PAD
+; CHECK-NEXT:    ALU clause starting at 4:
+; CHECK-NEXT:     LSHR * T0.X, KC0[2].Y, literal.x,
+; CHECK-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; CHECK-NEXT:     MOV * T1.X, KC0[2].Z,
+  %cast = addrspacecast ptr addrspace(1) %src.ptr to ptr
+  store ptr %cast, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @addrspacecast_flat_null_to_local(ptr addrspace(1) %out) {
+; CHECK-LABEL: addrspacecast_flat_null_to_local:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ALU 3, @4, KC0[CB0:0-32], KC1[]
+; CHECK-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CHECK-NEXT:    CF_END
+; CHECK-NEXT:    PAD
+; CHECK-NEXT:    ALU clause starting at 4:
+; CHECK-NEXT:     MOV * T0.X, literal.x,
+; CHECK-NEXT:    -1(nan), 0(0.000000e+00)
+; CHECK-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
+; CHECK-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+  store ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @addrspacecast_flat_null_to_global(ptr addrspace(1) %out) {
+; CHECK-LABEL: addrspacecast_flat_null_to_global:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ALU 3, @4, KC0[CB0:0-32], KC1[]
+; CHECK-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CHECK-NEXT:    CF_END
+; CHECK-NEXT:    PAD
+; CHECK-NEXT:    ALU clause starting at 4:
+; CHECK-NEXT:     MOV * T0.X, literal.x,
+; CHECK-NEXT:    0(0.000000e+00), 0(0.000000e+00)
+; CHECK-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
+; CHECK-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+  store ptr addrspace(1) addrspacecast (ptr null to ptr addrspace(1)), ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @addrspacecast_flat_undef_to_local(ptr addrspace(1) %out) {
+; CHECK-LABEL: addrspacecast_flat_undef_to_local:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    CF_END
+; CHECK-NEXT:    PAD
+  store ptr addrspace(3) addrspacecast (ptr undef to ptr addrspace(3)), ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @addrspacecast_flat_poison_to_local(ptr addrspace(1) %out) {
+; CHECK-LABEL: addrspacecast_flat_poison_to_local:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    CF_END
+; CHECK-NEXT:    PAD
+  store ptr addrspace(3) addrspacecast (ptr poison to ptr addrspace(3)), ptr addrspace(1) %out
+  ret void
+}


        


More information about the llvm-commits mailing list