[llvm] 1706960 - AMDGPU/R600: Special case addrspacecast lowering for null
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Sun Dec 18 05:02:52 PST 2022
Author: Matt Arsenault
Date: 2022-12-18T08:02:45-05:00
New Revision: 17069608940d22cd6266afb948443b11793f0a57
URL: https://github.com/llvm/llvm-project/commit/17069608940d22cd6266afb948443b11793f0a57
DIFF: https://github.com/llvm/llvm-project/commit/17069608940d22cd6266afb948443b11793f0a57.diff
LOG: AMDGPU/R600: Special case addrspacecast lowering for null
Due to poor support for non-0 null pointers, clang always emits
addrspacecast from a null flat constant for private/local null. We can
trivially handle this case for old hardware.
Should fix issue 55679.
Added:
llvm/test/CodeGen/AMDGPU/addrspacecast.r600.ll
Modified:
llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
llvm/lib/Target/AMDGPU/R600ISelLowering.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index 2b2d27ca1694..22457723fc46 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -18,6 +18,7 @@
#include "R600InstrInfo.h"
#include "R600MachineFunctionInfo.h"
#include "R600Subtarget.h"
+#include "R600TargetMachine.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"
@@ -180,6 +181,7 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
setHasExtractBitsInsn(true);
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);
const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
for (MVT VT : ScalarIntVTs)
@@ -418,6 +420,8 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
case ISD::FrameIndex: return lowerFrameIndex(Op, DAG);
+ case ISD::ADDRSPACECAST:
+ return lowerADDRSPACECAST(Op, DAG);
case ISD::INTRINSIC_VOID: {
SDValue Chain = Op.getOperand(0);
unsigned IntrinsicID =
@@ -937,6 +941,26 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
DAG.getCondCode(ISD::SETNE));
}
+SDValue R600TargetLowering::lowerADDRSPACECAST(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc SL(Op);
+ EVT VT = Op.getValueType();
+
+ const R600TargetMachine &TM =
+ static_cast<const R600TargetMachine &>(getTargetMachine());
+
+ const AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(Op);
+ unsigned SrcAS = ASC->getSrcAddressSpace();
+ unsigned DestAS = ASC->getDestAddressSpace();
+
+ if (auto *ConstSrc = dyn_cast<ConstantSDNode>(Op.getOperand(0))) {
+ if (SrcAS == AMDGPUAS::FLAT_ADDRESS && ConstSrc->isNullValue())
+ return DAG.getConstant(TM.getNullPointerValue(DestAS), SL, VT);
+ }
+
+ return Op;
+}
+
/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
/// convert these pointers to a register index. Each register holds
/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.h b/llvm/lib/Target/AMDGPU/R600ISelLowering.h
index 0484ff5e7b7f..8a5479db4ee6 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.h
@@ -92,6 +92,7 @@ class R600TargetLowering final : public AMDGPUTargetLowering {
SDValue lowerPrivateExtLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast.r600.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast.r600.ll
new file mode 100644
index 000000000000..1895a710b878
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast.r600.ll
@@ -0,0 +1,84 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck %s
+
+define amdgpu_kernel void @addrspacecast_flat_to_global(ptr addrspace(1) %out, ptr %src.ptr) {
+; CHECK-LABEL: addrspacecast_flat_to_global:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
+; CHECK-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X
+; CHECK-NEXT: CF_END
+; CHECK-NEXT: PAD
+; CHECK-NEXT: ALU clause starting at 4:
+; CHECK-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
+; CHECK-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; CHECK-NEXT: MOV * T1.X, KC0[2].Z,
+ %cast = addrspacecast ptr %src.ptr to ptr addrspace(1)
+ store ptr addrspace(1) %cast, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @addrspacecast_global_to_flat(ptr addrspace(1) %out, ptr addrspace(1) %src.ptr) {
+; CHECK-LABEL: addrspacecast_global_to_flat:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
+; CHECK-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X
+; CHECK-NEXT: CF_END
+; CHECK-NEXT: PAD
+; CHECK-NEXT: ALU clause starting at 4:
+; CHECK-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
+; CHECK-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; CHECK-NEXT: MOV * T1.X, KC0[2].Z,
+ %cast = addrspacecast ptr addrspace(1) %src.ptr to ptr
+ store ptr %cast, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @addrspacecast_flat_null_to_local(ptr addrspace(1) %out) {
+; CHECK-LABEL: addrspacecast_flat_null_to_local:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[]
+; CHECK-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CHECK-NEXT: CF_END
+; CHECK-NEXT: PAD
+; CHECK-NEXT: ALU clause starting at 4:
+; CHECK-NEXT: MOV * T0.X, literal.x,
+; CHECK-NEXT: -1(nan), 0(0.000000e+00)
+; CHECK-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; CHECK-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+ store ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @addrspacecast_flat_null_to_global(ptr addrspace(1) %out) {
+; CHECK-LABEL: addrspacecast_flat_null_to_global:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[]
+; CHECK-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CHECK-NEXT: CF_END
+; CHECK-NEXT: PAD
+; CHECK-NEXT: ALU clause starting at 4:
+; CHECK-NEXT: MOV * T0.X, literal.x,
+; CHECK-NEXT: 0(0.000000e+00), 0(0.000000e+00)
+; CHECK-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; CHECK-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+ store ptr addrspace(1) addrspacecast (ptr null to ptr addrspace(1)), ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @addrspacecast_flat_undef_to_local(ptr addrspace(1) %out) {
+; CHECK-LABEL: addrspacecast_flat_undef_to_local:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: CF_END
+; CHECK-NEXT: PAD
+ store ptr addrspace(3) addrspacecast (ptr undef to ptr addrspace(3)), ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @addrspacecast_flat_poison_to_local(ptr addrspace(1) %out) {
+; CHECK-LABEL: addrspacecast_flat_poison_to_local:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: CF_END
+; CHECK-NEXT: PAD
+ store ptr addrspace(3) addrspacecast (ptr poison to ptr addrspace(3)), ptr addrspace(1) %out
+ ret void
+}
More information about the llvm-commits
mailing list