[llvm] 68468bb - AMDGPU: Avoid null check during addrspacecast lowering
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 10 10:27:43 PST 2022
Author: Matt Arsenault
Date: 2022-01-10T13:27:39-05:00
New Revision: 68468bbe15d568550b4a20bc24010caae1bc469a
URL: https://github.com/llvm/llvm-project/commit/68468bbe15d568550b4a20bc24010caae1bc469a
DIFF: https://github.com/llvm/llvm-project/commit/68468bbe15d568550b4a20bc24010caae1bc469a.diff
LOG: AMDGPU: Avoid null check during addrspacecast lowering
If we know the source is a valid object, we do not need to insert a
null check. This misses a lot of opportunities from
metadata/attributes not tracked in codegen.
Added:
llvm/test/CodeGen/AMDGPU/addrspacecast-known-non-null.ll
Modified:
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir
llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index a89ba399a1aa2..dac4aa4c2b631 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1812,6 +1812,27 @@ Register AMDGPULegalizerInfo::getSegmentAperture(
return B.buildLoad(S32, LoadAddr, *MMO).getReg(0);
}
+/// Return true if the value is a known valid address, such that a null check is
+/// not necessary.
+static bool isKnownNonNull(Register Val, MachineRegisterInfo &MRI,
+ const AMDGPUTargetMachine &TM, unsigned AddrSpace) {
+ MachineInstr *Def = MRI.getVRegDef(Val);
+ switch (Def->getOpcode()) {
+ case AMDGPU::G_FRAME_INDEX:
+ case AMDGPU::G_GLOBAL_VALUE:
+ case AMDGPU::G_BLOCK_ADDR:
+ return true;
+ case AMDGPU::G_CONSTANT: {
+ const ConstantInt *CI = Def->getOperand(1).getCImm();
+ return CI->getSExtValue() != TM.getNullPointerValue(AddrSpace);
+ }
+ default:
+ return false;
+ }
+
+ return false;
+}
+
bool AMDGPULegalizerInfo::legalizeAddrSpaceCast(
MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
@@ -1862,6 +1883,14 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast(
if (SrcAS == AMDGPUAS::FLAT_ADDRESS) {
assert(DestAS == AMDGPUAS::LOCAL_ADDRESS ||
DestAS == AMDGPUAS::PRIVATE_ADDRESS);
+
+ if (isKnownNonNull(Src, MRI, TM, SrcAS)) {
+ // Extract low 32-bits of the pointer.
+ B.buildExtract(Dst, Src, 0);
+ MI.eraseFromParent();
+ return true;
+ }
+
unsigned NullVal = TM.getNullPointerValue(DestAS);
auto SegmentNull = B.buildConstant(DstTy, NullVal);
@@ -1884,24 +1913,29 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast(
if (!ST.hasFlatAddressSpace())
return false;
- auto SegmentNull =
- B.buildConstant(SrcTy, TM.getNullPointerValue(SrcAS));
- auto FlatNull =
- B.buildConstant(DstTy, TM.getNullPointerValue(DestAS));
-
Register ApertureReg = getSegmentAperture(SrcAS, MRI, B);
if (!ApertureReg.isValid())
return false;
- auto CmpRes =
- B.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Src, SegmentNull.getReg(0));
-
// Coerce the type of the low half of the result so we can use merge_values.
Register SrcAsInt = B.buildPtrToInt(S32, Src).getReg(0);
// TODO: Should we allow mismatched types but matching sizes in merges to
// avoid the ptrtoint?
auto BuildPtr = B.buildMerge(DstTy, {SrcAsInt, ApertureReg});
+
+ if (isKnownNonNull(Src, MRI, TM, SrcAS)) {
+ B.buildCopy(Dst, BuildPtr);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ auto SegmentNull = B.buildConstant(SrcTy, TM.getNullPointerValue(SrcAS));
+ auto FlatNull = B.buildConstant(DstTy, TM.getNullPointerValue(DestAS));
+
+ auto CmpRes =
+ B.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Src, SegmentNull.getReg(0));
+
B.buildSelect(Dst, CmpRes, BuildPtr, FlatNull);
MI.eraseFromParent();
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 48f9320acf01a..bcf003843d4dc 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5501,6 +5501,22 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL,
MachineMemOperand::MOInvariant);
}
+/// Return true if the value is a known valid address, such that a null check is
+/// not necessary.
+static bool isKnownNonNull(SDValue Val, SelectionDAG &DAG,
+ const AMDGPUTargetMachine &TM, unsigned AddrSpace) {
+ if (isa<FrameIndexSDNode>(Val) || isa<GlobalAddressSDNode>(Val) ||
+ isa<BasicBlockSDNode>(Val))
+ return true;
+
+ if (auto *ConstVal = dyn_cast<ConstantSDNode>(Val))
+ return ConstVal->getSExtValue() != TM.getNullPointerValue(AddrSpace);
+
+ // TODO: Search through arithmetic, handle arguments and loads
+ // marked nonnull.
+ return false;
+}
+
SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op,
SelectionDAG &DAG) const {
SDLoc SL(Op);
@@ -5508,44 +5524,51 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op,
SDValue Src = ASC->getOperand(0);
SDValue FlatNullPtr = DAG.getConstant(0, SL, MVT::i64);
+ unsigned SrcAS = ASC->getSrcAddressSpace();
const AMDGPUTargetMachine &TM =
static_cast<const AMDGPUTargetMachine &>(getTargetMachine());
// flat -> local/private
- if (ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS) {
+ if (SrcAS == AMDGPUAS::FLAT_ADDRESS) {
unsigned DestAS = ASC->getDestAddressSpace();
if (DestAS == AMDGPUAS::LOCAL_ADDRESS ||
DestAS == AMDGPUAS::PRIVATE_ADDRESS) {
+ SDValue Ptr = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, Src);
+
+ if (isKnownNonNull(Src, DAG, TM, SrcAS))
+ return Ptr;
+
unsigned NullVal = TM.getNullPointerValue(DestAS);
SDValue SegmentNullPtr = DAG.getConstant(NullVal, SL, MVT::i32);
SDValue NonNull = DAG.getSetCC(SL, MVT::i1, Src, FlatNullPtr, ISD::SETNE);
- SDValue Ptr = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, Src);
- return DAG.getNode(ISD::SELECT, SL, MVT::i32,
- NonNull, Ptr, SegmentNullPtr);
+ return DAG.getNode(ISD::SELECT, SL, MVT::i32, NonNull, Ptr,
+ SegmentNullPtr);
}
}
// local/private -> flat
if (ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) {
- unsigned SrcAS = ASC->getSrcAddressSpace();
-
if (SrcAS == AMDGPUAS::LOCAL_ADDRESS ||
SrcAS == AMDGPUAS::PRIVATE_ADDRESS) {
+
+ SDValue Aperture = getSegmentAperture(ASC->getSrcAddressSpace(), SL, DAG);
+ SDValue CvtPtr =
+ DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, Src, Aperture);
+ CvtPtr = DAG.getNode(ISD::BITCAST, SL, MVT::i64, CvtPtr);
+
+ if (isKnownNonNull(Src, DAG, TM, SrcAS))
+ return CvtPtr;
+
unsigned NullVal = TM.getNullPointerValue(SrcAS);
SDValue SegmentNullPtr = DAG.getConstant(NullVal, SL, MVT::i32);
SDValue NonNull
= DAG.getSetCC(SL, MVT::i1, Src, SegmentNullPtr, ISD::SETNE);
- SDValue Aperture = getSegmentAperture(ASC->getSrcAddressSpace(), SL, DAG);
- SDValue CvtPtr
- = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, Src, Aperture);
-
- return DAG.getNode(ISD::SELECT, SL, MVT::i64, NonNull,
- DAG.getNode(ISD::BITCAST, SL, MVT::i64, CvtPtr),
+ return DAG.getNode(ISD::SELECT, SL, MVT::i64, NonNull, CvtPtr,
FlatNullPtr);
}
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir
index e4813a40ce149..e34edd0825588 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir
@@ -174,28 +174,28 @@ body: |
; VI-LABEL: name: test_addrspacecast_p5_to_p0
; VI: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5
; VI-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY $vgpr0
- ; VI-NEXT: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 -1
- ; VI-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
; VI-NEXT: [[COPY2:%[0-9]+]]:_(p4) = COPY [[COPY]](p4)
- ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
- ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY2]], [[C2]](s64)
+ ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
+ ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY2]], [[C]](s64)
; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
- ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](p5), [[C]]
; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY1]](p5)
; VI-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32)
- ; VI-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C1]]
+ ; VI-NEXT: [[C1:%[0-9]+]]:_(p5) = G_CONSTANT i32 -1
+ ; VI-NEXT: [[C2:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+ ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](p5), [[C1]]
+ ; VI-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C2]]
; VI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](p0)
; GFX9-LABEL: name: test_addrspacecast_p5_to_p0
; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
- ; GFX9-NEXT: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 -1
- ; GFX9-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
; GFX9-NEXT: [[S_GETREG_B32_:%[0-9]+]]:sreg_32(s32) = S_GETREG_B32 30735
- ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[S_GETREG_B32_]], [[C2]](s32)
- ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p5), [[C]]
+ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[S_GETREG_B32_]], [[C]](s32)
; GFX9-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p5)
; GFX9-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[SHL]](s32)
- ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C1]]
+ ; GFX9-NEXT: [[C1:%[0-9]+]]:_(p5) = G_CONSTANT i32 -1
+ ; GFX9-NEXT: [[C2:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+ ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p5), [[C1]]
+ ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C2]]
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](p0)
; SI-LABEL: name: test_addrspacecast_p5_to_p0
; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
@@ -257,28 +257,28 @@ body: |
; VI-LABEL: name: test_addrspacecast_p3_to_p0
; VI: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5
; VI-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; VI-NEXT: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
- ; VI-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
; VI-NEXT: [[COPY2:%[0-9]+]]:_(p4) = COPY [[COPY]](p4)
- ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
- ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY2]], [[C2]](s64)
+ ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
+ ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY2]], [[C]](s64)
; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 64, addrspace 4)
- ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](p3), [[C]]
; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY1]](p3)
; VI-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32)
- ; VI-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C1]]
+ ; VI-NEXT: [[C1:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
+ ; VI-NEXT: [[C2:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+ ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](p3), [[C1]]
+ ; VI-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C2]]
; VI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](p0)
; GFX9-LABEL: name: test_addrspacecast_p3_to_p0
; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; GFX9-NEXT: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
- ; GFX9-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
; GFX9-NEXT: [[S_GETREG_B32_:%[0-9]+]]:sreg_32(s32) = S_GETREG_B32 31759
- ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[S_GETREG_B32_]], [[C2]](s32)
- ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p3), [[C]]
+ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[S_GETREG_B32_]], [[C]](s32)
; GFX9-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p3)
; GFX9-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[SHL]](s32)
- ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C1]]
+ ; GFX9-NEXT: [[C1:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
+ ; GFX9-NEXT: [[C2:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+ ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p3), [[C1]]
+ ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C2]]
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](p0)
; SI-LABEL: name: test_addrspacecast_p3_to_p0
; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
@@ -463,43 +463,43 @@ body: |
; VI: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5
; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr0_vgpr1
; VI-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY1]](<2 x p3>)
- ; VI-NEXT: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
- ; VI-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
; VI-NEXT: [[COPY2:%[0-9]+]]:_(p4) = COPY [[COPY]](p4)
- ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
- ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY2]], [[C2]](s64)
+ ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
+ ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY2]], [[C]](s64)
; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 64, addrspace 4)
- ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p3), [[C]]
; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3)
; VI-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32)
- ; VI-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C1]]
+ ; VI-NEXT: [[C1:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
+ ; VI-NEXT: [[C2:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+ ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p3), [[C1]]
+ ; VI-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C2]]
; VI-NEXT: [[COPY3:%[0-9]+]]:_(p4) = COPY [[COPY]](p4)
- ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY3]], [[C2]](s64)
+ ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY3]], [[C]](s64)
; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), align 64, addrspace 4)
- ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p3), [[C]]
; VI-NEXT: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3)
; VI-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT1]](s32), [[LOAD1]](s32)
- ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(p0) = G_SELECT [[ICMP1]](s1), [[MV1]], [[C1]]
+ ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p3), [[C1]]
+ ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(p0) = G_SELECT [[ICMP1]](s1), [[MV1]], [[C2]]
; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[SELECT]](p0), [[SELECT1]](p0)
; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p0>)
; GFX9-LABEL: name: test_addrspacecast_v2p3_to_v2p0
; GFX9: [[COPY:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY]](<2 x p3>)
- ; GFX9-NEXT: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
- ; GFX9-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
; GFX9-NEXT: [[S_GETREG_B32_:%[0-9]+]]:sreg_32(s32) = S_GETREG_B32 31759
- ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[S_GETREG_B32_]], [[C2]](s32)
- ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p3), [[C]]
+ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[S_GETREG_B32_]], [[C]](s32)
; GFX9-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3)
; GFX9-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[SHL]](s32)
- ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C1]]
+ ; GFX9-NEXT: [[C1:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
+ ; GFX9-NEXT: [[C2:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+ ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p3), [[C1]]
+ ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C2]]
; GFX9-NEXT: [[S_GETREG_B32_1:%[0-9]+]]:sreg_32(s32) = S_GETREG_B32 31759
- ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[S_GETREG_B32_1]], [[C2]](s32)
- ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p3), [[C]]
+ ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[S_GETREG_B32_1]], [[C]](s32)
; GFX9-NEXT: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3)
; GFX9-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT1]](s32), [[SHL1]](s32)
- ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(p0) = G_SELECT [[ICMP1]](s1), [[MV1]], [[C1]]
+ ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p3), [[C1]]
+ ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(p0) = G_SELECT [[ICMP1]](s1), [[MV1]], [[C2]]
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[SELECT]](p0), [[SELECT1]](p0)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p0>)
; SI-LABEL: name: test_addrspacecast_v2p3_to_v2p0
@@ -639,3 +639,40 @@ body: |
%1:_(p0) = G_ADDRSPACE_CAST %0
$vgpr0_vgpr1 = COPY %1
...
+---
+name: test_addrspacecast_p5_fi_to_p0
+machineFunctionInfo:
+ argumentInfo:
+ queuePtr: { reg: '$sgpr4_sgpr5' }
+stack:
+ - { id: 0, size: 4, alignment: 4 }
+body: |
+ bb.0:
+ ; VI-LABEL: name: test_addrspacecast_p5_fi_to_p0
+ ; VI: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5
+ ; VI-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0
+ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p4) = COPY [[COPY]](p4)
+ ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
+ ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY1]], [[C]](s64)
+ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
+ ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[FRAME_INDEX]](p5)
+ ; VI-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32)
+ ; VI-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY [[MV]](p0)
+ ; VI-NEXT: $vgpr0_vgpr1 = COPY [[COPY2]](p0)
+ ; GFX9-LABEL: name: test_addrspacecast_p5_fi_to_p0
+ ; GFX9: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0
+ ; GFX9-NEXT: [[S_GETREG_B32_:%[0-9]+]]:sreg_32(s32) = S_GETREG_B32 30735
+ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[S_GETREG_B32_]], [[C]](s32)
+ ; GFX9-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[FRAME_INDEX]](p5)
+ ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[SHL]](s32)
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY [[MV]](p0)
+ ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](p0)
+ ; SI-LABEL: name: test_addrspacecast_p5_fi_to_p0
+ ; SI: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0
+ ; SI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p0) = G_ADDRSPACE_CAST [[FRAME_INDEX]](p5)
+ ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ADDRSPACE_CAST]](p0)
+ %0:_(p5) = G_FRAME_INDEX %stack.0
+ %1:_(p0) = G_ADDRSPACE_CAST %0
+ $vgpr0_vgpr1 = COPY %1
+...
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-known-non-null.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-known-non-null.ll
new file mode 100644
index 0000000000000..f3971427e3c5a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-known-non-null.ll
@@ -0,0 +1,75 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck %s
+
+; Test that a null check is not emitted for lowered addrspacecast
+
+
+define void @flat_user(i8* %ptr) {
+ store i8 0, i8* %ptr
+ ret void
+}
+
+; CHECK-LABEL: {{^}}cast_alloca:
+; CHECK: s_getreg_b32 [[GETREG:s[0-9]+]], hwreg(HW_REG_SH_MEM_BASES, 0, 16)
+; CHECK-NEXT: s_lshl_b32 [[APERTURE:s[0-9]+]], [[GETREG]], 16
+; CHECK-NEXT: v_lshrrev_b32_e64 v0, 6, s33
+; CHECK-NEXT: v_mov_b32_e32 v1, [[APERTURE]]
+; CHECK-NOT: v0
+; CHECK-NOT: v1
+define void @cast_alloca() {
+ %alloca = alloca i8, addrspace(5)
+ %cast = addrspacecast i8 addrspace(5)* %alloca to i8*
+ call void @flat_user(i8* %cast)
+ ret void
+}
+
+ at lds = internal unnamed_addr addrspace(3) global i8 undef, align 4
+
+; CHECK-LABEL: {{^}}cast_lds_gv:
+; CHECK: s_getreg_b32 [[GETREG:s[0-9]+]], hwreg(HW_REG_SH_MEM_BASES, 16, 16)
+; CHECK-NEXT: s_lshl_b32 [[APERTURE:s[0-9]+]], [[GETREG]], 16
+; CHECK-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-NEXT: v_mov_b32_e32 v1, [[APERTURE]]
+; CHECK-NOT: v0
+; CHECK-NOT: v1
+define void @cast_lds_gv() {
+ %cast = addrspacecast i8 addrspace(3)* @lds to i8*
+ call void @flat_user(i8* %cast)
+ ret void
+}
+
+; CHECK-LABEL: {{^}}cast_constant_lds_neg1_gv:
+; CHECK: v_mov_b32_e32 v0, 0
+; CHECK: v_mov_b32_e32 v1, 0
+define void @cast_constant_lds_neg1_gv() {
+ call void @flat_user(i8* addrspacecast (i8 addrspace(3)* inttoptr (i32 -1 to i8 addrspace(3)*) to i8*))
+ ret void
+}
+
+; CHECK-LABEL: {{^}}cast_constant_private_neg1_gv:
+; CHECK: v_mov_b32_e32 v0, 0
+; CHECK: v_mov_b32_e32 v1, 0
+define void @cast_constant_private_neg1_gv() {
+ call void @flat_user(i8* addrspacecast (i8 addrspace(5)* inttoptr (i32 -1 to i8 addrspace(5)*) to i8*))
+ ret void
+}
+
+; CHECK-LABEL: {{^}}cast_constant_lds_other_gv:
+; CHECK: s_getreg_b32 [[GETREG:s[0-9]+]], hwreg(HW_REG_SH_MEM_BASES, 16, 16)
+; CHECK: s_lshl_b32 [[APERTURE:s[0-9]+]], [[GETREG]], 16
+; CHECK: v_mov_b32_e32 v0, 0x7b
+; CHECK: v_mov_b32_e32 v1, [[APERTURE]]
+define void @cast_constant_lds_other_gv() {
+ call void @flat_user(i8* addrspacecast (i8 addrspace(3)* inttoptr (i32 123 to i8 addrspace(3)*) to i8*))
+ ret void
+}
+
+; CHECK-LABEL: {{^}}cast_constant_private_other_gv:
+; CHECK: s_getreg_b32 [[GETREG:s[0-9]+]], hwreg(HW_REG_SH_MEM_BASES, 0, 16)
+; CHECK: s_lshl_b32 [[APERTURE:s[0-9]+]], [[GETREG]], 16
+; CHECK: v_mov_b32_e32 v0, 0x7b
+; CHECK: v_mov_b32_e32 v1, [[APERTURE]]
+define void @cast_constant_private_other_gv() {
+ call void @flat_user(i8* addrspacecast (i8 addrspace(5)* inttoptr (i32 123 to i8 addrspace(5)*) to i8*))
+ ret void
+}
\ No newline at end of file
diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
index 86fd814d95faa..32c55cc51a8b5 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
@@ -11,26 +11,22 @@ define amdgpu_kernel void @stack_object_addrspacecast_in_kernel_no_calls() {
; FLAT_SCR_OPT-NEXT: s_addc_u32 s1, s1, 0
; FLAT_SCR_OPT-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
; FLAT_SCR_OPT-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-; FLAT_SCR_OPT-NEXT: v_mov_b32_e32 v0, 4
; FLAT_SCR_OPT-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
-; FLAT_SCR_OPT-NEXT: v_mov_b32_e32 v2, 0
+; FLAT_SCR_OPT-NEXT: v_mov_b32_e32 v0, 4
; FLAT_SCR_OPT-NEXT: s_lshl_b32 s0, s0, 16
-; FLAT_SCR_OPT-NEXT: v_cmp_ne_u32_e32 vcc_lo, -1, v0
-; FLAT_SCR_OPT-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc_lo
-; FLAT_SCR_OPT-NEXT: v_cndmask_b32_e64 v1, 0, s0, vcc_lo
+; FLAT_SCR_OPT-NEXT: v_mov_b32_e32 v2, 0
+; FLAT_SCR_OPT-NEXT: v_mov_b32_e32 v1, s0
; FLAT_SCR_OPT-NEXT: flat_store_dword v[0:1], v2
; FLAT_SCR_OPT-NEXT: s_waitcnt_vscnt null, 0x0
; FLAT_SCR_OPT-NEXT: s_endpgm
;
; FLAT_SCR_ARCH-LABEL: stack_object_addrspacecast_in_kernel_no_calls:
; FLAT_SCR_ARCH: ; %bb.0:
-; FLAT_SCR_ARCH-NEXT: v_mov_b32_e32 v0, 4
; FLAT_SCR_ARCH-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
-; FLAT_SCR_ARCH-NEXT: v_mov_b32_e32 v2, 0
+; FLAT_SCR_ARCH-NEXT: v_mov_b32_e32 v0, 4
; FLAT_SCR_ARCH-NEXT: s_lshl_b32 s0, s0, 16
-; FLAT_SCR_ARCH-NEXT: v_cmp_ne_u32_e32 vcc_lo, -1, v0
-; FLAT_SCR_ARCH-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc_lo
-; FLAT_SCR_ARCH-NEXT: v_cndmask_b32_e64 v1, 0, s0, vcc_lo
+; FLAT_SCR_ARCH-NEXT: v_mov_b32_e32 v2, 0
+; FLAT_SCR_ARCH-NEXT: v_mov_b32_e32 v1, s0
; FLAT_SCR_ARCH-NEXT: flat_store_dword v[0:1], v2
; FLAT_SCR_ARCH-NEXT: s_waitcnt_vscnt null, 0x0
; FLAT_SCR_ARCH-NEXT: s_endpgm
More information about the llvm-commits
mailing list