[llvm] r353497 - AMDGPU/GlobalISel: Legalize addrspacecast
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 7 18:40:47 PST 2019
Author: arsenm
Date: Thu Feb 7 18:40:47 2019
New Revision: 353497
URL: http://llvm.org/viewvc/llvm-project?rev=353497&view=rev
Log:
AMDGPU/GlobalISel: Legalize addrspacecast
Use a placeholder constant for now on targets
that need the load from the queue ptr.
Added:
llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir
Modified:
llvm/trunk/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
Modified: llvm/trunk/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/GlobalISel/LegalizerHelper.cpp?rev=353497&r1=353496&r2=353497&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/GlobalISel/LegalizerHelper.cpp (original)
+++ llvm/trunk/lib/CodeGen/GlobalISel/LegalizerHelper.cpp Thu Feb 7 18:40:47 2019
@@ -2174,6 +2174,7 @@ LegalizerHelper::fewerElementsVector(Mac
case G_FPTOUI:
case G_INTTOPTR:
case G_PTRTOINT:
+ case G_ADDRSPACE_CAST:
return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy);
case G_ICMP:
case G_FCMP:
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp?rev=353497&r1=353496&r2=353497&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp Thu Feb 7 18:40:47 2019
@@ -14,6 +14,9 @@
#include "AMDGPU.h"
#include "AMDGPULegalizerInfo.h"
#include "AMDGPUTargetMachine.h"
+#include "SIMachineFunctionInfo.h"
+
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DerivedTypes.h"
@@ -316,6 +319,12 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo
return std::make_pair(0, LLT::scalar(Query.Types[1].getSizeInBits()));
});
+ if (ST.hasFlatAddressSpace()) {
+ getActionDefinitionsBuilder(G_ADDRSPACE_CAST)
+ .scalarize(0)
+ .custom();
+ }
+
getActionDefinitionsBuilder({G_LOAD, G_STORE})
.narrowScalarIf([](const LegalityQuery &Query) {
unsigned Size = Query.Types[0].getSizeInBits();
@@ -587,3 +596,171 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo
computeTables();
verify(*ST.getInstrInfo());
}
+
+bool AMDGPULegalizerInfo::legalizeCustom(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder,
+ GISelChangeObserver &Observer) const {
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_ADDRSPACE_CAST:
+ return legalizeAddrSpaceCast(MI, MRI, MIRBuilder);
+ default:
+ return false;
+ }
+
+ llvm_unreachable("expected switch to return");
+}
+
+unsigned AMDGPULegalizerInfo::getSegmentAperture(
+ unsigned AS,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const {
+ MachineFunction &MF = MIRBuilder.getMF();
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const LLT S32 = LLT::scalar(32);
+
+ if (ST.hasApertureRegs()) {
+ // FIXME: Use inline constants (src_{shared, private}_base) instead of
+ // getreg.
+ unsigned Offset = AS == AMDGPUAS::LOCAL_ADDRESS ?
+ AMDGPU::Hwreg::OFFSET_SRC_SHARED_BASE :
+ AMDGPU::Hwreg::OFFSET_SRC_PRIVATE_BASE;
+ unsigned WidthM1 = AS == AMDGPUAS::LOCAL_ADDRESS ?
+ AMDGPU::Hwreg::WIDTH_M1_SRC_SHARED_BASE :
+ AMDGPU::Hwreg::WIDTH_M1_SRC_PRIVATE_BASE;
+ unsigned Encoding =
+ AMDGPU::Hwreg::ID_MEM_BASES << AMDGPU::Hwreg::ID_SHIFT_ |
+ Offset << AMDGPU::Hwreg::OFFSET_SHIFT_ |
+ WidthM1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_;
+
+ unsigned ShiftAmt = MRI.createGenericVirtualRegister(S32);
+ unsigned ApertureReg = MRI.createGenericVirtualRegister(S32);
+ unsigned GetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+
+ MIRBuilder.buildInstr(AMDGPU::S_GETREG_B32)
+ .addDef(GetReg)
+ .addImm(Encoding);
+ MRI.setType(GetReg, S32);
+
+ MIRBuilder.buildConstant(ShiftAmt, WidthM1 + 1);
+ MIRBuilder.buildInstr(TargetOpcode::G_SHL)
+ .addDef(ApertureReg)
+ .addUse(GetReg)
+ .addUse(ShiftAmt);
+
+ return ApertureReg;
+ }
+
+ unsigned QueuePtr = MRI.createGenericVirtualRegister(
+ LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
+
+ // FIXME: Placeholder until we can track the input registers.
+ MIRBuilder.buildConstant(QueuePtr, 0xdeadbeef);
+
+ // Offset into amd_queue_t for group_segment_aperture_base_hi /
+ // private_segment_aperture_base_hi.
+ uint32_t StructOffset = (AS == AMDGPUAS::LOCAL_ADDRESS) ? 0x40 : 0x44;
+
+ // FIXME: Don't use undef
+ Value *V = UndefValue::get(PointerType::get(
+ Type::getInt8Ty(MF.getFunction().getContext()),
+ AMDGPUAS::CONSTANT_ADDRESS));
+
+ MachinePointerInfo PtrInfo(V, StructOffset);
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ PtrInfo,
+ MachineMemOperand::MOLoad |
+ MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOInvariant,
+ 4,
+ MinAlign(64, StructOffset));
+
+ unsigned LoadResult = MRI.createGenericVirtualRegister(S32);
+ unsigned LoadAddr = AMDGPU::NoRegister;
+
+ MIRBuilder.materializeGEP(LoadAddr, QueuePtr, LLT::scalar(64), StructOffset);
+ MIRBuilder.buildLoad(LoadResult, LoadAddr, *MMO);
+ return LoadResult;
+}
+
+bool AMDGPULegalizerInfo::legalizeAddrSpaceCast(
+ MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const {
+ MachineFunction &MF = MIRBuilder.getMF();
+
+ MIRBuilder.setInstr(MI);
+
+ unsigned Dst = MI.getOperand(0).getReg();
+ unsigned Src = MI.getOperand(1).getReg();
+
+ LLT DstTy = MRI.getType(Dst);
+ LLT SrcTy = MRI.getType(Src);
+ unsigned DestAS = DstTy.getAddressSpace();
+ unsigned SrcAS = SrcTy.getAddressSpace();
+
+ // TODO: Avoid reloading from the queue ptr for each cast, or at least each
+ // vector element.
+ assert(!DstTy.isVector());
+
+ const AMDGPUTargetMachine &TM
+ = static_cast<const AMDGPUTargetMachine &>(MF.getTarget());
+
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ if (ST.getTargetLowering()->isNoopAddrSpaceCast(SrcAS, DestAS)) {
+ MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY));
+ return true;
+ }
+
+ if (SrcAS == AMDGPUAS::FLAT_ADDRESS) {
+ assert(DestAS == AMDGPUAS::LOCAL_ADDRESS ||
+ DestAS == AMDGPUAS::PRIVATE_ADDRESS);
+ unsigned NullVal = TM.getNullPointerValue(DestAS);
+
+ unsigned SegmentNullReg = MRI.createGenericVirtualRegister(DstTy);
+ unsigned FlatNullReg = MRI.createGenericVirtualRegister(SrcTy);
+
+ MIRBuilder.buildConstant(SegmentNullReg, NullVal);
+ MIRBuilder.buildConstant(FlatNullReg, 0);
+
+ unsigned PtrLo32 = MRI.createGenericVirtualRegister(DstTy);
+
+ // Extract low 32-bits of the pointer.
+ MIRBuilder.buildExtract(PtrLo32, Src, 0);
+
+ unsigned CmpRes = MRI.createGenericVirtualRegister(LLT::scalar(1));
+ MIRBuilder.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, FlatNullReg);
+ MIRBuilder.buildSelect(Dst, CmpRes, PtrLo32, SegmentNullReg);
+
+ MI.eraseFromParent();
+ return true;
+ }
+
+ assert(SrcAS == AMDGPUAS::LOCAL_ADDRESS ||
+ SrcAS == AMDGPUAS::PRIVATE_ADDRESS);
+
+ unsigned FlatNullReg = MRI.createGenericVirtualRegister(DstTy);
+ unsigned SegmentNullReg = MRI.createGenericVirtualRegister(SrcTy);
+ MIRBuilder.buildConstant(SegmentNullReg, TM.getNullPointerValue(SrcAS));
+ MIRBuilder.buildConstant(FlatNullReg, TM.getNullPointerValue(DestAS));
+
+ unsigned ApertureReg = getSegmentAperture(DestAS, MRI, MIRBuilder);
+
+ unsigned CmpRes = MRI.createGenericVirtualRegister(LLT::scalar(1));
+ MIRBuilder.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, SegmentNullReg);
+
+ unsigned BuildPtr = MRI.createGenericVirtualRegister(DstTy);
+
+ // Coerce the type of the low half of the result so we can use merge_values.
+ unsigned SrcAsInt = MRI.createGenericVirtualRegister(LLT::scalar(32));
+ MIRBuilder.buildInstr(TargetOpcode::G_PTRTOINT)
+ .addDef(SrcAsInt)
+ .addUse(Src);
+
+ // TODO: Should we allow mismatched types but matching sizes in merges to
+ // avoid the ptrtoint?
+ MIRBuilder.buildMerge(BuildPtr, {SrcAsInt, ApertureReg});
+ MIRBuilder.buildSelect(Dst, CmpRes, BuildPtr, FlatNullReg);
+
+ MI.eraseFromParent();
+ return true;
+}
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.h?rev=353497&r1=353496&r2=353497&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.h Thu Feb 7 18:40:47 2019
@@ -27,6 +27,17 @@ class AMDGPULegalizerInfo : public Legal
public:
AMDGPULegalizerInfo(const GCNSubtarget &ST,
const GCNTargetMachine &TM);
+
+ bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder,
+ GISelChangeObserver &Observer) const override;
+
+ unsigned getSegmentAperture(unsigned AddrSpace,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const;
+
+ bool legalizeAddrSpaceCast(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const;
};
} // End llvm namespace.
#endif
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=353497&r1=353496&r2=353497&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Thu Feb 7 18:40:47 2019
@@ -1217,7 +1217,8 @@ EVT SITargetLowering::getOptimalMemOpTyp
static bool isFlatGlobalAddrSpace(unsigned AS) {
return AS == AMDGPUAS::GLOBAL_ADDRESS ||
AS == AMDGPUAS::FLAT_ADDRESS ||
- AS == AMDGPUAS::CONSTANT_ADDRESS;
+ AS == AMDGPUAS::CONSTANT_ADDRESS ||
+ AS > AMDGPUAS::MAX_AMDGPU_ADDRESS;
}
bool SITargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
Added: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir?rev=353497&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir Thu Feb 7 18:40:47 2019
@@ -0,0 +1,393 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=VI %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
+
+---
+name: test_addrspacecast_p0_to_p1
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; VI-LABEL: name: test_addrspacecast_p0_to_p1
+ ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
+ ; VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[COPY]](p0)
+ ; VI: $vgpr0_vgpr1 = COPY [[COPY1]](p1)
+ ; GFX9-LABEL: name: test_addrspacecast_p0_to_p1
+ ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
+ ; GFX9: [[COPY1:%[0-9]+]]:_(p1) = COPY [[COPY]](p0)
+ ; GFX9: $vgpr0_vgpr1 = COPY [[COPY1]](p1)
+ %0:_(p0) = COPY $vgpr0_vgpr1
+ %1:_(p1) = G_ADDRSPACE_CAST %0
+ $vgpr0_vgpr1 = COPY %1
+...
+
+---
+name: test_addrspacecast_p1_to_p0
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; VI-LABEL: name: test_addrspacecast_p1_to_p0
+ ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; VI: [[COPY1:%[0-9]+]]:_(p0) = COPY [[COPY]](p1)
+ ; VI: $vgpr0_vgpr1 = COPY [[COPY1]](p0)
+ ; GFX9-LABEL: name: test_addrspacecast_p1_to_p0
+ ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; GFX9: [[COPY1:%[0-9]+]]:_(p0) = COPY [[COPY]](p1)
+ ; GFX9: $vgpr0_vgpr1 = COPY [[COPY1]](p0)
+ %0:_(p1) = COPY $vgpr0_vgpr1
+ %1:_(p0) = G_ADDRSPACE_CAST %0
+ $vgpr0_vgpr1 = COPY %1
+...
+
+---
+name: test_addrspacecast_p0_to_p4
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; VI-LABEL: name: test_addrspacecast_p0_to_p4
+ ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
+ ; VI: [[COPY1:%[0-9]+]]:_(p4) = COPY [[COPY]](p0)
+ ; VI: $vgpr0_vgpr1 = COPY [[COPY1]](p4)
+ ; GFX9-LABEL: name: test_addrspacecast_p0_to_p4
+ ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
+ ; GFX9: [[COPY1:%[0-9]+]]:_(p4) = COPY [[COPY]](p0)
+ ; GFX9: $vgpr0_vgpr1 = COPY [[COPY1]](p4)
+ %0:_(p0) = COPY $vgpr0_vgpr1
+ %1:_(p4) = G_ADDRSPACE_CAST %0
+ $vgpr0_vgpr1 = COPY %1
+...
+
+---
+name: test_addrspacecast_p4_to_p0
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; VI-LABEL: name: test_addrspacecast_p4_to_p0
+ ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
+ ; VI: [[COPY1:%[0-9]+]]:_(p0) = COPY [[COPY]](p4)
+ ; VI: $vgpr0_vgpr1 = COPY [[COPY1]](p0)
+ ; GFX9-LABEL: name: test_addrspacecast_p4_to_p0
+ ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
+ ; GFX9: [[COPY1:%[0-9]+]]:_(p0) = COPY [[COPY]](p4)
+ ; GFX9: $vgpr0_vgpr1 = COPY [[COPY1]](p0)
+ %0:_(p4) = COPY $vgpr0_vgpr1
+ %1:_(p0) = G_ADDRSPACE_CAST %0
+ $vgpr0_vgpr1 = COPY %1
+...
+
+---
+name: test_addrspacecast_p0_to_p999
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; VI-LABEL: name: test_addrspacecast_p0_to_p999
+ ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
+ ; VI: [[COPY1:%[0-9]+]]:_(p999) = COPY [[COPY]](p0)
+ ; VI: $vgpr0_vgpr1 = COPY [[COPY1]](p999)
+ ; GFX9-LABEL: name: test_addrspacecast_p0_to_p999
+ ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
+ ; GFX9: [[COPY1:%[0-9]+]]:_(p999) = COPY [[COPY]](p0)
+ ; GFX9: $vgpr0_vgpr1 = COPY [[COPY1]](p999)
+ %0:_(p0) = COPY $vgpr0_vgpr1
+ %1:_(p999) = G_ADDRSPACE_CAST %0
+ $vgpr0_vgpr1 = COPY %1
+...
+
+---
+name: test_addrspacecast_p999_to_p0
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; VI-LABEL: name: test_addrspacecast_p999_to_p0
+ ; VI: [[COPY:%[0-9]+]]:_(p999) = COPY $vgpr0_vgpr1
+ ; VI: [[COPY1:%[0-9]+]]:_(p0) = COPY [[COPY]](p999)
+ ; VI: $vgpr0_vgpr1 = COPY [[COPY1]](p0)
+ ; GFX9-LABEL: name: test_addrspacecast_p999_to_p0
+ ; GFX9: [[COPY:%[0-9]+]]:_(p999) = COPY $vgpr0_vgpr1
+ ; GFX9: [[COPY1:%[0-9]+]]:_(p0) = COPY [[COPY]](p999)
+ ; GFX9: $vgpr0_vgpr1 = COPY [[COPY1]](p0)
+ %0:_(p999) = COPY $vgpr0_vgpr1
+ %1:_(p0) = G_ADDRSPACE_CAST %0
+ $vgpr0_vgpr1 = COPY %1
+...
+
+---
+name: test_addrspacecast_p5_to_p0
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; VI-LABEL: name: test_addrspacecast_p5_to_p0
+ ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
+ ; VI: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 0
+ ; VI: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+ ; VI: [[C2:%[0-9]+]]:_(p4) = G_CONSTANT i64 3735928559
+ ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
+ ; VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[C2]], [[C3]](s64)
+ ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (dereferenceable invariant load 4 from `i8 addrspace(4)* undef` + 68, addrspace 4)
+ ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p5), [[C]]
+ ; VI: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p5)
+ ; VI: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32)
+ ; VI: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C1]]
+ ; VI: $vgpr0_vgpr1 = COPY [[SELECT]](p0)
+ ; GFX9-LABEL: name: test_addrspacecast_p5_to_p0
+ ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
+ ; GFX9: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 0
+ ; GFX9: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+ ; GFX9: [[S_GETREG_B32_:%[0-9]+]]:sreg_32(s32) = S_GETREG_B32 30735
+ ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[S_GETREG_B32_]], [[C2]](s32)
+ ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p5), [[C]]
+ ; GFX9: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p5)
+ ; GFX9: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[SHL]](s32)
+ ; GFX9: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C1]]
+ ; GFX9: $vgpr0_vgpr1 = COPY [[SELECT]](p0)
+ %0:_(p5) = COPY $vgpr0
+ %1:_(p0) = G_ADDRSPACE_CAST %0
+ $vgpr0_vgpr1 = COPY %1
+...
+
+---
+name: test_addrspacecast_p0_to_p5
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; VI-LABEL: name: test_addrspacecast_p0_to_p5
+ ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
+ ; VI: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 0
+ ; VI: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+ ; VI: [[EXTRACT:%[0-9]+]]:_(p5) = G_EXTRACT [[COPY]](p0), 0
+ ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p0), [[C1]]
+ ; VI: [[SELECT:%[0-9]+]]:_(p5) = G_SELECT [[ICMP]](s1), [[EXTRACT]], [[C]]
+ ; VI: $vgpr0 = COPY [[SELECT]](p5)
+ ; GFX9-LABEL: name: test_addrspacecast_p0_to_p5
+ ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
+ ; GFX9: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 0
+ ; GFX9: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+ ; GFX9: [[EXTRACT:%[0-9]+]]:_(p5) = G_EXTRACT [[COPY]](p0), 0
+ ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p0), [[C1]]
+ ; GFX9: [[SELECT:%[0-9]+]]:_(p5) = G_SELECT [[ICMP]](s1), [[EXTRACT]], [[C]]
+ ; GFX9: $vgpr0 = COPY [[SELECT]](p5)
+ %0:_(p0) = COPY $vgpr0_vgpr1
+ %1:_(p5) = G_ADDRSPACE_CAST %0
+ $vgpr0 = COPY %1
+...
+
+---
+name: test_addrspacecast_p3_to_p0
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; VI-LABEL: name: test_addrspacecast_p3_to_p0
+ ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+ ; VI: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
+ ; VI: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+ ; VI: [[C2:%[0-9]+]]:_(p4) = G_CONSTANT i64 3735928559
+ ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
+ ; VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[C2]], [[C3]](s64)
+ ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (dereferenceable invariant load 4 from `i8 addrspace(4)* undef` + 68, addrspace 4)
+ ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p3), [[C]]
+ ; VI: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p3)
+ ; VI: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32)
+ ; VI: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C1]]
+ ; VI: $vgpr0_vgpr1 = COPY [[SELECT]](p0)
+ ; GFX9-LABEL: name: test_addrspacecast_p3_to_p0
+ ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+ ; GFX9: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
+ ; GFX9: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+ ; GFX9: [[S_GETREG_B32_:%[0-9]+]]:sreg_32(s32) = S_GETREG_B32 30735
+ ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[S_GETREG_B32_]], [[C2]](s32)
+ ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p3), [[C]]
+ ; GFX9: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p3)
+ ; GFX9: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[SHL]](s32)
+ ; GFX9: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C1]]
+ ; GFX9: $vgpr0_vgpr1 = COPY [[SELECT]](p0)
+ %0:_(p3) = COPY $vgpr0
+ %1:_(p0) = G_ADDRSPACE_CAST %0
+ $vgpr0_vgpr1 = COPY %1
+...
+
+---
+name: test_addrspacecast_p0_to_p3
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; VI-LABEL: name: test_addrspacecast_p0_to_p3
+ ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
+ ; VI: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
+ ; VI: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+ ; VI: [[EXTRACT:%[0-9]+]]:_(p3) = G_EXTRACT [[COPY]](p0), 0
+ ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p0), [[C1]]
+ ; VI: [[SELECT:%[0-9]+]]:_(p3) = G_SELECT [[ICMP]](s1), [[EXTRACT]], [[C]]
+ ; VI: $vgpr0 = COPY [[SELECT]](p3)
+ ; GFX9-LABEL: name: test_addrspacecast_p0_to_p3
+ ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
+ ; GFX9: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
+ ; GFX9: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+ ; GFX9: [[EXTRACT:%[0-9]+]]:_(p3) = G_EXTRACT [[COPY]](p0), 0
+ ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p0), [[C1]]
+ ; GFX9: [[SELECT:%[0-9]+]]:_(p3) = G_SELECT [[ICMP]](s1), [[EXTRACT]], [[C]]
+ ; GFX9: $vgpr0 = COPY [[SELECT]](p3)
+ %0:_(p0) = COPY $vgpr0_vgpr1
+ %1:_(p3) = G_ADDRSPACE_CAST %0
+ $vgpr0 = COPY %1
+...
+
+---
+name: test_addrspacecast_v2p0_to_v2p1
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+
+ ; VI-LABEL: name: test_addrspacecast_v2p0_to_v2p1
+ ; VI: [[COPY:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ ; VI: [[UV:%[0-9]+]]:_(p0), [[UV1:%[0-9]+]]:_(p0) = G_UNMERGE_VALUES [[COPY]](<2 x p0>)
+ ; VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[UV]](p0)
+ ; VI: [[COPY2:%[0-9]+]]:_(p1) = COPY [[UV1]](p0)
+ ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[COPY1]](p1), [[COPY2]](p1)
+ ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p1>)
+ ; GFX9-LABEL: name: test_addrspacecast_v2p0_to_v2p1
+ ; GFX9: [[COPY:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ ; GFX9: [[UV:%[0-9]+]]:_(p0), [[UV1:%[0-9]+]]:_(p0) = G_UNMERGE_VALUES [[COPY]](<2 x p0>)
+ ; GFX9: [[COPY1:%[0-9]+]]:_(p1) = COPY [[UV]](p0)
+ ; GFX9: [[COPY2:%[0-9]+]]:_(p1) = COPY [[UV1]](p0)
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[COPY1]](p1), [[COPY2]](p1)
+ ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p1>)
+ %0:_(<2 x p0>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ %1:_(<2 x p1>) = G_ADDRSPACE_CAST %0
+ $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+...
+
+---
+name: test_addrspacecast_v2p1_to_v2p0
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+
+ ; VI-LABEL: name: test_addrspacecast_v2p1_to_v2p0
+ ; VI: [[COPY:%[0-9]+]]:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ ; VI: [[UV:%[0-9]+]]:_(p1), [[UV1:%[0-9]+]]:_(p1) = G_UNMERGE_VALUES [[COPY]](<2 x p1>)
+ ; VI: [[COPY1:%[0-9]+]]:_(p0) = COPY [[UV]](p1)
+ ; VI: [[COPY2:%[0-9]+]]:_(p0) = COPY [[UV1]](p1)
+ ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[COPY1]](p0), [[COPY2]](p0)
+ ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p0>)
+ ; GFX9-LABEL: name: test_addrspacecast_v2p1_to_v2p0
+ ; GFX9: [[COPY:%[0-9]+]]:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ ; GFX9: [[UV:%[0-9]+]]:_(p1), [[UV1:%[0-9]+]]:_(p1) = G_UNMERGE_VALUES [[COPY]](<2 x p1>)
+ ; GFX9: [[COPY1:%[0-9]+]]:_(p0) = COPY [[UV]](p1)
+ ; GFX9: [[COPY2:%[0-9]+]]:_(p0) = COPY [[UV1]](p1)
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[COPY1]](p0), [[COPY2]](p0)
+ ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p0>)
+ %0:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ %1:_(<2 x p0>) = G_ADDRSPACE_CAST %0
+ $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+...
+
+---
+name: test_addrspacecast_v2p0_to_v2p3
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+
+ ; VI-LABEL: name: test_addrspacecast_v2p0_to_v2p3
+ ; VI: [[COPY:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ ; VI: [[UV:%[0-9]+]]:_(p0), [[UV1:%[0-9]+]]:_(p0) = G_UNMERGE_VALUES [[COPY]](<2 x p0>)
+ ; VI: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
+ ; VI: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+ ; VI: [[EXTRACT:%[0-9]+]]:_(p3) = G_EXTRACT [[UV]](p0), 0
+ ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p0), [[C1]]
+ ; VI: [[SELECT:%[0-9]+]]:_(p3) = G_SELECT [[ICMP]](s1), [[EXTRACT]], [[C]]
+ ; VI: [[C2:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
+ ; VI: [[C3:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+ ; VI: [[EXTRACT1:%[0-9]+]]:_(p3) = G_EXTRACT [[UV1]](p0), 0
+ ; VI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p0), [[C3]]
+ ; VI: [[SELECT1:%[0-9]+]]:_(p3) = G_SELECT [[ICMP1]](s1), [[EXTRACT1]], [[C2]]
+ ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[SELECT]](p3), [[SELECT1]](p3)
+ ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
+ ; GFX9-LABEL: name: test_addrspacecast_v2p0_to_v2p3
+ ; GFX9: [[COPY:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ ; GFX9: [[UV:%[0-9]+]]:_(p0), [[UV1:%[0-9]+]]:_(p0) = G_UNMERGE_VALUES [[COPY]](<2 x p0>)
+ ; GFX9: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
+ ; GFX9: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+ ; GFX9: [[EXTRACT:%[0-9]+]]:_(p3) = G_EXTRACT [[UV]](p0), 0
+ ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p0), [[C1]]
+ ; GFX9: [[SELECT:%[0-9]+]]:_(p3) = G_SELECT [[ICMP]](s1), [[EXTRACT]], [[C]]
+ ; GFX9: [[C2:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
+ ; GFX9: [[C3:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+ ; GFX9: [[EXTRACT1:%[0-9]+]]:_(p3) = G_EXTRACT [[UV1]](p0), 0
+ ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p0), [[C3]]
+ ; GFX9: [[SELECT1:%[0-9]+]]:_(p3) = G_SELECT [[ICMP1]](s1), [[EXTRACT1]], [[C2]]
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[SELECT]](p3), [[SELECT1]](p3)
+ ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
+ %0:_(<2 x p0>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ %1:_(<2 x p3>) = G_ADDRSPACE_CAST %0
+ $vgpr0_vgpr1 = COPY %1
+...
+
+---
+name: test_addrspacecast_v2p3_to_v2p0
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; VI-LABEL: name: test_addrspacecast_v2p3_to_v2p0
+ ; VI: [[COPY:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr0_vgpr1
+ ; VI: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY]](<2 x p3>)
+ ; VI: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
+ ; VI: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+ ; VI: [[C2:%[0-9]+]]:_(p4) = G_CONSTANT i64 3735928559
+ ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
+ ; VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[C2]], [[C3]](s64)
+ ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (dereferenceable invariant load 4 from `i8 addrspace(4)* undef` + 68, addrspace 4)
+ ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p3), [[C]]
+ ; VI: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3)
+ ; VI: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32)
+ ; VI: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C1]]
+ ; VI: [[C4:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
+ ; VI: [[C5:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+ ; VI: [[C6:%[0-9]+]]:_(p4) = G_CONSTANT i64 3735928559
+ ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
+ ; VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[C6]], [[C7]](s64)
+ ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (dereferenceable invariant load 4 from `i8 addrspace(4)* undef` + 68, addrspace 4)
+ ; VI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p3), [[C4]]
+ ; VI: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3)
+ ; VI: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT1]](s32), [[LOAD1]](s32)
+ ; VI: [[SELECT1:%[0-9]+]]:_(p0) = G_SELECT [[ICMP1]](s1), [[MV1]], [[C5]]
+ ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[SELECT]](p0), [[SELECT1]](p0)
+ ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p0>)
+ ; GFX9-LABEL: name: test_addrspacecast_v2p3_to_v2p0
+ ; GFX9: [[COPY:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr0_vgpr1
+ ; GFX9: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY]](<2 x p3>)
+ ; GFX9: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
+ ; GFX9: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+ ; GFX9: [[S_GETREG_B32_:%[0-9]+]]:sreg_32(s32) = S_GETREG_B32 30735
+ ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[S_GETREG_B32_]], [[C2]](s32)
+ ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p3), [[C]]
+ ; GFX9: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3)
+ ; GFX9: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[SHL]](s32)
+ ; GFX9: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C1]]
+ ; GFX9: [[C3:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
+ ; GFX9: [[C4:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+ ; GFX9: [[S_GETREG_B32_1:%[0-9]+]]:sreg_32(s32) = S_GETREG_B32 30735
+ ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[S_GETREG_B32_1]], [[C5]](s32)
+ ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p3), [[C3]]
+ ; GFX9: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3)
+ ; GFX9: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT1]](s32), [[SHL1]](s32)
+ ; GFX9: [[SELECT1:%[0-9]+]]:_(p0) = G_SELECT [[ICMP1]](s1), [[MV1]], [[C4]]
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[SELECT]](p0), [[SELECT1]](p0)
+ ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p0>)
+ %0:_(<2 x p3>) = COPY $vgpr0_vgpr1
+ %1:_(<2 x p0>) = G_ADDRSPACE_CAST %0
+ $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+...
More information about the llvm-commits
mailing list