[llvm] r363576 - AMDGPU/GlobalISel: Implement select for G_ICMP and G_SELECT
Tom Stellard via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 17 09:27:43 PDT 2019
Author: tstellar
Date: Mon Jun 17 09:27:43 2019
New Revision: 363576
URL: http://llvm.org/viewvc/llvm-project?rev=363576&view=rev
Log:
AMDGPU/GlobalISel: Implement select for G_ICMP and G_SELECT
Reviewers: arsenm
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, hiraditya, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D60640
Added:
llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.mir
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp
llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp?rev=363576&r1=363575&r2=363576&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp Mon Jun 17 09:27:43 2019
@@ -59,11 +59,52 @@ AMDGPUInstructionSelector::AMDGPUInstruc
const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; }
+static bool isSCC(unsigned Reg, const MachineRegisterInfo &MRI) {
+ if (Reg == AMDGPU::SCC)
+ return true;
+
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return false;
+
+ auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
+ const TargetRegisterClass *RC =
+ RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
+ if (RC)
+ return RC->getID() == AMDGPU::SReg_32_XM0RegClassID &&
+ MRI.getType(Reg).getSizeInBits() == 1;
+
+ const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
+ return RB->getID() == AMDGPU::SCCRegBankID;
+}
+
bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
I.setDesc(TII.get(TargetOpcode::COPY));
+
+ // Special case for COPY from the scc register bank. The scc register bank
+ // is modeled using 32-bit sgprs.
+ const MachineOperand &Src = I.getOperand(1);
+ unsigned SrcReg = Src.getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(SrcReg) && isSCC(SrcReg, MRI)) {
+ unsigned DstReg = TRI.getRegSizeInBits(I.getOperand(0).getReg(), MRI);
+ unsigned DstSize = TRI.getRegSizeInBits(DstReg, MRI);
+
+ // We have a copy from a 32-bit to 64-bit register. This happens
+ // when we are selecting scc->vcc copies.
+ if (DstSize == 64) {
+ const DebugLoc &DL = I.getDebugLoc();
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), I.getOperand(0).getReg())
+ .addImm(0)
+ .addReg(SrcReg);
+ if (!MRI.getRegClassOrNull(SrcReg))
+ MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI));
+ I.eraseFromParent();
+ return true;
+ }
+ }
+
for (const MachineOperand &MO : I.operands()) {
if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
continue;
@@ -262,6 +303,101 @@ bool AMDGPUInstructionSelector::selectG_
return false;
}
+static unsigned getV_CMPOpcode(CmpInst::Predicate P, unsigned Size) {
+ assert(Size == 32 || Size == 64);
+ switch (P) {
+ default:
+ llvm_unreachable("Unknown condition code!");
+ case CmpInst::ICMP_NE:
+ return Size == 32 ? AMDGPU::V_CMP_NE_U32_e64 : AMDGPU::V_CMP_NE_U64_e64;
+ case CmpInst::ICMP_EQ:
+ return Size == 32 ? AMDGPU::V_CMP_EQ_U32_e64 : AMDGPU::V_CMP_EQ_U64_e64;
+ case CmpInst::ICMP_SGT:
+ return Size == 32 ? AMDGPU::V_CMP_GT_I32_e64 : AMDGPU::V_CMP_GT_I64_e64;
+ case CmpInst::ICMP_SGE:
+ return Size == 32 ? AMDGPU::V_CMP_GE_I32_e64 : AMDGPU::V_CMP_GE_I64_e64;
+ case CmpInst::ICMP_SLT:
+ return Size == 32 ? AMDGPU::V_CMP_LT_I32_e64 : AMDGPU::V_CMP_LT_I64_e64;
+ case CmpInst::ICMP_SLE:
+ return Size == 32 ? AMDGPU::V_CMP_LE_I32_e64 : AMDGPU::V_CMP_LE_I64_e64;
+ case CmpInst::ICMP_UGT:
+ return Size == 32 ? AMDGPU::V_CMP_GT_U32_e64 : AMDGPU::V_CMP_GT_U64_e64;
+ case CmpInst::ICMP_UGE:
+ return Size == 32 ? AMDGPU::V_CMP_GE_U32_e64 : AMDGPU::V_CMP_GE_U64_e64;
+ case CmpInst::ICMP_ULT:
+ return Size == 32 ? AMDGPU::V_CMP_LT_U32_e64 : AMDGPU::V_CMP_LT_U64_e64;
+ case CmpInst::ICMP_ULE:
+ return Size == 32 ? AMDGPU::V_CMP_LE_U32_e64 : AMDGPU::V_CMP_LE_U64_e64;
+ }
+}
+
+static unsigned getS_CMPOpcode(CmpInst::Predicate P, unsigned Size) {
+ // FIXME: VI supports 64-bit comparse.
+ assert(Size == 32);
+ switch (P) {
+ default:
+ llvm_unreachable("Unknown condition code!");
+ case CmpInst::ICMP_NE:
+ return AMDGPU::S_CMP_LG_U32;
+ case CmpInst::ICMP_EQ:
+ return AMDGPU::S_CMP_EQ_U32;
+ case CmpInst::ICMP_SGT:
+ return AMDGPU::S_CMP_GT_I32;
+ case CmpInst::ICMP_SGE:
+ return AMDGPU::S_CMP_GE_I32;
+ case CmpInst::ICMP_SLT:
+ return AMDGPU::S_CMP_LT_I32;
+ case CmpInst::ICMP_SLE:
+ return AMDGPU::S_CMP_LE_I32;
+ case CmpInst::ICMP_UGT:
+ return AMDGPU::S_CMP_GT_U32;
+ case CmpInst::ICMP_UGE:
+ return AMDGPU::S_CMP_GE_U32;
+ case CmpInst::ICMP_ULT:
+ return AMDGPU::S_CMP_LT_U32;
+ case CmpInst::ICMP_ULE:
+ return AMDGPU::S_CMP_LE_U32;
+ }
+}
+
+bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const {
+ MachineBasicBlock *BB = I.getParent();
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ DebugLoc DL = I.getDebugLoc();
+
+ unsigned SrcReg = I.getOperand(2).getReg();
+ unsigned Size = RBI.getSizeInBits(SrcReg, MRI, TRI);
+ // FIXME: VI supports 64-bit compares.
+ assert(Size == 32);
+
+ unsigned CCReg = I.getOperand(0).getReg();
+ if (isSCC(CCReg, MRI)) {
+ unsigned Opcode = getS_CMPOpcode((CmpInst::Predicate)I.getOperand(1).getPredicate(), Size);
+ MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode))
+ .add(I.getOperand(2))
+ .add(I.getOperand(3));
+ MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg)
+ .addReg(AMDGPU::SCC);
+ bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI) |
+ constrainSelectedInstRegOperands(*Copy, TII, TRI, RBI);
+ I.eraseFromParent();
+ return Ret;
+ }
+
+ assert(Size == 32 || Size == 64);
+ unsigned Opcode = getV_CMPOpcode((CmpInst::Predicate)I.getOperand(1).getPredicate(), Size);
+ MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode),
+ I.getOperand(0).getReg())
+ .add(I.getOperand(2))
+ .add(I.getOperand(3));
+ RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(),
+ AMDGPU::SReg_64RegClass, MRI);
+ bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI);
+ I.eraseFromParent();
+ return Ret;
+}
+
static MachineInstr *
buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt,
unsigned Reg0, unsigned Reg1, unsigned Reg2, unsigned Reg3,
@@ -325,6 +461,53 @@ bool AMDGPUInstructionSelector::selectG_
return false;
}
+bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {
+ MachineBasicBlock *BB = I.getParent();
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ const DebugLoc &DL = I.getDebugLoc();
+
+ unsigned DstReg = I.getOperand(0).getReg();
+ unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
+ assert(Size == 32 || Size == 64);
+ const MachineOperand &CCOp = I.getOperand(1);
+ unsigned CCReg = CCOp.getReg();
+ if (isSCC(CCReg, MRI)) {
+ unsigned SelectOpcode = Size == 32 ? AMDGPU::S_CSELECT_B32 :
+ AMDGPU::S_CSELECT_B64;
+ MachineInstr *CopySCC = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
+ .addReg(CCReg);
+
+ // The generic constrainSelectedInstRegOperands doesn't work for the scc register
+ // bank, because it does not cover the register class that we used to represent
+ // for it. So we need to manually set the register class here.
+ if (!MRI.getRegClassOrNull(CCReg))
+ MRI.setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, MRI));
+ MachineInstr *Select = BuildMI(*BB, &I, DL, TII.get(SelectOpcode), DstReg)
+ .add(I.getOperand(2))
+ .add(I.getOperand(3));
+
+ bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI) |
+ constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI);
+ I.eraseFromParent();
+ return Ret;
+ }
+
+ assert(Size == 32);
+ // FIXME: Support 64-bit select
+ MachineInstr *Select =
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+ .addImm(0)
+ .add(I.getOperand(3))
+ .addImm(0)
+ .add(I.getOperand(2))
+ .add(I.getOperand(1));
+
+ bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI);
+ I.eraseFromParent();
+ return Ret;
+}
+
bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
MachineFunction *MF = BB->getParent();
@@ -573,10 +756,14 @@ bool AMDGPUInstructionSelector::select(M
return selectG_INTRINSIC(I, CoverageInfo);
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
return selectG_INTRINSIC_W_SIDE_EFFECTS(I, CoverageInfo);
+ case TargetOpcode::G_ICMP:
+ return selectG_ICMP(I);
case TargetOpcode::G_LOAD:
if (selectImpl(I, CoverageInfo))
return true;
return selectG_LOAD(I);
+ case TargetOpcode::G_SELECT:
+ return selectG_SELECT(I);
case TargetOpcode::G_STORE:
return selectG_STORE(I);
}
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h?rev=363576&r1=363575&r2=363576&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h Mon Jun 17 09:27:43 2019
@@ -72,11 +72,13 @@ private:
bool selectG_INTRINSIC(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
bool selectG_INTRINSIC_W_SIDE_EFFECTS(MachineInstr &I,
CodeGenCoverage &CoverageInfo) const;
+ bool selectG_ICMP(MachineInstr &I) const;
bool hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const;
void getAddrModeInfo(const MachineInstr &Load, const MachineRegisterInfo &MRI,
SmallVectorImpl<GEPInfo> &AddrInfo) const;
bool selectSMRD(MachineInstr &I, ArrayRef<GEPInfo> AddrInfo) const;
bool selectG_LOAD(MachineInstr &I) const;
+ bool selectG_SELECT(MachineInstr &I) const;
bool selectG_STORE(MachineInstr &I) const;
InstructionSelector::ComplexRendererFns
Modified: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp?rev=363576&r1=363575&r2=363576&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp Mon Jun 17 09:27:43 2019
@@ -1688,6 +1688,10 @@ SIRegisterInfo::getConstrainedRegClassFo
Size = PowerOf2Ceil(Size);
switch (Size) {
+ case 1:
+ if (RB->getID() == AMDGPU::SCCRegBankID)
+ return &AMDGPU::SReg_32_XM0RegClass;
+ break;
case 32:
return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
&AMDGPU::SReg_32_XM0RegClass;
@@ -1710,8 +1714,9 @@ SIRegisterInfo::getConstrainedRegClassFo
return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_512RegClass :
&AMDGPU::SReg_512RegClass;
default:
- llvm_unreachable("not implemented");
+ break;
}
+ llvm_unreachable("not implemented");
}
unsigned SIRegisterInfo::getVCC() const {
Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir?rev=363576&r1=363575&r2=363576&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir Mon Jun 17 09:27:43 2019
@@ -1,9 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN
---- |
- define amdgpu_kernel void @copy(i32 addrspace(1)* %global0) {ret void}
-...
---
name: copy
@@ -22,6 +19,60 @@ body: |
%0:sgpr(p1) = COPY $sgpr2_sgpr3
%1:vgpr(p1) = COPY %0
%2:vgpr(s32) = G_IMPLICIT_DEF
- G_STORE %2, %1 :: (store 4 into %ir.global0)
+ G_STORE %2, %1 :: (store 4, addrspace 1)
+...
+---
+
+name: copy_vcc_scc
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $scc
+ ; GCN-LABEL: name: copy_vcc_scc
+ ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GCN: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc
+ ; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
+ ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec
+ ; GCN: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:vgpr(s32) = COPY $vgpr2
+ %2:vgpr(s32) = COPY $vgpr3
+ %3:scc(s1) = COPY $scc
+ %4:vcc(s1) = COPY %3
+ %5:vgpr(s32) = G_SELECT %4, %1, %2
+ G_STORE %5, %0 :: (store 4, addrspace 1)
+...
+---
+
+name: copy_vcc_scc_2_uses
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $scc
+ ; GCN-LABEL: name: copy_vcc_scc_2_uses
+ ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GCN: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc
+ ; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
+ ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec
+ ; GCN: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
+ ; GCN: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_1]], implicit $exec
+ ; GCN: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:vgpr(s32) = COPY $vgpr2
+ %2:vgpr(s32) = COPY $vgpr3
+ %3:scc(s1) = COPY $scc
+ %4:vcc(s1) = COPY %3
+ %5:vgpr(s32) = G_SELECT %4, %1, %2
+ %6:vcc(s1) = COPY %3
+ %7:vgpr(s32) = G_SELECT %6, %1, %5
+ G_STORE %7, %0 :: (store 4, addrspace 1)
...
---
Added: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.mir?rev=363576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.mir Mon Jun 17 09:27:43 2019
@@ -0,0 +1,309 @@
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN
+
+---
+name: icmp_s_mix
+legalized: true
+regBankSelected: true
+
+# GCN: name: icmp_s_mix
+# GCN: [[SGPR0:%[0-9]+]]:sreg_32 = COPY $sgpr0
+# GCN: [[SGPR1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+# GCN: [[SGPR2:%[0-9]+]]:sreg_32 = COPY $sgpr2
+# GCN: [[SGPR3:%[0-9]+]]:sreg_32 = COPY $sgpr3
+# GCN: [[SGPR4:%[0-9]+]]:sreg_32 = COPY $sgpr4
+# GCN: [[SGPR5:%[0-9]+]]:sreg_32 = COPY $sgpr5
+# GCN: [[SGPR6:%[0-9]+]]:sreg_32 = COPY $sgpr6
+# GCN: [[SGPR7:%[0-9]+]]:sreg_32 = COPY $sgpr7
+# GCN: S_CMP_LG_U32 [[SGPR0]], [[SGPR1]], implicit-def $scc
+# GCN-NEXT: [[COND0:%[0-9]+]]:sreg_32_xm0 = COPY $scc
+# GCN: S_CMP_LG_U32 [[SGPR4]], [[SGPR5]], implicit-def $scc
+# GCN-NEXT: [[COND1:%[0-9]+]]:sreg_32_xm0 = COPY $scc
+# GCN: $scc = COPY [[COND0]]
+# GCN-NEXT: S_CSELECT_B32 [[SGPR6]], [[SGPR7]], implicit $scc
+# GCN: $scc = COPY [[COND1]]
+# GCN-NEXT: S_CSELECT_B32 [[SGPR2]], [[SGPR3]], implicit $scc
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7
+
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:sgpr(s32) = COPY $sgpr0
+ %2:sgpr(s32) = COPY $sgpr1
+ %3:sgpr(s32) = COPY $sgpr2
+ %4:sgpr(s32) = COPY $sgpr3
+ %5:sgpr(s32) = COPY $sgpr4
+ %6:sgpr(s32) = COPY $sgpr5
+ %7:sgpr(s32) = COPY $sgpr6
+ %8:sgpr(s32) = COPY $sgpr7
+ %9:scc(s1) = G_ICMP intpred(ne), %1, %2
+ %10:scc(s1) = G_ICMP intpred(ne), %5, %6
+ %11:sgpr(s32) = G_SELECT %9, %7, %8
+ %12:sgpr(s32) = G_SELECT %10, %3, %4
+ %13:vgpr(s32) = COPY %11
+ G_STORE %13, %0 :: (volatile store 4, addrspace 1)
+ %14:vgpr(s32) = COPY %12
+ G_STORE %14, %0 :: (volatile store 4, addrspace 1)
+
+...
+---
+name: icmp_salu
+legalized: true
+regBankSelected: true
+
+# GCN-LABEL: name: icmp_salu
+# GCN: S_CMP_LG_U32
+# GCN: S_CMP_EQ_U32
+# GCN: S_CMP_GT_I32
+# GCN: S_CMP_GE_I32
+# GCN: S_CMP_LT_I32
+# GCN: S_CMP_LE_I32
+# GCN: S_CMP_GT_U32
+# GCN: S_CMP_GE_U32
+# GCN: S_CMP_LT_U32
+# GCN: S_CMP_LE_U32
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3
+
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:sgpr(s32) = COPY $sgpr0
+ %2:sgpr(s32) = COPY $sgpr1
+ %3:sgpr(s32) = COPY $sgpr2
+ %4:sgpr(s32) = COPY $sgpr3
+ %5:scc(s1) = G_ICMP intpred(ne), %1, %2
+ %6:scc(s1) = G_ICMP intpred(eq), %1, %2
+ %7:scc(s1) = G_ICMP intpred(sgt), %1, %2
+ %8:scc(s1) = G_ICMP intpred(sge), %1, %2
+ %9:scc(s1) = G_ICMP intpred(slt), %1, %2
+ %10:scc(s1) = G_ICMP intpred(sle), %1, %2
+ %11:scc(s1) = G_ICMP intpred(ugt), %1, %2
+ %12:scc(s1) = G_ICMP intpred(uge), %1, %2
+ %13:scc(s1) = G_ICMP intpred(ult), %1, %2
+ %14:scc(s1) = G_ICMP intpred(ule), %1, %2
+ %15:sgpr(s32) = G_SELECT %5, %3, %4
+ %16:sgpr(s32) = G_SELECT %6, %3, %4
+ %17:sgpr(s32) = G_SELECT %7, %3, %4
+ %18:sgpr(s32) = G_SELECT %8, %3, %4
+ %19:sgpr(s32) = G_SELECT %9, %3, %4
+ %20:sgpr(s32) = G_SELECT %10, %3, %4
+ %21:sgpr(s32) = G_SELECT %11, %3, %4
+ %22:sgpr(s32) = G_SELECT %12, %3, %4
+ %23:sgpr(s32) = G_SELECT %13, %3, %4
+ %24:sgpr(s32) = G_SELECT %14, %3, %4
+ %25:vgpr(s32) = COPY %15
+ G_STORE %25, %0 :: (volatile store 4, addrspace 1)
+ %26:vgpr(s32) = COPY %16
+ G_STORE %26, %0 :: (volatile store 4, addrspace 1)
+ %27:vgpr(s32) = COPY %17
+ G_STORE %27, %0 :: (volatile store 4, addrspace 1)
+ %28:vgpr(s32) = COPY %18
+ G_STORE %28, %0 :: (volatile store 4, addrspace 1)
+ %29:vgpr(s32) = COPY %19
+ G_STORE %29, %0 :: (volatile store 4, addrspace 1)
+ %30:vgpr(s32) = COPY %20
+ G_STORE %30, %0 :: (volatile store 4, addrspace 1)
+ %31:vgpr(s32) = COPY %21
+ G_STORE %31, %0 :: (volatile store 4, addrspace 1)
+ %32:vgpr(s32) = COPY %22
+ G_STORE %32, %0 :: (volatile store 4, addrspace 1)
+ %33:vgpr(s32) = COPY %23
+ G_STORE %33, %0 :: (volatile store 4, addrspace 1)
+ %34:vgpr(s32) = COPY %24
+ G_STORE %34, %0 :: (volatile store 4, addrspace 1)
+
+...
+---
+name: icmp_v_mix
+legalized: true
+regBankSelected: true
+
+# GCN-LABEL: name: icmp_v_mix
+# GCN: [[VGPR2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+# GCN: [[VGPR3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+# GCN: [[VGPR4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
+# GCN: [[VGPR5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
+# GCN: [[VGPR6:%[0-9]+]]:vgpr_32 = COPY $vgpr6
+# GCN: [[VGPR7:%[0-9]+]]:vgpr_32 = COPY $vgpr7
+# GCN: [[VGPR8:%[0-9]+]]:vgpr_32 = COPY $vgpr8
+# GCN: [[VGPR9:%[0-9]+]]:vgpr_32 = COPY $vgpr9
+# GCN: [[COND0:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 [[VGPR2]], [[VGPR3]]
+# GCN: [[COND1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 [[VGPR6]], [[VGPR7]]
+# GCN: V_CNDMASK_B32_e64 0, [[VGPR9]], 0, [[VGPR8]], [[COND0]]
+# GCN: V_CNDMASK_B32_e64 0, [[VGPR5]], 0, [[VGPR4]], [[COND1]]
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9
+
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:vgpr(s32) = COPY $vgpr2
+ %2:vgpr(s32) = COPY $vgpr3
+ %3:vgpr(s32) = COPY $vgpr4
+ %4:vgpr(s32) = COPY $vgpr5
+ %5:vgpr(s32) = COPY $vgpr6
+ %6:vgpr(s32) = COPY $vgpr7
+ %7:vgpr(s32) = COPY $vgpr8
+ %8:vgpr(s32) = COPY $vgpr9
+ %9:sgpr(s1) = G_ICMP intpred(ne), %1, %2
+ %10:sgpr(s1) = G_ICMP intpred(ne), %5, %6
+ %11:vgpr(s32) = G_SELECT %9, %7, %8
+ %12:vgpr(s32) = G_SELECT %10, %3, %4
+ G_STORE %11, %0 :: (volatile store 4, addrspace 1)
+ G_STORE %12, %0 :: (volatile store 4, addrspace 1)
+...
+---
+name: icmp_valu
+legalized: true
+regBankSelected: true
+
+# GCN-LABEL: name: icmp_valu
+# GCN: V_CMP_NE_U32_e64
+# GCN: V_CMP_EQ_U32_e64
+# GCN: V_CMP_GT_I32_e64
+# GCN: V_CMP_GE_I32_e64
+# GCN: V_CMP_LT_I32_e64
+# GCN: V_CMP_LE_I32_e64
+# GCN: V_CMP_GT_U32_e64
+# GCN: V_CMP_GE_U32_e64
+# GCN: V_CMP_LT_U32_e64
+# GCN: V_CMP_LE_U32_e64
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:vgpr(s32) = COPY $vgpr2
+ %2:vgpr(s32) = COPY $vgpr3
+ %3:vgpr(s32) = COPY $vgpr4
+ %4:vgpr(s32) = COPY $vgpr5
+ %5:sgpr(s1) = G_ICMP intpred(ne), %1, %2
+ %6:sgpr(s1) = G_ICMP intpred(eq), %1, %2
+ %7:sgpr(s1) = G_ICMP intpred(sgt), %1, %2
+ %8:sgpr(s1) = G_ICMP intpred(sge), %1, %2
+ %9:sgpr(s1) = G_ICMP intpred(slt), %1, %2
+ %10:sgpr(s1) = G_ICMP intpred(sle), %1, %2
+ %11:sgpr(s1) = G_ICMP intpred(ugt), %1, %2
+ %12:sgpr(s1) = G_ICMP intpred(uge), %1, %2
+ %13:sgpr(s1) = G_ICMP intpred(ult), %1, %2
+ %14:sgpr(s1) = G_ICMP intpred(ule), %1, %2
+ %15:vgpr(s32) = G_SELECT %5, %3, %4
+ %16:vgpr(s32) = G_SELECT %6, %3, %4
+ %17:vgpr(s32) = G_SELECT %7, %3, %4
+ %18:vgpr(s32) = G_SELECT %8, %3, %4
+ %19:vgpr(s32) = G_SELECT %9, %3, %4
+ %20:vgpr(s32) = G_SELECT %10, %3, %4
+ %21:vgpr(s32) = G_SELECT %11, %3, %4
+ %22:vgpr(s32) = G_SELECT %12, %3, %4
+ %23:vgpr(s32) = G_SELECT %13, %3, %4
+ %24:vgpr(s32) = G_SELECT %14, %3, %4
+ G_STORE %15, %0 :: (volatile store 4, addrspace 1)
+ G_STORE %16, %0 :: (volatile store 4, addrspace 1)
+ G_STORE %17, %0 :: (volatile store 4, addrspace 1)
+ G_STORE %18, %0 :: (volatile store 4, addrspace 1)
+ G_STORE %19, %0 :: (volatile store 4, addrspace 1)
+ G_STORE %20, %0 :: (volatile store 4, addrspace 1)
+ G_STORE %21, %0 :: (volatile store 4, addrspace 1)
+ G_STORE %22, %0 :: (volatile store 4, addrspace 1)
+ G_STORE %23, %0 :: (volatile store 4, addrspace 1)
+ G_STORE %24, %0 :: (volatile store 4, addrspace 1)
+...
+---
+
+name: icmp_vv
+legalized: true
+regBankSelected: true
+
+# GCN-LABEL: name: icmp_vv
+# GCN: [[VGPR2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+# GCN: [[VGPR3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+# GCN: V_CMP_NE_U32_e64 [[VGPR2]], [[VGPR3]]
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:vgpr(s32) = COPY $vgpr2
+ %2:vgpr(s32) = COPY $vgpr3
+ %3:vgpr(s32) = COPY $vgpr4
+ %4:vgpr(s32) = COPY $vgpr5
+ %5:sgpr(s1) = G_ICMP intpred(ne), %1, %2
+ %6:vgpr(s32) = G_SELECT %5, %3, %4
+ G_STORE %6, %0 :: (store 4, addrspace 1)
+...
+---
+
+name: icmp_vs
+legalized: true
+regBankSelected: true
+
+# GCN-LABEL: name: icmp_vs
+# GCN: [[VGPR2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+# GCN: [[SGPR0:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+# GCN: V_CMP_NE_U32_e64 [[VGPR2]], [[SGPR0]]
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr0
+
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:vgpr(s32) = COPY $vgpr2
+ %2:vgpr(s32) = COPY $vgpr3
+ %3:vgpr(s32) = COPY $vgpr4
+ %4:sgpr(s32) = COPY $sgpr0
+ %5:sgpr(s1) = G_ICMP intpred(ne), %1, %4
+ %6:vgpr(s32) = G_SELECT %5, %2, %3
+ G_STORE %6, %0 :: (store 4, addrspace 1)
+...
+---
+
+name: icmp_sv
+legalized: true
+regBankSelected: true
+
+# GCN-LABEL: name: icmp_sv
+# GCN: [[VGPR2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+# GCN: [[SGPR0:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+# GCN: V_CMP_NE_U32_e64 [[SGPR0]], [[VGPR2]]
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr0
+
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:vgpr(s32) = COPY $vgpr2
+ %2:vgpr(s32) = COPY $vgpr3
+ %3:vgpr(s32) = COPY $vgpr4
+ %4:sgpr(s32) = COPY $sgpr0
+ %5:sgpr(s1) = G_ICMP intpred(ne), %4, %1
+ %6:vgpr(s32) = G_SELECT %5, %2, %3
+ G_STORE %6, %0 :: (store 4, addrspace 1)
+...
+---
+
+name: icmp_or_vcc
+legalized: true
+regBankSelected: true
+
+# GCN-LABEL: name: icmp_or_vcc
+# GCN: [[VGPR2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+# GCN: [[SGPR0:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+# GCN: V_CMP_NE_U32_e64 [[SGPR0]], [[VGPR2]]
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr0
+
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:vgpr(s32) = COPY $vgpr2
+ %2:vgpr(s32) = COPY $vgpr3
+ %3:vgpr(s32) = COPY $vgpr4
+ %4:sgpr(s32) = COPY $sgpr0
+ %5:sgpr(s1) = G_ICMP intpred(ne), %4, %1
+ %6:vgpr(s32) = G_SELECT %5, %2, %3
+ G_STORE %6, %0 :: (store 4, addrspace 1)
+...
+---
More information about the llvm-commits
mailing list