[llvm] 2414bab - AMDGPU/GlobalISel: Remove old hacks for boolean selection
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 3 06:11:48 PDT 2020
Author: Matt Arsenault
Date: 2020-08-03T09:04:14-04:00
New Revision: 2414bab5d7d6b5b247f3f3b97140a2673fa8414b
URL: https://github.com/llvm/llvm-project/commit/2414bab5d7d6b5b247f3f3b97140a2673fa8414b
DIFF: https://github.com/llvm/llvm-project/commit/2414bab5d7d6b5b247f3f3b97140a2673fa8414b.diff
LOG: AMDGPU/GlobalISel: Remove old hacks for boolean selection
There were various hacks used to try to avoid making s1 SGPR vs. s1
VCC ambiguous after constraining the register before we had a strategy
to deal with this. This also attempted to handle undef operands, which
are now illegal gMIR.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 5aceb40933c3..16fc759f0cbf 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -170,19 +170,6 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
if (RC && !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
return false;
- // Don't constrain the source register to a class so the def instruction
- // handles it (unless it's undef).
- //
- // FIXME: This is a hack. When selecting the def, we neeed to know
- // specifically know that the result is VCCRegBank, and not just an SGPR
- // with size 1. An SReg_32 with size 1 is ambiguous with wave32.
- if (Src.isUndef()) {
- const TargetRegisterClass *SrcRC =
- TRI.getConstrainedRegClassForOperand(Src, *MRI);
- if (SrcRC && !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
- return false;
- }
-
return true;
}
@@ -286,50 +273,24 @@ static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64) {
}
bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr &I) const {
- MachineOperand &Dst = I.getOperand(0);
- MachineOperand &Src0 = I.getOperand(1);
- MachineOperand &Src1 = I.getOperand(2);
- Register DstReg = Dst.getReg();
+ Register DstReg = I.getOperand(0).getReg();
unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
- if (DstRB->getID() == AMDGPU::VCCRegBankID) {
- const TargetRegisterClass *RC = TRI.getBoolRC();
- unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(),
- RC == &AMDGPU::SReg_64RegClass);
- I.setDesc(TII.get(InstOpc));
- // Dead implicit-def of scc
- I.addOperand(MachineOperand::CreateReg(AMDGPU::SCC, true, // isDef
- true, // isImp
- false, // isKill
- true)); // isDead
-
- // FIXME: Hack to avoid turning the register bank into a register class.
- // The selector for G_ICMP relies on seeing the register bank for the result
- // is VCC. In wave32 if we constrain the registers to SReg_32 here, it will
- // be ambiguous whether it's a scalar or vector bool.
- if (Src0.isUndef() && !MRI->getRegClassOrNull(Src0.getReg()))
- MRI->setRegClass(Src0.getReg(), RC);
- if (Src1.isUndef() && !MRI->getRegClassOrNull(Src1.getReg()))
- MRI->setRegClass(Src1.getReg(), RC);
-
- return RBI.constrainGenericRegister(DstReg, *RC, *MRI);
- }
-
- // TODO: Should this allow an SCC bank result, and produce a copy from SCC for
- // the result?
- if (DstRB->getID() == AMDGPU::SGPRRegBankID) {
- unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(), Size > 32);
- I.setDesc(TII.get(InstOpc));
- // Dead implicit-def of scc
- I.addOperand(MachineOperand::CreateReg(AMDGPU::SCC, true, // isDef
- true, // isImp
- false, // isKill
- true)); // isDead
- return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
- }
+ if (DstRB->getID() != AMDGPU::SGPRRegBankID &&
+ DstRB->getID() != AMDGPU::VCCRegBankID)
+ return false;
- return false;
+ bool Is64 = Size > 32 || (DstRB->getID() == AMDGPU::VCCRegBankID &&
+ STI.isWave64());
+ I.setDesc(TII.get(getLogicalBitOpcode(I.getOpcode(), Is64)));
+
+ // Dead implicit-def of scc
+ I.addOperand(MachineOperand::CreateReg(AMDGPU::SCC, true, // isDef
+ true, // isImp
+ false, // isKill
+ true)); // isDead
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const {
@@ -2338,8 +2299,7 @@ bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
CondPhysReg = AMDGPU::SCC;
BrOpcode = AMDGPU::S_CBRANCH_SCC1;
- // FIXME: Hack for isSCC tests
- ConstrainRC = &AMDGPU::SGPR_32RegClass;
+ ConstrainRC = &AMDGPU::SReg_32RegClass;
} else {
// FIXME: Do we have to insert an and with exec here, like in SelectionDAG?
// We sort of know that a VCC producer based on the register bank, that ands
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 6848f762fc27..0f57d34ba6aa 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -1210,6 +1210,10 @@ class GCNSubtarget : public AMDGPUGenSubtargetInfo,
return getWavefrontSize() == 32;
}
+ bool isWave64() const {
+ return getWavefrontSize() == 64;
+ }
+
const TargetRegisterClass *getBoolRC() const {
return getRegisterInfo()->getBoolRC();
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir
index 496d496a42c5..6adb3549778c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir
@@ -20,7 +20,7 @@ body: |
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
; GCN: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc
- ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $scc
+ ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc
; GCN: $scc = COPY [[COPY2]]
; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc
; GCN: bb.1:
@@ -46,7 +46,7 @@ body: |
; GCN-LABEL: name: brcond_scc_impdef
; GCN: bb.0:
; GCN: successors: %bb.1(0x80000000)
- ; GCN: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+ ; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; GCN: $scc = COPY [[DEF]]
; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc
; GCN: bb.1:
@@ -73,7 +73,7 @@ body: |
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
; GCN: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc
- ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $scc
+ ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc
; GCN: $scc = COPY [[COPY2]]
; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc
; GCN: S_BRANCH %bb.1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir
index b4ef0caebfc1..4e7c81f5c79a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir
@@ -17,7 +17,7 @@ body: |
; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GCN: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
- ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc
+ ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc
; GCN: $scc = COPY [[COPY3]]
; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc
; GCN: S_BRANCH %bb.2
@@ -66,7 +66,7 @@ body: |
; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GCN: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
- ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc
+ ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc
; GCN: $scc = COPY [[COPY3]]
; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc
; GCN: S_BRANCH %bb.2
@@ -116,7 +116,7 @@ body: |
; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GCN: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
- ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc
+ ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc
; GCN: $scc = COPY [[COPY3]]
; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc
; GCN: S_BRANCH %bb.2
@@ -165,7 +165,7 @@ body: |
; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GCN: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
- ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc
+ ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc
; GCN: $scc = COPY [[COPY3]]
; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc
; GCN: S_BRANCH %bb.2
@@ -215,7 +215,7 @@ body: |
; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GCN: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
- ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc
+ ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc
; GCN: $scc = COPY [[COPY3]]
; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc
; GCN: S_BRANCH %bb.2
@@ -263,7 +263,7 @@ body: |
; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GCN: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
- ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc
+ ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc
; GCN: $scc = COPY [[COPY3]]
; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc
; GCN: S_BRANCH %bb.2
@@ -314,7 +314,7 @@ body: |
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY]], [[S_MOV_B32_]], implicit $exec
; GCN: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
- ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc
+ ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc
; GCN: $scc = COPY [[COPY3]]
; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc
; GCN: S_BRANCH %bb.2
@@ -363,7 +363,7 @@ body: |
; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GCN: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
- ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc
+ ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc
; GCN: $scc = COPY [[COPY3]]
; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc
; GCN: S_BRANCH %bb.2
@@ -412,7 +412,7 @@ body: |
; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GCN: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
- ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc
+ ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc
; GCN: $scc = COPY [[COPY3]]
; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc
; GCN: S_BRANCH %bb.2
More information about the llvm-commits
mailing list