[llvm] 2414bab - AMDGPU/GlobalISel: Remove old hacks for boolean selection

Mon Aug 3 06:11:48 PDT 2020

Author: Matt Arsenault
Date: 2020-08-03T09:04:14-04:00
New Revision: 2414bab5d7d6b5b247f3f3b97140a2673fa8414b

URL: https://github.com/llvm/llvm-project/commit/2414bab5d7d6b5b247f3f3b97140a2673fa8414b
DIFF: https://github.com/llvm/llvm-project/commit/2414bab5d7d6b5b247f3f3b97140a2673fa8414b.diff

LOG: AMDGPU/GlobalISel: Remove old hacks for boolean selection

There were various hacks used to try to avoid making s1 SGPR vs. s1
VCC ambiguous after constraining the register before we had a strategy
to deal with this. This also attempted to handle undef operands, which
are now illegal gMIR.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
    llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
    llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 5aceb40933c3..16fc759f0cbf 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -170,19 +170,6 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
     if (RC && !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
       return false;
 
-    // Don't constrain the source register to a class so the def instruction
-    // handles it (unless it's undef).
-    //
-    // FIXME: This is a hack. When selecting the def, we neeed to know
-    // specifically know that the result is VCCRegBank, and not just an SGPR
-    // with size 1. An SReg_32 with size 1 is ambiguous with wave32.
-    if (Src.isUndef()) {
-      const TargetRegisterClass *SrcRC =
-        TRI.getConstrainedRegClassForOperand(Src, *MRI);
-      if (SrcRC && !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
-        return false;
-    }
-
     return true;
   }
 
@@ -286,50 +273,24 @@ static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64) {
 }
 
 bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr &I) const {
-  MachineOperand &Dst = I.getOperand(0);
-  MachineOperand &Src0 = I.getOperand(1);
-  MachineOperand &Src1 = I.getOperand(2);
-  Register DstReg = Dst.getReg();
+  Register DstReg = I.getOperand(0).getReg();
   unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
 
   const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
-  if (DstRB->getID() == AMDGPU::VCCRegBankID) {
-    const TargetRegisterClass *RC = TRI.getBoolRC();
-    unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(),
-                                           RC == &AMDGPU::SReg_64RegClass);
-    I.setDesc(TII.get(InstOpc));
-    // Dead implicit-def of scc
-    I.addOperand(MachineOperand::CreateReg(AMDGPU::SCC, true, // isDef
-                                           true, // isImp
-                                           false, // isKill
-                                           true)); // isDead
-
-    // FIXME: Hack to avoid turning the register bank into a register class.
-    // The selector for G_ICMP relies on seeing the register bank for the result
-    // is VCC. In wave32 if we constrain the registers to SReg_32 here, it will
-    // be ambiguous whether it's a scalar or vector bool.
-    if (Src0.isUndef() && !MRI->getRegClassOrNull(Src0.getReg()))
-      MRI->setRegClass(Src0.getReg(), RC);
-    if (Src1.isUndef() && !MRI->getRegClassOrNull(Src1.getReg()))
-      MRI->setRegClass(Src1.getReg(), RC);
-
-    return RBI.constrainGenericRegister(DstReg, *RC, *MRI);
-  }
-
-  // TODO: Should this allow an SCC bank result, and produce a copy from SCC for
-  // the result?
-  if (DstRB->getID() == AMDGPU::SGPRRegBankID) {
-    unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(), Size > 32);
-    I.setDesc(TII.get(InstOpc));
-    // Dead implicit-def of scc
-    I.addOperand(MachineOperand::CreateReg(AMDGPU::SCC, true, // isDef
-                                           true, // isImp
-                                           false, // isKill
-                                           true)); // isDead
-    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
-  }
+  if (DstRB->getID() != AMDGPU::SGPRRegBankID &&
+      DstRB->getID() != AMDGPU::VCCRegBankID)
+    return false;
 
-  return false;
+  bool Is64 = Size > 32 || (DstRB->getID() == AMDGPU::VCCRegBankID &&
+                            STI.isWave64());
+  I.setDesc(TII.get(getLogicalBitOpcode(I.getOpcode(), Is64)));
+
+  // Dead implicit-def of scc
+  I.addOperand(MachineOperand::CreateReg(AMDGPU::SCC, true, // isDef
+                                         true, // isImp
+                                         false, // isKill
+                                         true)); // isDead
+  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
 }
 
 bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const {
@@ -2338,8 +2299,7 @@ bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
 
     CondPhysReg = AMDGPU::SCC;
     BrOpcode = AMDGPU::S_CBRANCH_SCC1;
-    // FIXME: Hack for isSCC tests
-    ConstrainRC = &AMDGPU::SGPR_32RegClass;
+    ConstrainRC = &AMDGPU::SReg_32RegClass;
   } else {
     // FIXME: Do we have to insert an and with exec here, like in SelectionDAG?
     // We sort of know that a VCC producer based on the register bank, that ands

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 6848f762fc27..0f57d34ba6aa 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -1210,6 +1210,10 @@ class GCNSubtarget : public AMDGPUGenSubtargetInfo,
     return getWavefrontSize() == 32;
   }
 
+  bool isWave64() const {
+    return getWavefrontSize() == 64;
+  }
+
   const TargetRegisterClass *getBoolRC() const {
     return getRegisterInfo()->getBoolRC();
   }

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir
index 496d496a42c5..6adb3549778c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir
@@ -20,7 +20,7 @@ body: |
   ; GCN:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
   ; GCN:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
   ; GCN:   S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc
-  ; GCN:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $scc
+  ; GCN:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc
   ; GCN:   $scc = COPY [[COPY2]]
   ; GCN:   S_CBRANCH_SCC1 %bb.1, implicit $scc
   ; GCN: bb.1:
@@ -46,7 +46,7 @@ body: |
   ; GCN-LABEL: name: brcond_scc_impdef
   ; GCN: bb.0:
   ; GCN:   successors: %bb.1(0x80000000)
-  ; GCN:   [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+  ; GCN:   [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
   ; GCN:   $scc = COPY [[DEF]]
   ; GCN:   S_CBRANCH_SCC1 %bb.1, implicit $scc
   ; GCN: bb.1:
@@ -73,7 +73,7 @@ body: |
   ; GCN:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
   ; GCN:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
   ; GCN:   S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc
-  ; GCN:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $scc
+  ; GCN:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc
   ; GCN:   $scc = COPY [[COPY2]]
   ; GCN:   S_CBRANCH_SCC1 %bb.1, implicit $scc
   ; GCN:   S_BRANCH %bb.1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir
index b4ef0caebfc1..4e7c81f5c79a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir
@@ -17,7 +17,7 @@ body:             |
   ; GCN:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
   ; GCN:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
   ; GCN:   S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc
+  ; GCN:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc
   ; GCN:   $scc = COPY [[COPY3]]
   ; GCN:   S_CBRANCH_SCC1 %bb.1, implicit $scc
   ; GCN:   S_BRANCH %bb.2
@@ -66,7 +66,7 @@ body:             |
   ; GCN:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
   ; GCN:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
   ; GCN:   S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc
+  ; GCN:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc
   ; GCN:   $scc = COPY [[COPY3]]
   ; GCN:   S_CBRANCH_SCC1 %bb.1, implicit $scc
   ; GCN:   S_BRANCH %bb.2
@@ -116,7 +116,7 @@ body:             |
   ; GCN:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
   ; GCN:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
   ; GCN:   S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc
+  ; GCN:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc
   ; GCN:   $scc = COPY [[COPY3]]
   ; GCN:   S_CBRANCH_SCC1 %bb.1, implicit $scc
   ; GCN:   S_BRANCH %bb.2
@@ -165,7 +165,7 @@ body:             |
   ; GCN:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
   ; GCN:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
   ; GCN:   S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc
+  ; GCN:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc
   ; GCN:   $scc = COPY [[COPY3]]
   ; GCN:   S_CBRANCH_SCC1 %bb.1, implicit $scc
   ; GCN:   S_BRANCH %bb.2
@@ -215,7 +215,7 @@ body:             |
   ; GCN:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
   ; GCN:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
   ; GCN:   S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc
+  ; GCN:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc
   ; GCN:   $scc = COPY [[COPY3]]
   ; GCN:   S_CBRANCH_SCC1 %bb.1, implicit $scc
   ; GCN:   S_BRANCH %bb.2
@@ -263,7 +263,7 @@ body:             |
   ; GCN:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
   ; GCN:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
   ; GCN:   S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc
+  ; GCN:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc
   ; GCN:   $scc = COPY [[COPY3]]
   ; GCN:   S_CBRANCH_SCC1 %bb.1, implicit $scc
   ; GCN:   S_BRANCH %bb.2
@@ -314,7 +314,7 @@ body:             |
   ; GCN:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
   ; GCN:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY]], [[S_MOV_B32_]], implicit $exec
   ; GCN:   S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc
+  ; GCN:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc
   ; GCN:   $scc = COPY [[COPY3]]
   ; GCN:   S_CBRANCH_SCC1 %bb.1, implicit $scc
   ; GCN:   S_BRANCH %bb.2
@@ -363,7 +363,7 @@ body:             |
   ; GCN:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
   ; GCN:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
   ; GCN:   S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc
+  ; GCN:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc
   ; GCN:   $scc = COPY [[COPY3]]
   ; GCN:   S_CBRANCH_SCC1 %bb.1, implicit $scc
   ; GCN:   S_BRANCH %bb.2
@@ -412,7 +412,7 @@ body:             |
   ; GCN:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
   ; GCN:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
   ; GCN:   S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc
+  ; GCN:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc
   ; GCN:   $scc = COPY [[COPY3]]
   ; GCN:   S_CBRANCH_SCC1 %bb.1, implicit $scc
   ; GCN:   S_BRANCH %bb.2