[llvm] r363576 - AMDGPU/GlobalISel: Implement select for G_ICMP and G_SELECT

Tom Stellard via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 17 09:27:43 PDT 2019


Author: tstellar
Date: Mon Jun 17 09:27:43 2019
New Revision: 363576

URL: http://llvm.org/viewvc/llvm-project?rev=363576&view=rev
Log:
AMDGPU/GlobalISel: Implement select for G_ICMP and G_SELECT

Reviewers: arsenm

Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, hiraditya, Petar.Avramovic, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D60640

Added:
    llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.mir
Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
    llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
    llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp
    llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp?rev=363576&r1=363575&r2=363576&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp Mon Jun 17 09:27:43 2019
@@ -59,11 +59,52 @@ AMDGPUInstructionSelector::AMDGPUInstruc
 
 const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; }
 
+static bool isSCC(unsigned Reg, const MachineRegisterInfo &MRI) {
+  if (Reg == AMDGPU::SCC)
+    return true;
+
+  if (TargetRegisterInfo::isPhysicalRegister(Reg))
+    return false;
+
+  auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
+  const TargetRegisterClass *RC =
+      RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
+  if (RC)
+    return RC->getID() == AMDGPU::SReg_32_XM0RegClassID &&
+           MRI.getType(Reg).getSizeInBits() == 1;
+
+  const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
+  return RB->getID() == AMDGPU::SCCRegBankID;
+}
+
 bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
   MachineBasicBlock *BB = I.getParent();
   MachineFunction *MF = BB->getParent();
   MachineRegisterInfo &MRI = MF->getRegInfo();
   I.setDesc(TII.get(TargetOpcode::COPY));
+
+  // Special case for COPY from the scc register bank.  The scc register bank
+  // is modeled using 32-bit sgprs.
+  const MachineOperand &Src = I.getOperand(1);
+  unsigned SrcReg = Src.getReg();
+  if (!TargetRegisterInfo::isPhysicalRegister(SrcReg) && isSCC(SrcReg, MRI)) {
+    unsigned DstReg = TRI.getRegSizeInBits(I.getOperand(0).getReg(), MRI);
+    unsigned DstSize = TRI.getRegSizeInBits(DstReg, MRI);
+
+    // We have a copy from a 32-bit to 64-bit register.  This happens
+    // when we are selecting scc->vcc copies.
+    if (DstSize == 64) {
+      const DebugLoc &DL = I.getDebugLoc();
+      BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), I.getOperand(0).getReg())
+        .addImm(0)
+        .addReg(SrcReg);
+      if (!MRI.getRegClassOrNull(SrcReg))
+        MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI));
+      I.eraseFromParent();
+      return true;
+    }
+  }
+
   for (const MachineOperand &MO : I.operands()) {
     if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
       continue;
@@ -262,6 +303,101 @@ bool AMDGPUInstructionSelector::selectG_
   return false;
 }
 
+static unsigned getV_CMPOpcode(CmpInst::Predicate P, unsigned Size) {
+  assert(Size == 32 || Size == 64);
+  switch (P) {
+  default:
+    llvm_unreachable("Unknown condition code!");
+  case CmpInst::ICMP_NE:
+    return Size == 32 ? AMDGPU::V_CMP_NE_U32_e64 : AMDGPU::V_CMP_NE_U64_e64;
+  case CmpInst::ICMP_EQ:
+    return Size == 32 ? AMDGPU::V_CMP_EQ_U32_e64 : AMDGPU::V_CMP_EQ_U64_e64;
+  case CmpInst::ICMP_SGT:
+    return Size == 32 ? AMDGPU::V_CMP_GT_I32_e64 : AMDGPU::V_CMP_GT_I64_e64;
+  case CmpInst::ICMP_SGE:
+    return Size == 32 ? AMDGPU::V_CMP_GE_I32_e64 : AMDGPU::V_CMP_GE_I64_e64;
+  case CmpInst::ICMP_SLT:
+    return Size == 32 ? AMDGPU::V_CMP_LT_I32_e64 : AMDGPU::V_CMP_LT_I64_e64;
+  case CmpInst::ICMP_SLE:
+    return Size == 32 ? AMDGPU::V_CMP_LE_I32_e64 : AMDGPU::V_CMP_LE_I64_e64;
+  case CmpInst::ICMP_UGT:
+    return Size == 32 ? AMDGPU::V_CMP_GT_U32_e64 : AMDGPU::V_CMP_GT_U64_e64;
+  case CmpInst::ICMP_UGE:
+    return Size == 32 ? AMDGPU::V_CMP_GE_U32_e64 : AMDGPU::V_CMP_GE_U64_e64;
+  case CmpInst::ICMP_ULT:
+    return Size == 32 ? AMDGPU::V_CMP_LT_U32_e64 : AMDGPU::V_CMP_LT_U64_e64;
+  case CmpInst::ICMP_ULE:
+    return Size == 32 ? AMDGPU::V_CMP_LE_U32_e64 : AMDGPU::V_CMP_LE_U64_e64;
+  }
+}
+
+static unsigned getS_CMPOpcode(CmpInst::Predicate P, unsigned Size) {
+  // FIXME: VI supports 64-bit comparse.
+  assert(Size == 32);
+  switch (P) {
+  default:
+    llvm_unreachable("Unknown condition code!");
+  case CmpInst::ICMP_NE:
+    return AMDGPU::S_CMP_LG_U32;
+  case CmpInst::ICMP_EQ:
+    return AMDGPU::S_CMP_EQ_U32;
+  case CmpInst::ICMP_SGT:
+    return AMDGPU::S_CMP_GT_I32;
+  case CmpInst::ICMP_SGE:
+    return AMDGPU::S_CMP_GE_I32;
+  case CmpInst::ICMP_SLT:
+    return AMDGPU::S_CMP_LT_I32;
+  case CmpInst::ICMP_SLE:
+    return AMDGPU::S_CMP_LE_I32;
+  case CmpInst::ICMP_UGT:
+    return AMDGPU::S_CMP_GT_U32;
+  case CmpInst::ICMP_UGE:
+    return AMDGPU::S_CMP_GE_U32;
+  case CmpInst::ICMP_ULT:
+    return AMDGPU::S_CMP_LT_U32;
+  case CmpInst::ICMP_ULE:
+    return AMDGPU::S_CMP_LE_U32;
+  }
+}
+
+bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const {
+  MachineBasicBlock *BB = I.getParent();
+  MachineFunction *MF = BB->getParent();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+  DebugLoc DL = I.getDebugLoc();
+
+  unsigned SrcReg = I.getOperand(2).getReg();
+  unsigned Size = RBI.getSizeInBits(SrcReg, MRI, TRI);
+  // FIXME: VI supports 64-bit compares.
+  assert(Size == 32);
+
+  unsigned CCReg = I.getOperand(0).getReg();
+  if (isSCC(CCReg, MRI)) {
+    unsigned Opcode = getS_CMPOpcode((CmpInst::Predicate)I.getOperand(1).getPredicate(), Size);
+    MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode))
+            .add(I.getOperand(2))
+            .add(I.getOperand(3));
+    MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg)
+            .addReg(AMDGPU::SCC);
+    bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI) |
+               constrainSelectedInstRegOperands(*Copy, TII, TRI, RBI);
+    I.eraseFromParent();
+    return Ret;
+  }
+
+  assert(Size == 32 || Size == 64);
+  unsigned Opcode = getV_CMPOpcode((CmpInst::Predicate)I.getOperand(1).getPredicate(), Size);
+  MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode),
+            I.getOperand(0).getReg())
+            .add(I.getOperand(2))
+            .add(I.getOperand(3));
+  RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(),
+                               AMDGPU::SReg_64RegClass, MRI);
+  bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI);
+  I.eraseFromParent();
+  return Ret;
+}
+
 static MachineInstr *
 buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt,
          unsigned Reg0, unsigned Reg1, unsigned Reg2, unsigned Reg3,
@@ -325,6 +461,53 @@ bool AMDGPUInstructionSelector::selectG_
   return false;
 }
 
+bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {
+  MachineBasicBlock *BB = I.getParent();
+  MachineFunction *MF = BB->getParent();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+  const DebugLoc &DL = I.getDebugLoc();
+
+  unsigned DstReg = I.getOperand(0).getReg();
+  unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
+  assert(Size == 32 || Size == 64);
+  const MachineOperand &CCOp = I.getOperand(1);
+  unsigned CCReg = CCOp.getReg();
+  if (isSCC(CCReg, MRI)) {
+    unsigned SelectOpcode = Size == 32 ? AMDGPU::S_CSELECT_B32 :
+                                         AMDGPU::S_CSELECT_B64;
+    MachineInstr *CopySCC = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
+            .addReg(CCReg);
+
+    // The generic constrainSelectedInstRegOperands doesn't work for the scc register
+    // bank, because it does not cover the register class that we used to represent
+    // for it.  So we need to manually set the register class here.
+    if (!MRI.getRegClassOrNull(CCReg))
+        MRI.setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, MRI));
+    MachineInstr *Select = BuildMI(*BB, &I, DL, TII.get(SelectOpcode), DstReg)
+            .add(I.getOperand(2))
+            .add(I.getOperand(3));
+
+    bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI) |
+               constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI);
+    I.eraseFromParent();
+    return Ret;
+  }
+
+  assert(Size == 32);
+  // FIXME: Support 64-bit select
+  MachineInstr *Select =
+      BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+              .addImm(0)
+              .add(I.getOperand(3))
+              .addImm(0)
+              .add(I.getOperand(2))
+              .add(I.getOperand(1));
+
+  bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI);
+  I.eraseFromParent();
+  return Ret;
+}
+
 bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
   MachineBasicBlock *BB = I.getParent();
   MachineFunction *MF = BB->getParent();
@@ -573,10 +756,14 @@ bool AMDGPUInstructionSelector::select(M
     return selectG_INTRINSIC(I, CoverageInfo);
   case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
     return selectG_INTRINSIC_W_SIDE_EFFECTS(I, CoverageInfo);
+  case TargetOpcode::G_ICMP:
+    return selectG_ICMP(I);
   case TargetOpcode::G_LOAD:
     if (selectImpl(I, CoverageInfo))
       return true;
     return selectG_LOAD(I);
+  case TargetOpcode::G_SELECT:
+    return selectG_SELECT(I);
   case TargetOpcode::G_STORE:
     return selectG_STORE(I);
   }

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h?rev=363576&r1=363575&r2=363576&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h Mon Jun 17 09:27:43 2019
@@ -72,11 +72,13 @@ private:
   bool selectG_INTRINSIC(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
   bool selectG_INTRINSIC_W_SIDE_EFFECTS(MachineInstr &I,
                                         CodeGenCoverage &CoverageInfo) const;
+  bool selectG_ICMP(MachineInstr &I) const;
   bool hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const;
   void getAddrModeInfo(const MachineInstr &Load, const MachineRegisterInfo &MRI,
                        SmallVectorImpl<GEPInfo> &AddrInfo) const;
   bool selectSMRD(MachineInstr &I, ArrayRef<GEPInfo> AddrInfo) const;
   bool selectG_LOAD(MachineInstr &I) const;
+  bool selectG_SELECT(MachineInstr &I) const;
   bool selectG_STORE(MachineInstr &I) const;
 
   InstructionSelector::ComplexRendererFns

Modified: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp?rev=363576&r1=363575&r2=363576&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp Mon Jun 17 09:27:43 2019
@@ -1688,6 +1688,10 @@ SIRegisterInfo::getConstrainedRegClassFo
 
   Size = PowerOf2Ceil(Size);
   switch (Size) {
+  case 1:
+    if (RB->getID() == AMDGPU::SCCRegBankID)
+      return &AMDGPU::SReg_32_XM0RegClass;
+    break;
   case 32:
     return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
                                                   &AMDGPU::SReg_32_XM0RegClass;
@@ -1710,8 +1714,9 @@ SIRegisterInfo::getConstrainedRegClassFo
     return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_512RegClass :
                                                   &AMDGPU::SReg_512RegClass;
   default:
-    llvm_unreachable("not implemented");
+    break;
   }
+  llvm_unreachable("not implemented");
 }
 
 unsigned SIRegisterInfo::getVCC() const {

Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir?rev=363576&r1=363575&r2=363576&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir Mon Jun 17 09:27:43 2019
@@ -1,9 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN
 
---- |
-  define amdgpu_kernel void @copy(i32 addrspace(1)* %global0) {ret void}
-...
 ---
 
 name:            copy
@@ -22,6 +19,60 @@ body: |
     %0:sgpr(p1) = COPY $sgpr2_sgpr3
     %1:vgpr(p1) = COPY %0
     %2:vgpr(s32) = G_IMPLICIT_DEF
-    G_STORE %2, %1 :: (store 4 into %ir.global0)
+    G_STORE %2, %1 :: (store 4, addrspace 1)
+...
+---
+
+name:            copy_vcc_scc
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $scc
+    ; GCN-LABEL: name: copy_vcc_scc
+    ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+    ; GCN: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc
+    ; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
+    ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec
+    ; GCN: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s32) = COPY $vgpr2
+    %2:vgpr(s32) = COPY $vgpr3
+    %3:scc(s1) = COPY $scc
+    %4:vcc(s1) = COPY %3
+    %5:vgpr(s32) = G_SELECT %4, %1, %2
+    G_STORE %5, %0 :: (store 4, addrspace 1)
+...
+---
+
+name:            copy_vcc_scc_2_uses
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $scc
+    ; GCN-LABEL: name: copy_vcc_scc_2_uses
+    ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+    ; GCN: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc
+    ; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
+    ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec
+    ; GCN: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
+    ; GCN: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_1]], implicit $exec
+    ; GCN: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s32) = COPY $vgpr2
+    %2:vgpr(s32) = COPY $vgpr3
+    %3:scc(s1) = COPY $scc
+    %4:vcc(s1) = COPY %3
+    %5:vgpr(s32) = G_SELECT %4, %1, %2
+    %6:vcc(s1) = COPY %3
+    %7:vgpr(s32) = G_SELECT %6, %1, %5
+    G_STORE %7, %0 :: (store 4, addrspace 1)
 ...
 ---

Added: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.mir?rev=363576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.mir Mon Jun 17 09:27:43 2019
@@ -0,0 +1,309 @@
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN
+
+---
+name:            icmp_s_mix
+legalized:       true
+regBankSelected: true
+
+# GCN: name: icmp_s_mix
+# GCN: [[SGPR0:%[0-9]+]]:sreg_32 = COPY $sgpr0
+# GCN: [[SGPR1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+# GCN: [[SGPR2:%[0-9]+]]:sreg_32 = COPY $sgpr2
+# GCN: [[SGPR3:%[0-9]+]]:sreg_32 = COPY $sgpr3
+# GCN: [[SGPR4:%[0-9]+]]:sreg_32 = COPY $sgpr4
+# GCN: [[SGPR5:%[0-9]+]]:sreg_32 = COPY $sgpr5
+# GCN: [[SGPR6:%[0-9]+]]:sreg_32 = COPY $sgpr6
+# GCN: [[SGPR7:%[0-9]+]]:sreg_32 = COPY $sgpr7
+# GCN: S_CMP_LG_U32 [[SGPR0]], [[SGPR1]], implicit-def $scc
+# GCN-NEXT: [[COND0:%[0-9]+]]:sreg_32_xm0 = COPY $scc
+# GCN: S_CMP_LG_U32 [[SGPR4]], [[SGPR5]], implicit-def $scc
+# GCN-NEXT: [[COND1:%[0-9]+]]:sreg_32_xm0 = COPY $scc
+# GCN: $scc = COPY [[COND0]]
+# GCN-NEXT: S_CSELECT_B32 [[SGPR6]], [[SGPR7]], implicit $scc
+# GCN: $scc = COPY [[COND1]]
+# GCN-NEXT: S_CSELECT_B32 [[SGPR2]], [[SGPR3]], implicit $scc
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7
+
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:sgpr(s32) = COPY $sgpr0
+    %2:sgpr(s32) = COPY $sgpr1
+    %3:sgpr(s32) = COPY $sgpr2
+    %4:sgpr(s32) = COPY $sgpr3
+    %5:sgpr(s32) = COPY $sgpr4
+    %6:sgpr(s32) = COPY $sgpr5
+    %7:sgpr(s32) = COPY $sgpr6
+    %8:sgpr(s32) = COPY $sgpr7
+    %9:scc(s1) = G_ICMP intpred(ne), %1, %2
+    %10:scc(s1) = G_ICMP intpred(ne), %5, %6
+    %11:sgpr(s32) = G_SELECT %9, %7, %8
+    %12:sgpr(s32) = G_SELECT %10, %3, %4
+    %13:vgpr(s32) = COPY %11
+    G_STORE %13, %0 :: (volatile store 4, addrspace 1)
+    %14:vgpr(s32) = COPY %12
+    G_STORE %14, %0 :: (volatile store 4, addrspace 1)
+
+...
+---
+name:            icmp_salu
+legalized:       true
+regBankSelected: true
+
+# GCN-LABEL: name: icmp_salu
+# GCN: S_CMP_LG_U32
+# GCN: S_CMP_EQ_U32
+# GCN: S_CMP_GT_I32
+# GCN: S_CMP_GE_I32
+# GCN: S_CMP_LT_I32
+# GCN: S_CMP_LE_I32
+# GCN: S_CMP_GT_U32
+# GCN: S_CMP_GE_U32
+# GCN: S_CMP_LT_U32
+# GCN: S_CMP_LE_U32
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3
+
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:sgpr(s32) = COPY $sgpr0
+    %2:sgpr(s32) = COPY $sgpr1
+    %3:sgpr(s32) = COPY $sgpr2
+    %4:sgpr(s32) = COPY $sgpr3
+    %5:scc(s1) = G_ICMP intpred(ne), %1, %2
+    %6:scc(s1) = G_ICMP intpred(eq), %1, %2
+    %7:scc(s1) = G_ICMP intpred(sgt), %1, %2
+    %8:scc(s1) = G_ICMP intpred(sge), %1, %2
+    %9:scc(s1) = G_ICMP intpred(slt), %1, %2
+    %10:scc(s1) = G_ICMP intpred(sle), %1, %2
+    %11:scc(s1) = G_ICMP intpred(ugt), %1, %2
+    %12:scc(s1) = G_ICMP intpred(uge), %1, %2
+    %13:scc(s1) = G_ICMP intpred(ult), %1, %2
+    %14:scc(s1) = G_ICMP intpred(ule), %1, %2
+    %15:sgpr(s32) = G_SELECT %5, %3, %4
+    %16:sgpr(s32) = G_SELECT %6, %3, %4
+    %17:sgpr(s32) = G_SELECT %7, %3, %4
+    %18:sgpr(s32) = G_SELECT %8, %3, %4
+    %19:sgpr(s32) = G_SELECT %9, %3, %4
+    %20:sgpr(s32) = G_SELECT %10, %3, %4
+    %21:sgpr(s32) = G_SELECT %11, %3, %4
+    %22:sgpr(s32) = G_SELECT %12, %3, %4
+    %23:sgpr(s32) = G_SELECT %13, %3, %4
+    %24:sgpr(s32) = G_SELECT %14, %3, %4
+    %25:vgpr(s32) = COPY %15
+    G_STORE %25, %0 :: (volatile store 4, addrspace 1)
+    %26:vgpr(s32) = COPY %16
+    G_STORE %26, %0 :: (volatile store 4, addrspace 1)
+    %27:vgpr(s32) = COPY %17
+    G_STORE %27, %0 :: (volatile store 4, addrspace 1)
+    %28:vgpr(s32) = COPY %18
+    G_STORE %28, %0 :: (volatile store 4, addrspace 1)
+    %29:vgpr(s32) = COPY %19
+    G_STORE %29, %0 :: (volatile store 4, addrspace 1)
+    %30:vgpr(s32) = COPY %20
+    G_STORE %30, %0 :: (volatile store 4, addrspace 1)
+    %31:vgpr(s32) = COPY %21
+    G_STORE %31, %0 :: (volatile store 4, addrspace 1)
+    %32:vgpr(s32) = COPY %22
+    G_STORE %32, %0 :: (volatile store 4, addrspace 1)
+    %33:vgpr(s32) = COPY %23
+    G_STORE %33, %0 :: (volatile store 4, addrspace 1)
+    %34:vgpr(s32) = COPY %24
+    G_STORE %34, %0 :: (volatile store 4, addrspace 1)
+
+...
+---
+name:            icmp_v_mix
+legalized:       true
+regBankSelected: true
+
+# GCN-LABEL: name: icmp_v_mix
+# GCN: [[VGPR2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+# GCN: [[VGPR3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+# GCN: [[VGPR4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
+# GCN: [[VGPR5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
+# GCN: [[VGPR6:%[0-9]+]]:vgpr_32 = COPY $vgpr6
+# GCN: [[VGPR7:%[0-9]+]]:vgpr_32 = COPY $vgpr7
+# GCN: [[VGPR8:%[0-9]+]]:vgpr_32 = COPY $vgpr8
+# GCN: [[VGPR9:%[0-9]+]]:vgpr_32 = COPY $vgpr9
+# GCN: [[COND0:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 [[VGPR2]], [[VGPR3]]
+# GCN: [[COND1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 [[VGPR6]], [[VGPR7]]
+# GCN: V_CNDMASK_B32_e64 0, [[VGPR9]], 0, [[VGPR8]], [[COND0]]
+# GCN: V_CNDMASK_B32_e64 0, [[VGPR5]], 0, [[VGPR4]], [[COND1]]
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9
+
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s32) = COPY $vgpr2
+    %2:vgpr(s32) = COPY $vgpr3
+    %3:vgpr(s32) = COPY $vgpr4
+    %4:vgpr(s32) = COPY $vgpr5
+    %5:vgpr(s32) = COPY $vgpr6
+    %6:vgpr(s32) = COPY $vgpr7
+    %7:vgpr(s32) = COPY $vgpr8
+    %8:vgpr(s32) = COPY $vgpr9
+    %9:sgpr(s1) = G_ICMP intpred(ne), %1, %2
+    %10:sgpr(s1) = G_ICMP intpred(ne), %5, %6
+    %11:vgpr(s32) = G_SELECT %9, %7, %8
+    %12:vgpr(s32) = G_SELECT %10, %3, %4
+    G_STORE %11, %0 :: (volatile store 4, addrspace 1)
+    G_STORE %12, %0 :: (volatile store 4, addrspace 1)
+...
+---
+name:            icmp_valu
+legalized:       true
+regBankSelected: true
+
+# GCN-LABEL: name: icmp_valu
+# GCN: V_CMP_NE_U32_e64
+# GCN: V_CMP_EQ_U32_e64
+# GCN: V_CMP_GT_I32_e64
+# GCN: V_CMP_GE_I32_e64
+# GCN: V_CMP_LT_I32_e64
+# GCN: V_CMP_LE_I32_e64
+# GCN: V_CMP_GT_U32_e64
+# GCN: V_CMP_GE_U32_e64
+# GCN: V_CMP_LT_U32_e64
+# GCN: V_CMP_LE_U32_e64
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s32) = COPY $vgpr2
+    %2:vgpr(s32) = COPY $vgpr3
+    %3:vgpr(s32) = COPY $vgpr4
+    %4:vgpr(s32) = COPY $vgpr5
+    %5:sgpr(s1) = G_ICMP intpred(ne), %1, %2
+    %6:sgpr(s1) = G_ICMP intpred(eq), %1, %2
+    %7:sgpr(s1) = G_ICMP intpred(sgt), %1, %2
+    %8:sgpr(s1) = G_ICMP intpred(sge), %1, %2
+    %9:sgpr(s1) = G_ICMP intpred(slt), %1, %2
+    %10:sgpr(s1) = G_ICMP intpred(sle), %1, %2
+    %11:sgpr(s1) = G_ICMP intpred(ugt), %1, %2
+    %12:sgpr(s1) = G_ICMP intpred(uge), %1, %2
+    %13:sgpr(s1) = G_ICMP intpred(ult), %1, %2
+    %14:sgpr(s1) = G_ICMP intpred(ule), %1, %2
+    %15:vgpr(s32) = G_SELECT %5, %3, %4
+    %16:vgpr(s32) = G_SELECT %6, %3, %4
+    %17:vgpr(s32) = G_SELECT %7, %3, %4
+    %18:vgpr(s32) = G_SELECT %8, %3, %4
+    %19:vgpr(s32) = G_SELECT %9, %3, %4
+    %20:vgpr(s32) = G_SELECT %10, %3, %4
+    %21:vgpr(s32) = G_SELECT %11, %3, %4
+    %22:vgpr(s32) = G_SELECT %12, %3, %4
+    %23:vgpr(s32) = G_SELECT %13, %3, %4
+    %24:vgpr(s32) = G_SELECT %14, %3, %4
+    G_STORE %15, %0 :: (volatile store 4, addrspace 1)
+    G_STORE %16, %0 :: (volatile store 4, addrspace 1)
+    G_STORE %17, %0 :: (volatile store 4, addrspace 1)
+    G_STORE %18, %0 :: (volatile store 4, addrspace 1)
+    G_STORE %19, %0 :: (volatile store 4, addrspace 1)
+    G_STORE %20, %0 :: (volatile store 4, addrspace 1)
+    G_STORE %21, %0 :: (volatile store 4, addrspace 1)
+    G_STORE %22, %0 :: (volatile store 4, addrspace 1)
+    G_STORE %23, %0 :: (volatile store 4, addrspace 1)
+    G_STORE %24, %0 :: (volatile store 4, addrspace 1)
+...
+---
+
+name:            icmp_vv
+legalized:       true
+regBankSelected: true
+
+# GCN-LABEL: name: icmp_vv
+# GCN: [[VGPR2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+# GCN: [[VGPR3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+# GCN: V_CMP_NE_U32_e64 [[VGPR2]], [[VGPR3]]
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s32) = COPY $vgpr2
+    %2:vgpr(s32) = COPY $vgpr3
+    %3:vgpr(s32) = COPY $vgpr4
+    %4:vgpr(s32) = COPY $vgpr5
+    %5:sgpr(s1) = G_ICMP intpred(ne), %1, %2
+    %6:vgpr(s32) = G_SELECT %5, %3, %4
+    G_STORE %6, %0 :: (store 4, addrspace 1)
+...
+---
+
+name:            icmp_vs
+legalized:       true
+regBankSelected: true
+
+# GCN-LABEL: name: icmp_vs
+# GCN: [[VGPR2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+# GCN: [[SGPR0:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+# GCN: V_CMP_NE_U32_e64 [[VGPR2]], [[SGPR0]]
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr0
+
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s32) = COPY $vgpr2
+    %2:vgpr(s32) = COPY $vgpr3
+    %3:vgpr(s32) = COPY $vgpr4
+    %4:sgpr(s32) = COPY $sgpr0
+    %5:sgpr(s1) = G_ICMP intpred(ne), %1, %4
+    %6:vgpr(s32) = G_SELECT %5, %2, %3
+    G_STORE %6, %0 :: (store 4, addrspace 1)
+...
+---
+
+name:            icmp_sv
+legalized:       true
+regBankSelected: true
+
+# GCN-LABEL: name: icmp_sv
+# GCN: [[VGPR2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+# GCN: [[SGPR0:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+# GCN: V_CMP_NE_U32_e64 [[SGPR0]], [[VGPR2]]
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr0
+
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s32) = COPY $vgpr2
+    %2:vgpr(s32) = COPY $vgpr3
+    %3:vgpr(s32) = COPY $vgpr4
+    %4:sgpr(s32) = COPY $sgpr0
+    %5:sgpr(s1) = G_ICMP intpred(ne), %4, %1
+    %6:vgpr(s32) = G_SELECT %5, %2, %3
+    G_STORE %6, %0 :: (store 4, addrspace 1)
+...
+---
+
+name:            icmp_or_vcc
+legalized:       true
+regBankSelected: true
+
+# GCN-LABEL: name: icmp_or_vcc
+# GCN: [[VGPR2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+# GCN: [[SGPR0:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+# GCN: V_CMP_NE_U32_e64 [[SGPR0]], [[VGPR2]]
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr0
+
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s32) = COPY $vgpr2
+    %2:vgpr(s32) = COPY $vgpr3
+    %3:vgpr(s32) = COPY $vgpr4
+    %4:sgpr(s32) = COPY $sgpr0
+    %5:sgpr(s1) = G_ICMP intpred(ne), %4, %1
+    %6:vgpr(s32) = G_SELECT %5, %2, %3
+    G_STORE %6, %0 :: (store 4, addrspace 1)
+...
+---




More information about the llvm-commits mailing list