[llvm] d4c9e13 - AMDGPU/GlobalISel: Select G_UADDE/G_USUBE

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 6 15:36:19 PST 2020


Author: Matt Arsenault
Date: 2020-01-06T18:27:52-05:00
New Revision: d4c9e13324443c0324148156d54d2c7c81393327

URL: https://github.com/llvm/llvm-project/commit/d4c9e13324443c0324148156d54d2c7c81393327
DIFF: https://github.com/llvm/llvm-project/commit/d4c9e13324443c0324148156d54d2c7c81393327.diff

LOG: AMDGPU/GlobalISel: Select G_UADDE/G_USUBE

Added: 
    llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.gfx10.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.gfx10.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.mir

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
    llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index b595facd9f20..3a88e27acf88 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -387,20 +387,25 @@ bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const {
   return true;
 }
 
-bool AMDGPUInstructionSelector::selectG_UADDO_USUBO(MachineInstr &I) const {
+bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
+  MachineInstr &I) const {
   MachineBasicBlock *BB = I.getParent();
   MachineFunction *MF = BB->getParent();
   const DebugLoc &DL = I.getDebugLoc();
   Register Dst0Reg = I.getOperand(0).getReg();
   Register Dst1Reg = I.getOperand(1).getReg();
-  const bool IsAdd = I.getOpcode() == AMDGPU::G_UADDO;
+  const bool IsAdd = I.getOpcode() == AMDGPU::G_UADDO ||
+                     I.getOpcode() == AMDGPU::G_UADDE;
+  const bool HasCarryIn = I.getOpcode() == AMDGPU::G_UADDE ||
+                          I.getOpcode() == AMDGPU::G_USUBE;
 
   if (isVCC(Dst1Reg, *MRI)) {
-    // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
-    // carry out despite the _i32 name. These were renamed in VI to _U32.
-    // FIXME: We should probably rename the opcodes here.
-    unsigned NewOpc = IsAdd ? AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
-    I.setDesc(TII.get(NewOpc));
+      // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
+      // carry out despite the _i32 name. These were renamed in VI to _U32.
+      // FIXME: We should probably rename the opcodes here.
+    unsigned NoCarryOpc = IsAdd ? AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
+    unsigned CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
+    I.setDesc(TII.get(HasCarryIn ? CarryOpc : NoCarryOpc));
     I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
     I.addOperand(*MF, MachineOperand::CreateImm(0));
     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
@@ -408,8 +413,16 @@ bool AMDGPUInstructionSelector::selectG_UADDO_USUBO(MachineInstr &I) const {
 
   Register Src0Reg = I.getOperand(2).getReg();
   Register Src1Reg = I.getOperand(3).getReg();
-  unsigned NewOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
-  BuildMI(*BB, &I, DL, TII.get(NewOpc), Dst0Reg)
+
+  if (HasCarryIn) {
+    BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
+      .addReg(I.getOperand(4).getReg());
+  }
+
+  unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
+  unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
+
+  BuildMI(*BB, &I, DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)
     .add(I.getOperand(2))
     .add(I.getOperand(3));
   BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), Dst1Reg)
@@ -423,6 +436,11 @@ bool AMDGPUInstructionSelector::selectG_UADDO_USUBO(MachineInstr &I) const {
       !RBI.constrainGenericRegister(Src1Reg, AMDGPU::SReg_32RegClass, *MRI))
     return false;
 
+  if (HasCarryIn &&
+      !RBI.constrainGenericRegister(I.getOperand(4).getReg(),
+                                    AMDGPU::SReg_32RegClass, *MRI))
+    return false;
+
   I.eraseFromParent();
   return true;
 }
@@ -1611,7 +1629,9 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
     return selectG_ADD_SUB(I);
   case TargetOpcode::G_UADDO:
   case TargetOpcode::G_USUBO:
-    return selectG_UADDO_USUBO(I);
+  case TargetOpcode::G_UADDE:
+  case TargetOpcode::G_USUBE:
+    return selectG_UADDO_USUBO_UADDE_USUBE(I);
   case TargetOpcode::G_INTTOPTR:
   case TargetOpcode::G_BITCAST:
   case TargetOpcode::G_PTRTOINT:

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 5a48ad807437..cb4b9277563e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -87,7 +87,7 @@ class AMDGPUInstructionSelector : public InstructionSelector {
   bool selectG_CONSTANT(MachineInstr &I) const;
   bool selectG_AND_OR_XOR(MachineInstr &I) const;
   bool selectG_ADD_SUB(MachineInstr &I) const;
-  bool selectG_UADDO_USUBO(MachineInstr &I) const;
+  bool selectG_UADDO_USUBO_UADDE_USUBE(MachineInstr &I) const;
   bool selectG_EXTRACT(MachineInstr &I) const;
   bool selectG_MERGE_VALUES(MachineInstr &I) const;
   bool selectG_UNMERGE_VALUES(MachineInstr &I) const;

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.gfx10.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.gfx10.mir
new file mode 100644
index 000000000000..e592a5e56c34
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.gfx10.mir
@@ -0,0 +1,70 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s  | FileCheck -check-prefix=GFX10 %s
+
+# These violate the constant bus restriction pre-gfx10
+
+---
+name: uadde_s32_s1_vsv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+
+    ; GFX10-LABEL: name: uadde_s32_s1_vsv
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX10: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec
+    ; GFX10: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec
+    ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX10: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+    ; GFX10: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_2]], 0, [[V_MOV_B32_e32_1]], [[V_ADDC_U32_e64_1]], implicit $exec
+    ; GFX10: S_ENDPGM 0, implicit [[V_ADDC_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:vgpr(s32) = COPY $vgpr0
+    %2:vgpr(s32) = COPY $vgpr2
+    %3:vgpr(s32) = G_CONSTANT i32 0
+    %4:vcc(s1) = G_ICMP intpred(eq), %2, %3
+    %5:vgpr(s32), %6:vcc(s1) = G_UADDE %0, %1, %4
+    %7:vgpr(s32) = G_CONSTANT i32 0
+    %8:vgpr(s32) = G_CONSTANT i32 1
+    %9:vgpr(s32) = G_SELECT %6, %7, %8
+    S_ENDPGM 0, implicit %5, implicit %9
+...
+
+---
+name: uadde_s32_s1_vvs
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+
+    ; GFX10-LABEL: name: uadde_s32_s1_vvs
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX10: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec
+    ; GFX10: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec
+    ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX10: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+    ; GFX10: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_2]], 0, [[V_MOV_B32_e32_1]], [[V_ADDC_U32_e64_1]], implicit $exec
+    ; GFX10: S_ENDPGM 0, implicit [[V_ADDC_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:sgpr(s32) = COPY $sgpr0
+    %2:vgpr(s32) = COPY $vgpr2
+    %3:vgpr(s32) = G_CONSTANT i32 0
+    %4:vcc(s1) = G_ICMP intpred(eq), %2, %3
+    %5:vgpr(s32), %6:vcc(s1) = G_UADDE %0, %1, %4
+    %7:vgpr(s32) = G_CONSTANT i32 0
+    %8:vgpr(s32) = G_CONSTANT i32 1
+    %9:vgpr(s32) = G_SELECT %6, %7, %8
+    S_ENDPGM 0, implicit %5, implicit %9
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.mir
new file mode 100644
index 000000000000..42dcca05d4c8
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.mir
@@ -0,0 +1,89 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -o - %s  | FileCheck -check-prefix=WAVE64 %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s  | FileCheck -check-prefix=WAVE64 %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s  | FileCheck -check-prefix=WAVE64 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s  | FileCheck -check-prefix=WAVE32 %s
+
+---
+name: uadde_s32_s1_sss
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1, $sgpr2
+
+    ; WAVE64-LABEL: name: uadde_s32_s1_sss
+    ; WAVE64: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+    ; WAVE64: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
+    ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+    ; WAVE64: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
+    ; WAVE64: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc
+    ; WAVE64: $scc = COPY [[COPY3]]
+    ; WAVE64: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $scc
+    ; WAVE64: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc
+    ; WAVE64: $scc = COPY [[COPY4]]
+    ; WAVE64: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc
+    ; WAVE64: S_ENDPGM 0, implicit [[S_ADDC_U32_]], implicit [[S_CSELECT_B32_]]
+    ; WAVE32-LABEL: name: uadde_s32_s1_sss
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+    ; WAVE32: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
+    ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+    ; WAVE32: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
+    ; WAVE32: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc
+    ; WAVE32: $scc = COPY [[COPY3]]
+    ; WAVE32: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $scc
+    ; WAVE32: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc
+    ; WAVE32: $scc = COPY [[COPY4]]
+    ; WAVE32: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc
+    ; WAVE32: S_ENDPGM 0, implicit [[S_ADDC_U32_]], implicit [[S_CSELECT_B32_]]
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s32) = COPY $sgpr1
+    %2:sgpr(s32) = COPY $sgpr2
+    %3:sgpr(s32) = G_CONSTANT i32 0
+    %4:sgpr(s32) = G_ICMP intpred(eq), %2, %3
+    %5:sgpr(s32), %6:sgpr(s32) = G_UADDE %0, %1, %4
+    %7:sgpr(s32) = G_SELECT %6, %0, %1
+    S_ENDPGM 0, implicit %5, implicit %7
+...
+
+---
+name: uadde_s32_s1_vvv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2
+
+    ; WAVE64-LABEL: name: uadde_s32_s1_vvv
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE64: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec
+    ; WAVE64: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec
+    ; WAVE64: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_ADDC_U32_e64_1]], implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_ADDC_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
+    ; WAVE32-LABEL: name: uadde_s32_s1_vvv
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE32: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec
+    ; WAVE32: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec
+    ; WAVE32: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_ADDC_U32_e64_1]], implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_ADDC_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(s32) = COPY $vgpr2
+    %3:vgpr(s32) = G_CONSTANT i32 0
+    %4:vcc(s1) = G_ICMP intpred(eq), %2, %3
+    %5:vgpr(s32), %6:vcc(s1) = G_UADDE %0, %1, %4
+    %7:vgpr(s32) = G_SELECT %6, %0, %1
+    S_ENDPGM 0, implicit %5, implicit %7
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.gfx10.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.gfx10.mir
new file mode 100644
index 000000000000..f3a700be8ffc
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.gfx10.mir
@@ -0,0 +1,70 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s  | FileCheck -check-prefix=GFX10 %s
+
+# These violate the constant bus restriction pre-gfx10
+
+---
+name: usube_s32_s1_vsv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+
+    ; GFX10-LABEL: name: usube_s32_s1_vsv
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX10: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec
+    ; GFX10: [[V_SUBB_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUBB_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUBB_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec
+    ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX10: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+    ; GFX10: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_2]], 0, [[V_MOV_B32_e32_1]], [[V_SUBB_U32_e64_1]], implicit $exec
+    ; GFX10: S_ENDPGM 0, implicit [[V_SUBB_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:vgpr(s32) = COPY $vgpr0
+    %2:vgpr(s32) = COPY $vgpr2
+    %3:vgpr(s32) = G_CONSTANT i32 0
+    %4:vcc(s1) = G_ICMP intpred(eq), %2, %3
+    %5:vgpr(s32), %6:vcc(s1) = G_USUBE %0, %1, %4
+    %7:vgpr(s32) = G_CONSTANT i32 0
+    %8:vgpr(s32) = G_CONSTANT i32 1
+    %9:vgpr(s32) = G_SELECT %6, %7, %8
+    S_ENDPGM 0, implicit %5, implicit %9
+...
+
+---
+name: usube_s32_s1_vvs
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+
+    ; GFX10-LABEL: name: usube_s32_s1_vvs
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX10: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec
+    ; GFX10: [[V_SUBB_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUBB_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUBB_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec
+    ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX10: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+    ; GFX10: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_2]], 0, [[V_MOV_B32_e32_1]], [[V_SUBB_U32_e64_1]], implicit $exec
+    ; GFX10: S_ENDPGM 0, implicit [[V_SUBB_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:sgpr(s32) = COPY $sgpr0
+    %2:vgpr(s32) = COPY $vgpr2
+    %3:vgpr(s32) = G_CONSTANT i32 0
+    %4:vcc(s1) = G_ICMP intpred(eq), %2, %3
+    %5:vgpr(s32), %6:vcc(s1) = G_USUBE %0, %1, %4
+    %7:vgpr(s32) = G_CONSTANT i32 0
+    %8:vgpr(s32) = G_CONSTANT i32 1
+    %9:vgpr(s32) = G_SELECT %6, %7, %8
+    S_ENDPGM 0, implicit %5, implicit %9
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.mir
new file mode 100644
index 000000000000..cd375ba3f28f
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.mir
@@ -0,0 +1,89 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -o - %s  | FileCheck -check-prefix=WAVE64 %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s  | FileCheck -check-prefix=WAVE64 %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s  | FileCheck -check-prefix=WAVE64 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s  | FileCheck -check-prefix=WAVE32 %s
+
+---
+name: usube_s32_s1_sss
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1, $sgpr2
+
+    ; WAVE64-LABEL: name: usube_s32_s1_sss
+    ; WAVE64: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+    ; WAVE64: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
+    ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+    ; WAVE64: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
+    ; WAVE64: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc
+    ; WAVE64: $scc = COPY [[COPY3]]
+    ; WAVE64: [[S_SUBB_U32_:%[0-9]+]]:sreg_32 = S_SUBB_U32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $scc
+    ; WAVE64: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc
+    ; WAVE64: $scc = COPY [[COPY4]]
+    ; WAVE64: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc
+    ; WAVE64: S_ENDPGM 0, implicit [[S_SUBB_U32_]], implicit [[S_CSELECT_B32_]]
+    ; WAVE32-LABEL: name: usube_s32_s1_sss
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+    ; WAVE32: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
+    ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+    ; WAVE32: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
+    ; WAVE32: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc
+    ; WAVE32: $scc = COPY [[COPY3]]
+    ; WAVE32: [[S_SUBB_U32_:%[0-9]+]]:sreg_32 = S_SUBB_U32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $scc
+    ; WAVE32: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc
+    ; WAVE32: $scc = COPY [[COPY4]]
+    ; WAVE32: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc
+    ; WAVE32: S_ENDPGM 0, implicit [[S_SUBB_U32_]], implicit [[S_CSELECT_B32_]]
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s32) = COPY $sgpr1
+    %2:sgpr(s32) = COPY $sgpr2
+    %3:sgpr(s32) = G_CONSTANT i32 0
+    %4:sgpr(s32) = G_ICMP intpred(eq), %2, %3
+    %5:sgpr(s32), %6:sgpr(s32) = G_USUBE %0, %1, %4
+    %7:sgpr(s32) = G_SELECT %6, %0, %1
+    S_ENDPGM 0, implicit %5, implicit %7
+...
+
+---
+name: usube_s32_s1_vvv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2
+
+    ; WAVE64-LABEL: name: usube_s32_s1_vvv
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE64: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec
+    ; WAVE64: [[V_SUBB_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUBB_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUBB_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec
+    ; WAVE64: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_SUBB_U32_e64_1]], implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_SUBB_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
+    ; WAVE32-LABEL: name: usube_s32_s1_vvv
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE32: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec
+    ; WAVE32: [[V_SUBB_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUBB_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUBB_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec
+    ; WAVE32: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_SUBB_U32_e64_1]], implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_SUBB_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(s32) = COPY $vgpr2
+    %3:vgpr(s32) = G_CONSTANT i32 0
+    %4:vcc(s1) = G_ICMP intpred(eq), %2, %3
+    %5:vgpr(s32), %6:vcc(s1) = G_USUBE %0, %1, %4
+    %7:vgpr(s32) = G_SELECT %6, %0, %1
+    S_ENDPGM 0, implicit %5, implicit %7
+...


        


More information about the llvm-commits mailing list