[llvm] d4c9e13 - AMDGPU/GlobalISel: Select G_UADDE/G_USUBE
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 6 15:36:19 PST 2020
Author: Matt Arsenault
Date: 2020-01-06T18:27:52-05:00
New Revision: d4c9e13324443c0324148156d54d2c7c81393327
URL: https://github.com/llvm/llvm-project/commit/d4c9e13324443c0324148156d54d2c7c81393327
DIFF: https://github.com/llvm/llvm-project/commit/d4c9e13324443c0324148156d54d2c7c81393327.diff
LOG: AMDGPU/GlobalISel: Select G_UADDE/G_USUBE
Added:
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.gfx10.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.gfx10.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.mir
Modified:
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index b595facd9f20..3a88e27acf88 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -387,20 +387,25 @@ bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const {
return true;
}
-bool AMDGPUInstructionSelector::selectG_UADDO_USUBO(MachineInstr &I) const {
+bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
+ MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
MachineFunction *MF = BB->getParent();
const DebugLoc &DL = I.getDebugLoc();
Register Dst0Reg = I.getOperand(0).getReg();
Register Dst1Reg = I.getOperand(1).getReg();
- const bool IsAdd = I.getOpcode() == AMDGPU::G_UADDO;
+ const bool IsAdd = I.getOpcode() == AMDGPU::G_UADDO ||
+ I.getOpcode() == AMDGPU::G_UADDE;
+ const bool HasCarryIn = I.getOpcode() == AMDGPU::G_UADDE ||
+ I.getOpcode() == AMDGPU::G_USUBE;
if (isVCC(Dst1Reg, *MRI)) {
- // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
- // carry out despite the _i32 name. These were renamed in VI to _U32.
- // FIXME: We should probably rename the opcodes here.
- unsigned NewOpc = IsAdd ? AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
- I.setDesc(TII.get(NewOpc));
+ // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
+ // carry out despite the _i32 name. These were renamed in VI to _U32.
+ // FIXME: We should probably rename the opcodes here.
+ unsigned NoCarryOpc = IsAdd ? AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
+ unsigned CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
+ I.setDesc(TII.get(HasCarryIn ? CarryOpc : NoCarryOpc));
I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
I.addOperand(*MF, MachineOperand::CreateImm(0));
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
@@ -408,8 +413,16 @@ bool AMDGPUInstructionSelector::selectG_UADDO_USUBO(MachineInstr &I) const {
Register Src0Reg = I.getOperand(2).getReg();
Register Src1Reg = I.getOperand(3).getReg();
- unsigned NewOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
- BuildMI(*BB, &I, DL, TII.get(NewOpc), Dst0Reg)
+
+ if (HasCarryIn) {
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
+ .addReg(I.getOperand(4).getReg());
+ }
+
+ unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
+ unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
+
+ BuildMI(*BB, &I, DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)
.add(I.getOperand(2))
.add(I.getOperand(3));
BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), Dst1Reg)
@@ -423,6 +436,11 @@ bool AMDGPUInstructionSelector::selectG_UADDO_USUBO(MachineInstr &I) const {
!RBI.constrainGenericRegister(Src1Reg, AMDGPU::SReg_32RegClass, *MRI))
return false;
+ if (HasCarryIn &&
+ !RBI.constrainGenericRegister(I.getOperand(4).getReg(),
+ AMDGPU::SReg_32RegClass, *MRI))
+ return false;
+
I.eraseFromParent();
return true;
}
@@ -1611,7 +1629,9 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
return selectG_ADD_SUB(I);
case TargetOpcode::G_UADDO:
case TargetOpcode::G_USUBO:
- return selectG_UADDO_USUBO(I);
+ case TargetOpcode::G_UADDE:
+ case TargetOpcode::G_USUBE:
+ return selectG_UADDO_USUBO_UADDE_USUBE(I);
case TargetOpcode::G_INTTOPTR:
case TargetOpcode::G_BITCAST:
case TargetOpcode::G_PTRTOINT:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 5a48ad807437..cb4b9277563e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -87,7 +87,7 @@ class AMDGPUInstructionSelector : public InstructionSelector {
bool selectG_CONSTANT(MachineInstr &I) const;
bool selectG_AND_OR_XOR(MachineInstr &I) const;
bool selectG_ADD_SUB(MachineInstr &I) const;
- bool selectG_UADDO_USUBO(MachineInstr &I) const;
+ bool selectG_UADDO_USUBO_UADDE_USUBE(MachineInstr &I) const;
bool selectG_EXTRACT(MachineInstr &I) const;
bool selectG_MERGE_VALUES(MachineInstr &I) const;
bool selectG_UNMERGE_VALUES(MachineInstr &I) const;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.gfx10.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.gfx10.mir
new file mode 100644
index 000000000000..e592a5e56c34
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.gfx10.mir
@@ -0,0 +1,70 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
+
+# These violate the constant bus restriction pre-gfx10
+
+---
+name: uadde_s32_s1_vsv
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0
+
+ ; GFX10-LABEL: name: uadde_s32_s1_vsv
+ ; GFX10: $vcc_hi = IMPLICIT_DEF
+ ; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX10: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec
+ ; GFX10: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec
+ ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX10: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+ ; GFX10: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_2]], 0, [[V_MOV_B32_e32_1]], [[V_ADDC_U32_e64_1]], implicit $exec
+ ; GFX10: S_ENDPGM 0, implicit [[V_ADDC_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:vgpr(s32) = COPY $vgpr0
+ %2:vgpr(s32) = COPY $vgpr2
+ %3:vgpr(s32) = G_CONSTANT i32 0
+ %4:vcc(s1) = G_ICMP intpred(eq), %2, %3
+ %5:vgpr(s32), %6:vcc(s1) = G_UADDE %0, %1, %4
+ %7:vgpr(s32) = G_CONSTANT i32 0
+ %8:vgpr(s32) = G_CONSTANT i32 1
+ %9:vgpr(s32) = G_SELECT %6, %7, %8
+ S_ENDPGM 0, implicit %5, implicit %9
+...
+
+---
+name: uadde_s32_s1_vvs
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0
+
+ ; GFX10-LABEL: name: uadde_s32_s1_vvs
+ ; GFX10: $vcc_hi = IMPLICIT_DEF
+ ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX10: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec
+ ; GFX10: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec
+ ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX10: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+ ; GFX10: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_2]], 0, [[V_MOV_B32_e32_1]], [[V_ADDC_U32_e64_1]], implicit $exec
+ ; GFX10: S_ENDPGM 0, implicit [[V_ADDC_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:sgpr(s32) = COPY $sgpr0
+ %2:vgpr(s32) = COPY $vgpr2
+ %3:vgpr(s32) = G_CONSTANT i32 0
+ %4:vcc(s1) = G_ICMP intpred(eq), %2, %3
+ %5:vgpr(s32), %6:vcc(s1) = G_UADDE %0, %1, %4
+ %7:vgpr(s32) = G_CONSTANT i32 0
+ %8:vgpr(s32) = G_CONSTANT i32 1
+ %9:vgpr(s32) = G_SELECT %6, %7, %8
+ S_ENDPGM 0, implicit %5, implicit %9
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.mir
new file mode 100644
index 000000000000..42dcca05d4c8
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.mir
@@ -0,0 +1,89 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE64 %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE64 %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE64 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s
+
+---
+name: uadde_s32_s1_sss
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $sgpr2
+
+ ; WAVE64-LABEL: name: uadde_s32_s1_sss
+ ; WAVE64: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; WAVE64: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; WAVE64: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; WAVE64: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
+ ; WAVE64: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc
+ ; WAVE64: $scc = COPY [[COPY3]]
+ ; WAVE64: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $scc
+ ; WAVE64: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc
+ ; WAVE64: $scc = COPY [[COPY4]]
+ ; WAVE64: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc
+ ; WAVE64: S_ENDPGM 0, implicit [[S_ADDC_U32_]], implicit [[S_CSELECT_B32_]]
+ ; WAVE32-LABEL: name: uadde_s32_s1_sss
+ ; WAVE32: $vcc_hi = IMPLICIT_DEF
+ ; WAVE32: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; WAVE32: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; WAVE32: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; WAVE32: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
+ ; WAVE32: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc
+ ; WAVE32: $scc = COPY [[COPY3]]
+ ; WAVE32: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $scc
+ ; WAVE32: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc
+ ; WAVE32: $scc = COPY [[COPY4]]
+ ; WAVE32: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc
+ ; WAVE32: S_ENDPGM 0, implicit [[S_ADDC_U32_]], implicit [[S_CSELECT_B32_]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s32) = COPY $sgpr1
+ %2:sgpr(s32) = COPY $sgpr2
+ %3:sgpr(s32) = G_CONSTANT i32 0
+ %4:sgpr(s32) = G_ICMP intpred(eq), %2, %3
+ %5:sgpr(s32), %6:sgpr(s32) = G_UADDE %0, %1, %4
+ %7:sgpr(s32) = G_SELECT %6, %0, %1
+ S_ENDPGM 0, implicit %5, implicit %7
+...
+
+---
+name: uadde_s32_s1_vvv
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ ; WAVE64-LABEL: name: uadde_s32_s1_vvv
+ ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; WAVE64: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec
+ ; WAVE64: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec
+ ; WAVE64: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_ADDC_U32_e64_1]], implicit $exec
+ ; WAVE64: S_ENDPGM 0, implicit [[V_ADDC_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
+ ; WAVE32-LABEL: name: uadde_s32_s1_vvv
+ ; WAVE32: $vcc_hi = IMPLICIT_DEF
+ ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; WAVE32: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec
+ ; WAVE32: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec
+ ; WAVE32: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_ADDC_U32_e64_1]], implicit $exec
+ ; WAVE32: S_ENDPGM 0, implicit [[V_ADDC_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:vgpr(s32) = COPY $vgpr1
+ %2:vgpr(s32) = COPY $vgpr2
+ %3:vgpr(s32) = G_CONSTANT i32 0
+ %4:vcc(s1) = G_ICMP intpred(eq), %2, %3
+ %5:vgpr(s32), %6:vcc(s1) = G_UADDE %0, %1, %4
+ %7:vgpr(s32) = G_SELECT %6, %0, %1
+ S_ENDPGM 0, implicit %5, implicit %7
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.gfx10.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.gfx10.mir
new file mode 100644
index 000000000000..f3a700be8ffc
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.gfx10.mir
@@ -0,0 +1,70 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
+
+# These violate the constant bus restriction pre-gfx10
+
+---
+name: usube_s32_s1_vsv
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0
+
+ ; GFX10-LABEL: name: usube_s32_s1_vsv
+ ; GFX10: $vcc_hi = IMPLICIT_DEF
+ ; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX10: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec
+ ; GFX10: [[V_SUBB_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUBB_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUBB_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec
+ ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX10: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+ ; GFX10: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_2]], 0, [[V_MOV_B32_e32_1]], [[V_SUBB_U32_e64_1]], implicit $exec
+ ; GFX10: S_ENDPGM 0, implicit [[V_SUBB_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:vgpr(s32) = COPY $vgpr0
+ %2:vgpr(s32) = COPY $vgpr2
+ %3:vgpr(s32) = G_CONSTANT i32 0
+ %4:vcc(s1) = G_ICMP intpred(eq), %2, %3
+ %5:vgpr(s32), %6:vcc(s1) = G_USUBE %0, %1, %4
+ %7:vgpr(s32) = G_CONSTANT i32 0
+ %8:vgpr(s32) = G_CONSTANT i32 1
+ %9:vgpr(s32) = G_SELECT %6, %7, %8
+ S_ENDPGM 0, implicit %5, implicit %9
+...
+
+---
+name: usube_s32_s1_vvs
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0
+
+ ; GFX10-LABEL: name: usube_s32_s1_vvs
+ ; GFX10: $vcc_hi = IMPLICIT_DEF
+ ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX10: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec
+ ; GFX10: [[V_SUBB_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUBB_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUBB_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec
+ ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX10: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+ ; GFX10: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_2]], 0, [[V_MOV_B32_e32_1]], [[V_SUBB_U32_e64_1]], implicit $exec
+ ; GFX10: S_ENDPGM 0, implicit [[V_SUBB_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:sgpr(s32) = COPY $sgpr0
+ %2:vgpr(s32) = COPY $vgpr2
+ %3:vgpr(s32) = G_CONSTANT i32 0
+ %4:vcc(s1) = G_ICMP intpred(eq), %2, %3
+ %5:vgpr(s32), %6:vcc(s1) = G_USUBE %0, %1, %4
+ %7:vgpr(s32) = G_CONSTANT i32 0
+ %8:vgpr(s32) = G_CONSTANT i32 1
+ %9:vgpr(s32) = G_SELECT %6, %7, %8
+ S_ENDPGM 0, implicit %5, implicit %9
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.mir
new file mode 100644
index 000000000000..cd375ba3f28f
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.mir
@@ -0,0 +1,89 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE64 %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE64 %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE64 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s
+
+---
+name: usube_s32_s1_sss
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $sgpr2
+
+ ; WAVE64-LABEL: name: usube_s32_s1_sss
+ ; WAVE64: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; WAVE64: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; WAVE64: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; WAVE64: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
+ ; WAVE64: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc
+ ; WAVE64: $scc = COPY [[COPY3]]
+ ; WAVE64: [[S_SUBB_U32_:%[0-9]+]]:sreg_32 = S_SUBB_U32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $scc
+ ; WAVE64: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc
+ ; WAVE64: $scc = COPY [[COPY4]]
+ ; WAVE64: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc
+ ; WAVE64: S_ENDPGM 0, implicit [[S_SUBB_U32_]], implicit [[S_CSELECT_B32_]]
+ ; WAVE32-LABEL: name: usube_s32_s1_sss
+ ; WAVE32: $vcc_hi = IMPLICIT_DEF
+ ; WAVE32: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; WAVE32: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; WAVE32: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; WAVE32: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
+ ; WAVE32: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc
+ ; WAVE32: $scc = COPY [[COPY3]]
+ ; WAVE32: [[S_SUBB_U32_:%[0-9]+]]:sreg_32 = S_SUBB_U32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $scc
+ ; WAVE32: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc
+ ; WAVE32: $scc = COPY [[COPY4]]
+ ; WAVE32: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc
+ ; WAVE32: S_ENDPGM 0, implicit [[S_SUBB_U32_]], implicit [[S_CSELECT_B32_]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s32) = COPY $sgpr1
+ %2:sgpr(s32) = COPY $sgpr2
+ %3:sgpr(s32) = G_CONSTANT i32 0
+ %4:sgpr(s32) = G_ICMP intpred(eq), %2, %3
+ %5:sgpr(s32), %6:sgpr(s32) = G_USUBE %0, %1, %4
+ %7:sgpr(s32) = G_SELECT %6, %0, %1
+ S_ENDPGM 0, implicit %5, implicit %7
+...
+
+---
+name: usube_s32_s1_vvv
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ ; WAVE64-LABEL: name: usube_s32_s1_vvv
+ ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; WAVE64: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec
+ ; WAVE64: [[V_SUBB_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUBB_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUBB_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec
+ ; WAVE64: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_SUBB_U32_e64_1]], implicit $exec
+ ; WAVE64: S_ENDPGM 0, implicit [[V_SUBB_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
+ ; WAVE32-LABEL: name: usube_s32_s1_vvv
+ ; WAVE32: $vcc_hi = IMPLICIT_DEF
+ ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; WAVE32: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec
+ ; WAVE32: [[V_SUBB_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUBB_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUBB_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec
+ ; WAVE32: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_SUBB_U32_e64_1]], implicit $exec
+ ; WAVE32: S_ENDPGM 0, implicit [[V_SUBB_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:vgpr(s32) = COPY $vgpr1
+ %2:vgpr(s32) = COPY $vgpr2
+ %3:vgpr(s32) = G_CONSTANT i32 0
+ %4:vcc(s1) = G_ICMP intpred(eq), %2, %3
+ %5:vgpr(s32), %6:vcc(s1) = G_USUBE %0, %1, %4
+ %7:vgpr(s32) = G_SELECT %6, %0, %1
+ S_ENDPGM 0, implicit %5, implicit %7
+...
More information about the llvm-commits
mailing list