[llvm] a751676 - [AMDGPU][GISel] Add llvm.amdgcn.icmp selection
Pierre van Houtryve via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 22 00:26:56 PST 2022
Author: Pierre van Houtryve
Date: 2022-11-22T08:26:50Z
New Revision: a751676f98e8d1380cedc7c4639704bd41a0f182
URL: https://github.com/llvm/llvm-project/commit/a751676f98e8d1380cedc7c4639704bd41a0f182
DIFF: https://github.com/llvm/llvm-project/commit/a751676f98e8d1380cedc7c4639704bd41a0f182.diff
LOG: [AMDGPU][GISel] Add llvm.amdgcn.icmp selection
Add missing logic to select i16 variants and enable GISel testing.
Reviewed By: arsenm
Differential Revision: https://reviews.llvm.org/D136448
Added:
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll
Modified:
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir
Removed:
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.icmp.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 9957642aafafe..fbc0461b522d2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1093,6 +1093,8 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const {
case Intrinsic::amdgcn_div_scale:
return selectDivScale(I);
case Intrinsic::amdgcn_icmp:
+ if (selectImpl(I, *CoverageInfo))
+ return true;
return selectIntrinsicIcmp(I);
case Intrinsic::amdgcn_ballot:
return selectBallot(I);
@@ -1122,32 +1124,56 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const {
}
}
-static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size) {
- if (Size != 32 && Size != 64)
+static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size,
+ const GCNSubtarget &ST) {
+ if (Size != 16 && Size != 32 && Size != 64)
+ return -1;
+
+ if (Size == 16 && !ST.has16BitInsts())
return -1;
+
+ const auto Select = [&](unsigned S16Opc, unsigned TrueS16Opc, unsigned S32Opc,
+ unsigned S64Opc) {
+ if (Size == 16)
+ return ST.hasTrue16BitInsts() ? TrueS16Opc : S16Opc;
+ if (Size == 32)
+ return S32Opc;
+ return S64Opc;
+ };
+
switch (P) {
default:
llvm_unreachable("Unknown condition code!");
case CmpInst::ICMP_NE:
- return Size == 32 ? AMDGPU::V_CMP_NE_U32_e64 : AMDGPU::V_CMP_NE_U64_e64;
+ return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64,
+ AMDGPU::V_CMP_NE_U32_e64, AMDGPU::V_CMP_NE_U64_e64);
case CmpInst::ICMP_EQ:
- return Size == 32 ? AMDGPU::V_CMP_EQ_U32_e64 : AMDGPU::V_CMP_EQ_U64_e64;
+ return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64,
+ AMDGPU::V_CMP_EQ_U32_e64, AMDGPU::V_CMP_EQ_U64_e64);
case CmpInst::ICMP_SGT:
- return Size == 32 ? AMDGPU::V_CMP_GT_I32_e64 : AMDGPU::V_CMP_GT_I64_e64;
+ return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64,
+ AMDGPU::V_CMP_GT_I32_e64, AMDGPU::V_CMP_GT_I64_e64);
case CmpInst::ICMP_SGE:
- return Size == 32 ? AMDGPU::V_CMP_GE_I32_e64 : AMDGPU::V_CMP_GE_I64_e64;
+ return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64,
+ AMDGPU::V_CMP_GE_I32_e64, AMDGPU::V_CMP_GE_I64_e64);
case CmpInst::ICMP_SLT:
- return Size == 32 ? AMDGPU::V_CMP_LT_I32_e64 : AMDGPU::V_CMP_LT_I64_e64;
+ return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64,
+ AMDGPU::V_CMP_LT_I32_e64, AMDGPU::V_CMP_LT_I64_e64);
case CmpInst::ICMP_SLE:
- return Size == 32 ? AMDGPU::V_CMP_LE_I32_e64 : AMDGPU::V_CMP_LE_I64_e64;
+ return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64,
+ AMDGPU::V_CMP_LE_I32_e64, AMDGPU::V_CMP_LE_I64_e64);
case CmpInst::ICMP_UGT:
- return Size == 32 ? AMDGPU::V_CMP_GT_U32_e64 : AMDGPU::V_CMP_GT_U64_e64;
+ return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64,
+ AMDGPU::V_CMP_GT_U32_e64, AMDGPU::V_CMP_GT_U64_e64);
case CmpInst::ICMP_UGE:
- return Size == 32 ? AMDGPU::V_CMP_GE_U32_e64 : AMDGPU::V_CMP_GE_U64_e64;
+ return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64,
+ AMDGPU::V_CMP_GE_U32_e64, AMDGPU::V_CMP_GE_U64_e64);
case CmpInst::ICMP_ULT:
- return Size == 32 ? AMDGPU::V_CMP_LT_U32_e64 : AMDGPU::V_CMP_LT_U64_e64;
+ return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64,
+ AMDGPU::V_CMP_LT_U32_e64, AMDGPU::V_CMP_LT_U64_e64);
case CmpInst::ICMP_ULE:
- return Size == 32 ? AMDGPU::V_CMP_LE_U32_e64 : AMDGPU::V_CMP_LE_U64_e64;
+ return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64,
+ AMDGPU::V_CMP_LE_U32_e64, AMDGPU::V_CMP_LE_U64_e64);
}
}
@@ -1222,7 +1248,7 @@ bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const {
return Ret;
}
- int Opcode = getV_CMPOpcode(Pred, Size);
+ int Opcode = getV_CMPOpcode(Pred, Size, *Subtarget);
if (Opcode == -1)
return false;
@@ -1250,30 +1276,27 @@ bool AMDGPUInstructionSelector::selectIntrinsicIcmp(MachineInstr &I) const {
Register SrcReg = I.getOperand(2).getReg();
unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
- auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(4).getImm());
- if (!ICmpInst::isIntPredicate(static_cast<ICmpInst::Predicate>(Pred))) {
- MachineInstr *ICmp =
- BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Dst);
+ // i1 inputs are not supported in GlobalISel.
+ if (Size == 1)
+ return false;
- if (!RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(),
- *TRI.getBoolRC(), *MRI))
- return false;
+ auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(4).getImm());
+ if (!CmpInst::isIntPredicate(Pred)) {
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Dst);
I.eraseFromParent();
- return true;
+ return RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI);
}
- int Opcode = getV_CMPOpcode(Pred, Size);
+ int Opcode = getV_CMPOpcode(Pred, Size, *Subtarget);
if (Opcode == -1)
return false;
MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode), Dst)
.add(I.getOperand(2))
.add(I.getOperand(3));
- RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(), *TRI.getBoolRC(),
- *MRI);
- bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI);
+ RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI);
I.eraseFromParent();
- return Ret;
+ return constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI);
}
bool AMDGPUInstructionSelector::selectBallot(MachineInstr &I) const {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir
index 3c2c37513bba5..669721b6dbfec 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir
@@ -18,21 +18,21 @@ body: |
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; WAVE64-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE64-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
; WAVE32-LABEL: name: icmp_eq_s16_sv
; WAVE32: liveins: $sgpr0, $vgpr0
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
; GFX11-LABEL: name: icmp_eq_s16_sv
; GFX11: liveins: $sgpr0, $vgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = COPY $vgpr0
@@ -57,21 +57,21 @@ body: |
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; WAVE64-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE64-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
; WAVE32-LABEL: name: icmp_eq_s16_vs
; WAVE32: liveins: $sgpr0, $vgpr0
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
; GFX11-LABEL: name: icmp_eq_s16_vs
; GFX11: liveins: $sgpr0, $vgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr(s32) = COPY $sgpr0
@@ -96,21 +96,21 @@ body: |
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; WAVE64-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE64-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
; WAVE32-LABEL: name: icmp_eq_s16_vv
; WAVE32: liveins: $vgpr0, $vgpr1
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
; GFX11-LABEL: name: icmp_eq_s16_vv
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
@@ -135,21 +135,21 @@ body: |
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; WAVE64-NEXT: [[V_CMP_NE_U16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE64-NEXT: [[V_CMP_NE_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_e64_]]
; WAVE32-LABEL: name: icmp_ne_s16_vv
; WAVE32: liveins: $vgpr0, $vgpr1
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; WAVE32-NEXT: [[V_CMP_NE_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE32-NEXT: [[V_CMP_NE_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_e64_]]
; GFX11-LABEL: name: icmp_ne_s16_vv
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_CMP_NE_U16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: [[V_CMP_NE_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_t16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
@@ -174,21 +174,21 @@ body: |
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; WAVE64-NEXT: [[V_CMP_LT_I16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LT_I16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE64-NEXT: [[V_CMP_LT_I16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_e64_]]
; WAVE32-LABEL: name: icmp_slt_s16_vv
; WAVE32: liveins: $vgpr0, $vgpr1
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; WAVE32-NEXT: [[V_CMP_LT_I16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_I16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE32-NEXT: [[V_CMP_LT_I16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_e64_]]
; GFX11-LABEL: name: icmp_slt_s16_vv
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_CMP_LT_I16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_I16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: [[V_CMP_LT_I16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_t16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
@@ -213,21 +213,21 @@ body: |
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; WAVE64-NEXT: [[V_CMP_LE_I16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LE_I16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE64-NEXT: [[V_CMP_LE_I16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LE_I16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_e64_]]
; WAVE32-LABEL: name: icmp_sle_s16_vv
; WAVE32: liveins: $vgpr0, $vgpr1
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; WAVE32-NEXT: [[V_CMP_LE_I16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LE_I16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE32-NEXT: [[V_CMP_LE_I16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_e64_]]
; GFX11-LABEL: name: icmp_sle_s16_vv
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_CMP_LE_I16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LE_I16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: [[V_CMP_LE_I16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_t16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
@@ -252,21 +252,21 @@ body: |
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; WAVE64-NEXT: [[V_CMP_LT_U16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LT_U16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE64-NEXT: [[V_CMP_LT_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_e64_]]
; WAVE32-LABEL: name: icmp_ult_s16_vv
; WAVE32: liveins: $vgpr0, $vgpr1
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; WAVE32-NEXT: [[V_CMP_LT_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_U16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE32-NEXT: [[V_CMP_LT_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_e64_]]
; GFX11-LABEL: name: icmp_ult_s16_vv
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_CMP_LT_U16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: [[V_CMP_LT_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_t16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
@@ -291,21 +291,21 @@ body: |
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; WAVE64-NEXT: [[V_CMP_LE_U16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LE_U16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE64-NEXT: [[V_CMP_LE_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LE_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_e64_]]
; WAVE32-LABEL: name: icmp_ule_s16_vv
; WAVE32: liveins: $vgpr0, $vgpr1
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; WAVE32-NEXT: [[V_CMP_LE_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LE_U16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE32-NEXT: [[V_CMP_LE_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_e64_]]
; GFX11-LABEL: name: icmp_ule_s16_vv
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_CMP_LE_U16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LE_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: [[V_CMP_LE_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_t16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.icmp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.icmp.ll
deleted file mode 100644
index 73440ecbbfcaf..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.icmp.ll
+++ /dev/null
@@ -1,66 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -global-isel-abort=1 -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
-; RUN: llc -global-isel -global-isel-abort=1 -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s
-
-define amdgpu_ps void @test_intr_icmp_eq_i64(i64 addrspace(1)* %out, i32 %src) #0 {
-; GFX10-LABEL: test_intr_icmp_eq_i64:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cmp_eq_u32_e64 s[0:1], 0x64, v2
-; GFX10-NEXT: v_mov_b32_e32 v3, s1
-; GFX10-NEXT: v_mov_b32_e32 v2, s0
-; GFX10-NEXT: global_store_dwordx2 v[0:1], v[2:3], off
-; GFX10-NEXT: s_endpgm
-;
-; GFX11-LABEL: test_intr_icmp_eq_i64:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cmp_eq_u32_e64 s[0:1], 0x64, v2
-; GFX11-NEXT: v_mov_b32_e32 v3, s1
-; GFX11-NEXT: v_mov_b32_e32 v2, s0
-; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off
-; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-NEXT: s_endpgm
- %result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %src, i32 100, i32 32)
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-define amdgpu_ps void @test_intr_icmp_ne_i32(i32 addrspace(1)* %out, i32 %src) #1 {
-; GFX10-LABEL: test_intr_icmp_ne_i32:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0x64, v2
-; GFX10-NEXT: v_mov_b32_e32 v2, s0
-; GFX10-NEXT: global_store_dword v[0:1], v2, off
-; GFX10-NEXT: s_endpgm
-;
-; GFX11-LABEL: test_intr_icmp_ne_i32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 0x64, v2
-; GFX11-NEXT: v_mov_b32_e32 v2, s0
-; GFX11-NEXT: global_store_b32 v[0:1], v2, off
-; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-NEXT: s_endpgm
- %result = call i32 @llvm.amdgcn.icmp.i32.i32(i32 %src, i32 100, i32 33)
- store i32 %result, i32 addrspace(1)* %out
- ret void
-}
-
-define amdgpu_ps void @test_intr_icmp_i32_invalid_cc(i32 addrspace(1)* %out, i32 %src) #1 {
-; GFX10-LABEL: test_intr_icmp_i32_invalid_cc:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: global_store_dword v[0:1], v0, off
-; GFX10-NEXT: s_endpgm
-;
-; GFX11-LABEL: test_intr_icmp_i32_invalid_cc:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: global_store_b32 v[0:1], v0, off
-; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-NEXT: s_endpgm
- %result = call i32 @llvm.amdgcn.icmp.i32.i32(i32 %src, i32 100, i32 9999)
- store i32 %result, i32 addrspace(1)* %out
- ret void
-}
-
-declare i64 @llvm.amdgcn.icmp.i64.i32(i32, i32, i32)
-declare i32 @llvm.amdgcn.icmp.i32.i32(i32, i32, i32)
-attributes #0 = { "target-features"="+wavefrontsize64" }
-attributes #1 = { "target-features"="+wavefrontsize32" }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll
deleted file mode 100644
index 7c9d063bb7242..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll
+++ /dev/null
@@ -1,1101 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX %s
-; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
-
-declare i64 @llvm.amdgcn.icmp.i64.i32(i32, i32, i32)
-declare i32 @llvm.amdgcn.icmp.i32.i32(i32, i32, i32)
-declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) #0
-declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #0
-declare i64 @llvm.amdgcn.icmp.i16(i16, i16, i32) #0
-declare i64 @llvm.amdgcn.icmp.i1(i1, i1, i32) #0
-
-define amdgpu_kernel void @v_icmp_i32_eq(i64 addrspace(1)* %out, i32 %src) {
-; GFX-LABEL: v_icmp_i32_eq:
-; GFX: ; %bb.0:
-; GFX-NEXT: s_load_dword s2, s[0:1], 0xb
-; GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
-; GFX-NEXT: s_mov_b32 s3, 0xf000
-; GFX-NEXT: v_mov_b32_e32 v0, 0x64
-; GFX-NEXT: s_waitcnt lgkmcnt(0)
-; GFX-NEXT: v_cmp_eq_u32_e64 s[4:5], s2, v0
-; GFX-NEXT: s_mov_b32 s2, -1
-; GFX-NEXT: v_mov_b32_e32 v0, s4
-; GFX-NEXT: v_mov_b32_e32 v1, s5
-; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; GFX-NEXT: s_endpgm
-;
-; VI-LABEL: v_icmp_i32_eq:
-; VI: ; %bb.0:
-; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
-; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT: v_mov_b32_e32 v0, 0x64
-; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_cmp_eq_u32_e64 s[2:3], s2, v0
-; VI-NEXT: v_mov_b32_e32 v0, s0
-; VI-NEXT: v_mov_b32_e32 v2, s2
-; VI-NEXT: v_mov_b32_e32 v1, s1
-; VI-NEXT: v_mov_b32_e32 v3, s3
-; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT: s_endpgm
- %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 32)
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-define amdgpu_kernel void @v_icmp_i32(i64 addrspace(1)* %out, i32 %src) {
-; GCN-LABEL: v_icmp_i32:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_endpgm
- %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 30)
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-define amdgpu_kernel void @v_icmp_i32_ne(i64 addrspace(1)* %out, i32 %src) {
-; GFX-LABEL: v_icmp_i32_ne:
-; GFX: ; %bb.0:
-; GFX-NEXT: s_load_dword s2, s[0:1], 0xb
-; GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
-; GFX-NEXT: s_mov_b32 s3, 0xf000
-; GFX-NEXT: v_mov_b32_e32 v0, 0x64
-; GFX-NEXT: s_waitcnt lgkmcnt(0)
-; GFX-NEXT: v_cmp_ne_u32_e64 s[4:5], s2, v0
-; GFX-NEXT: s_mov_b32 s2, -1
-; GFX-NEXT: v_mov_b32_e32 v0, s4
-; GFX-NEXT: v_mov_b32_e32 v1, s5
-; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; GFX-NEXT: s_endpgm
-;
-; VI-LABEL: v_icmp_i32_ne:
-; VI: ; %bb.0:
-; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
-; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT: v_mov_b32_e32 v0, 0x64
-; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_cmp_ne_u32_e64 s[2:3], s2, v0
-; VI-NEXT: v_mov_b32_e32 v0, s0
-; VI-NEXT: v_mov_b32_e32 v2, s2
-; VI-NEXT: v_mov_b32_e32 v1, s1
-; VI-NEXT: v_mov_b32_e32 v3, s3
-; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT: s_endpgm
- %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 33)
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-define amdgpu_kernel void @v_icmp_i32_ugt(i64 addrspace(1)* %out, i32 %src) {
-; GFX-LABEL: v_icmp_i32_ugt:
-; GFX: ; %bb.0:
-; GFX-NEXT: s_load_dword s2, s[0:1], 0xb
-; GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
-; GFX-NEXT: s_mov_b32 s3, 0xf000
-; GFX-NEXT: v_mov_b32_e32 v0, 0x64
-; GFX-NEXT: s_waitcnt lgkmcnt(0)
-; GFX-NEXT: v_cmp_gt_u32_e64 s[4:5], s2, v0
-; GFX-NEXT: s_mov_b32 s2, -1
-; GFX-NEXT: v_mov_b32_e32 v0, s4
-; GFX-NEXT: v_mov_b32_e32 v1, s5
-; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; GFX-NEXT: s_endpgm
-;
-; VI-LABEL: v_icmp_i32_ugt:
-; VI: ; %bb.0:
-; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
-; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT: v_mov_b32_e32 v0, 0x64
-; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_cmp_gt_u32_e64 s[2:3], s2, v0
-; VI-NEXT: v_mov_b32_e32 v0, s0
-; VI-NEXT: v_mov_b32_e32 v2, s2
-; VI-NEXT: v_mov_b32_e32 v1, s1
-; VI-NEXT: v_mov_b32_e32 v3, s3
-; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT: s_endpgm
- %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 34)
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-define amdgpu_kernel void @v_icmp_i32_uge(i64 addrspace(1)* %out, i32 %src) {
-; GFX-LABEL: v_icmp_i32_uge:
-; GFX: ; %bb.0:
-; GFX-NEXT: s_load_dword s2, s[0:1], 0xb
-; GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
-; GFX-NEXT: s_mov_b32 s3, 0xf000
-; GFX-NEXT: v_mov_b32_e32 v0, 0x64
-; GFX-NEXT: s_waitcnt lgkmcnt(0)
-; GFX-NEXT: v_cmp_ge_u32_e64 s[4:5], s2, v0
-; GFX-NEXT: s_mov_b32 s2, -1
-; GFX-NEXT: v_mov_b32_e32 v0, s4
-; GFX-NEXT: v_mov_b32_e32 v1, s5
-; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; GFX-NEXT: s_endpgm
-;
-; VI-LABEL: v_icmp_i32_uge:
-; VI: ; %bb.0:
-; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
-; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT: v_mov_b32_e32 v0, 0x64
-; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_cmp_ge_u32_e64 s[2:3], s2, v0
-; VI-NEXT: v_mov_b32_e32 v0, s0
-; VI-NEXT: v_mov_b32_e32 v2, s2
-; VI-NEXT: v_mov_b32_e32 v1, s1
-; VI-NEXT: v_mov_b32_e32 v3, s3
-; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT: s_endpgm
- %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 35)
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-define amdgpu_kernel void @v_icmp_i32_ult(i64 addrspace(1)* %out, i32 %src) {
-; GFX-LABEL: v_icmp_i32_ult:
-; GFX: ; %bb.0:
-; GFX-NEXT: s_load_dword s2, s[0:1], 0xb
-; GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
-; GFX-NEXT: s_mov_b32 s3, 0xf000
-; GFX-NEXT: v_mov_b32_e32 v0, 0x64
-; GFX-NEXT: s_waitcnt lgkmcnt(0)
-; GFX-NEXT: v_cmp_lt_u32_e64 s[4:5], s2, v0
-; GFX-NEXT: s_mov_b32 s2, -1
-; GFX-NEXT: v_mov_b32_e32 v0, s4
-; GFX-NEXT: v_mov_b32_e32 v1, s5
-; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; GFX-NEXT: s_endpgm
-;
-; VI-LABEL: v_icmp_i32_ult:
-; VI: ; %bb.0:
-; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
-; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT: v_mov_b32_e32 v0, 0x64
-; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_cmp_lt_u32_e64 s[2:3], s2, v0
-; VI-NEXT: v_mov_b32_e32 v0, s0
-; VI-NEXT: v_mov_b32_e32 v2, s2
-; VI-NEXT: v_mov_b32_e32 v1, s1
-; VI-NEXT: v_mov_b32_e32 v3, s3
-; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT: s_endpgm
- %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 36)
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-define amdgpu_kernel void @v_icmp_i32_ule(i64 addrspace(1)* %out, i32 %src) {
-; GFX-LABEL: v_icmp_i32_ule:
-; GFX: ; %bb.0:
-; GFX-NEXT: s_load_dword s2, s[0:1], 0xb
-; GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
-; GFX-NEXT: s_mov_b32 s3, 0xf000
-; GFX-NEXT: v_mov_b32_e32 v0, 0x64
-; GFX-NEXT: s_waitcnt lgkmcnt(0)
-; GFX-NEXT: v_cmp_le_u32_e64 s[4:5], s2, v0
-; GFX-NEXT: s_mov_b32 s2, -1
-; GFX-NEXT: v_mov_b32_e32 v0, s4
-; GFX-NEXT: v_mov_b32_e32 v1, s5
-; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; GFX-NEXT: s_endpgm
-;
-; VI-LABEL: v_icmp_i32_ule:
-; VI: ; %bb.0:
-; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
-; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT: v_mov_b32_e32 v0, 0x64
-; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_cmp_le_u32_e64 s[2:3], s2, v0
-; VI-NEXT: v_mov_b32_e32 v0, s0
-; VI-NEXT: v_mov_b32_e32 v2, s2
-; VI-NEXT: v_mov_b32_e32 v1, s1
-; VI-NEXT: v_mov_b32_e32 v3, s3
-; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT: s_endpgm
- %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 37)
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-define amdgpu_kernel void @v_icmp_i32_sgt(i64 addrspace(1)* %out, i32 %src) #1 {
-; GFX-LABEL: v_icmp_i32_sgt:
-; GFX: ; %bb.0:
-; GFX-NEXT: s_load_dword s2, s[0:1], 0xb
-; GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
-; GFX-NEXT: s_mov_b32 s3, 0xf000
-; GFX-NEXT: v_mov_b32_e32 v0, 0x64
-; GFX-NEXT: s_waitcnt lgkmcnt(0)
-; GFX-NEXT: v_cmp_gt_i32_e64 s[4:5], s2, v0
-; GFX-NEXT: s_mov_b32 s2, -1
-; GFX-NEXT: v_mov_b32_e32 v0, s4
-; GFX-NEXT: v_mov_b32_e32 v1, s5
-; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; GFX-NEXT: s_endpgm
-;
-; VI-LABEL: v_icmp_i32_sgt:
-; VI: ; %bb.0:
-; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
-; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT: v_mov_b32_e32 v0, 0x64
-; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_cmp_gt_i32_e64 s[2:3], s2, v0
-; VI-NEXT: v_mov_b32_e32 v0, s0
-; VI-NEXT: v_mov_b32_e32 v2, s2
-; VI-NEXT: v_mov_b32_e32 v1, s1
-; VI-NEXT: v_mov_b32_e32 v3, s3
-; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT: s_endpgm
- %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 38)
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-define amdgpu_kernel void @v_icmp_i32_sge(i64 addrspace(1)* %out, i32 %src) {
-; GFX-LABEL: v_icmp_i32_sge:
-; GFX: ; %bb.0:
-; GFX-NEXT: s_load_dword s2, s[0:1], 0xb
-; GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
-; GFX-NEXT: s_mov_b32 s3, 0xf000
-; GFX-NEXT: v_mov_b32_e32 v0, 0x64
-; GFX-NEXT: s_waitcnt lgkmcnt(0)
-; GFX-NEXT: v_cmp_ge_i32_e64 s[4:5], s2, v0
-; GFX-NEXT: s_mov_b32 s2, -1
-; GFX-NEXT: v_mov_b32_e32 v0, s4
-; GFX-NEXT: v_mov_b32_e32 v1, s5
-; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; GFX-NEXT: s_endpgm
-;
-; VI-LABEL: v_icmp_i32_sge:
-; VI: ; %bb.0:
-; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
-; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT: v_mov_b32_e32 v0, 0x64
-; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_cmp_ge_i32_e64 s[2:3], s2, v0
-; VI-NEXT: v_mov_b32_e32 v0, s0
-; VI-NEXT: v_mov_b32_e32 v2, s2
-; VI-NEXT: v_mov_b32_e32 v1, s1
-; VI-NEXT: v_mov_b32_e32 v3, s3
-; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT: s_endpgm
- %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 39)
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-define amdgpu_kernel void @v_icmp_i32_slt(i64 addrspace(1)* %out, i32 %src) {
-; GFX-LABEL: v_icmp_i32_slt:
-; GFX: ; %bb.0:
-; GFX-NEXT: s_load_dword s2, s[0:1], 0xb
-; GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
-; GFX-NEXT: s_mov_b32 s3, 0xf000
-; GFX-NEXT: v_mov_b32_e32 v0, 0x64
-; GFX-NEXT: s_waitcnt lgkmcnt(0)
-; GFX-NEXT: v_cmp_lt_i32_e64 s[4:5], s2, v0
-; GFX-NEXT: s_mov_b32 s2, -1
-; GFX-NEXT: v_mov_b32_e32 v0, s4
-; GFX-NEXT: v_mov_b32_e32 v1, s5
-; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; GFX-NEXT: s_endpgm
-;
-; VI-LABEL: v_icmp_i32_slt:
-; VI: ; %bb.0:
-; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
-; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT: v_mov_b32_e32 v0, 0x64
-; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_cmp_lt_i32_e64 s[2:3], s2, v0
-; VI-NEXT: v_mov_b32_e32 v0, s0
-; VI-NEXT: v_mov_b32_e32 v2, s2
-; VI-NEXT: v_mov_b32_e32 v1, s1
-; VI-NEXT: v_mov_b32_e32 v3, s3
-; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT: s_endpgm
- %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 40)
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-define amdgpu_kernel void @v_icmp_i32_sle(i64 addrspace(1)* %out, i32 %src) {
-; GFX-LABEL: v_icmp_i32_sle:
-; GFX: ; %bb.0:
-; GFX-NEXT: s_load_dword s2, s[0:1], 0xb
-; GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
-; GFX-NEXT: s_mov_b32 s3, 0xf000
-; GFX-NEXT: v_mov_b32_e32 v0, 0x64
-; GFX-NEXT: s_waitcnt lgkmcnt(0)
-; GFX-NEXT: v_cmp_le_i32_e64 s[4:5], s2, v0
-; GFX-NEXT: s_mov_b32 s2, -1
-; GFX-NEXT: v_mov_b32_e32 v0, s4
-; GFX-NEXT: v_mov_b32_e32 v1, s5
-; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; GFX-NEXT: s_endpgm
-;
-; VI-LABEL: v_icmp_i32_sle:
-; VI: ; %bb.0:
-; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
-; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT: v_mov_b32_e32 v0, 0x64
-; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_cmp_le_i32_e64 s[2:3], s2, v0
-; VI-NEXT: v_mov_b32_e32 v0, s0
-; VI-NEXT: v_mov_b32_e32 v2, s2
-; VI-NEXT: v_mov_b32_e32 v1, s1
-; VI-NEXT: v_mov_b32_e32 v3, s3
-; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT: s_endpgm
- %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 41)
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-define amdgpu_kernel void @v_icmp_i64_eq(i64 addrspace(1)* %out, i64 %src) {
-; GFX-LABEL: v_icmp_i64_eq:
-; GFX: ; %bb.0:
-; GFX-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
-; GFX-NEXT: s_mov_b32 s7, 0xf000
-; GFX-NEXT: v_mov_b32_e32 v0, 0x64
-; GFX-NEXT: v_mov_b32_e32 v1, 0
-; GFX-NEXT: s_waitcnt lgkmcnt(0)
-; GFX-NEXT: v_cmp_eq_u64_e64 s[2:3], s[2:3], v[0:1]
-; GFX-NEXT: s_mov_b32 s6, -1
-; GFX-NEXT: s_mov_b32 s4, s0
-; GFX-NEXT: s_mov_b32 s5, s1
-; GFX-NEXT: v_mov_b32_e32 v0, s2
-; GFX-NEXT: v_mov_b32_e32 v1, s3
-; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
-; GFX-NEXT: s_endpgm
-;
-; VI-LABEL: v_icmp_i64_eq:
-; VI: ; %bb.0:
-; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; VI-NEXT: v_mov_b32_e32 v0, 0x64
-; VI-NEXT: v_mov_b32_e32 v1, 0
-; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_cmp_eq_u64_e64 s[2:3], s[2:3], v[0:1]
-; VI-NEXT: v_mov_b32_e32 v2, s0
-; VI-NEXT: v_mov_b32_e32 v0, s2
-; VI-NEXT: v_mov_b32_e32 v3, s1
-; VI-NEXT: v_mov_b32_e32 v1, s3
-; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
-; VI-NEXT: s_endpgm
- %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 32)
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-define amdgpu_kernel void @v_icmp_i64_ne(i64 addrspace(1)* %out, i64 %src) {
-; GFX-LABEL: v_icmp_i64_ne:
-; GFX: ; %bb.0:
-; GFX-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
-; GFX-NEXT: s_mov_b32 s7, 0xf000
-; GFX-NEXT: v_mov_b32_e32 v0, 0x64
-; GFX-NEXT: v_mov_b32_e32 v1, 0
-; GFX-NEXT: s_waitcnt lgkmcnt(0)
-; GFX-NEXT: v_cmp_ne_u64_e64 s[2:3], s[2:3], v[0:1]
-; GFX-NEXT: s_mov_b32 s6, -1
-; GFX-NEXT: s_mov_b32 s4, s0
-; GFX-NEXT: s_mov_b32 s5, s1
-; GFX-NEXT: v_mov_b32_e32 v0, s2
-; GFX-NEXT: v_mov_b32_e32 v1, s3
-; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
-; GFX-NEXT: s_endpgm
-;
-; VI-LABEL: v_icmp_i64_ne:
-; VI: ; %bb.0:
-; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; VI-NEXT: v_mov_b32_e32 v0, 0x64
-; VI-NEXT: v_mov_b32_e32 v1, 0
-; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_cmp_ne_u64_e64 s[2:3], s[2:3], v[0:1]
-; VI-NEXT: v_mov_b32_e32 v2, s0
-; VI-NEXT: v_mov_b32_e32 v0, s2
-; VI-NEXT: v_mov_b32_e32 v3, s1
-; VI-NEXT: v_mov_b32_e32 v1, s3
-; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
-; VI-NEXT: s_endpgm
- %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 33)
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-define amdgpu_kernel void @v_icmp_u64_ugt(i64 addrspace(1)* %out, i64 %src) {
-; GFX-LABEL: v_icmp_u64_ugt:
-; GFX: ; %bb.0:
-; GFX-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
-; GFX-NEXT: s_mov_b32 s7, 0xf000
-; GFX-NEXT: v_mov_b32_e32 v0, 0x64
-; GFX-NEXT: v_mov_b32_e32 v1, 0
-; GFX-NEXT: s_waitcnt lgkmcnt(0)
-; GFX-NEXT: v_cmp_gt_u64_e64 s[2:3], s[2:3], v[0:1]
-; GFX-NEXT: s_mov_b32 s6, -1
-; GFX-NEXT: s_mov_b32 s4, s0
-; GFX-NEXT: s_mov_b32 s5, s1
-; GFX-NEXT: v_mov_b32_e32 v0, s2
-; GFX-NEXT: v_mov_b32_e32 v1, s3
-; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
-; GFX-NEXT: s_endpgm
-;
-; VI-LABEL: v_icmp_u64_ugt:
-; VI: ; %bb.0:
-; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; VI-NEXT: v_mov_b32_e32 v0, 0x64
-; VI-NEXT: v_mov_b32_e32 v1, 0
-; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_cmp_gt_u64_e64 s[2:3], s[2:3], v[0:1]
-; VI-NEXT: v_mov_b32_e32 v2, s0
-; VI-NEXT: v_mov_b32_e32 v0, s2
-; VI-NEXT: v_mov_b32_e32 v3, s1
-; VI-NEXT: v_mov_b32_e32 v1, s3
-; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
-; VI-NEXT: s_endpgm
- %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 34)
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-define amdgpu_kernel void @v_icmp_u64_uge(i64 addrspace(1)* %out, i64 %src) {
-; GFX-LABEL: v_icmp_u64_uge:
-; GFX: ; %bb.0:
-; GFX-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
-; GFX-NEXT: s_mov_b32 s7, 0xf000
-; GFX-NEXT: v_mov_b32_e32 v0, 0x64
-; GFX-NEXT: v_mov_b32_e32 v1, 0
-; GFX-NEXT: s_waitcnt lgkmcnt(0)
-; GFX-NEXT: v_cmp_ge_u64_e64 s[2:3], s[2:3], v[0:1]
-; GFX-NEXT: s_mov_b32 s6, -1
-; GFX-NEXT: s_mov_b32 s4, s0
-; GFX-NEXT: s_mov_b32 s5, s1
-; GFX-NEXT: v_mov_b32_e32 v0, s2
-; GFX-NEXT: v_mov_b32_e32 v1, s3
-; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
-; GFX-NEXT: s_endpgm
-;
-; VI-LABEL: v_icmp_u64_uge:
-; VI: ; %bb.0:
-; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; VI-NEXT: v_mov_b32_e32 v0, 0x64
-; VI-NEXT: v_mov_b32_e32 v1, 0
-; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_cmp_ge_u64_e64 s[2:3], s[2:3], v[0:1]
-; VI-NEXT: v_mov_b32_e32 v2, s0
-; VI-NEXT: v_mov_b32_e32 v0, s2
-; VI-NEXT: v_mov_b32_e32 v3, s1
-; VI-NEXT: v_mov_b32_e32 v1, s3
-; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
-; VI-NEXT: s_endpgm
- %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 35)
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-define amdgpu_kernel void @v_icmp_u64_ult(i64 addrspace(1)* %out, i64 %src) {
-; GFX-LABEL: v_icmp_u64_ult:
-; GFX: ; %bb.0:
-; GFX-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
-; GFX-NEXT: s_mov_b32 s7, 0xf000
-; GFX-NEXT: v_mov_b32_e32 v0, 0x64
-; GFX-NEXT: v_mov_b32_e32 v1, 0
-; GFX-NEXT: s_waitcnt lgkmcnt(0)
-; GFX-NEXT: v_cmp_lt_u64_e64 s[2:3], s[2:3], v[0:1]
-; GFX-NEXT: s_mov_b32 s6, -1
-; GFX-NEXT: s_mov_b32 s4, s0
-; GFX-NEXT: s_mov_b32 s5, s1
-; GFX-NEXT: v_mov_b32_e32 v0, s2
-; GFX-NEXT: v_mov_b32_e32 v1, s3
-; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
-; GFX-NEXT: s_endpgm
-;
-; VI-LABEL: v_icmp_u64_ult:
-; VI: ; %bb.0:
-; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; VI-NEXT: v_mov_b32_e32 v0, 0x64
-; VI-NEXT: v_mov_b32_e32 v1, 0
-; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_cmp_lt_u64_e64 s[2:3], s[2:3], v[0:1]
-; VI-NEXT: v_mov_b32_e32 v2, s0
-; VI-NEXT: v_mov_b32_e32 v0, s2
-; VI-NEXT: v_mov_b32_e32 v3, s1
-; VI-NEXT: v_mov_b32_e32 v1, s3
-; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
-; VI-NEXT: s_endpgm
- %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 36)
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-define amdgpu_kernel void @v_icmp_u64_ule(i64 addrspace(1)* %out, i64 %src) {
-; GFX-LABEL: v_icmp_u64_ule:
-; GFX: ; %bb.0:
-; GFX-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
-; GFX-NEXT: s_mov_b32 s7, 0xf000
-; GFX-NEXT: v_mov_b32_e32 v0, 0x64
-; GFX-NEXT: v_mov_b32_e32 v1, 0
-; GFX-NEXT: s_waitcnt lgkmcnt(0)
-; GFX-NEXT: v_cmp_le_u64_e64 s[2:3], s[2:3], v[0:1]
-; GFX-NEXT: s_mov_b32 s6, -1
-; GFX-NEXT: s_mov_b32 s4, s0
-; GFX-NEXT: s_mov_b32 s5, s1
-; GFX-NEXT: v_mov_b32_e32 v0, s2
-; GFX-NEXT: v_mov_b32_e32 v1, s3
-; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
-; GFX-NEXT: s_endpgm
-;
-; VI-LABEL: v_icmp_u64_ule:
-; VI: ; %bb.0:
-; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; VI-NEXT: v_mov_b32_e32 v0, 0x64
-; VI-NEXT: v_mov_b32_e32 v1, 0
-; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_cmp_le_u64_e64 s[2:3], s[2:3], v[0:1]
-; VI-NEXT: v_mov_b32_e32 v2, s0
-; VI-NEXT: v_mov_b32_e32 v0, s2
-; VI-NEXT: v_mov_b32_e32 v3, s1
-; VI-NEXT: v_mov_b32_e32 v1, s3
-; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
-; VI-NEXT: s_endpgm
- %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 37)
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-define amdgpu_kernel void @v_icmp_i64_sgt(i64 addrspace(1)* %out, i64 %src) {
-; GFX-LABEL: v_icmp_i64_sgt:
-; GFX: ; %bb.0:
-; GFX-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
-; GFX-NEXT: s_mov_b32 s7, 0xf000
-; GFX-NEXT: v_mov_b32_e32 v0, 0x64
-; GFX-NEXT: v_mov_b32_e32 v1, 0
-; GFX-NEXT: s_waitcnt lgkmcnt(0)
-; GFX-NEXT: v_cmp_gt_i64_e64 s[2:3], s[2:3], v[0:1]
-; GFX-NEXT: s_mov_b32 s6, -1
-; GFX-NEXT: s_mov_b32 s4, s0
-; GFX-NEXT: s_mov_b32 s5, s1
-; GFX-NEXT: v_mov_b32_e32 v0, s2
-; GFX-NEXT: v_mov_b32_e32 v1, s3
-; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
-; GFX-NEXT: s_endpgm
-;
-; VI-LABEL: v_icmp_i64_sgt:
-; VI: ; %bb.0:
-; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; VI-NEXT: v_mov_b32_e32 v0, 0x64
-; VI-NEXT: v_mov_b32_e32 v1, 0
-; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_cmp_gt_i64_e64 s[2:3], s[2:3], v[0:1]
-; VI-NEXT: v_mov_b32_e32 v2, s0
-; VI-NEXT: v_mov_b32_e32 v0, s2
-; VI-NEXT: v_mov_b32_e32 v3, s1
-; VI-NEXT: v_mov_b32_e32 v1, s3
-; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
-; VI-NEXT: s_endpgm
- %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 38)
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-define amdgpu_kernel void @v_icmp_i64_sge(i64 addrspace(1)* %out, i64 %src) {
-; GFX-LABEL: v_icmp_i64_sge:
-; GFX: ; %bb.0:
-; GFX-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
-; GFX-NEXT: s_mov_b32 s7, 0xf000
-; GFX-NEXT: v_mov_b32_e32 v0, 0x64
-; GFX-NEXT: v_mov_b32_e32 v1, 0
-; GFX-NEXT: s_waitcnt lgkmcnt(0)
-; GFX-NEXT: v_cmp_ge_i64_e64 s[2:3], s[2:3], v[0:1]
-; GFX-NEXT: s_mov_b32 s6, -1
-; GFX-NEXT: s_mov_b32 s4, s0
-; GFX-NEXT: s_mov_b32 s5, s1
-; GFX-NEXT: v_mov_b32_e32 v0, s2
-; GFX-NEXT: v_mov_b32_e32 v1, s3
-; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
-; GFX-NEXT: s_endpgm
-;
-; VI-LABEL: v_icmp_i64_sge:
-; VI: ; %bb.0:
-; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; VI-NEXT: v_mov_b32_e32 v0, 0x64
-; VI-NEXT: v_mov_b32_e32 v1, 0
-; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_cmp_ge_i64_e64 s[2:3], s[2:3], v[0:1]
-; VI-NEXT: v_mov_b32_e32 v2, s0
-; VI-NEXT: v_mov_b32_e32 v0, s2
-; VI-NEXT: v_mov_b32_e32 v3, s1
-; VI-NEXT: v_mov_b32_e32 v1, s3
-; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
-; VI-NEXT: s_endpgm
- %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 39)
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-define amdgpu_kernel void @v_icmp_i64_slt(i64 addrspace(1)* %out, i64 %src) {
-; GFX-LABEL: v_icmp_i64_slt:
-; GFX: ; %bb.0:
-; GFX-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
-; GFX-NEXT: s_mov_b32 s7, 0xf000
-; GFX-NEXT: v_mov_b32_e32 v0, 0x64
-; GFX-NEXT: v_mov_b32_e32 v1, 0
-; GFX-NEXT: s_waitcnt lgkmcnt(0)
-; GFX-NEXT: v_cmp_lt_i64_e64 s[2:3], s[2:3], v[0:1]
-; GFX-NEXT: s_mov_b32 s6, -1
-; GFX-NEXT: s_mov_b32 s4, s0
-; GFX-NEXT: s_mov_b32 s5, s1
-; GFX-NEXT: v_mov_b32_e32 v0, s2
-; GFX-NEXT: v_mov_b32_e32 v1, s3
-; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
-; GFX-NEXT: s_endpgm
-;
-; VI-LABEL: v_icmp_i64_slt:
-; VI: ; %bb.0:
-; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; VI-NEXT: v_mov_b32_e32 v0, 0x64
-; VI-NEXT: v_mov_b32_e32 v1, 0
-; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_cmp_lt_i64_e64 s[2:3], s[2:3], v[0:1]
-; VI-NEXT: v_mov_b32_e32 v2, s0
-; VI-NEXT: v_mov_b32_e32 v0, s2
-; VI-NEXT: v_mov_b32_e32 v3, s1
-; VI-NEXT: v_mov_b32_e32 v1, s3
-; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
-; VI-NEXT: s_endpgm
- %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 40)
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-define amdgpu_kernel void @v_icmp_i64_sle(i64 addrspace(1)* %out, i64 %src) {
-; GFX-LABEL: v_icmp_i64_sle:
-; GFX: ; %bb.0:
-; GFX-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
-; GFX-NEXT: s_mov_b32 s7, 0xf000
-; GFX-NEXT: v_mov_b32_e32 v0, 0x64
-; GFX-NEXT: v_mov_b32_e32 v1, 0
-; GFX-NEXT: s_waitcnt lgkmcnt(0)
-; GFX-NEXT: v_cmp_le_i64_e64 s[2:3], s[2:3], v[0:1]
-; GFX-NEXT: s_mov_b32 s6, -1
-; GFX-NEXT: s_mov_b32 s4, s0
-; GFX-NEXT: s_mov_b32 s5, s1
-; GFX-NEXT: v_mov_b32_e32 v0, s2
-; GFX-NEXT: v_mov_b32_e32 v1, s3
-; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
-; GFX-NEXT: s_endpgm
-;
-; VI-LABEL: v_icmp_i64_sle:
-; VI: ; %bb.0:
-; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; VI-NEXT: v_mov_b32_e32 v0, 0x64
-; VI-NEXT: v_mov_b32_e32 v1, 0
-; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_cmp_le_i64_e64 s[2:3], s[2:3], v[0:1]
-; VI-NEXT: v_mov_b32_e32 v2, s0
-; VI-NEXT: v_mov_b32_e32 v0, s2
-; VI-NEXT: v_mov_b32_e32 v3, s1
-; VI-NEXT: v_mov_b32_e32 v1, s3
-; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
-; VI-NEXT: s_endpgm
- %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 41)
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-define amdgpu_kernel void @v_icmp_i16_eq(i64 addrspace(1)* %out, i16 %src) {
-; GFX-LABEL: v_icmp_i16_eq:
-; GFX: ; %bb.0:
-; GFX-NEXT: s_load_dword s2, s[0:1], 0xb
-; GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
-; GFX-NEXT: s_mov_b32 s3, 0xf000
-; GFX-NEXT: v_mov_b32_e32 v0, 0x64
-; GFX-NEXT: s_waitcnt lgkmcnt(0)
-; GFX-NEXT: s_and_b32 s2, s2, 0xffff
-; GFX-NEXT: v_cmp_eq_u32_e64 s[4:5], s2, v0
-; GFX-NEXT: s_mov_b32 s2, -1
-; GFX-NEXT: v_mov_b32_e32 v0, s4
-; GFX-NEXT: v_mov_b32_e32 v1, s5
-; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; GFX-NEXT: s_endpgm
-;
-; VI-LABEL: v_icmp_i16_eq:
-; VI: ; %bb.0:
-; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
-; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT: v_mov_b32_e32 v0, 0x64
-; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_cmp_eq_u16_e64 s[2:3], s2, v0
-; VI-NEXT: v_mov_b32_e32 v0, s0
-; VI-NEXT: v_mov_b32_e32 v2, s2
-; VI-NEXT: v_mov_b32_e32 v1, s1
-; VI-NEXT: v_mov_b32_e32 v3, s3
-; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT: s_endpgm
- %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 32)
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-define amdgpu_kernel void @v_icmp_i16(i64 addrspace(1)* %out, i16 %src) {
-; GCN-LABEL: v_icmp_i16:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_endpgm
- %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 30)
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-define amdgpu_kernel void @v_icmp_i16_ne(i64 addrspace(1)* %out, i16 %src) {
-; GFX-LABEL: v_icmp_i16_ne:
-; GFX: ; %bb.0:
-; GFX-NEXT: s_load_dword s2, s[0:1], 0xb
-; GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
-; GFX-NEXT: s_mov_b32 s3, 0xf000
-; GFX-NEXT: v_mov_b32_e32 v0, 0x64
-; GFX-NEXT: s_waitcnt lgkmcnt(0)
-; GFX-NEXT: s_and_b32 s2, s2, 0xffff
-; GFX-NEXT: v_cmp_ne_u32_e64 s[4:5], s2, v0
-; GFX-NEXT: s_mov_b32 s2, -1
-; GFX-NEXT: v_mov_b32_e32 v0, s4
-; GFX-NEXT: v_mov_b32_e32 v1, s5
-; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; GFX-NEXT: s_endpgm
-;
-; VI-LABEL: v_icmp_i16_ne:
-; VI: ; %bb.0:
-; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
-; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT: v_mov_b32_e32 v0, 0x64
-; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_cmp_ne_u16_e64 s[2:3], s2, v0
-; VI-NEXT: v_mov_b32_e32 v0, s0
-; VI-NEXT: v_mov_b32_e32 v2, s2
-; VI-NEXT: v_mov_b32_e32 v1, s1
-; VI-NEXT: v_mov_b32_e32 v3, s3
-; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT: s_endpgm
- %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 33)
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-define amdgpu_kernel void @v_icmp_i16_ugt(i64 addrspace(1)* %out, i16 %src) {
-; GFX-LABEL: v_icmp_i16_ugt:
-; GFX: ; %bb.0:
-; GFX-NEXT: s_load_dword s2, s[0:1], 0xb
-; GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
-; GFX-NEXT: s_mov_b32 s3, 0xf000
-; GFX-NEXT: v_mov_b32_e32 v0, 0x64
-; GFX-NEXT: s_waitcnt lgkmcnt(0)
-; GFX-NEXT: s_and_b32 s2, s2, 0xffff
-; GFX-NEXT: v_cmp_gt_u32_e64 s[4:5], s2, v0
-; GFX-NEXT: s_mov_b32 s2, -1
-; GFX-NEXT: v_mov_b32_e32 v0, s4
-; GFX-NEXT: v_mov_b32_e32 v1, s5
-; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; GFX-NEXT: s_endpgm
-;
-; VI-LABEL: v_icmp_i16_ugt:
-; VI: ; %bb.0:
-; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
-; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT: v_mov_b32_e32 v0, 0x64
-; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_cmp_gt_u16_e64 s[2:3], s2, v0
-; VI-NEXT: v_mov_b32_e32 v0, s0
-; VI-NEXT: v_mov_b32_e32 v2, s2
-; VI-NEXT: v_mov_b32_e32 v1, s1
-; VI-NEXT: v_mov_b32_e32 v3, s3
-; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT: s_endpgm
- %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 34)
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-define amdgpu_kernel void @v_icmp_i16_uge(i64 addrspace(1)* %out, i16 %src) {
-; GFX-LABEL: v_icmp_i16_uge:
-; GFX: ; %bb.0:
-; GFX-NEXT: s_load_dword s2, s[0:1], 0xb
-; GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
-; GFX-NEXT: s_mov_b32 s3, 0xf000
-; GFX-NEXT: v_mov_b32_e32 v0, 0x64
-; GFX-NEXT: s_waitcnt lgkmcnt(0)
-; GFX-NEXT: s_and_b32 s2, s2, 0xffff
-; GFX-NEXT: v_cmp_ge_u32_e64 s[4:5], s2, v0
-; GFX-NEXT: s_mov_b32 s2, -1
-; GFX-NEXT: v_mov_b32_e32 v0, s4
-; GFX-NEXT: v_mov_b32_e32 v1, s5
-; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; GFX-NEXT: s_endpgm
-;
-; VI-LABEL: v_icmp_i16_uge:
-; VI: ; %bb.0:
-; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
-; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT: v_mov_b32_e32 v0, 0x64
-; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_cmp_ge_u16_e64 s[2:3], s2, v0
-; VI-NEXT: v_mov_b32_e32 v0, s0
-; VI-NEXT: v_mov_b32_e32 v2, s2
-; VI-NEXT: v_mov_b32_e32 v1, s1
-; VI-NEXT: v_mov_b32_e32 v3, s3
-; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT: s_endpgm
- %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 35)
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-define amdgpu_kernel void @v_icmp_i16_ult(i64 addrspace(1)* %out, i16 %src) {
-; GFX-LABEL: v_icmp_i16_ult:
-; GFX: ; %bb.0:
-; GFX-NEXT: s_load_dword s2, s[0:1], 0xb
-; GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
-; GFX-NEXT: s_mov_b32 s3, 0xf000
-; GFX-NEXT: v_mov_b32_e32 v0, 0x64
-; GFX-NEXT: s_waitcnt lgkmcnt(0)
-; GFX-NEXT: s_and_b32 s2, s2, 0xffff
-; GFX-NEXT: v_cmp_lt_u32_e64 s[4:5], s2, v0
-; GFX-NEXT: s_mov_b32 s2, -1
-; GFX-NEXT: v_mov_b32_e32 v0, s4
-; GFX-NEXT: v_mov_b32_e32 v1, s5
-; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; GFX-NEXT: s_endpgm
-;
-; VI-LABEL: v_icmp_i16_ult:
-; VI: ; %bb.0:
-; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
-; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT: v_mov_b32_e32 v0, 0x64
-; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_cmp_lt_u16_e64 s[2:3], s2, v0
-; VI-NEXT: v_mov_b32_e32 v0, s0
-; VI-NEXT: v_mov_b32_e32 v2, s2
-; VI-NEXT: v_mov_b32_e32 v1, s1
-; VI-NEXT: v_mov_b32_e32 v3, s3
-; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT: s_endpgm
- %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 36)
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-define amdgpu_kernel void @v_icmp_i16_ule(i64 addrspace(1)* %out, i16 %src) {
-; GFX-LABEL: v_icmp_i16_ule:
-; GFX: ; %bb.0:
-; GFX-NEXT: s_load_dword s2, s[0:1], 0xb
-; GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
-; GFX-NEXT: s_mov_b32 s3, 0xf000
-; GFX-NEXT: v_mov_b32_e32 v0, 0x64
-; GFX-NEXT: s_waitcnt lgkmcnt(0)
-; GFX-NEXT: s_and_b32 s2, s2, 0xffff
-; GFX-NEXT: v_cmp_le_u32_e64 s[4:5], s2, v0
-; GFX-NEXT: s_mov_b32 s2, -1
-; GFX-NEXT: v_mov_b32_e32 v0, s4
-; GFX-NEXT: v_mov_b32_e32 v1, s5
-; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; GFX-NEXT: s_endpgm
-;
-; VI-LABEL: v_icmp_i16_ule:
-; VI: ; %bb.0:
-; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
-; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT: v_mov_b32_e32 v0, 0x64
-; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_cmp_le_u16_e64 s[2:3], s2, v0
-; VI-NEXT: v_mov_b32_e32 v0, s0
-; VI-NEXT: v_mov_b32_e32 v2, s2
-; VI-NEXT: v_mov_b32_e32 v1, s1
-; VI-NEXT: v_mov_b32_e32 v3, s3
-; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT: s_endpgm
- %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 37)
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-define amdgpu_kernel void @v_icmp_i16_sgt(i64 addrspace(1)* %out, i16 %src) #1 {
-; GFX-LABEL: v_icmp_i16_sgt:
-; GFX: ; %bb.0:
-; GFX-NEXT: s_load_dword s2, s[0:1], 0xb
-; GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
-; GFX-NEXT: s_mov_b32 s3, 0xf000
-; GFX-NEXT: v_mov_b32_e32 v0, 0x64
-; GFX-NEXT: s_waitcnt lgkmcnt(0)
-; GFX-NEXT: s_sext_i32_i16 s2, s2
-; GFX-NEXT: v_cmp_gt_i32_e64 s[4:5], s2, v0
-; GFX-NEXT: s_mov_b32 s2, -1
-; GFX-NEXT: v_mov_b32_e32 v0, s4
-; GFX-NEXT: v_mov_b32_e32 v1, s5
-; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; GFX-NEXT: s_endpgm
-;
-; VI-LABEL: v_icmp_i16_sgt:
-; VI: ; %bb.0:
-; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
-; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT: v_mov_b32_e32 v0, 0x64
-; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_cmp_gt_i16_e64 s[2:3], s2, v0
-; VI-NEXT: v_mov_b32_e32 v0, s0
-; VI-NEXT: v_mov_b32_e32 v2, s2
-; VI-NEXT: v_mov_b32_e32 v1, s1
-; VI-NEXT: v_mov_b32_e32 v3, s3
-; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT: s_endpgm
- %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 38)
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-define amdgpu_kernel void @v_icmp_i16_sge(i64 addrspace(1)* %out, i16 %src) {
-; GFX-LABEL: v_icmp_i16_sge:
-; GFX: ; %bb.0:
-; GFX-NEXT: s_load_dword s2, s[0:1], 0xb
-; GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
-; GFX-NEXT: s_mov_b32 s3, 0xf000
-; GFX-NEXT: v_mov_b32_e32 v0, 0x64
-; GFX-NEXT: s_waitcnt lgkmcnt(0)
-; GFX-NEXT: s_sext_i32_i16 s2, s2
-; GFX-NEXT: v_cmp_ge_i32_e64 s[4:5], s2, v0
-; GFX-NEXT: s_mov_b32 s2, -1
-; GFX-NEXT: v_mov_b32_e32 v0, s4
-; GFX-NEXT: v_mov_b32_e32 v1, s5
-; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; GFX-NEXT: s_endpgm
-;
-; VI-LABEL: v_icmp_i16_sge:
-; VI: ; %bb.0:
-; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
-; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT: v_mov_b32_e32 v0, 0x64
-; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_cmp_ge_i16_e64 s[2:3], s2, v0
-; VI-NEXT: v_mov_b32_e32 v0, s0
-; VI-NEXT: v_mov_b32_e32 v2, s2
-; VI-NEXT: v_mov_b32_e32 v1, s1
-; VI-NEXT: v_mov_b32_e32 v3, s3
-; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT: s_endpgm
- %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 39)
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-define amdgpu_kernel void @v_icmp_i16_slt(i64 addrspace(1)* %out, i16 %src) {
-; GFX-LABEL: v_icmp_i16_slt:
-; GFX: ; %bb.0:
-; GFX-NEXT: s_load_dword s2, s[0:1], 0xb
-; GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
-; GFX-NEXT: s_mov_b32 s3, 0xf000
-; GFX-NEXT: v_mov_b32_e32 v0, 0x64
-; GFX-NEXT: s_waitcnt lgkmcnt(0)
-; GFX-NEXT: s_sext_i32_i16 s2, s2
-; GFX-NEXT: v_cmp_lt_i32_e64 s[4:5], s2, v0
-; GFX-NEXT: s_mov_b32 s2, -1
-; GFX-NEXT: v_mov_b32_e32 v0, s4
-; GFX-NEXT: v_mov_b32_e32 v1, s5
-; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; GFX-NEXT: s_endpgm
-;
-; VI-LABEL: v_icmp_i16_slt:
-; VI: ; %bb.0:
-; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
-; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT: v_mov_b32_e32 v0, 0x64
-; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_cmp_lt_i16_e64 s[2:3], s2, v0
-; VI-NEXT: v_mov_b32_e32 v0, s0
-; VI-NEXT: v_mov_b32_e32 v2, s2
-; VI-NEXT: v_mov_b32_e32 v1, s1
-; VI-NEXT: v_mov_b32_e32 v3, s3
-; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT: s_endpgm
- %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 40)
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-define amdgpu_kernel void @v_icmp_i16_sle(i64 addrspace(1)* %out, i16 %src) {
-; GFX-LABEL: v_icmp_i16_sle:
-; GFX: ; %bb.0:
-; GFX-NEXT: s_load_dword s2, s[0:1], 0xb
-; GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
-; GFX-NEXT: s_mov_b32 s3, 0xf000
-; GFX-NEXT: v_mov_b32_e32 v0, 0x64
-; GFX-NEXT: s_waitcnt lgkmcnt(0)
-; GFX-NEXT: s_sext_i32_i16 s2, s2
-; GFX-NEXT: v_cmp_le_i32_e64 s[4:5], s2, v0
-; GFX-NEXT: s_mov_b32 s2, -1
-; GFX-NEXT: v_mov_b32_e32 v0, s4
-; GFX-NEXT: v_mov_b32_e32 v1, s5
-; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; GFX-NEXT: s_endpgm
-;
-; VI-LABEL: v_icmp_i16_sle:
-; VI: ; %bb.0:
-; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
-; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT: v_mov_b32_e32 v0, 0x64
-; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_cmp_le_i16_e64 s[2:3], s2, v0
-; VI-NEXT: v_mov_b32_e32 v0, s0
-; VI-NEXT: v_mov_b32_e32 v2, s2
-; VI-NEXT: v_mov_b32_e32 v1, s1
-; VI-NEXT: v_mov_b32_e32 v3, s3
-; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT: s_endpgm
- %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 41)
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-define amdgpu_kernel void @v_icmp_i1_ne0(i64 addrspace(1)* %out, i32 %a, i32 %b) {
-; GFX-LABEL: v_icmp_i1_ne0:
-; GFX: ; %bb.0:
-; GFX-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
-; GFX-NEXT: s_mov_b32 s7, 0xf000
-; GFX-NEXT: s_waitcnt lgkmcnt(0)
-; GFX-NEXT: s_cmp_gt_u32 s2, 1
-; GFX-NEXT: s_cselect_b64 s[4:5], -1, 0
-; GFX-NEXT: s_cmp_gt_u32 s3, 2
-; GFX-NEXT: s_cselect_b64 s[2:3], -1, 0
-; GFX-NEXT: s_and_b64 s[2:3], s[4:5], s[2:3]
-; GFX-NEXT: s_mov_b32 s6, -1
-; GFX-NEXT: s_mov_b32 s4, s0
-; GFX-NEXT: s_mov_b32 s5, s1
-; GFX-NEXT: v_mov_b32_e32 v0, s2
-; GFX-NEXT: v_mov_b32_e32 v1, s3
-; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
-; GFX-NEXT: s_endpgm
-;
-; VI-LABEL: v_icmp_i1_ne0:
-; VI: ; %bb.0:
-; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: s_cmp_gt_u32 s2, 1
-; VI-NEXT: s_cselect_b64 s[4:5], -1, 0
-; VI-NEXT: s_cmp_gt_u32 s3, 2
-; VI-NEXT: s_cselect_b64 s[2:3], -1, 0
-; VI-NEXT: s_and_b64 s[2:3], s[4:5], s[2:3]
-; VI-NEXT: v_mov_b32_e32 v0, s0
-; VI-NEXT: v_mov_b32_e32 v2, s2
-; VI-NEXT: v_mov_b32_e32 v1, s1
-; VI-NEXT: v_mov_b32_e32 v3, s3
-; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT: s_endpgm
- %c0 = icmp ugt i32 %a, 1
- %c1 = icmp ugt i32 %b, 2
- %src = and i1 %c0, %c1
- %result = call i64 @llvm.amdgcn.icmp.i1(i1 %src, i1 false, i32 33)
- store i64 %result, i64 addrspace(1)* %out
- ret void
-}
-
-define amdgpu_ps void @test_intr_icmp_i32_invalid_cc(i32 addrspace(1)* %out, i32 %src) {
-; GCN-LABEL: test_intr_icmp_i32_invalid_cc:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_endpgm
- %result = call i32 @llvm.amdgcn.icmp.i32.i32(i32 %src, i32 100, i32 9999)
- store i32 %result, i32 addrspace(1)* %out
- ret void
-}
-
-attributes #0 = { nounwind readnone convergent }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll
new file mode 100644
index 0000000000000..33681c3c96b74
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll
@@ -0,0 +1,1784 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize32" -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX11,SDAG-GFX11 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr="+wavefrontsize32" -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10,SDAG-GFX10 %s
+
+; RUN: llc -global-isel -global-isel-abort=2 -march=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize32" -verify-machineinstrs < %s 2>%t | FileCheck -check-prefixes=GCN,GFX11,GISEL-GFX11 %s
+; RUN: FileCheck --check-prefix=ERR %s < %t
+; RUN: llc -global-isel -global-isel-abort=2 -march=amdgcn -mcpu=gfx1010 -mattr="+wavefrontsize32" -verify-machineinstrs < %s 2>%t | FileCheck -check-prefixes=GCN,GFX10,GISEL-GFX10 %s
+; RUN: FileCheck --check-prefix=ERR %s < %t
+
+; Note: GlobalISel abort is disabled so we don't crash on i1 inputs.
+; They are allowed in DAGISel but we (intentionally) don't support them
+; in GlobalISel.
+
+; ERR: warning: Instruction selection used fallback path for v_icmp_i1_ne0
+
+declare i32 @llvm.amdgcn.icmp.i32(i32, i32, i32) #0
+declare i32 @llvm.amdgcn.icmp.i64(i64, i64, i32) #0
+declare i32 @llvm.amdgcn.icmp.i16(i16, i16, i32) #0
+declare i32 @llvm.amdgcn.icmp.i1(i1, i1, i32) #0
+
+define amdgpu_kernel void @v_icmp_i32_eq(i32 addrspace(1)* %out, i32 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i32_eq:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_clause 0x1
+; SDAG-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT: v_cmp_eq_u32_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i32_eq:
+; SDAG-GFX10: ; %bb.0:
+; SDAG-GFX10-NEXT: s_clause 0x1
+; SDAG-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT: v_cmp_eq_u32_e64 s0, 0x64, s4
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i32_eq:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_clause 0x1
+; GISEL-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_cmp_eq_u32_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i32_eq:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_clause 0x1
+; GISEL-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: v_cmp_eq_u32_e64 s0, 0x64, s4
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT: s_endpgm
+ %result = call i32 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 32)
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i32(i32 addrspace(1)* %out, i32 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i32:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i32:
+; SDAG-GFX10: ; %bb.0:
+; SDAG-GFX10-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i32:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, 0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: global_store_b32 v0, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i32:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, 0
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: global_store_dword v0, v0, s[0:1]
+; GISEL-GFX10-NEXT: s_endpgm
+ %result = call i32 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 30)
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i32_ne(i32 addrspace(1)* %out, i32 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i32_ne:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_clause 0x1
+; SDAG-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT: v_cmp_ne_u32_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i32_ne:
+; SDAG-GFX10: ; %bb.0:
+; SDAG-GFX10-NEXT: s_clause 0x1
+; SDAG-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0x64, s4
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i32_ne:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_clause 0x1
+; GISEL-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_cmp_ne_u32_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i32_ne:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_clause 0x1
+; GISEL-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0x64, s4
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT: s_endpgm
+ %result = call i32 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 33)
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i32_ugt(i32 addrspace(1)* %out, i32 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i32_ugt:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_clause 0x1
+; SDAG-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT: v_cmp_lt_u32_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i32_ugt:
+; SDAG-GFX10: ; %bb.0:
+; SDAG-GFX10-NEXT: s_clause 0x1
+; SDAG-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT: v_cmp_lt_u32_e64 s0, 0x64, s4
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i32_ugt:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_clause 0x1
+; GISEL-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_cmp_lt_u32_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i32_ugt:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_clause 0x1
+; GISEL-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: v_cmp_lt_u32_e64 s0, 0x64, s4
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT: s_endpgm
+ %result = call i32 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 34)
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i32_uge(i32 addrspace(1)* %out, i32 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i32_uge:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_clause 0x1
+; SDAG-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT: v_cmp_le_u32_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i32_uge:
+; SDAG-GFX10: ; %bb.0:
+; SDAG-GFX10-NEXT: s_clause 0x1
+; SDAG-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT: v_cmp_le_u32_e64 s0, 0x64, s4
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i32_uge:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_clause 0x1
+; GISEL-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_cmp_le_u32_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i32_uge:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_clause 0x1
+; GISEL-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: v_cmp_le_u32_e64 s0, 0x64, s4
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT: s_endpgm
+ %result = call i32 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 35)
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i32_ult(i32 addrspace(1)* %out, i32 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i32_ult:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_clause 0x1
+; SDAG-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT: v_cmp_gt_u32_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i32_ult:
+; SDAG-GFX10: ; %bb.0:
+; SDAG-GFX10-NEXT: s_clause 0x1
+; SDAG-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT: v_cmp_gt_u32_e64 s0, 0x64, s4
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i32_ult:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_clause 0x1
+; GISEL-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_cmp_gt_u32_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i32_ult:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_clause 0x1
+; GISEL-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: v_cmp_gt_u32_e64 s0, 0x64, s4
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT: s_endpgm
+ %result = call i32 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 36)
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i32_ule(i32 addrspace(1)* %out, i32 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i32_ule:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_clause 0x1
+; SDAG-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT: v_cmp_ge_u32_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i32_ule:
+; SDAG-GFX10: ; %bb.0:
+; SDAG-GFX10-NEXT: s_clause 0x1
+; SDAG-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT: v_cmp_ge_u32_e64 s0, 0x64, s4
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i32_ule:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_clause 0x1
+; GISEL-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_cmp_ge_u32_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i32_ule:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_clause 0x1
+; GISEL-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: v_cmp_ge_u32_e64 s0, 0x64, s4
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT: s_endpgm
+ %result = call i32 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 37)
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i32_sgt(i32 addrspace(1)* %out, i32 %src) #1 {
+; SDAG-GFX11-LABEL: v_icmp_i32_sgt:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_clause 0x1
+; SDAG-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT: v_cmp_lt_i32_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i32_sgt:
+; SDAG-GFX10: ; %bb.0:
+; SDAG-GFX10-NEXT: s_clause 0x1
+; SDAG-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT: v_cmp_lt_i32_e64 s0, 0x64, s4
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i32_sgt:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_clause 0x1
+; GISEL-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_cmp_lt_i32_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i32_sgt:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_clause 0x1
+; GISEL-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: v_cmp_lt_i32_e64 s0, 0x64, s4
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT: s_endpgm
+ %result = call i32 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 38)
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i32_sge(i32 addrspace(1)* %out, i32 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i32_sge:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_clause 0x1
+; SDAG-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT: v_cmp_le_i32_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i32_sge:
+; SDAG-GFX10: ; %bb.0:
+; SDAG-GFX10-NEXT: s_clause 0x1
+; SDAG-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT: v_cmp_le_i32_e64 s0, 0x64, s4
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i32_sge:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_clause 0x1
+; GISEL-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_cmp_le_i32_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i32_sge:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_clause 0x1
+; GISEL-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: v_cmp_le_i32_e64 s0, 0x64, s4
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT: s_endpgm
+ %result = call i32 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 39)
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i32_slt(i32 addrspace(1)* %out, i32 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i32_slt:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_clause 0x1
+; SDAG-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT: v_cmp_gt_i32_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i32_slt:
+; SDAG-GFX10: ; %bb.0:
+; SDAG-GFX10-NEXT: s_clause 0x1
+; SDAG-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT: v_cmp_gt_i32_e64 s0, 0x64, s4
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i32_slt:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_clause 0x1
+; GISEL-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_cmp_gt_i32_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i32_slt:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_clause 0x1
+; GISEL-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: v_cmp_gt_i32_e64 s0, 0x64, s4
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT: s_endpgm
+ %result = call i32 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 40)
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i32_sle(i32 addrspace(1)* %out, i32 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i32_sle:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_clause 0x1
+; SDAG-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT: v_cmp_ge_i32_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i32_sle:
+; SDAG-GFX10: ; %bb.0:
+; SDAG-GFX10-NEXT: s_clause 0x1
+; SDAG-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT: v_cmp_ge_i32_e64 s0, 0x64, s4
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i32_sle:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_clause 0x1
+; GISEL-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_cmp_ge_i32_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i32_sle:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_clause 0x1
+; GISEL-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: v_cmp_ge_i32_e64 s0, 0x64, s4
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT: s_endpgm
+ %result = call i32 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 41)
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i64_eq(i32 addrspace(1)* %out, i64 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i64_eq:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; SDAG-GFX11-NEXT: s_mov_b64 s[4:5], 0x64
+; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT: v_cmp_eq_u64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i64_eq:
+; SDAG-GFX10: ; %bb.0:
+; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT: s_mov_b64 s[4:5], 0x64
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT: v_cmp_eq_u64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2
+; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1]
+; SDAG-GFX10-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i64_eq:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GISEL-GFX11-NEXT: s_mov_b64 s[4:5], 0x64
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_cmp_eq_u64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i64_eq:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT: s_mov_b64 s[4:5], 0x64
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: v_cmp_eq_u64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1]
+; GISEL-GFX10-NEXT: s_endpgm
+ %result = call i32 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 32)
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i64_ne(i32 addrspace(1)* %out, i64 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i64_ne:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; SDAG-GFX11-NEXT: s_mov_b64 s[4:5], 0x64
+; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT: v_cmp_ne_u64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i64_ne:
+; SDAG-GFX10: ; %bb.0:
+; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT: s_mov_b64 s[4:5], 0x64
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT: v_cmp_ne_u64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2
+; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1]
+; SDAG-GFX10-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i64_ne:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GISEL-GFX11-NEXT: s_mov_b64 s[4:5], 0x64
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_cmp_ne_u64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i64_ne:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT: s_mov_b64 s[4:5], 0x64
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: v_cmp_ne_u64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1]
+; GISEL-GFX10-NEXT: s_endpgm
+ %result = call i32 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 33)
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_u64_ugt(i32 addrspace(1)* %out, i64 %src) {
+; SDAG-GFX11-LABEL: v_icmp_u64_ugt:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; SDAG-GFX11-NEXT: s_mov_b64 s[4:5], 0x64
+; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT: v_cmp_gt_u64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_u64_ugt:
+; SDAG-GFX10: ; %bb.0:
+; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT: s_mov_b64 s[4:5], 0x64
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT: v_cmp_gt_u64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2
+; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1]
+; SDAG-GFX10-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_u64_ugt:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GISEL-GFX11-NEXT: s_mov_b64 s[4:5], 0x64
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_cmp_gt_u64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_u64_ugt:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT: s_mov_b64 s[4:5], 0x64
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: v_cmp_gt_u64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1]
+; GISEL-GFX10-NEXT: s_endpgm
+ %result = call i32 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 34)
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_u64_uge(i32 addrspace(1)* %out, i64 %src) {
+; SDAG-GFX11-LABEL: v_icmp_u64_uge:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; SDAG-GFX11-NEXT: s_mov_b64 s[4:5], 0x64
+; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT: v_cmp_ge_u64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_u64_uge:
+; SDAG-GFX10: ; %bb.0:
+; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT: s_mov_b64 s[4:5], 0x64
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT: v_cmp_ge_u64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2
+; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1]
+; SDAG-GFX10-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_u64_uge:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GISEL-GFX11-NEXT: s_mov_b64 s[4:5], 0x64
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_cmp_ge_u64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_u64_uge:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT: s_mov_b64 s[4:5], 0x64
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: v_cmp_ge_u64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1]
+; GISEL-GFX10-NEXT: s_endpgm
+ %result = call i32 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 35)
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_u64_ult(i32 addrspace(1)* %out, i64 %src) {
+; SDAG-GFX11-LABEL: v_icmp_u64_ult:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; SDAG-GFX11-NEXT: s_mov_b64 s[4:5], 0x64
+; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT: v_cmp_lt_u64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_u64_ult:
+; SDAG-GFX10: ; %bb.0:
+; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT: s_mov_b64 s[4:5], 0x64
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT: v_cmp_lt_u64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2
+; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1]
+; SDAG-GFX10-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_u64_ult:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GISEL-GFX11-NEXT: s_mov_b64 s[4:5], 0x64
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_cmp_lt_u64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_u64_ult:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT: s_mov_b64 s[4:5], 0x64
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: v_cmp_lt_u64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1]
+; GISEL-GFX10-NEXT: s_endpgm
+ %result = call i32 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 36)
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_u64_ule(i32 addrspace(1)* %out, i64 %src) {
+; SDAG-GFX11-LABEL: v_icmp_u64_ule:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; SDAG-GFX11-NEXT: s_mov_b64 s[4:5], 0x64
+; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT: v_cmp_le_u64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_u64_ule:
+; SDAG-GFX10: ; %bb.0:
+; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT: s_mov_b64 s[4:5], 0x64
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT: v_cmp_le_u64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2
+; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1]
+; SDAG-GFX10-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_u64_ule:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GISEL-GFX11-NEXT: s_mov_b64 s[4:5], 0x64
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_cmp_le_u64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_u64_ule:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT: s_mov_b64 s[4:5], 0x64
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: v_cmp_le_u64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1]
+; GISEL-GFX10-NEXT: s_endpgm
+ %result = call i32 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 37)
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i64_sgt(i32 addrspace(1)* %out, i64 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i64_sgt:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; SDAG-GFX11-NEXT: s_mov_b64 s[4:5], 0x64
+; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT: v_cmp_gt_i64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i64_sgt:
+; SDAG-GFX10: ; %bb.0:
+; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT: s_mov_b64 s[4:5], 0x64
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT: v_cmp_gt_i64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2
+; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1]
+; SDAG-GFX10-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i64_sgt:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GISEL-GFX11-NEXT: s_mov_b64 s[4:5], 0x64
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_cmp_gt_i64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i64_sgt:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT: s_mov_b64 s[4:5], 0x64
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: v_cmp_gt_i64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1]
+; GISEL-GFX10-NEXT: s_endpgm
+ %result = call i32 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 38)
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i64_sge(i32 addrspace(1)* %out, i64 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i64_sge:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; SDAG-GFX11-NEXT: s_mov_b64 s[4:5], 0x64
+; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT: v_cmp_ge_i64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i64_sge:
+; SDAG-GFX10: ; %bb.0:
+; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT: s_mov_b64 s[4:5], 0x64
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT: v_cmp_ge_i64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2
+; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1]
+; SDAG-GFX10-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i64_sge:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GISEL-GFX11-NEXT: s_mov_b64 s[4:5], 0x64
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_cmp_ge_i64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i64_sge:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT: s_mov_b64 s[4:5], 0x64
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: v_cmp_ge_i64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1]
+; GISEL-GFX10-NEXT: s_endpgm
+ %result = call i32 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 39)
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i64_slt(i32 addrspace(1)* %out, i64 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i64_slt:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; SDAG-GFX11-NEXT: s_mov_b64 s[4:5], 0x64
+; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT: v_cmp_lt_i64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i64_slt:
+; SDAG-GFX10: ; %bb.0:
+; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT: s_mov_b64 s[4:5], 0x64
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT: v_cmp_lt_i64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2
+; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1]
+; SDAG-GFX10-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i64_slt:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GISEL-GFX11-NEXT: s_mov_b64 s[4:5], 0x64
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_cmp_lt_i64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i64_slt:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT: s_mov_b64 s[4:5], 0x64
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: v_cmp_lt_i64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1]
+; GISEL-GFX10-NEXT: s_endpgm
+ %result = call i32 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 40)
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i64_sle(i32 addrspace(1)* %out, i64 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i64_sle:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; SDAG-GFX11-NEXT: s_mov_b64 s[4:5], 0x64
+; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT: v_cmp_le_i64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i64_sle:
+; SDAG-GFX10: ; %bb.0:
+; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT: s_mov_b64 s[4:5], 0x64
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT: v_cmp_le_i64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2
+; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1]
+; SDAG-GFX10-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i64_sle:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GISEL-GFX11-NEXT: s_mov_b64 s[4:5], 0x64
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_cmp_le_i64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i64_sle:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT: s_mov_b64 s[4:5], 0x64
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: v_cmp_le_i64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1]
+; GISEL-GFX10-NEXT: s_endpgm
+ %result = call i32 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 41)
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i16_eq(i32 addrspace(1)* %out, i16 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i16_eq:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_clause 0x1
+; SDAG-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT: v_cmp_eq_u16_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i16_eq:
+; SDAG-GFX10: ; %bb.0:
+; SDAG-GFX10-NEXT: s_clause 0x1
+; SDAG-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT: v_cmp_eq_u16_e64 s0, 0x64, s4
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i16_eq:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_clause 0x1
+; GISEL-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_cmp_eq_u16_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i16_eq:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_clause 0x1
+; GISEL-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: v_cmp_eq_u16_e64 s0, 0x64, s4
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT: s_endpgm
+ %result = call i32 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 32)
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i16(i32 addrspace(1)* %out, i16 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i16:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i16:
+; SDAG-GFX10: ; %bb.0:
+; SDAG-GFX10-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i16:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, 0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: global_store_b32 v0, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i16:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, 0
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: global_store_dword v0, v0, s[0:1]
+; GISEL-GFX10-NEXT: s_endpgm
+ %result = call i32 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 30)
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i16_ne(i32 addrspace(1)* %out, i16 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i16_ne:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_clause 0x1
+; SDAG-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT: v_cmp_ne_u16_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i16_ne:
+; SDAG-GFX10: ; %bb.0:
+; SDAG-GFX10-NEXT: s_clause 0x1
+; SDAG-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT: v_cmp_ne_u16_e64 s0, 0x64, s4
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i16_ne:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_clause 0x1
+; GISEL-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_cmp_ne_u16_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i16_ne:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_clause 0x1
+; GISEL-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: v_cmp_ne_u16_e64 s0, 0x64, s4
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT: s_endpgm
+ %result = call i32 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 33)
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i16_ugt(i32 addrspace(1)* %out, i16 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i16_ugt:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_clause 0x1
+; SDAG-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT: v_cmp_lt_u16_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i16_ugt:
+; SDAG-GFX10: ; %bb.0:
+; SDAG-GFX10-NEXT: s_clause 0x1
+; SDAG-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT: v_cmp_lt_u16_e64 s0, 0x64, s4
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i16_ugt:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_clause 0x1
+; GISEL-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_cmp_lt_u16_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i16_ugt:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_clause 0x1
+; GISEL-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: v_cmp_lt_u16_e64 s0, 0x64, s4
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT: s_endpgm
+ %result = call i32 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 34)
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i16_uge(i32 addrspace(1)* %out, i16 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i16_uge:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_clause 0x1
+; SDAG-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT: v_cmp_le_u16_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i16_uge:
+; SDAG-GFX10: ; %bb.0:
+; SDAG-GFX10-NEXT: s_clause 0x1
+; SDAG-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT: v_cmp_le_u16_e64 s0, 0x64, s4
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i16_uge:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_clause 0x1
+; GISEL-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_cmp_le_u16_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i16_uge:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_clause 0x1
+; GISEL-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: v_cmp_le_u16_e64 s0, 0x64, s4
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT: s_endpgm
+ %result = call i32 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 35)
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i16_ult(i32 addrspace(1)* %out, i16 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i16_ult:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_clause 0x1
+; SDAG-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT: v_cmp_gt_u16_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i16_ult:
+; SDAG-GFX10: ; %bb.0:
+; SDAG-GFX10-NEXT: s_clause 0x1
+; SDAG-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT: v_cmp_gt_u16_e64 s0, 0x64, s4
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i16_ult:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_clause 0x1
+; GISEL-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_cmp_gt_u16_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i16_ult:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_clause 0x1
+; GISEL-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: v_cmp_gt_u16_e64 s0, 0x64, s4
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT: s_endpgm
+ %result = call i32 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 36)
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i16_ule(i32 addrspace(1)* %out, i16 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i16_ule:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_clause 0x1
+; SDAG-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT: v_cmp_ge_u16_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i16_ule:
+; SDAG-GFX10: ; %bb.0:
+; SDAG-GFX10-NEXT: s_clause 0x1
+; SDAG-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT: v_cmp_ge_u16_e64 s0, 0x64, s4
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i16_ule:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_clause 0x1
+; GISEL-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_cmp_ge_u16_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i16_ule:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_clause 0x1
+; GISEL-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: v_cmp_ge_u16_e64 s0, 0x64, s4
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT: s_endpgm
+ %result = call i32 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 37)
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i16_sgt(i32 addrspace(1)* %out, i16 %src) #1 {
+; SDAG-GFX11-LABEL: v_icmp_i16_sgt:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_clause 0x1
+; SDAG-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT: v_cmp_lt_i16_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i16_sgt:
+; SDAG-GFX10: ; %bb.0:
+; SDAG-GFX10-NEXT: s_clause 0x1
+; SDAG-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT: v_cmp_lt_i16_e64 s0, 0x64, s4
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i16_sgt:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_clause 0x1
+; GISEL-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_cmp_lt_i16_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i16_sgt:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_clause 0x1
+; GISEL-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: v_cmp_lt_i16_e64 s0, 0x64, s4
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT: s_endpgm
+ %result = call i32 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 38)
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i16_sge(i32 addrspace(1)* %out, i16 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i16_sge:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_clause 0x1
+; SDAG-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT: v_cmp_le_i16_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i16_sge:
+; SDAG-GFX10: ; %bb.0:
+; SDAG-GFX10-NEXT: s_clause 0x1
+; SDAG-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT: v_cmp_le_i16_e64 s0, 0x64, s4
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i16_sge:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_clause 0x1
+; GISEL-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_cmp_le_i16_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i16_sge:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_clause 0x1
+; GISEL-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: v_cmp_le_i16_e64 s0, 0x64, s4
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT: s_endpgm
+ %result = call i32 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 39)
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i16_slt(i32 addrspace(1)* %out, i16 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i16_slt:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_clause 0x1
+; SDAG-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT: v_cmp_gt_i16_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i16_slt:
+; SDAG-GFX10: ; %bb.0:
+; SDAG-GFX10-NEXT: s_clause 0x1
+; SDAG-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT: v_cmp_gt_i16_e64 s0, 0x64, s4
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i16_slt:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_clause 0x1
+; GISEL-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_cmp_gt_i16_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i16_slt:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_clause 0x1
+; GISEL-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: v_cmp_gt_i16_e64 s0, 0x64, s4
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT: s_endpgm
+ %result = call i32 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 40)
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i16_sle(i32 addrspace(1)* %out, i16 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i16_sle:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_clause 0x1
+; SDAG-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT: v_cmp_ge_i16_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i16_sle:
+; SDAG-GFX10: ; %bb.0:
+; SDAG-GFX10-NEXT: s_clause 0x1
+; SDAG-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT: v_cmp_ge_i16_e64 s0, 0x64, s4
+; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i16_sle:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_clause 0x1
+; GISEL-GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_cmp_ge_i16_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i16_sle:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_clause 0x1
+; GISEL-GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: v_cmp_ge_i16_e64 s0, 0x64, s4
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT: s_endpgm
+ %result = call i32 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 41)
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i1_ne0(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+; GFX11-LABEL: v_icmp_i1_ne0:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_cmp_gt_u32 s2, 1
+; GFX11-NEXT: s_cselect_b32 s2, -1, 0
+; GFX11-NEXT: s_cmp_gt_u32 s3, 2
+; GFX11-NEXT: s_cselect_b32 s3, -1, 0
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT: s_and_b32 s2, s2, s3
+; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+;
+; GFX10-LABEL: v_icmp_i1_ne0:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX10-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_cmp_gt_u32 s2, 1
+; GFX10-NEXT: s_cselect_b32 s2, -1, 0
+; GFX10-NEXT: s_cmp_gt_u32 s3, 2
+; GFX10-NEXT: s_cselect_b32 s3, -1, 0
+; GFX10-NEXT: s_and_b32 s2, s2, s3
+; GFX10-NEXT: v_mov_b32_e32 v1, s2
+; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-NEXT: s_endpgm
+ %c0 = icmp ugt i32 %a, 1
+ %c1 = icmp ugt i32 %b, 2
+ %src = and i1 %c0, %c1
+ %result = call i32 @llvm.amdgcn.icmp.i1(i1 %src, i1 false, i32 33)
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_ps void @test_intr_icmp_i32_invalid_cc(i32 addrspace(1)* %out, i32 %src) {
+; SDAG-GFX11-LABEL: test_intr_icmp_i32_invalid_cc:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-GFX10-LABEL: test_intr_icmp_i32_invalid_cc:
+; SDAG-GFX10: ; %bb.0:
+; SDAG-GFX10-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: test_intr_icmp_i32_invalid_cc:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-GFX10-LABEL: test_intr_icmp_i32_invalid_cc:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: global_store_dword v[0:1], v0, off
+; GISEL-GFX10-NEXT: s_endpgm
+ %result = call i32 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 9999)
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
+attributes #0 = { nounwind readnone convergent }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GCN: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll
new file mode 100644
index 0000000000000..f76973f791b2a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll
@@ -0,0 +1,2161 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize64" -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX11,SDAG-GFX11 %s
+; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,SDAG-VI %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,SDAG-GFX9 %s
+
+; RUN: llc -global-isel -global-isel-abort=2 -march=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize64" -verify-machineinstrs < %s 2>%t | FileCheck -check-prefixes=GCN,GFX11,GISEL-GFX11 %s
+; RUN: FileCheck --check-prefix=ERR %s < %t
+; RUN: llc -global-isel -global-isel-abort=2 -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s 2>%t | FileCheck -check-prefixes=GCN,VI,GISEL-VI %s
+; RUN: FileCheck --check-prefix=ERR %s < %t
+; RUN: llc -global-isel -global-isel-abort=2 -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s 2>%t | FileCheck -check-prefixes=GCN,GFX9,GISEL-GFX9 %s
+; RUN: FileCheck --check-prefix=ERR %s < %t
+
+; Note: GlobalISel abort is disabled so we don't crash on i1 inputs.
+; They are allowed in DAGISel but we (intentionally) don't support them
+; in GlobalISel.
+
+; ERR: warning: Instruction selection used fallback path for v_icmp_i1_ne0
+
+declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) #0
+declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #0
+declare i64 @llvm.amdgcn.icmp.i16(i16, i16, i32) #0
+declare i64 @llvm.amdgcn.icmp.i1(i1, i1, i32) #0
+
+define amdgpu_kernel void @v_icmp_i32_eq(i64 addrspace(1)* %out, i32 %src) {
+; GFX11-LABEL: v_icmp_i32_eq:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u32_e64 s[2:3], 0x64, s2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i32_eq:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; SDAG-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT: v_cmp_eq_u32_e64 s[2:3], s2, v0
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, s0
+; SDAG-VI-NEXT: v_mov_b32_e32 v2, s2
+; SDAG-VI-NEXT: v_mov_b32_e32 v1, s1
+; SDAG-VI-NEXT: v_mov_b32_e32 v3, s3
+; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
+; SDAG-VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: v_icmp_i32_eq:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT: v_mov_b32_e32 v0, 0x64
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s4, v0
+; GFX9-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT: s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i32_eq:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT: v_cmp_eq_u32_e64 s[2:3], s2, v0
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT: s_endpgm
+ %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 32)
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i32(i64 addrspace(1)* %out, i32 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i32:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i32:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_endpgm
+;
+; SDAG-GFX9-LABEL: v_icmp_i32:
+; SDAG-GFX9: ; %bb.0:
+; SDAG-GFX9-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i32:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, 0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i32:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, s1
+; GISEL-VI-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
+; GISEL-VI-NEXT: s_endpgm
+;
+; GISEL-GFX9-LABEL: v_icmp_i32:
+; GISEL-GFX9: ; %bb.0:
+; GISEL-GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, 0
+; GISEL-GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX9-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
+; GISEL-GFX9-NEXT: s_endpgm
+ %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 30)
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i32_ne(i64 addrspace(1)* %out, i32 %src) {
+; GFX11-LABEL: v_icmp_i32_ne:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_cmp_ne_u32_e64 s[2:3], 0x64, s2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i32_ne:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; SDAG-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT: v_cmp_ne_u32_e64 s[2:3], s2, v0
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, s0
+; SDAG-VI-NEXT: v_mov_b32_e32 v2, s2
+; SDAG-VI-NEXT: v_mov_b32_e32 v1, s1
+; SDAG-VI-NEXT: v_mov_b32_e32 v3, s3
+; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
+; SDAG-VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: v_icmp_i32_ne:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT: v_mov_b32_e32 v0, 0x64
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], s4, v0
+; GFX9-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT: s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i32_ne:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT: v_cmp_ne_u32_e64 s[2:3], s2, v0
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT: s_endpgm
+ %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 33)
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i32_ugt(i64 addrspace(1)* %out, i32 %src) {
+; GFX11-LABEL: v_icmp_i32_ugt:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_cmp_lt_u32_e64 s[2:3], 0x64, s2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i32_ugt:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; SDAG-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT: v_cmp_gt_u32_e64 s[2:3], s2, v0
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, s0
+; SDAG-VI-NEXT: v_mov_b32_e32 v2, s2
+; SDAG-VI-NEXT: v_mov_b32_e32 v1, s1
+; SDAG-VI-NEXT: v_mov_b32_e32 v3, s3
+; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
+; SDAG-VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: v_icmp_i32_ugt:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT: v_mov_b32_e32 v0, 0x64
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: v_cmp_gt_u32_e64 s[0:1], s4, v0
+; GFX9-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT: s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i32_ugt:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT: v_cmp_gt_u32_e64 s[2:3], s2, v0
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT: s_endpgm
+ %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 34)
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i32_uge(i64 addrspace(1)* %out, i32 %src) {
+; GFX11-LABEL: v_icmp_i32_uge:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_cmp_le_u32_e64 s[2:3], 0x64, s2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i32_uge:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; SDAG-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT: v_cmp_ge_u32_e64 s[2:3], s2, v0
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, s0
+; SDAG-VI-NEXT: v_mov_b32_e32 v2, s2
+; SDAG-VI-NEXT: v_mov_b32_e32 v1, s1
+; SDAG-VI-NEXT: v_mov_b32_e32 v3, s3
+; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
+; SDAG-VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: v_icmp_i32_uge:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT: v_mov_b32_e32 v0, 0x64
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: v_cmp_ge_u32_e64 s[0:1], s4, v0
+; GFX9-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT: s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i32_uge:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT: v_cmp_ge_u32_e64 s[2:3], s2, v0
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT: s_endpgm
+ %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 35)
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i32_ult(i64 addrspace(1)* %out, i32 %src) {
+; GFX11-LABEL: v_icmp_i32_ult:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_cmp_gt_u32_e64 s[2:3], 0x64, s2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i32_ult:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; SDAG-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT: v_cmp_lt_u32_e64 s[2:3], s2, v0
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, s0
+; SDAG-VI-NEXT: v_mov_b32_e32 v2, s2
+; SDAG-VI-NEXT: v_mov_b32_e32 v1, s1
+; SDAG-VI-NEXT: v_mov_b32_e32 v3, s3
+; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
+; SDAG-VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: v_icmp_i32_ult:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT: v_mov_b32_e32 v0, 0x64
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: v_cmp_lt_u32_e64 s[0:1], s4, v0
+; GFX9-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT: s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i32_ult:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT: v_cmp_lt_u32_e64 s[2:3], s2, v0
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT: s_endpgm
+ %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 36)
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i32_ule(i64 addrspace(1)* %out, i32 %src) {
+; GFX11-LABEL: v_icmp_i32_ule:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_cmp_ge_u32_e64 s[2:3], 0x64, s2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i32_ule:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; SDAG-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT: v_cmp_le_u32_e64 s[2:3], s2, v0
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, s0
+; SDAG-VI-NEXT: v_mov_b32_e32 v2, s2
+; SDAG-VI-NEXT: v_mov_b32_e32 v1, s1
+; SDAG-VI-NEXT: v_mov_b32_e32 v3, s3
+; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
+; SDAG-VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: v_icmp_i32_ule:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT: v_mov_b32_e32 v0, 0x64
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s4, v0
+; GFX9-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT: s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i32_ule:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT: v_cmp_le_u32_e64 s[2:3], s2, v0
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT: s_endpgm
+ %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 37)
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i32_sgt(i64 addrspace(1)* %out, i32 %src) #1 {
+; GFX11-LABEL: v_icmp_i32_sgt:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_cmp_lt_i32_e64 s[2:3], 0x64, s2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i32_sgt:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; SDAG-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT: v_cmp_gt_i32_e64 s[2:3], s2, v0
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, s0
+; SDAG-VI-NEXT: v_mov_b32_e32 v2, s2
+; SDAG-VI-NEXT: v_mov_b32_e32 v1, s1
+; SDAG-VI-NEXT: v_mov_b32_e32 v3, s3
+; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
+; SDAG-VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: v_icmp_i32_sgt:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT: v_mov_b32_e32 v0, 0x64
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: v_cmp_gt_i32_e64 s[0:1], s4, v0
+; GFX9-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT: s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i32_sgt:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT: v_cmp_gt_i32_e64 s[2:3], s2, v0
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT: s_endpgm
+ %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 38)
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i32_sge(i64 addrspace(1)* %out, i32 %src) {
+; GFX11-LABEL: v_icmp_i32_sge:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_cmp_le_i32_e64 s[2:3], 0x64, s2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i32_sge:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; SDAG-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT: v_cmp_ge_i32_e64 s[2:3], s2, v0
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, s0
+; SDAG-VI-NEXT: v_mov_b32_e32 v2, s2
+; SDAG-VI-NEXT: v_mov_b32_e32 v1, s1
+; SDAG-VI-NEXT: v_mov_b32_e32 v3, s3
+; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
+; SDAG-VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: v_icmp_i32_sge:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT: v_mov_b32_e32 v0, 0x64
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: v_cmp_ge_i32_e64 s[0:1], s4, v0
+; GFX9-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT: s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i32_sge:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT: v_cmp_ge_i32_e64 s[2:3], s2, v0
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT: s_endpgm
+ %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 39)
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i32_slt(i64 addrspace(1)* %out, i32 %src) {
+; GFX11-LABEL: v_icmp_i32_slt:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_cmp_gt_i32_e64 s[2:3], 0x64, s2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i32_slt:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; SDAG-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT: v_cmp_lt_i32_e64 s[2:3], s2, v0
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, s0
+; SDAG-VI-NEXT: v_mov_b32_e32 v2, s2
+; SDAG-VI-NEXT: v_mov_b32_e32 v1, s1
+; SDAG-VI-NEXT: v_mov_b32_e32 v3, s3
+; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
+; SDAG-VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: v_icmp_i32_slt:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT: v_mov_b32_e32 v0, 0x64
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: v_cmp_lt_i32_e64 s[0:1], s4, v0
+; GFX9-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT: s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i32_slt:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT: v_cmp_lt_i32_e64 s[2:3], s2, v0
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT: s_endpgm
+ %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 40)
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i32_sle(i64 addrspace(1)* %out, i32 %src) {
+; GFX11-LABEL: v_icmp_i32_sle:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_cmp_ge_i32_e64 s[2:3], 0x64, s2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i32_sle:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; SDAG-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT: v_cmp_le_i32_e64 s[2:3], s2, v0
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, s0
+; SDAG-VI-NEXT: v_mov_b32_e32 v2, s2
+; SDAG-VI-NEXT: v_mov_b32_e32 v1, s1
+; SDAG-VI-NEXT: v_mov_b32_e32 v3, s3
+; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
+; SDAG-VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: v_icmp_i32_sle:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT: v_mov_b32_e32 v0, 0x64
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: v_cmp_le_i32_e64 s[0:1], s4, v0
+; GFX9-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT: s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i32_sle:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT: v_cmp_le_i32_e64 s[2:3], s2, v0
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT: s_endpgm
+ %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 41)
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i64_eq(i64 addrspace(1)* %out, i64 %src) {
+; GFX11-LABEL: v_icmp_i64_eq:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT: s_mov_b64 s[4:5], 0x64
+; GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u64_e64 s[2:3], s[2:3], s[4:5]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i64_eq:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT: v_mov_b32_e32 v1, 0
+; SDAG-VI-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT: v_cmp_eq_u64_e64 s[2:3], s[2:3], v[0:1]
+; SDAG-VI-NEXT: v_mov_b32_e32 v2, s0
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, s2
+; SDAG-VI-NEXT: v_mov_b32_e32 v3, s1
+; SDAG-VI-NEXT: v_mov_b32_e32 v1, s3
+; SDAG-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; SDAG-VI-NEXT: s_endpgm
+;
+; SDAG-GFX9-LABEL: v_icmp_i64_eq:
+; SDAG-GFX9: ; %bb.0:
+; SDAG-GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v1, 0
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v0, 0x64
+; SDAG-GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX9-NEXT: v_cmp_eq_u64_e64 s[2:3], s[2:3], v[0:1]
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v2, s2
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v3, s3
+; SDAG-GFX9-NEXT: global_store_dwordx2 v1, v[2:3], s[0:1]
+; SDAG-GFX9-NEXT: s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i64_eq:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-VI-NEXT: s_mov_b64 s[4:5], 0x64
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, s4
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, s5
+; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT: v_cmp_eq_u64_e64 s[2:3], s[2:3], v[0:1]
+; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT: s_endpgm
+;
+; GISEL-GFX9-LABEL: v_icmp_i64_eq:
+; GISEL-GFX9: ; %bb.0:
+; GISEL-GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-GFX9-NEXT: s_mov_b64 s[4:5], 0x64
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, s4
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, s5
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GISEL-GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX9-NEXT: v_cmp_eq_u64_e64 s[2:3], s[2:3], v[0:1]
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, s3
+; GISEL-GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GISEL-GFX9-NEXT: s_endpgm
+ %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 32)
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i64_ne(i64 addrspace(1)* %out, i64 %src) {
+; GFX11-LABEL: v_icmp_i64_ne:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT: s_mov_b64 s[4:5], 0x64
+; GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_cmp_ne_u64_e64 s[2:3], s[2:3], s[4:5]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i64_ne:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT: v_mov_b32_e32 v1, 0
+; SDAG-VI-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT: v_cmp_ne_u64_e64 s[2:3], s[2:3], v[0:1]
+; SDAG-VI-NEXT: v_mov_b32_e32 v2, s0
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, s2
+; SDAG-VI-NEXT: v_mov_b32_e32 v3, s1
+; SDAG-VI-NEXT: v_mov_b32_e32 v1, s3
+; SDAG-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; SDAG-VI-NEXT: s_endpgm
+;
+; SDAG-GFX9-LABEL: v_icmp_i64_ne:
+; SDAG-GFX9: ; %bb.0:
+; SDAG-GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v1, 0
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v0, 0x64
+; SDAG-GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX9-NEXT: v_cmp_ne_u64_e64 s[2:3], s[2:3], v[0:1]
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v2, s2
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v3, s3
+; SDAG-GFX9-NEXT: global_store_dwordx2 v1, v[2:3], s[0:1]
+; SDAG-GFX9-NEXT: s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i64_ne:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-VI-NEXT: s_mov_b64 s[4:5], 0x64
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, s4
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, s5
+; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT: v_cmp_ne_u64_e64 s[2:3], s[2:3], v[0:1]
+; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT: s_endpgm
+;
+; GISEL-GFX9-LABEL: v_icmp_i64_ne:
+; GISEL-GFX9: ; %bb.0:
+; GISEL-GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-GFX9-NEXT: s_mov_b64 s[4:5], 0x64
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, s4
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, s5
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GISEL-GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX9-NEXT: v_cmp_ne_u64_e64 s[2:3], s[2:3], v[0:1]
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, s3
+; GISEL-GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GISEL-GFX9-NEXT: s_endpgm
+ %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 33)
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_u64_ugt(i64 addrspace(1)* %out, i64 %src) {
+; GFX11-LABEL: v_icmp_u64_ugt:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT: s_mov_b64 s[4:5], 0x64
+; GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_cmp_gt_u64_e64 s[2:3], s[2:3], s[4:5]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_u64_ugt:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT: v_mov_b32_e32 v1, 0
+; SDAG-VI-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT: v_cmp_gt_u64_e64 s[2:3], s[2:3], v[0:1]
+; SDAG-VI-NEXT: v_mov_b32_e32 v2, s0
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, s2
+; SDAG-VI-NEXT: v_mov_b32_e32 v3, s1
+; SDAG-VI-NEXT: v_mov_b32_e32 v1, s3
+; SDAG-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; SDAG-VI-NEXT: s_endpgm
+;
+; SDAG-GFX9-LABEL: v_icmp_u64_ugt:
+; SDAG-GFX9: ; %bb.0:
+; SDAG-GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v1, 0
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v0, 0x64
+; SDAG-GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX9-NEXT: v_cmp_gt_u64_e64 s[2:3], s[2:3], v[0:1]
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v2, s2
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v3, s3
+; SDAG-GFX9-NEXT: global_store_dwordx2 v1, v[2:3], s[0:1]
+; SDAG-GFX9-NEXT: s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_u64_ugt:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-VI-NEXT: s_mov_b64 s[4:5], 0x64
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, s4
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, s5
+; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT: v_cmp_gt_u64_e64 s[2:3], s[2:3], v[0:1]
+; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT: s_endpgm
+;
+; GISEL-GFX9-LABEL: v_icmp_u64_ugt:
+; GISEL-GFX9: ; %bb.0:
+; GISEL-GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-GFX9-NEXT: s_mov_b64 s[4:5], 0x64
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, s4
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, s5
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GISEL-GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX9-NEXT: v_cmp_gt_u64_e64 s[2:3], s[2:3], v[0:1]
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, s3
+; GISEL-GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GISEL-GFX9-NEXT: s_endpgm
+ %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 34)
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_u64_uge(i64 addrspace(1)* %out, i64 %src) {
+; GFX11-LABEL: v_icmp_u64_uge:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT: s_mov_b64 s[4:5], 0x64
+; GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_cmp_ge_u64_e64 s[2:3], s[2:3], s[4:5]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_u64_uge:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT: v_mov_b32_e32 v1, 0
+; SDAG-VI-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT: v_cmp_ge_u64_e64 s[2:3], s[2:3], v[0:1]
+; SDAG-VI-NEXT: v_mov_b32_e32 v2, s0
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, s2
+; SDAG-VI-NEXT: v_mov_b32_e32 v3, s1
+; SDAG-VI-NEXT: v_mov_b32_e32 v1, s3
+; SDAG-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; SDAG-VI-NEXT: s_endpgm
+;
+; SDAG-GFX9-LABEL: v_icmp_u64_uge:
+; SDAG-GFX9: ; %bb.0:
+; SDAG-GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v1, 0
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v0, 0x64
+; SDAG-GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX9-NEXT: v_cmp_ge_u64_e64 s[2:3], s[2:3], v[0:1]
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v2, s2
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v3, s3
+; SDAG-GFX9-NEXT: global_store_dwordx2 v1, v[2:3], s[0:1]
+; SDAG-GFX9-NEXT: s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_u64_uge:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-VI-NEXT: s_mov_b64 s[4:5], 0x64
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, s4
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, s5
+; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT: v_cmp_ge_u64_e64 s[2:3], s[2:3], v[0:1]
+; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT: s_endpgm
+;
+; GISEL-GFX9-LABEL: v_icmp_u64_uge:
+; GISEL-GFX9: ; %bb.0:
+; GISEL-GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-GFX9-NEXT: s_mov_b64 s[4:5], 0x64
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, s4
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, s5
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GISEL-GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX9-NEXT: v_cmp_ge_u64_e64 s[2:3], s[2:3], v[0:1]
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, s3
+; GISEL-GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GISEL-GFX9-NEXT: s_endpgm
+ %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 35)
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_u64_ult(i64 addrspace(1)* %out, i64 %src) {
+; GFX11-LABEL: v_icmp_u64_ult:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT: s_mov_b64 s[4:5], 0x64
+; GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_cmp_lt_u64_e64 s[2:3], s[2:3], s[4:5]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_u64_ult:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT: v_mov_b32_e32 v1, 0
+; SDAG-VI-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT: v_cmp_lt_u64_e64 s[2:3], s[2:3], v[0:1]
+; SDAG-VI-NEXT: v_mov_b32_e32 v2, s0
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, s2
+; SDAG-VI-NEXT: v_mov_b32_e32 v3, s1
+; SDAG-VI-NEXT: v_mov_b32_e32 v1, s3
+; SDAG-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; SDAG-VI-NEXT: s_endpgm
+;
+; SDAG-GFX9-LABEL: v_icmp_u64_ult:
+; SDAG-GFX9: ; %bb.0:
+; SDAG-GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v1, 0
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v0, 0x64
+; SDAG-GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX9-NEXT: v_cmp_lt_u64_e64 s[2:3], s[2:3], v[0:1]
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v2, s2
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v3, s3
+; SDAG-GFX9-NEXT: global_store_dwordx2 v1, v[2:3], s[0:1]
+; SDAG-GFX9-NEXT: s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_u64_ult:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-VI-NEXT: s_mov_b64 s[4:5], 0x64
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, s4
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, s5
+; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT: v_cmp_lt_u64_e64 s[2:3], s[2:3], v[0:1]
+; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT: s_endpgm
+;
+; GISEL-GFX9-LABEL: v_icmp_u64_ult:
+; GISEL-GFX9: ; %bb.0:
+; GISEL-GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-GFX9-NEXT: s_mov_b64 s[4:5], 0x64
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, s4
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, s5
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GISEL-GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX9-NEXT: v_cmp_lt_u64_e64 s[2:3], s[2:3], v[0:1]
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, s3
+; GISEL-GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GISEL-GFX9-NEXT: s_endpgm
+ %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 36)
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_u64_ule(i64 addrspace(1)* %out, i64 %src) {
+; GFX11-LABEL: v_icmp_u64_ule:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT: s_mov_b64 s[4:5], 0x64
+; GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_cmp_le_u64_e64 s[2:3], s[2:3], s[4:5]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_u64_ule:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT: v_mov_b32_e32 v1, 0
+; SDAG-VI-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT: v_cmp_le_u64_e64 s[2:3], s[2:3], v[0:1]
+; SDAG-VI-NEXT: v_mov_b32_e32 v2, s0
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, s2
+; SDAG-VI-NEXT: v_mov_b32_e32 v3, s1
+; SDAG-VI-NEXT: v_mov_b32_e32 v1, s3
+; SDAG-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; SDAG-VI-NEXT: s_endpgm
+;
+; SDAG-GFX9-LABEL: v_icmp_u64_ule:
+; SDAG-GFX9: ; %bb.0:
+; SDAG-GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v1, 0
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v0, 0x64
+; SDAG-GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX9-NEXT: v_cmp_le_u64_e64 s[2:3], s[2:3], v[0:1]
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v2, s2
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v3, s3
+; SDAG-GFX9-NEXT: global_store_dwordx2 v1, v[2:3], s[0:1]
+; SDAG-GFX9-NEXT: s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_u64_ule:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-VI-NEXT: s_mov_b64 s[4:5], 0x64
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, s4
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, s5
+; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT: v_cmp_le_u64_e64 s[2:3], s[2:3], v[0:1]
+; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT: s_endpgm
+;
+; GISEL-GFX9-LABEL: v_icmp_u64_ule:
+; GISEL-GFX9: ; %bb.0:
+; GISEL-GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-GFX9-NEXT: s_mov_b64 s[4:5], 0x64
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, s4
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, s5
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GISEL-GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX9-NEXT: v_cmp_le_u64_e64 s[2:3], s[2:3], v[0:1]
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, s3
+; GISEL-GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GISEL-GFX9-NEXT: s_endpgm
+ %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 37)
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i64_sgt(i64 addrspace(1)* %out, i64 %src) {
+; GFX11-LABEL: v_icmp_i64_sgt:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT: s_mov_b64 s[4:5], 0x64
+; GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_cmp_gt_i64_e64 s[2:3], s[2:3], s[4:5]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i64_sgt:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT: v_mov_b32_e32 v1, 0
+; SDAG-VI-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT: v_cmp_gt_i64_e64 s[2:3], s[2:3], v[0:1]
+; SDAG-VI-NEXT: v_mov_b32_e32 v2, s0
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, s2
+; SDAG-VI-NEXT: v_mov_b32_e32 v3, s1
+; SDAG-VI-NEXT: v_mov_b32_e32 v1, s3
+; SDAG-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; SDAG-VI-NEXT: s_endpgm
+;
+; SDAG-GFX9-LABEL: v_icmp_i64_sgt:
+; SDAG-GFX9: ; %bb.0:
+; SDAG-GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v1, 0
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v0, 0x64
+; SDAG-GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX9-NEXT: v_cmp_gt_i64_e64 s[2:3], s[2:3], v[0:1]
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v2, s2
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v3, s3
+; SDAG-GFX9-NEXT: global_store_dwordx2 v1, v[2:3], s[0:1]
+; SDAG-GFX9-NEXT: s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i64_sgt:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-VI-NEXT: s_mov_b64 s[4:5], 0x64
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, s4
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, s5
+; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT: v_cmp_gt_i64_e64 s[2:3], s[2:3], v[0:1]
+; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT: s_endpgm
+;
+; GISEL-GFX9-LABEL: v_icmp_i64_sgt:
+; GISEL-GFX9: ; %bb.0:
+; GISEL-GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-GFX9-NEXT: s_mov_b64 s[4:5], 0x64
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, s4
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, s5
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GISEL-GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX9-NEXT: v_cmp_gt_i64_e64 s[2:3], s[2:3], v[0:1]
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, s3
+; GISEL-GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GISEL-GFX9-NEXT: s_endpgm
+ %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 38)
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i64_sge(i64 addrspace(1)* %out, i64 %src) {
+; GFX11-LABEL: v_icmp_i64_sge:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT: s_mov_b64 s[4:5], 0x64
+; GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_cmp_ge_i64_e64 s[2:3], s[2:3], s[4:5]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i64_sge:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT: v_mov_b32_e32 v1, 0
+; SDAG-VI-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT: v_cmp_ge_i64_e64 s[2:3], s[2:3], v[0:1]
+; SDAG-VI-NEXT: v_mov_b32_e32 v2, s0
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, s2
+; SDAG-VI-NEXT: v_mov_b32_e32 v3, s1
+; SDAG-VI-NEXT: v_mov_b32_e32 v1, s3
+; SDAG-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; SDAG-VI-NEXT: s_endpgm
+;
+; SDAG-GFX9-LABEL: v_icmp_i64_sge:
+; SDAG-GFX9: ; %bb.0:
+; SDAG-GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v1, 0
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v0, 0x64
+; SDAG-GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX9-NEXT: v_cmp_ge_i64_e64 s[2:3], s[2:3], v[0:1]
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v2, s2
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v3, s3
+; SDAG-GFX9-NEXT: global_store_dwordx2 v1, v[2:3], s[0:1]
+; SDAG-GFX9-NEXT: s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i64_sge:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-VI-NEXT: s_mov_b64 s[4:5], 0x64
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, s4
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, s5
+; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT: v_cmp_ge_i64_e64 s[2:3], s[2:3], v[0:1]
+; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT: s_endpgm
+;
+; GISEL-GFX9-LABEL: v_icmp_i64_sge:
+; GISEL-GFX9: ; %bb.0:
+; GISEL-GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-GFX9-NEXT: s_mov_b64 s[4:5], 0x64
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, s4
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, s5
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GISEL-GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX9-NEXT: v_cmp_ge_i64_e64 s[2:3], s[2:3], v[0:1]
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, s3
+; GISEL-GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GISEL-GFX9-NEXT: s_endpgm
+ %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 39)
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i64_slt(i64 addrspace(1)* %out, i64 %src) {
+; GFX11-LABEL: v_icmp_i64_slt:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT: s_mov_b64 s[4:5], 0x64
+; GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_cmp_lt_i64_e64 s[2:3], s[2:3], s[4:5]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i64_slt:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT: v_mov_b32_e32 v1, 0
+; SDAG-VI-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT: v_cmp_lt_i64_e64 s[2:3], s[2:3], v[0:1]
+; SDAG-VI-NEXT: v_mov_b32_e32 v2, s0
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, s2
+; SDAG-VI-NEXT: v_mov_b32_e32 v3, s1
+; SDAG-VI-NEXT: v_mov_b32_e32 v1, s3
+; SDAG-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; SDAG-VI-NEXT: s_endpgm
+;
+; SDAG-GFX9-LABEL: v_icmp_i64_slt:
+; SDAG-GFX9: ; %bb.0:
+; SDAG-GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v1, 0
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v0, 0x64
+; SDAG-GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX9-NEXT: v_cmp_lt_i64_e64 s[2:3], s[2:3], v[0:1]
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v2, s2
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v3, s3
+; SDAG-GFX9-NEXT: global_store_dwordx2 v1, v[2:3], s[0:1]
+; SDAG-GFX9-NEXT: s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i64_slt:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-VI-NEXT: s_mov_b64 s[4:5], 0x64
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, s4
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, s5
+; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT: v_cmp_lt_i64_e64 s[2:3], s[2:3], v[0:1]
+; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT: s_endpgm
+;
+; GISEL-GFX9-LABEL: v_icmp_i64_slt:
+; GISEL-GFX9: ; %bb.0:
+; GISEL-GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-GFX9-NEXT: s_mov_b64 s[4:5], 0x64
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, s4
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, s5
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GISEL-GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX9-NEXT: v_cmp_lt_i64_e64 s[2:3], s[2:3], v[0:1]
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, s3
+; GISEL-GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GISEL-GFX9-NEXT: s_endpgm
+ %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 40)
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i64_sle(i64 addrspace(1)* %out, i64 %src) {
+; GFX11-LABEL: v_icmp_i64_sle:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT: s_mov_b64 s[4:5], 0x64
+; GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_cmp_le_i64_e64 s[2:3], s[2:3], s[4:5]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i64_sle:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT: v_mov_b32_e32 v1, 0
+; SDAG-VI-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT: v_cmp_le_i64_e64 s[2:3], s[2:3], v[0:1]
+; SDAG-VI-NEXT: v_mov_b32_e32 v2, s0
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, s2
+; SDAG-VI-NEXT: v_mov_b32_e32 v3, s1
+; SDAG-VI-NEXT: v_mov_b32_e32 v1, s3
+; SDAG-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; SDAG-VI-NEXT: s_endpgm
+;
+; SDAG-GFX9-LABEL: v_icmp_i64_sle:
+; SDAG-GFX9: ; %bb.0:
+; SDAG-GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v1, 0
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v0, 0x64
+; SDAG-GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX9-NEXT: v_cmp_le_i64_e64 s[2:3], s[2:3], v[0:1]
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v2, s2
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v3, s3
+; SDAG-GFX9-NEXT: global_store_dwordx2 v1, v[2:3], s[0:1]
+; SDAG-GFX9-NEXT: s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i64_sle:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-VI-NEXT: s_mov_b64 s[4:5], 0x64
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, s4
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, s5
+; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT: v_cmp_le_i64_e64 s[2:3], s[2:3], v[0:1]
+; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT: s_endpgm
+;
+; GISEL-GFX9-LABEL: v_icmp_i64_sle:
+; GISEL-GFX9: ; %bb.0:
+; GISEL-GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-GFX9-NEXT: s_mov_b64 s[4:5], 0x64
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, s4
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, s5
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GISEL-GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX9-NEXT: v_cmp_le_i64_e64 s[2:3], s[2:3], v[0:1]
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, s3
+; GISEL-GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GISEL-GFX9-NEXT: s_endpgm
+ %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 41)
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i16_eq(i64 addrspace(1)* %out, i16 %src) {
+; GFX11-LABEL: v_icmp_i16_eq:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u16_e64 s[2:3], 0x64, s2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i16_eq:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; SDAG-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT: v_cmp_eq_u16_e64 s[2:3], s2, v0
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, s0
+; SDAG-VI-NEXT: v_mov_b32_e32 v2, s2
+; SDAG-VI-NEXT: v_mov_b32_e32 v1, s1
+; SDAG-VI-NEXT: v_mov_b32_e32 v3, s3
+; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
+; SDAG-VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: v_icmp_i16_eq:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT: v_mov_b32_e32 v0, 0x64
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u16_e64 s[0:1], s4, v0
+; GFX9-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT: s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i16_eq:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT: v_cmp_eq_u16_e64 s[2:3], s2, v0
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT: s_endpgm
+ %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 32)
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i16(i64 addrspace(1)* %out, i16 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i16:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i16:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_endpgm
+;
+; SDAG-GFX9-LABEL: v_icmp_i16:
+; SDAG-GFX9: ; %bb.0:
+; SDAG-GFX9-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i16:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, 0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i16:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, s1
+; GISEL-VI-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
+; GISEL-VI-NEXT: s_endpgm
+;
+; GISEL-GFX9-LABEL: v_icmp_i16:
+; GISEL-GFX9: ; %bb.0:
+; GISEL-GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, 0
+; GISEL-GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX9-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
+; GISEL-GFX9-NEXT: s_endpgm
+ %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 30)
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i16_ne(i64 addrspace(1)* %out, i16 %src) {
+; GFX11-LABEL: v_icmp_i16_ne:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_cmp_ne_u16_e64 s[2:3], 0x64, s2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i16_ne:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; SDAG-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT: v_cmp_ne_u16_e64 s[2:3], s2, v0
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, s0
+; SDAG-VI-NEXT: v_mov_b32_e32 v2, s2
+; SDAG-VI-NEXT: v_mov_b32_e32 v1, s1
+; SDAG-VI-NEXT: v_mov_b32_e32 v3, s3
+; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
+; SDAG-VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: v_icmp_i16_ne:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT: v_mov_b32_e32 v0, 0x64
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: v_cmp_ne_u16_e64 s[0:1], s4, v0
+; GFX9-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT: s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i16_ne:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT: v_cmp_ne_u16_e64 s[2:3], s2, v0
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT: s_endpgm
+ %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 33)
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i16_ugt(i64 addrspace(1)* %out, i16 %src) {
+; GFX11-LABEL: v_icmp_i16_ugt:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_cmp_lt_u16_e64 s[2:3], 0x64, s2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i16_ugt:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; SDAG-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT: v_cmp_gt_u16_e64 s[2:3], s2, v0
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, s0
+; SDAG-VI-NEXT: v_mov_b32_e32 v2, s2
+; SDAG-VI-NEXT: v_mov_b32_e32 v1, s1
+; SDAG-VI-NEXT: v_mov_b32_e32 v3, s3
+; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
+; SDAG-VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: v_icmp_i16_ugt:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT: v_mov_b32_e32 v0, 0x64
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: v_cmp_gt_u16_e64 s[0:1], s4, v0
+; GFX9-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT: s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i16_ugt:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT: v_cmp_gt_u16_e64 s[2:3], s2, v0
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT: s_endpgm
+ %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 34)
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i16_uge(i64 addrspace(1)* %out, i16 %src) {
+; GFX11-LABEL: v_icmp_i16_uge:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_cmp_le_u16_e64 s[2:3], 0x64, s2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i16_uge:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; SDAG-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT: v_cmp_ge_u16_e64 s[2:3], s2, v0
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, s0
+; SDAG-VI-NEXT: v_mov_b32_e32 v2, s2
+; SDAG-VI-NEXT: v_mov_b32_e32 v1, s1
+; SDAG-VI-NEXT: v_mov_b32_e32 v3, s3
+; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
+; SDAG-VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: v_icmp_i16_uge:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT: v_mov_b32_e32 v0, 0x64
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: v_cmp_ge_u16_e64 s[0:1], s4, v0
+; GFX9-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT: s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i16_uge:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT: v_cmp_ge_u16_e64 s[2:3], s2, v0
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT: s_endpgm
+ %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 35)
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i16_ult(i64 addrspace(1)* %out, i16 %src) {
+; GFX11-LABEL: v_icmp_i16_ult:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_cmp_gt_u16_e64 s[2:3], 0x64, s2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i16_ult:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; SDAG-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT: v_cmp_lt_u16_e64 s[2:3], s2, v0
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, s0
+; SDAG-VI-NEXT: v_mov_b32_e32 v2, s2
+; SDAG-VI-NEXT: v_mov_b32_e32 v1, s1
+; SDAG-VI-NEXT: v_mov_b32_e32 v3, s3
+; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
+; SDAG-VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: v_icmp_i16_ult:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT: v_mov_b32_e32 v0, 0x64
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: v_cmp_lt_u16_e64 s[0:1], s4, v0
+; GFX9-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT: s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i16_ult:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT: v_cmp_lt_u16_e64 s[2:3], s2, v0
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT: s_endpgm
+ %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 36)
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i16_ule(i64 addrspace(1)* %out, i16 %src) {
+; GFX11-LABEL: v_icmp_i16_ule:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_cmp_ge_u16_e64 s[2:3], 0x64, s2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i16_ule:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; SDAG-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT: v_cmp_le_u16_e64 s[2:3], s2, v0
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, s0
+; SDAG-VI-NEXT: v_mov_b32_e32 v2, s2
+; SDAG-VI-NEXT: v_mov_b32_e32 v1, s1
+; SDAG-VI-NEXT: v_mov_b32_e32 v3, s3
+; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
+; SDAG-VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: v_icmp_i16_ule:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT: v_mov_b32_e32 v0, 0x64
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: v_cmp_le_u16_e64 s[0:1], s4, v0
+; GFX9-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT: s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i16_ule:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT: v_cmp_le_u16_e64 s[2:3], s2, v0
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT: s_endpgm
+ %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 37)
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i16_sgt(i64 addrspace(1)* %out, i16 %src) #1 {
+; GFX11-LABEL: v_icmp_i16_sgt:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_cmp_lt_i16_e64 s[2:3], 0x64, s2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i16_sgt:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; SDAG-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT: v_cmp_gt_i16_e64 s[2:3], s2, v0
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, s0
+; SDAG-VI-NEXT: v_mov_b32_e32 v2, s2
+; SDAG-VI-NEXT: v_mov_b32_e32 v1, s1
+; SDAG-VI-NEXT: v_mov_b32_e32 v3, s3
+; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
+; SDAG-VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: v_icmp_i16_sgt:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT: v_mov_b32_e32 v0, 0x64
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: v_cmp_gt_i16_e64 s[0:1], s4, v0
+; GFX9-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT: s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i16_sgt:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT: v_cmp_gt_i16_e64 s[2:3], s2, v0
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT: s_endpgm
+ %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 38)
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i16_sge(i64 addrspace(1)* %out, i16 %src) {
+; GFX11-LABEL: v_icmp_i16_sge:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_cmp_le_i16_e64 s[2:3], 0x64, s2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i16_sge:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; SDAG-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT: v_cmp_ge_i16_e64 s[2:3], s2, v0
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, s0
+; SDAG-VI-NEXT: v_mov_b32_e32 v2, s2
+; SDAG-VI-NEXT: v_mov_b32_e32 v1, s1
+; SDAG-VI-NEXT: v_mov_b32_e32 v3, s3
+; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
+; SDAG-VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: v_icmp_i16_sge:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT: v_mov_b32_e32 v0, 0x64
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: v_cmp_ge_i16_e64 s[0:1], s4, v0
+; GFX9-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT: s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i16_sge:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT: v_cmp_ge_i16_e64 s[2:3], s2, v0
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT: s_endpgm
+ %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 39)
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i16_slt(i64 addrspace(1)* %out, i16 %src) {
+; GFX11-LABEL: v_icmp_i16_slt:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_cmp_gt_i16_e64 s[2:3], 0x64, s2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i16_slt:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; SDAG-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT: v_cmp_lt_i16_e64 s[2:3], s2, v0
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, s0
+; SDAG-VI-NEXT: v_mov_b32_e32 v2, s2
+; SDAG-VI-NEXT: v_mov_b32_e32 v1, s1
+; SDAG-VI-NEXT: v_mov_b32_e32 v3, s3
+; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
+; SDAG-VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: v_icmp_i16_slt:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT: v_mov_b32_e32 v0, 0x64
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: v_cmp_lt_i16_e64 s[0:1], s4, v0
+; GFX9-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT: s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i16_slt:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT: v_cmp_lt_i16_e64 s[2:3], s2, v0
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT: s_endpgm
+ %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 40)
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i16_sle(i64 addrspace(1)* %out, i16 %src) {
+; GFX11-LABEL: v_icmp_i16_sle:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_cmp_ge_i16_e64 s[2:3], 0x64, s2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i16_sle:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; SDAG-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT: v_cmp_le_i16_e64 s[2:3], s2, v0
+; SDAG-VI-NEXT: v_mov_b32_e32 v0, s0
+; SDAG-VI-NEXT: v_mov_b32_e32 v2, s2
+; SDAG-VI-NEXT: v_mov_b32_e32 v1, s1
+; SDAG-VI-NEXT: v_mov_b32_e32 v3, s3
+; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
+; SDAG-VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: v_icmp_i16_sle:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT: v_mov_b32_e32 v0, 0x64
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: v_cmp_le_i16_e64 s[0:1], s4, v0
+; GFX9-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT: s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i16_sle:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64
+; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT: v_cmp_le_i16_e64 s[2:3], s2, v0
+; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT: s_endpgm
+ %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 41)
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @v_icmp_i1_ne0(i64 addrspace(1)* %out, i32 %a, i32 %b) {
+; GFX11-LABEL: v_icmp_i1_ne0:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_cmp_gt_u32 s2, 1
+; GFX11-NEXT: s_cselect_b64 s[4:5], -1, 0
+; GFX11-NEXT: s_cmp_gt_u32 s3, 2
+; GFX11-NEXT: s_cselect_b64 s[2:3], -1, 0
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT: s_and_b64 s[2:3], s[4:5], s[2:3]
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+;
+; VI-LABEL: v_icmp_i1_ne0:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_cmp_gt_u32 s2, 1
+; VI-NEXT: s_cselect_b64 s[4:5], -1, 0
+; VI-NEXT: s_cmp_gt_u32 s3, 2
+; VI-NEXT: s_cselect_b64 s[2:3], -1, 0
+; VI-NEXT: s_and_b64 s[2:3], s[4:5], s[2:3]
+; VI-NEXT: v_mov_b32_e32 v0, s0
+; VI-NEXT: v_mov_b32_e32 v2, s2
+; VI-NEXT: v_mov_b32_e32 v1, s1
+; VI-NEXT: v_mov_b32_e32 v3, s3
+; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
+; VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: v_icmp_i1_ne0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_cmp_gt_u32 s2, 1
+; GFX9-NEXT: s_cselect_b64 s[4:5], -1, 0
+; GFX9-NEXT: s_cmp_gt_u32 s3, 2
+; GFX9-NEXT: s_cselect_b64 s[2:3], -1, 0
+; GFX9-NEXT: s_and_b64 s[2:3], s[4:5], s[2:3]
+; GFX9-NEXT: v_mov_b32_e32 v0, s2
+; GFX9-NEXT: v_mov_b32_e32 v1, s3
+; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX9-NEXT: s_endpgm
+ %c0 = icmp ugt i32 %a, 1
+ %c1 = icmp ugt i32 %b, 2
+ %src = and i1 %c0, %c1
+ %result = call i64 @llvm.amdgcn.icmp.i1(i1 %src, i1 false, i32 33)
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_ps void @test_intr_icmp_i32_invalid_cc(i64 addrspace(1)* %out, i32 %src) {
+; SDAG-GFX11-LABEL: test_intr_icmp_i32_invalid_cc:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; SDAG-VI-LABEL: test_intr_icmp_i32_invalid_cc:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_endpgm
+;
+; SDAG-GFX9-LABEL: test_intr_icmp_i32_invalid_cc:
+; SDAG-GFX9: ; %bb.0:
+; SDAG-GFX9-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: test_intr_icmp_i32_invalid_cc:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+;
+; GISEL-VI-LABEL: test_intr_icmp_i32_invalid_cc:
+; GISEL-VI: ; %bb.0:
+; GISEL-VI-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
+; GISEL-VI-NEXT: s_endpgm
+;
+; GISEL-GFX9-LABEL: test_intr_icmp_i32_invalid_cc:
+; GISEL-GFX9: ; %bb.0:
+; GISEL-GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
+; GISEL-GFX9-NEXT: s_endpgm
+ %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 9999)
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+attributes #0 = { nounwind readnone convergent }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GCN: {{.*}}
More information about the llvm-commits
mailing list