[llvm] a751676 - [AMDGPU][GISel] Add llvm.amdgcn.icmp selection

Pierre van Houtryve via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 22 00:26:56 PST 2022


Author: Pierre van Houtryve
Date: 2022-11-22T08:26:50Z
New Revision: a751676f98e8d1380cedc7c4639704bd41a0f182

URL: https://github.com/llvm/llvm-project/commit/a751676f98e8d1380cedc7c4639704bd41a0f182
DIFF: https://github.com/llvm/llvm-project/commit/a751676f98e8d1380cedc7c4639704bd41a0f182.diff

LOG: [AMDGPU][GISel] Add llvm.amdgcn.icmp selection

Add missing logic to select i16 variants and enable GISel testing.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D136448

Added: 
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir

Removed: 
    llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.icmp.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 9957642aafafe..fbc0461b522d2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1093,6 +1093,8 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const {
   case Intrinsic::amdgcn_div_scale:
     return selectDivScale(I);
   case Intrinsic::amdgcn_icmp:
+    if (selectImpl(I, *CoverageInfo))
+      return true;
     return selectIntrinsicIcmp(I);
   case Intrinsic::amdgcn_ballot:
     return selectBallot(I);
@@ -1122,32 +1124,56 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const {
   }
 }
 
-static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size) {
-  if (Size != 32 && Size != 64)
+static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size,
+                          const GCNSubtarget &ST) {
+  if (Size != 16 && Size != 32 && Size != 64)
+    return -1;
+
+  if (Size == 16 && !ST.has16BitInsts())
     return -1;
+
+  const auto Select = [&](unsigned S16Opc, unsigned TrueS16Opc, unsigned S32Opc,
+                          unsigned S64Opc) {
+    if (Size == 16)
+      return ST.hasTrue16BitInsts() ? TrueS16Opc : S16Opc;
+    if (Size == 32)
+      return S32Opc;
+    return S64Opc;
+  };
+
   switch (P) {
   default:
     llvm_unreachable("Unknown condition code!");
   case CmpInst::ICMP_NE:
-    return Size == 32 ? AMDGPU::V_CMP_NE_U32_e64 : AMDGPU::V_CMP_NE_U64_e64;
+    return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64,
+                  AMDGPU::V_CMP_NE_U32_e64, AMDGPU::V_CMP_NE_U64_e64);
   case CmpInst::ICMP_EQ:
-    return Size == 32 ? AMDGPU::V_CMP_EQ_U32_e64 : AMDGPU::V_CMP_EQ_U64_e64;
+    return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64,
+                  AMDGPU::V_CMP_EQ_U32_e64, AMDGPU::V_CMP_EQ_U64_e64);
   case CmpInst::ICMP_SGT:
-    return Size == 32 ? AMDGPU::V_CMP_GT_I32_e64 : AMDGPU::V_CMP_GT_I64_e64;
+    return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64,
+                  AMDGPU::V_CMP_GT_I32_e64, AMDGPU::V_CMP_GT_I64_e64);
   case CmpInst::ICMP_SGE:
-    return Size == 32 ? AMDGPU::V_CMP_GE_I32_e64 : AMDGPU::V_CMP_GE_I64_e64;
+    return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64,
+                  AMDGPU::V_CMP_GE_I32_e64, AMDGPU::V_CMP_GE_I64_e64);
   case CmpInst::ICMP_SLT:
-    return Size == 32 ? AMDGPU::V_CMP_LT_I32_e64 : AMDGPU::V_CMP_LT_I64_e64;
+    return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64,
+                  AMDGPU::V_CMP_LT_I32_e64, AMDGPU::V_CMP_LT_I64_e64);
   case CmpInst::ICMP_SLE:
-    return Size == 32 ? AMDGPU::V_CMP_LE_I32_e64 : AMDGPU::V_CMP_LE_I64_e64;
+    return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64,
+                  AMDGPU::V_CMP_LE_I32_e64, AMDGPU::V_CMP_LE_I64_e64);
   case CmpInst::ICMP_UGT:
-    return Size == 32 ? AMDGPU::V_CMP_GT_U32_e64 : AMDGPU::V_CMP_GT_U64_e64;
+    return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64,
+                  AMDGPU::V_CMP_GT_U32_e64, AMDGPU::V_CMP_GT_U64_e64);
   case CmpInst::ICMP_UGE:
-    return Size == 32 ? AMDGPU::V_CMP_GE_U32_e64 : AMDGPU::V_CMP_GE_U64_e64;
+    return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64,
+                  AMDGPU::V_CMP_GE_U32_e64, AMDGPU::V_CMP_GE_U64_e64);
   case CmpInst::ICMP_ULT:
-    return Size == 32 ? AMDGPU::V_CMP_LT_U32_e64 : AMDGPU::V_CMP_LT_U64_e64;
+    return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64,
+                  AMDGPU::V_CMP_LT_U32_e64, AMDGPU::V_CMP_LT_U64_e64);
   case CmpInst::ICMP_ULE:
-    return Size == 32 ? AMDGPU::V_CMP_LE_U32_e64 : AMDGPU::V_CMP_LE_U64_e64;
+    return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64,
+                  AMDGPU::V_CMP_LE_U32_e64, AMDGPU::V_CMP_LE_U64_e64);
   }
 }
 
@@ -1222,7 +1248,7 @@ bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const {
     return Ret;
   }
 
-  int Opcode = getV_CMPOpcode(Pred, Size);
+  int Opcode = getV_CMPOpcode(Pred, Size, *Subtarget);
   if (Opcode == -1)
     return false;
 
@@ -1250,30 +1276,27 @@ bool AMDGPUInstructionSelector::selectIntrinsicIcmp(MachineInstr &I) const {
   Register SrcReg = I.getOperand(2).getReg();
   unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
 
-  auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(4).getImm());
-  if (!ICmpInst::isIntPredicate(static_cast<ICmpInst::Predicate>(Pred))) {
-    MachineInstr *ICmp =
-        BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Dst);
+  // i1 inputs are not supported in GlobalISel.
+  if (Size == 1)
+    return false;
 
-    if (!RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(),
-                                      *TRI.getBoolRC(), *MRI))
-      return false;
+  auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(4).getImm());
+  if (!CmpInst::isIntPredicate(Pred)) {
+    BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Dst);
     I.eraseFromParent();
-    return true;
+    return RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI);
   }
 
-  int Opcode = getV_CMPOpcode(Pred, Size);
+  int Opcode = getV_CMPOpcode(Pred, Size, *Subtarget);
   if (Opcode == -1)
     return false;
 
   MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode), Dst)
                            .add(I.getOperand(2))
                            .add(I.getOperand(3));
-  RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(), *TRI.getBoolRC(),
-                               *MRI);
-  bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI);
+  RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI);
   I.eraseFromParent();
-  return Ret;
+  return constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI);
 }
 
 bool AMDGPUInstructionSelector::selectBallot(MachineInstr &I) const {

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir
index 3c2c37513bba5..669721b6dbfec 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir
@@ -18,21 +18,21 @@ body: |
     ; WAVE64-NEXT: {{  $}}
     ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
     ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; WAVE64-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; WAVE64-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
     ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
     ; WAVE32-LABEL: name: icmp_eq_s16_sv
     ; WAVE32: liveins: $sgpr0, $vgpr0
     ; WAVE32-NEXT: {{  $}}
     ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
     ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
     ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
     ; GFX11-LABEL: name: icmp_eq_s16_sv
     ; GFX11: liveins: $sgpr0, $vgpr0
     ; GFX11-NEXT: {{  $}}
     ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
     ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
     ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
     %0:sgpr(s32) = COPY $sgpr0
     %1:vgpr(s32) = COPY $vgpr0
@@ -57,21 +57,21 @@ body: |
     ; WAVE64-NEXT: {{  $}}
     ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; WAVE64-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; WAVE64-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
     ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
     ; WAVE32-LABEL: name: icmp_eq_s16_vs
     ; WAVE32: liveins: $sgpr0, $vgpr0
     ; WAVE32-NEXT: {{  $}}
     ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
     ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
     ; GFX11-LABEL: name: icmp_eq_s16_vs
     ; GFX11: liveins: $sgpr0, $vgpr0
     ; GFX11-NEXT: {{  $}}
     ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
     ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:sgpr(s32) = COPY $sgpr0
@@ -96,21 +96,21 @@ body: |
     ; WAVE64-NEXT: {{  $}}
     ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; WAVE64-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; WAVE64-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
     ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
     ; WAVE32-LABEL: name: icmp_eq_s16_vv
     ; WAVE32: liveins: $vgpr0, $vgpr1
     ; WAVE32-NEXT: {{  $}}
     ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
     ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
     ; GFX11-LABEL: name: icmp_eq_s16_vv
     ; GFX11: liveins: $vgpr0, $vgpr1
     ; GFX11-NEXT: {{  $}}
     ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
     ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
@@ -135,21 +135,21 @@ body: |
     ; WAVE64-NEXT: {{  $}}
     ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; WAVE64-NEXT: [[V_CMP_NE_U16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; WAVE64-NEXT: [[V_CMP_NE_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U16_e64 [[COPY]], [[COPY1]], implicit $exec
     ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_e64_]]
     ; WAVE32-LABEL: name: icmp_ne_s16_vv
     ; WAVE32: liveins: $vgpr0, $vgpr1
     ; WAVE32-NEXT: {{  $}}
     ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; WAVE32-NEXT: [[V_CMP_NE_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; WAVE32-NEXT: [[V_CMP_NE_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_e64 [[COPY]], [[COPY1]], implicit $exec
     ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_e64_]]
     ; GFX11-LABEL: name: icmp_ne_s16_vv
     ; GFX11: liveins: $vgpr0, $vgpr1
     ; GFX11-NEXT: {{  $}}
     ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX11-NEXT: [[V_CMP_NE_U16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GFX11-NEXT: [[V_CMP_NE_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
     ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_t16_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
@@ -174,21 +174,21 @@ body: |
     ; WAVE64-NEXT: {{  $}}
     ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; WAVE64-NEXT: [[V_CMP_LT_I16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LT_I16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; WAVE64-NEXT: [[V_CMP_LT_I16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I16_e64 [[COPY]], [[COPY1]], implicit $exec
     ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_e64_]]
     ; WAVE32-LABEL: name: icmp_slt_s16_vv
     ; WAVE32: liveins: $vgpr0, $vgpr1
     ; WAVE32-NEXT: {{  $}}
     ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; WAVE32-NEXT: [[V_CMP_LT_I16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_I16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; WAVE32-NEXT: [[V_CMP_LT_I16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_e64 [[COPY]], [[COPY1]], implicit $exec
     ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_e64_]]
     ; GFX11-LABEL: name: icmp_slt_s16_vv
     ; GFX11: liveins: $vgpr0, $vgpr1
     ; GFX11-NEXT: {{  $}}
     ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX11-NEXT: [[V_CMP_LT_I16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_I16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GFX11-NEXT: [[V_CMP_LT_I16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
     ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_t16_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
@@ -213,21 +213,21 @@ body: |
     ; WAVE64-NEXT: {{  $}}
     ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; WAVE64-NEXT: [[V_CMP_LE_I16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LE_I16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; WAVE64-NEXT: [[V_CMP_LE_I16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LE_I16_e64 [[COPY]], [[COPY1]], implicit $exec
     ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_e64_]]
     ; WAVE32-LABEL: name: icmp_sle_s16_vv
     ; WAVE32: liveins: $vgpr0, $vgpr1
     ; WAVE32-NEXT: {{  $}}
     ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; WAVE32-NEXT: [[V_CMP_LE_I16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LE_I16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; WAVE32-NEXT: [[V_CMP_LE_I16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_e64 [[COPY]], [[COPY1]], implicit $exec
     ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_e64_]]
     ; GFX11-LABEL: name: icmp_sle_s16_vv
     ; GFX11: liveins: $vgpr0, $vgpr1
     ; GFX11-NEXT: {{  $}}
     ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX11-NEXT: [[V_CMP_LE_I16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LE_I16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GFX11-NEXT: [[V_CMP_LE_I16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
     ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_t16_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
@@ -252,21 +252,21 @@ body: |
     ; WAVE64-NEXT: {{  $}}
     ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; WAVE64-NEXT: [[V_CMP_LT_U16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LT_U16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; WAVE64-NEXT: [[V_CMP_LT_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_U16_e64 [[COPY]], [[COPY1]], implicit $exec
     ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_e64_]]
     ; WAVE32-LABEL: name: icmp_ult_s16_vv
     ; WAVE32: liveins: $vgpr0, $vgpr1
     ; WAVE32-NEXT: {{  $}}
     ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; WAVE32-NEXT: [[V_CMP_LT_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_U16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; WAVE32-NEXT: [[V_CMP_LT_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U16_e64 [[COPY]], [[COPY1]], implicit $exec
     ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_e64_]]
     ; GFX11-LABEL: name: icmp_ult_s16_vv
     ; GFX11: liveins: $vgpr0, $vgpr1
     ; GFX11-NEXT: {{  $}}
     ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX11-NEXT: [[V_CMP_LT_U16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GFX11-NEXT: [[V_CMP_LT_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
     ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_t16_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
@@ -291,21 +291,21 @@ body: |
     ; WAVE64-NEXT: {{  $}}
     ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; WAVE64-NEXT: [[V_CMP_LE_U16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LE_U16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; WAVE64-NEXT: [[V_CMP_LE_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LE_U16_e64 [[COPY]], [[COPY1]], implicit $exec
     ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_e64_]]
     ; WAVE32-LABEL: name: icmp_ule_s16_vv
     ; WAVE32: liveins: $vgpr0, $vgpr1
     ; WAVE32-NEXT: {{  $}}
     ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; WAVE32-NEXT: [[V_CMP_LE_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LE_U16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; WAVE32-NEXT: [[V_CMP_LE_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U16_e64 [[COPY]], [[COPY1]], implicit $exec
     ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_e64_]]
     ; GFX11-LABEL: name: icmp_ule_s16_vv
     ; GFX11: liveins: $vgpr0, $vgpr1
     ; GFX11-NEXT: {{  $}}
     ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX11-NEXT: [[V_CMP_LE_U16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LE_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GFX11-NEXT: [[V_CMP_LE_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
     ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_t16_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.icmp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.icmp.ll
deleted file mode 100644
index 73440ecbbfcaf..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.icmp.ll
+++ /dev/null
@@ -1,66 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -global-isel-abort=1 -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
-; RUN: llc -global-isel -global-isel-abort=1 -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s
-
-define amdgpu_ps void @test_intr_icmp_eq_i64(i64 addrspace(1)* %out, i32 %src) #0 {
-; GFX10-LABEL: test_intr_icmp_eq_i64:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cmp_eq_u32_e64 s[0:1], 0x64, v2
-; GFX10-NEXT:    v_mov_b32_e32 v3, s1
-; GFX10-NEXT:    v_mov_b32_e32 v2, s0
-; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[2:3], off
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: test_intr_icmp_eq_i64:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cmp_eq_u32_e64 s[0:1], 0x64, v2
-; GFX11-NEXT:    v_mov_b32_e32 v3, s1
-; GFX11-NEXT:    v_mov_b32_e32 v2, s0
-; GFX11-NEXT:    global_store_b64 v[0:1], v[2:3], off
-; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %src, i32 100, i32 32)
-  store i64 %result, i64 addrspace(1)* %out
-  ret void
-}
-
-define amdgpu_ps void @test_intr_icmp_ne_i32(i32 addrspace(1)* %out, i32 %src) #1 {
-; GFX10-LABEL: test_intr_icmp_ne_i32:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cmp_ne_u32_e64 s0, 0x64, v2
-; GFX10-NEXT:    v_mov_b32_e32 v2, s0
-; GFX10-NEXT:    global_store_dword v[0:1], v2, off
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: test_intr_icmp_ne_i32:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cmp_ne_u32_e64 s0, 0x64, v2
-; GFX11-NEXT:    v_mov_b32_e32 v2, s0
-; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
-; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-NEXT:    s_endpgm
-  %result = call i32 @llvm.amdgcn.icmp.i32.i32(i32 %src, i32 100, i32 33)
-  store i32 %result, i32 addrspace(1)* %out
-  ret void
-}
-
-define amdgpu_ps void @test_intr_icmp_i32_invalid_cc(i32 addrspace(1)* %out, i32 %src) #1 {
-; GFX10-LABEL: test_intr_icmp_i32_invalid_cc:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    global_store_dword v[0:1], v0, off
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: test_intr_icmp_i32_invalid_cc:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    global_store_b32 v[0:1], v0, off
-; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-NEXT:    s_endpgm
-  %result = call i32 @llvm.amdgcn.icmp.i32.i32(i32 %src, i32 100, i32 9999)
-  store i32 %result, i32 addrspace(1)* %out
-  ret void
-}
-
-declare i64 @llvm.amdgcn.icmp.i64.i32(i32, i32, i32)
-declare i32 @llvm.amdgcn.icmp.i32.i32(i32, i32, i32)
-attributes #0 = { "target-features"="+wavefrontsize64" }
-attributes #1 = { "target-features"="+wavefrontsize32" }

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll
deleted file mode 100644
index 7c9d063bb7242..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll
+++ /dev/null
@@ -1,1101 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX %s
-; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
-
-declare i64 @llvm.amdgcn.icmp.i64.i32(i32, i32, i32)
-declare i32 @llvm.amdgcn.icmp.i32.i32(i32, i32, i32)
-declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) #0
-declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #0
-declare i64 @llvm.amdgcn.icmp.i16(i16, i16, i32) #0
-declare i64 @llvm.amdgcn.icmp.i1(i1, i1, i32) #0
-
-define amdgpu_kernel void @v_icmp_i32_eq(i64 addrspace(1)* %out, i32 %src) {
-; GFX-LABEL: v_icmp_i32_eq:
-; GFX:       ; %bb.0:
-; GFX-NEXT:    s_load_dword s2, s[0:1], 0xb
-; GFX-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
-; GFX-NEXT:    s_mov_b32 s3, 0xf000
-; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
-; GFX-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX-NEXT:    v_cmp_eq_u32_e64 s[4:5], s2, v0
-; GFX-NEXT:    s_mov_b32 s2, -1
-; GFX-NEXT:    v_mov_b32_e32 v0, s4
-; GFX-NEXT:    v_mov_b32_e32 v1, s5
-; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; GFX-NEXT:    s_endpgm
-;
-; VI-LABEL: v_icmp_i32_eq:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT:    v_mov_b32_e32 v0, 0x64
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cmp_eq_u32_e64 s[2:3], s2, v0
-; VI-NEXT:    v_mov_b32_e32 v0, s0
-; VI-NEXT:    v_mov_b32_e32 v2, s2
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_mov_b32_e32 v3, s3
-; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 32)
-  store i64 %result, i64 addrspace(1)* %out
-  ret void
-}
-
-define amdgpu_kernel void @v_icmp_i32(i64 addrspace(1)* %out, i32 %src) {
-; GCN-LABEL: v_icmp_i32:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 30)
-  store i64 %result, i64 addrspace(1)* %out
-  ret void
-}
-
-define amdgpu_kernel void @v_icmp_i32_ne(i64 addrspace(1)* %out, i32 %src) {
-; GFX-LABEL: v_icmp_i32_ne:
-; GFX:       ; %bb.0:
-; GFX-NEXT:    s_load_dword s2, s[0:1], 0xb
-; GFX-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
-; GFX-NEXT:    s_mov_b32 s3, 0xf000
-; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
-; GFX-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX-NEXT:    v_cmp_ne_u32_e64 s[4:5], s2, v0
-; GFX-NEXT:    s_mov_b32 s2, -1
-; GFX-NEXT:    v_mov_b32_e32 v0, s4
-; GFX-NEXT:    v_mov_b32_e32 v1, s5
-; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; GFX-NEXT:    s_endpgm
-;
-; VI-LABEL: v_icmp_i32_ne:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT:    v_mov_b32_e32 v0, 0x64
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cmp_ne_u32_e64 s[2:3], s2, v0
-; VI-NEXT:    v_mov_b32_e32 v0, s0
-; VI-NEXT:    v_mov_b32_e32 v2, s2
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_mov_b32_e32 v3, s3
-; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 33)
-  store i64 %result, i64 addrspace(1)* %out
-  ret void
-}
-
-define amdgpu_kernel void @v_icmp_i32_ugt(i64 addrspace(1)* %out, i32 %src) {
-; GFX-LABEL: v_icmp_i32_ugt:
-; GFX:       ; %bb.0:
-; GFX-NEXT:    s_load_dword s2, s[0:1], 0xb
-; GFX-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
-; GFX-NEXT:    s_mov_b32 s3, 0xf000
-; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
-; GFX-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX-NEXT:    v_cmp_gt_u32_e64 s[4:5], s2, v0
-; GFX-NEXT:    s_mov_b32 s2, -1
-; GFX-NEXT:    v_mov_b32_e32 v0, s4
-; GFX-NEXT:    v_mov_b32_e32 v1, s5
-; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; GFX-NEXT:    s_endpgm
-;
-; VI-LABEL: v_icmp_i32_ugt:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT:    v_mov_b32_e32 v0, 0x64
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cmp_gt_u32_e64 s[2:3], s2, v0
-; VI-NEXT:    v_mov_b32_e32 v0, s0
-; VI-NEXT:    v_mov_b32_e32 v2, s2
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_mov_b32_e32 v3, s3
-; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 34)
-  store i64 %result, i64 addrspace(1)* %out
-  ret void
-}
-
-define amdgpu_kernel void @v_icmp_i32_uge(i64 addrspace(1)* %out, i32 %src) {
-; GFX-LABEL: v_icmp_i32_uge:
-; GFX:       ; %bb.0:
-; GFX-NEXT:    s_load_dword s2, s[0:1], 0xb
-; GFX-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
-; GFX-NEXT:    s_mov_b32 s3, 0xf000
-; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
-; GFX-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX-NEXT:    v_cmp_ge_u32_e64 s[4:5], s2, v0
-; GFX-NEXT:    s_mov_b32 s2, -1
-; GFX-NEXT:    v_mov_b32_e32 v0, s4
-; GFX-NEXT:    v_mov_b32_e32 v1, s5
-; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; GFX-NEXT:    s_endpgm
-;
-; VI-LABEL: v_icmp_i32_uge:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT:    v_mov_b32_e32 v0, 0x64
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cmp_ge_u32_e64 s[2:3], s2, v0
-; VI-NEXT:    v_mov_b32_e32 v0, s0
-; VI-NEXT:    v_mov_b32_e32 v2, s2
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_mov_b32_e32 v3, s3
-; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 35)
-  store i64 %result, i64 addrspace(1)* %out
-  ret void
-}
-
-define amdgpu_kernel void @v_icmp_i32_ult(i64 addrspace(1)* %out, i32 %src) {
-; GFX-LABEL: v_icmp_i32_ult:
-; GFX:       ; %bb.0:
-; GFX-NEXT:    s_load_dword s2, s[0:1], 0xb
-; GFX-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
-; GFX-NEXT:    s_mov_b32 s3, 0xf000
-; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
-; GFX-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX-NEXT:    v_cmp_lt_u32_e64 s[4:5], s2, v0
-; GFX-NEXT:    s_mov_b32 s2, -1
-; GFX-NEXT:    v_mov_b32_e32 v0, s4
-; GFX-NEXT:    v_mov_b32_e32 v1, s5
-; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; GFX-NEXT:    s_endpgm
-;
-; VI-LABEL: v_icmp_i32_ult:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT:    v_mov_b32_e32 v0, 0x64
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cmp_lt_u32_e64 s[2:3], s2, v0
-; VI-NEXT:    v_mov_b32_e32 v0, s0
-; VI-NEXT:    v_mov_b32_e32 v2, s2
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_mov_b32_e32 v3, s3
-; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 36)
-  store i64 %result, i64 addrspace(1)* %out
-  ret void
-}
-
-define amdgpu_kernel void @v_icmp_i32_ule(i64 addrspace(1)* %out, i32 %src) {
-; GFX-LABEL: v_icmp_i32_ule:
-; GFX:       ; %bb.0:
-; GFX-NEXT:    s_load_dword s2, s[0:1], 0xb
-; GFX-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
-; GFX-NEXT:    s_mov_b32 s3, 0xf000
-; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
-; GFX-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX-NEXT:    v_cmp_le_u32_e64 s[4:5], s2, v0
-; GFX-NEXT:    s_mov_b32 s2, -1
-; GFX-NEXT:    v_mov_b32_e32 v0, s4
-; GFX-NEXT:    v_mov_b32_e32 v1, s5
-; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; GFX-NEXT:    s_endpgm
-;
-; VI-LABEL: v_icmp_i32_ule:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT:    v_mov_b32_e32 v0, 0x64
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cmp_le_u32_e64 s[2:3], s2, v0
-; VI-NEXT:    v_mov_b32_e32 v0, s0
-; VI-NEXT:    v_mov_b32_e32 v2, s2
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_mov_b32_e32 v3, s3
-; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 37)
-  store i64 %result, i64 addrspace(1)* %out
-  ret void
-}
-
-define amdgpu_kernel void @v_icmp_i32_sgt(i64 addrspace(1)* %out, i32 %src) #1 {
-; GFX-LABEL: v_icmp_i32_sgt:
-; GFX:       ; %bb.0:
-; GFX-NEXT:    s_load_dword s2, s[0:1], 0xb
-; GFX-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
-; GFX-NEXT:    s_mov_b32 s3, 0xf000
-; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
-; GFX-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX-NEXT:    v_cmp_gt_i32_e64 s[4:5], s2, v0
-; GFX-NEXT:    s_mov_b32 s2, -1
-; GFX-NEXT:    v_mov_b32_e32 v0, s4
-; GFX-NEXT:    v_mov_b32_e32 v1, s5
-; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; GFX-NEXT:    s_endpgm
-;
-; VI-LABEL: v_icmp_i32_sgt:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT:    v_mov_b32_e32 v0, 0x64
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cmp_gt_i32_e64 s[2:3], s2, v0
-; VI-NEXT:    v_mov_b32_e32 v0, s0
-; VI-NEXT:    v_mov_b32_e32 v2, s2
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_mov_b32_e32 v3, s3
-; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 38)
-  store i64 %result, i64 addrspace(1)* %out
-  ret void
-}
-
-define amdgpu_kernel void @v_icmp_i32_sge(i64 addrspace(1)* %out, i32 %src) {
-; GFX-LABEL: v_icmp_i32_sge:
-; GFX:       ; %bb.0:
-; GFX-NEXT:    s_load_dword s2, s[0:1], 0xb
-; GFX-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
-; GFX-NEXT:    s_mov_b32 s3, 0xf000
-; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
-; GFX-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX-NEXT:    v_cmp_ge_i32_e64 s[4:5], s2, v0
-; GFX-NEXT:    s_mov_b32 s2, -1
-; GFX-NEXT:    v_mov_b32_e32 v0, s4
-; GFX-NEXT:    v_mov_b32_e32 v1, s5
-; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; GFX-NEXT:    s_endpgm
-;
-; VI-LABEL: v_icmp_i32_sge:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT:    v_mov_b32_e32 v0, 0x64
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cmp_ge_i32_e64 s[2:3], s2, v0
-; VI-NEXT:    v_mov_b32_e32 v0, s0
-; VI-NEXT:    v_mov_b32_e32 v2, s2
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_mov_b32_e32 v3, s3
-; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 39)
-  store i64 %result, i64 addrspace(1)* %out
-  ret void
-}
-
-define amdgpu_kernel void @v_icmp_i32_slt(i64 addrspace(1)* %out, i32 %src) {
-; GFX-LABEL: v_icmp_i32_slt:
-; GFX:       ; %bb.0:
-; GFX-NEXT:    s_load_dword s2, s[0:1], 0xb
-; GFX-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
-; GFX-NEXT:    s_mov_b32 s3, 0xf000
-; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
-; GFX-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX-NEXT:    v_cmp_lt_i32_e64 s[4:5], s2, v0
-; GFX-NEXT:    s_mov_b32 s2, -1
-; GFX-NEXT:    v_mov_b32_e32 v0, s4
-; GFX-NEXT:    v_mov_b32_e32 v1, s5
-; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; GFX-NEXT:    s_endpgm
-;
-; VI-LABEL: v_icmp_i32_slt:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT:    v_mov_b32_e32 v0, 0x64
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cmp_lt_i32_e64 s[2:3], s2, v0
-; VI-NEXT:    v_mov_b32_e32 v0, s0
-; VI-NEXT:    v_mov_b32_e32 v2, s2
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_mov_b32_e32 v3, s3
-; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 40)
-  store i64 %result, i64 addrspace(1)* %out
-  ret void
-}
-
-define amdgpu_kernel void @v_icmp_i32_sle(i64 addrspace(1)* %out, i32 %src) {
-; GFX-LABEL: v_icmp_i32_sle:
-; GFX:       ; %bb.0:
-; GFX-NEXT:    s_load_dword s2, s[0:1], 0xb
-; GFX-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
-; GFX-NEXT:    s_mov_b32 s3, 0xf000
-; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
-; GFX-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX-NEXT:    v_cmp_le_i32_e64 s[4:5], s2, v0
-; GFX-NEXT:    s_mov_b32 s2, -1
-; GFX-NEXT:    v_mov_b32_e32 v0, s4
-; GFX-NEXT:    v_mov_b32_e32 v1, s5
-; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; GFX-NEXT:    s_endpgm
-;
-; VI-LABEL: v_icmp_i32_sle:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT:    v_mov_b32_e32 v0, 0x64
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cmp_le_i32_e64 s[2:3], s2, v0
-; VI-NEXT:    v_mov_b32_e32 v0, s0
-; VI-NEXT:    v_mov_b32_e32 v2, s2
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_mov_b32_e32 v3, s3
-; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 41)
-  store i64 %result, i64 addrspace(1)* %out
-  ret void
-}
-
-define amdgpu_kernel void @v_icmp_i64_eq(i64 addrspace(1)* %out, i64 %src) {
-; GFX-LABEL: v_icmp_i64_eq:
-; GFX:       ; %bb.0:
-; GFX-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
-; GFX-NEXT:    s_mov_b32 s7, 0xf000
-; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
-; GFX-NEXT:    v_mov_b32_e32 v1, 0
-; GFX-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX-NEXT:    v_cmp_eq_u64_e64 s[2:3], s[2:3], v[0:1]
-; GFX-NEXT:    s_mov_b32 s6, -1
-; GFX-NEXT:    s_mov_b32 s4, s0
-; GFX-NEXT:    s_mov_b32 s5, s1
-; GFX-NEXT:    v_mov_b32_e32 v0, s2
-; GFX-NEXT:    v_mov_b32_e32 v1, s3
-; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
-; GFX-NEXT:    s_endpgm
-;
-; VI-LABEL: v_icmp_i64_eq:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
-; VI-NEXT:    v_mov_b32_e32 v0, 0x64
-; VI-NEXT:    v_mov_b32_e32 v1, 0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cmp_eq_u64_e64 s[2:3], s[2:3], v[0:1]
-; VI-NEXT:    v_mov_b32_e32 v2, s0
-; VI-NEXT:    v_mov_b32_e32 v0, s2
-; VI-NEXT:    v_mov_b32_e32 v3, s1
-; VI-NEXT:    v_mov_b32_e32 v1, s3
-; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
-; VI-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 32)
-  store i64 %result, i64 addrspace(1)* %out
-  ret void
-}
-
-define amdgpu_kernel void @v_icmp_i64_ne(i64 addrspace(1)* %out, i64 %src) {
-; GFX-LABEL: v_icmp_i64_ne:
-; GFX:       ; %bb.0:
-; GFX-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
-; GFX-NEXT:    s_mov_b32 s7, 0xf000
-; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
-; GFX-NEXT:    v_mov_b32_e32 v1, 0
-; GFX-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX-NEXT:    v_cmp_ne_u64_e64 s[2:3], s[2:3], v[0:1]
-; GFX-NEXT:    s_mov_b32 s6, -1
-; GFX-NEXT:    s_mov_b32 s4, s0
-; GFX-NEXT:    s_mov_b32 s5, s1
-; GFX-NEXT:    v_mov_b32_e32 v0, s2
-; GFX-NEXT:    v_mov_b32_e32 v1, s3
-; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
-; GFX-NEXT:    s_endpgm
-;
-; VI-LABEL: v_icmp_i64_ne:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
-; VI-NEXT:    v_mov_b32_e32 v0, 0x64
-; VI-NEXT:    v_mov_b32_e32 v1, 0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cmp_ne_u64_e64 s[2:3], s[2:3], v[0:1]
-; VI-NEXT:    v_mov_b32_e32 v2, s0
-; VI-NEXT:    v_mov_b32_e32 v0, s2
-; VI-NEXT:    v_mov_b32_e32 v3, s1
-; VI-NEXT:    v_mov_b32_e32 v1, s3
-; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
-; VI-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 33)
-  store i64 %result, i64 addrspace(1)* %out
-  ret void
-}
-
-define amdgpu_kernel void @v_icmp_u64_ugt(i64 addrspace(1)* %out, i64 %src) {
-; GFX-LABEL: v_icmp_u64_ugt:
-; GFX:       ; %bb.0:
-; GFX-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
-; GFX-NEXT:    s_mov_b32 s7, 0xf000
-; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
-; GFX-NEXT:    v_mov_b32_e32 v1, 0
-; GFX-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX-NEXT:    v_cmp_gt_u64_e64 s[2:3], s[2:3], v[0:1]
-; GFX-NEXT:    s_mov_b32 s6, -1
-; GFX-NEXT:    s_mov_b32 s4, s0
-; GFX-NEXT:    s_mov_b32 s5, s1
-; GFX-NEXT:    v_mov_b32_e32 v0, s2
-; GFX-NEXT:    v_mov_b32_e32 v1, s3
-; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
-; GFX-NEXT:    s_endpgm
-;
-; VI-LABEL: v_icmp_u64_ugt:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
-; VI-NEXT:    v_mov_b32_e32 v0, 0x64
-; VI-NEXT:    v_mov_b32_e32 v1, 0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cmp_gt_u64_e64 s[2:3], s[2:3], v[0:1]
-; VI-NEXT:    v_mov_b32_e32 v2, s0
-; VI-NEXT:    v_mov_b32_e32 v0, s2
-; VI-NEXT:    v_mov_b32_e32 v3, s1
-; VI-NEXT:    v_mov_b32_e32 v1, s3
-; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
-; VI-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 34)
-  store i64 %result, i64 addrspace(1)* %out
-  ret void
-}
-
-define amdgpu_kernel void @v_icmp_u64_uge(i64 addrspace(1)* %out, i64 %src) {
-; GFX-LABEL: v_icmp_u64_uge:
-; GFX:       ; %bb.0:
-; GFX-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
-; GFX-NEXT:    s_mov_b32 s7, 0xf000
-; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
-; GFX-NEXT:    v_mov_b32_e32 v1, 0
-; GFX-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX-NEXT:    v_cmp_ge_u64_e64 s[2:3], s[2:3], v[0:1]
-; GFX-NEXT:    s_mov_b32 s6, -1
-; GFX-NEXT:    s_mov_b32 s4, s0
-; GFX-NEXT:    s_mov_b32 s5, s1
-; GFX-NEXT:    v_mov_b32_e32 v0, s2
-; GFX-NEXT:    v_mov_b32_e32 v1, s3
-; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
-; GFX-NEXT:    s_endpgm
-;
-; VI-LABEL: v_icmp_u64_uge:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
-; VI-NEXT:    v_mov_b32_e32 v0, 0x64
-; VI-NEXT:    v_mov_b32_e32 v1, 0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cmp_ge_u64_e64 s[2:3], s[2:3], v[0:1]
-; VI-NEXT:    v_mov_b32_e32 v2, s0
-; VI-NEXT:    v_mov_b32_e32 v0, s2
-; VI-NEXT:    v_mov_b32_e32 v3, s1
-; VI-NEXT:    v_mov_b32_e32 v1, s3
-; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
-; VI-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 35)
-  store i64 %result, i64 addrspace(1)* %out
-  ret void
-}
-
-define amdgpu_kernel void @v_icmp_u64_ult(i64 addrspace(1)* %out, i64 %src) {
-; GFX-LABEL: v_icmp_u64_ult:
-; GFX:       ; %bb.0:
-; GFX-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
-; GFX-NEXT:    s_mov_b32 s7, 0xf000
-; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
-; GFX-NEXT:    v_mov_b32_e32 v1, 0
-; GFX-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX-NEXT:    v_cmp_lt_u64_e64 s[2:3], s[2:3], v[0:1]
-; GFX-NEXT:    s_mov_b32 s6, -1
-; GFX-NEXT:    s_mov_b32 s4, s0
-; GFX-NEXT:    s_mov_b32 s5, s1
-; GFX-NEXT:    v_mov_b32_e32 v0, s2
-; GFX-NEXT:    v_mov_b32_e32 v1, s3
-; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
-; GFX-NEXT:    s_endpgm
-;
-; VI-LABEL: v_icmp_u64_ult:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
-; VI-NEXT:    v_mov_b32_e32 v0, 0x64
-; VI-NEXT:    v_mov_b32_e32 v1, 0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cmp_lt_u64_e64 s[2:3], s[2:3], v[0:1]
-; VI-NEXT:    v_mov_b32_e32 v2, s0
-; VI-NEXT:    v_mov_b32_e32 v0, s2
-; VI-NEXT:    v_mov_b32_e32 v3, s1
-; VI-NEXT:    v_mov_b32_e32 v1, s3
-; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
-; VI-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 36)
-  store i64 %result, i64 addrspace(1)* %out
-  ret void
-}
-
-define amdgpu_kernel void @v_icmp_u64_ule(i64 addrspace(1)* %out, i64 %src) {
-; GFX-LABEL: v_icmp_u64_ule:
-; GFX:       ; %bb.0:
-; GFX-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
-; GFX-NEXT:    s_mov_b32 s7, 0xf000
-; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
-; GFX-NEXT:    v_mov_b32_e32 v1, 0
-; GFX-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX-NEXT:    v_cmp_le_u64_e64 s[2:3], s[2:3], v[0:1]
-; GFX-NEXT:    s_mov_b32 s6, -1
-; GFX-NEXT:    s_mov_b32 s4, s0
-; GFX-NEXT:    s_mov_b32 s5, s1
-; GFX-NEXT:    v_mov_b32_e32 v0, s2
-; GFX-NEXT:    v_mov_b32_e32 v1, s3
-; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
-; GFX-NEXT:    s_endpgm
-;
-; VI-LABEL: v_icmp_u64_ule:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
-; VI-NEXT:    v_mov_b32_e32 v0, 0x64
-; VI-NEXT:    v_mov_b32_e32 v1, 0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cmp_le_u64_e64 s[2:3], s[2:3], v[0:1]
-; VI-NEXT:    v_mov_b32_e32 v2, s0
-; VI-NEXT:    v_mov_b32_e32 v0, s2
-; VI-NEXT:    v_mov_b32_e32 v3, s1
-; VI-NEXT:    v_mov_b32_e32 v1, s3
-; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
-; VI-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 37)
-  store i64 %result, i64 addrspace(1)* %out
-  ret void
-}
-
-define amdgpu_kernel void @v_icmp_i64_sgt(i64 addrspace(1)* %out, i64 %src) {
-; GFX-LABEL: v_icmp_i64_sgt:
-; GFX:       ; %bb.0:
-; GFX-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
-; GFX-NEXT:    s_mov_b32 s7, 0xf000
-; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
-; GFX-NEXT:    v_mov_b32_e32 v1, 0
-; GFX-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX-NEXT:    v_cmp_gt_i64_e64 s[2:3], s[2:3], v[0:1]
-; GFX-NEXT:    s_mov_b32 s6, -1
-; GFX-NEXT:    s_mov_b32 s4, s0
-; GFX-NEXT:    s_mov_b32 s5, s1
-; GFX-NEXT:    v_mov_b32_e32 v0, s2
-; GFX-NEXT:    v_mov_b32_e32 v1, s3
-; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
-; GFX-NEXT:    s_endpgm
-;
-; VI-LABEL: v_icmp_i64_sgt:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
-; VI-NEXT:    v_mov_b32_e32 v0, 0x64
-; VI-NEXT:    v_mov_b32_e32 v1, 0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cmp_gt_i64_e64 s[2:3], s[2:3], v[0:1]
-; VI-NEXT:    v_mov_b32_e32 v2, s0
-; VI-NEXT:    v_mov_b32_e32 v0, s2
-; VI-NEXT:    v_mov_b32_e32 v3, s1
-; VI-NEXT:    v_mov_b32_e32 v1, s3
-; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
-; VI-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 38)
-  store i64 %result, i64 addrspace(1)* %out
-  ret void
-}
-
-define amdgpu_kernel void @v_icmp_i64_sge(i64 addrspace(1)* %out, i64 %src) {
-; GFX-LABEL: v_icmp_i64_sge:
-; GFX:       ; %bb.0:
-; GFX-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
-; GFX-NEXT:    s_mov_b32 s7, 0xf000
-; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
-; GFX-NEXT:    v_mov_b32_e32 v1, 0
-; GFX-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX-NEXT:    v_cmp_ge_i64_e64 s[2:3], s[2:3], v[0:1]
-; GFX-NEXT:    s_mov_b32 s6, -1
-; GFX-NEXT:    s_mov_b32 s4, s0
-; GFX-NEXT:    s_mov_b32 s5, s1
-; GFX-NEXT:    v_mov_b32_e32 v0, s2
-; GFX-NEXT:    v_mov_b32_e32 v1, s3
-; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
-; GFX-NEXT:    s_endpgm
-;
-; VI-LABEL: v_icmp_i64_sge:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
-; VI-NEXT:    v_mov_b32_e32 v0, 0x64
-; VI-NEXT:    v_mov_b32_e32 v1, 0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cmp_ge_i64_e64 s[2:3], s[2:3], v[0:1]
-; VI-NEXT:    v_mov_b32_e32 v2, s0
-; VI-NEXT:    v_mov_b32_e32 v0, s2
-; VI-NEXT:    v_mov_b32_e32 v3, s1
-; VI-NEXT:    v_mov_b32_e32 v1, s3
-; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
-; VI-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 39)
-  store i64 %result, i64 addrspace(1)* %out
-  ret void
-}
-
-define amdgpu_kernel void @v_icmp_i64_slt(i64 addrspace(1)* %out, i64 %src) {
-; GFX-LABEL: v_icmp_i64_slt:
-; GFX:       ; %bb.0:
-; GFX-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
-; GFX-NEXT:    s_mov_b32 s7, 0xf000
-; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
-; GFX-NEXT:    v_mov_b32_e32 v1, 0
-; GFX-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX-NEXT:    v_cmp_lt_i64_e64 s[2:3], s[2:3], v[0:1]
-; GFX-NEXT:    s_mov_b32 s6, -1
-; GFX-NEXT:    s_mov_b32 s4, s0
-; GFX-NEXT:    s_mov_b32 s5, s1
-; GFX-NEXT:    v_mov_b32_e32 v0, s2
-; GFX-NEXT:    v_mov_b32_e32 v1, s3
-; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
-; GFX-NEXT:    s_endpgm
-;
-; VI-LABEL: v_icmp_i64_slt:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
-; VI-NEXT:    v_mov_b32_e32 v0, 0x64
-; VI-NEXT:    v_mov_b32_e32 v1, 0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cmp_lt_i64_e64 s[2:3], s[2:3], v[0:1]
-; VI-NEXT:    v_mov_b32_e32 v2, s0
-; VI-NEXT:    v_mov_b32_e32 v0, s2
-; VI-NEXT:    v_mov_b32_e32 v3, s1
-; VI-NEXT:    v_mov_b32_e32 v1, s3
-; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
-; VI-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 40)
-  store i64 %result, i64 addrspace(1)* %out
-  ret void
-}
-
-define amdgpu_kernel void @v_icmp_i64_sle(i64 addrspace(1)* %out, i64 %src) {
-; GFX-LABEL: v_icmp_i64_sle:
-; GFX:       ; %bb.0:
-; GFX-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
-; GFX-NEXT:    s_mov_b32 s7, 0xf000
-; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
-; GFX-NEXT:    v_mov_b32_e32 v1, 0
-; GFX-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX-NEXT:    v_cmp_le_i64_e64 s[2:3], s[2:3], v[0:1]
-; GFX-NEXT:    s_mov_b32 s6, -1
-; GFX-NEXT:    s_mov_b32 s4, s0
-; GFX-NEXT:    s_mov_b32 s5, s1
-; GFX-NEXT:    v_mov_b32_e32 v0, s2
-; GFX-NEXT:    v_mov_b32_e32 v1, s3
-; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
-; GFX-NEXT:    s_endpgm
-;
-; VI-LABEL: v_icmp_i64_sle:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
-; VI-NEXT:    v_mov_b32_e32 v0, 0x64
-; VI-NEXT:    v_mov_b32_e32 v1, 0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cmp_le_i64_e64 s[2:3], s[2:3], v[0:1]
-; VI-NEXT:    v_mov_b32_e32 v2, s0
-; VI-NEXT:    v_mov_b32_e32 v0, s2
-; VI-NEXT:    v_mov_b32_e32 v3, s1
-; VI-NEXT:    v_mov_b32_e32 v1, s3
-; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
-; VI-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 41)
-  store i64 %result, i64 addrspace(1)* %out
-  ret void
-}
-
-define amdgpu_kernel void @v_icmp_i16_eq(i64 addrspace(1)* %out, i16 %src) {
-; GFX-LABEL: v_icmp_i16_eq:
-; GFX:       ; %bb.0:
-; GFX-NEXT:    s_load_dword s2, s[0:1], 0xb
-; GFX-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
-; GFX-NEXT:    s_mov_b32 s3, 0xf000
-; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
-; GFX-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX-NEXT:    s_and_b32 s2, s2, 0xffff
-; GFX-NEXT:    v_cmp_eq_u32_e64 s[4:5], s2, v0
-; GFX-NEXT:    s_mov_b32 s2, -1
-; GFX-NEXT:    v_mov_b32_e32 v0, s4
-; GFX-NEXT:    v_mov_b32_e32 v1, s5
-; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; GFX-NEXT:    s_endpgm
-;
-; VI-LABEL: v_icmp_i16_eq:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT:    v_mov_b32_e32 v0, 0x64
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cmp_eq_u16_e64 s[2:3], s2, v0
-; VI-NEXT:    v_mov_b32_e32 v0, s0
-; VI-NEXT:    v_mov_b32_e32 v2, s2
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_mov_b32_e32 v3, s3
-; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 32)
-  store i64 %result, i64 addrspace(1)* %out
-  ret void
-}
-
-define amdgpu_kernel void @v_icmp_i16(i64 addrspace(1)* %out, i16 %src) {
-; GCN-LABEL: v_icmp_i16:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 30)
-  store i64 %result, i64 addrspace(1)* %out
-  ret void
-}
-
-define amdgpu_kernel void @v_icmp_i16_ne(i64 addrspace(1)* %out, i16 %src) {
-; GFX-LABEL: v_icmp_i16_ne:
-; GFX:       ; %bb.0:
-; GFX-NEXT:    s_load_dword s2, s[0:1], 0xb
-; GFX-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
-; GFX-NEXT:    s_mov_b32 s3, 0xf000
-; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
-; GFX-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX-NEXT:    s_and_b32 s2, s2, 0xffff
-; GFX-NEXT:    v_cmp_ne_u32_e64 s[4:5], s2, v0
-; GFX-NEXT:    s_mov_b32 s2, -1
-; GFX-NEXT:    v_mov_b32_e32 v0, s4
-; GFX-NEXT:    v_mov_b32_e32 v1, s5
-; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; GFX-NEXT:    s_endpgm
-;
-; VI-LABEL: v_icmp_i16_ne:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT:    v_mov_b32_e32 v0, 0x64
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cmp_ne_u16_e64 s[2:3], s2, v0
-; VI-NEXT:    v_mov_b32_e32 v0, s0
-; VI-NEXT:    v_mov_b32_e32 v2, s2
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_mov_b32_e32 v3, s3
-; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 33)
-  store i64 %result, i64 addrspace(1)* %out
-  ret void
-}
-
-define amdgpu_kernel void @v_icmp_i16_ugt(i64 addrspace(1)* %out, i16 %src) {
-; GFX-LABEL: v_icmp_i16_ugt:
-; GFX:       ; %bb.0:
-; GFX-NEXT:    s_load_dword s2, s[0:1], 0xb
-; GFX-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
-; GFX-NEXT:    s_mov_b32 s3, 0xf000
-; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
-; GFX-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX-NEXT:    s_and_b32 s2, s2, 0xffff
-; GFX-NEXT:    v_cmp_gt_u32_e64 s[4:5], s2, v0
-; GFX-NEXT:    s_mov_b32 s2, -1
-; GFX-NEXT:    v_mov_b32_e32 v0, s4
-; GFX-NEXT:    v_mov_b32_e32 v1, s5
-; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; GFX-NEXT:    s_endpgm
-;
-; VI-LABEL: v_icmp_i16_ugt:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT:    v_mov_b32_e32 v0, 0x64
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cmp_gt_u16_e64 s[2:3], s2, v0
-; VI-NEXT:    v_mov_b32_e32 v0, s0
-; VI-NEXT:    v_mov_b32_e32 v2, s2
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_mov_b32_e32 v3, s3
-; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 34)
-  store i64 %result, i64 addrspace(1)* %out
-  ret void
-}
-
-define amdgpu_kernel void @v_icmp_i16_uge(i64 addrspace(1)* %out, i16 %src) {
-; GFX-LABEL: v_icmp_i16_uge:
-; GFX:       ; %bb.0:
-; GFX-NEXT:    s_load_dword s2, s[0:1], 0xb
-; GFX-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
-; GFX-NEXT:    s_mov_b32 s3, 0xf000
-; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
-; GFX-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX-NEXT:    s_and_b32 s2, s2, 0xffff
-; GFX-NEXT:    v_cmp_ge_u32_e64 s[4:5], s2, v0
-; GFX-NEXT:    s_mov_b32 s2, -1
-; GFX-NEXT:    v_mov_b32_e32 v0, s4
-; GFX-NEXT:    v_mov_b32_e32 v1, s5
-; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; GFX-NEXT:    s_endpgm
-;
-; VI-LABEL: v_icmp_i16_uge:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT:    v_mov_b32_e32 v0, 0x64
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cmp_ge_u16_e64 s[2:3], s2, v0
-; VI-NEXT:    v_mov_b32_e32 v0, s0
-; VI-NEXT:    v_mov_b32_e32 v2, s2
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_mov_b32_e32 v3, s3
-; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 35)
-  store i64 %result, i64 addrspace(1)* %out
-  ret void
-}
-
-define amdgpu_kernel void @v_icmp_i16_ult(i64 addrspace(1)* %out, i16 %src) {
-; GFX-LABEL: v_icmp_i16_ult:
-; GFX:       ; %bb.0:
-; GFX-NEXT:    s_load_dword s2, s[0:1], 0xb
-; GFX-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
-; GFX-NEXT:    s_mov_b32 s3, 0xf000
-; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
-; GFX-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX-NEXT:    s_and_b32 s2, s2, 0xffff
-; GFX-NEXT:    v_cmp_lt_u32_e64 s[4:5], s2, v0
-; GFX-NEXT:    s_mov_b32 s2, -1
-; GFX-NEXT:    v_mov_b32_e32 v0, s4
-; GFX-NEXT:    v_mov_b32_e32 v1, s5
-; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; GFX-NEXT:    s_endpgm
-;
-; VI-LABEL: v_icmp_i16_ult:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT:    v_mov_b32_e32 v0, 0x64
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cmp_lt_u16_e64 s[2:3], s2, v0
-; VI-NEXT:    v_mov_b32_e32 v0, s0
-; VI-NEXT:    v_mov_b32_e32 v2, s2
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_mov_b32_e32 v3, s3
-; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 36)
-  store i64 %result, i64 addrspace(1)* %out
-  ret void
-}
-
-define amdgpu_kernel void @v_icmp_i16_ule(i64 addrspace(1)* %out, i16 %src) {
-; GFX-LABEL: v_icmp_i16_ule:
-; GFX:       ; %bb.0:
-; GFX-NEXT:    s_load_dword s2, s[0:1], 0xb
-; GFX-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
-; GFX-NEXT:    s_mov_b32 s3, 0xf000
-; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
-; GFX-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX-NEXT:    s_and_b32 s2, s2, 0xffff
-; GFX-NEXT:    v_cmp_le_u32_e64 s[4:5], s2, v0
-; GFX-NEXT:    s_mov_b32 s2, -1
-; GFX-NEXT:    v_mov_b32_e32 v0, s4
-; GFX-NEXT:    v_mov_b32_e32 v1, s5
-; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; GFX-NEXT:    s_endpgm
-;
-; VI-LABEL: v_icmp_i16_ule:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT:    v_mov_b32_e32 v0, 0x64
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cmp_le_u16_e64 s[2:3], s2, v0
-; VI-NEXT:    v_mov_b32_e32 v0, s0
-; VI-NEXT:    v_mov_b32_e32 v2, s2
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_mov_b32_e32 v3, s3
-; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 37)
-  store i64 %result, i64 addrspace(1)* %out
-  ret void
-}
-
-define amdgpu_kernel void @v_icmp_i16_sgt(i64 addrspace(1)* %out, i16 %src) #1 {
-; GFX-LABEL: v_icmp_i16_sgt:
-; GFX:       ; %bb.0:
-; GFX-NEXT:    s_load_dword s2, s[0:1], 0xb
-; GFX-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
-; GFX-NEXT:    s_mov_b32 s3, 0xf000
-; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
-; GFX-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX-NEXT:    s_sext_i32_i16 s2, s2
-; GFX-NEXT:    v_cmp_gt_i32_e64 s[4:5], s2, v0
-; GFX-NEXT:    s_mov_b32 s2, -1
-; GFX-NEXT:    v_mov_b32_e32 v0, s4
-; GFX-NEXT:    v_mov_b32_e32 v1, s5
-; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; GFX-NEXT:    s_endpgm
-;
-; VI-LABEL: v_icmp_i16_sgt:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT:    v_mov_b32_e32 v0, 0x64
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cmp_gt_i16_e64 s[2:3], s2, v0
-; VI-NEXT:    v_mov_b32_e32 v0, s0
-; VI-NEXT:    v_mov_b32_e32 v2, s2
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_mov_b32_e32 v3, s3
-; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 38)
-  store i64 %result, i64 addrspace(1)* %out
-  ret void
-}
-
-define amdgpu_kernel void @v_icmp_i16_sge(i64 addrspace(1)* %out, i16 %src) {
-; GFX-LABEL: v_icmp_i16_sge:
-; GFX:       ; %bb.0:
-; GFX-NEXT:    s_load_dword s2, s[0:1], 0xb
-; GFX-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
-; GFX-NEXT:    s_mov_b32 s3, 0xf000
-; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
-; GFX-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX-NEXT:    s_sext_i32_i16 s2, s2
-; GFX-NEXT:    v_cmp_ge_i32_e64 s[4:5], s2, v0
-; GFX-NEXT:    s_mov_b32 s2, -1
-; GFX-NEXT:    v_mov_b32_e32 v0, s4
-; GFX-NEXT:    v_mov_b32_e32 v1, s5
-; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; GFX-NEXT:    s_endpgm
-;
-; VI-LABEL: v_icmp_i16_sge:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT:    v_mov_b32_e32 v0, 0x64
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cmp_ge_i16_e64 s[2:3], s2, v0
-; VI-NEXT:    v_mov_b32_e32 v0, s0
-; VI-NEXT:    v_mov_b32_e32 v2, s2
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_mov_b32_e32 v3, s3
-; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 39)
-  store i64 %result, i64 addrspace(1)* %out
-  ret void
-}
-
-define amdgpu_kernel void @v_icmp_i16_slt(i64 addrspace(1)* %out, i16 %src) {
-; GFX-LABEL: v_icmp_i16_slt:
-; GFX:       ; %bb.0:
-; GFX-NEXT:    s_load_dword s2, s[0:1], 0xb
-; GFX-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
-; GFX-NEXT:    s_mov_b32 s3, 0xf000
-; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
-; GFX-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX-NEXT:    s_sext_i32_i16 s2, s2
-; GFX-NEXT:    v_cmp_lt_i32_e64 s[4:5], s2, v0
-; GFX-NEXT:    s_mov_b32 s2, -1
-; GFX-NEXT:    v_mov_b32_e32 v0, s4
-; GFX-NEXT:    v_mov_b32_e32 v1, s5
-; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; GFX-NEXT:    s_endpgm
-;
-; VI-LABEL: v_icmp_i16_slt:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT:    v_mov_b32_e32 v0, 0x64
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cmp_lt_i16_e64 s[2:3], s2, v0
-; VI-NEXT:    v_mov_b32_e32 v0, s0
-; VI-NEXT:    v_mov_b32_e32 v2, s2
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_mov_b32_e32 v3, s3
-; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 40)
-  store i64 %result, i64 addrspace(1)* %out
-  ret void
-}
-
-define amdgpu_kernel void @v_icmp_i16_sle(i64 addrspace(1)* %out, i16 %src) {
-; GFX-LABEL: v_icmp_i16_sle:
-; GFX:       ; %bb.0:
-; GFX-NEXT:    s_load_dword s2, s[0:1], 0xb
-; GFX-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
-; GFX-NEXT:    s_mov_b32 s3, 0xf000
-; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
-; GFX-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX-NEXT:    s_sext_i32_i16 s2, s2
-; GFX-NEXT:    v_cmp_le_i32_e64 s[4:5], s2, v0
-; GFX-NEXT:    s_mov_b32 s2, -1
-; GFX-NEXT:    v_mov_b32_e32 v0, s4
-; GFX-NEXT:    v_mov_b32_e32 v1, s5
-; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; GFX-NEXT:    s_endpgm
-;
-; VI-LABEL: v_icmp_i16_sle:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT:    v_mov_b32_e32 v0, 0x64
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cmp_le_i16_e64 s[2:3], s2, v0
-; VI-NEXT:    v_mov_b32_e32 v0, s0
-; VI-NEXT:    v_mov_b32_e32 v2, s2
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_mov_b32_e32 v3, s3
-; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 41)
-  store i64 %result, i64 addrspace(1)* %out
-  ret void
-}
-
-define amdgpu_kernel void @v_icmp_i1_ne0(i64 addrspace(1)* %out, i32 %a, i32 %b) {
-; GFX-LABEL: v_icmp_i1_ne0:
-; GFX:       ; %bb.0:
-; GFX-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
-; GFX-NEXT:    s_mov_b32 s7, 0xf000
-; GFX-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX-NEXT:    s_cmp_gt_u32 s2, 1
-; GFX-NEXT:    s_cselect_b64 s[4:5], -1, 0
-; GFX-NEXT:    s_cmp_gt_u32 s3, 2
-; GFX-NEXT:    s_cselect_b64 s[2:3], -1, 0
-; GFX-NEXT:    s_and_b64 s[2:3], s[4:5], s[2:3]
-; GFX-NEXT:    s_mov_b32 s6, -1
-; GFX-NEXT:    s_mov_b32 s4, s0
-; GFX-NEXT:    s_mov_b32 s5, s1
-; GFX-NEXT:    v_mov_b32_e32 v0, s2
-; GFX-NEXT:    v_mov_b32_e32 v1, s3
-; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
-; GFX-NEXT:    s_endpgm
-;
-; VI-LABEL: v_icmp_i1_ne0:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_cmp_gt_u32 s2, 1
-; VI-NEXT:    s_cselect_b64 s[4:5], -1, 0
-; VI-NEXT:    s_cmp_gt_u32 s3, 2
-; VI-NEXT:    s_cselect_b64 s[2:3], -1, 0
-; VI-NEXT:    s_and_b64 s[2:3], s[4:5], s[2:3]
-; VI-NEXT:    v_mov_b32_e32 v0, s0
-; VI-NEXT:    v_mov_b32_e32 v2, s2
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_mov_b32_e32 v3, s3
-; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
-; VI-NEXT:    s_endpgm
-  %c0 = icmp ugt i32 %a, 1
-  %c1 = icmp ugt i32 %b, 2
-  %src = and i1 %c0, %c1
-  %result = call i64 @llvm.amdgcn.icmp.i1(i1 %src, i1 false, i32 33)
-  store i64 %result, i64 addrspace(1)* %out
-  ret void
-}
-
-define amdgpu_ps void @test_intr_icmp_i32_invalid_cc(i32 addrspace(1)* %out, i32 %src) {
-; GCN-LABEL: test_intr_icmp_i32_invalid_cc:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_endpgm
-  %result = call i32 @llvm.amdgcn.icmp.i32.i32(i32 %src, i32 100, i32 9999)
-  store i32 %result, i32 addrspace(1)* %out
-  ret void
-}
-
-attributes #0 = { nounwind readnone convergent }

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll
new file mode 100644
index 0000000000000..33681c3c96b74
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll
@@ -0,0 +1,1784 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize32" -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX11,SDAG-GFX11 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr="+wavefrontsize32" -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10,SDAG-GFX10 %s
+
+; RUN: llc -global-isel -global-isel-abort=2 -march=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize32" -verify-machineinstrs < %s 2>%t | FileCheck -check-prefixes=GCN,GFX11,GISEL-GFX11 %s
+; RUN: FileCheck --check-prefix=ERR %s < %t
+; RUN: llc -global-isel -global-isel-abort=2 -march=amdgcn -mcpu=gfx1010 -mattr="+wavefrontsize32" -verify-machineinstrs < %s 2>%t | FileCheck -check-prefixes=GCN,GFX10,GISEL-GFX10 %s
+; RUN: FileCheck --check-prefix=ERR %s < %t
+
+; Note: GlobalISel abort is disabled so we don't crash on i1 inputs.
+;  They are allowed in DAGISel but we (intentionally) don't support them
+;  in GlobalISel.
+
+; ERR: warning: Instruction selection used fallback path for v_icmp_i1_ne0
+
+declare i32 @llvm.amdgcn.icmp.i32(i32, i32, i32) #0
+declare i32 @llvm.amdgcn.icmp.i64(i64, i64, i32) #0
+declare i32 @llvm.amdgcn.icmp.i16(i16, i16, i32) #0
+declare i32 @llvm.amdgcn.icmp.i1(i1, i1, i32) #0
+
+define amdgpu_kernel void @v_icmp_i32_eq(i32 addrspace(1)* %out, i32 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i32_eq:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_clause 0x1
+; SDAG-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT:    v_cmp_eq_u32_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT:    s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i32_eq:
+; SDAG-GFX10:       ; %bb.0:
+; SDAG-GFX10-NEXT:    s_clause 0x1
+; SDAG-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT:    v_cmp_eq_u32_e64 s0, 0x64, s4
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT:    s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i32_eq:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_clause 0x1
+; GISEL-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    v_cmp_eq_u32_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT:    s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i32_eq:
+; GISEL-GFX10:       ; %bb.0:
+; GISEL-GFX10-NEXT:    s_clause 0x1
+; GISEL-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT:    v_cmp_eq_u32_e64 s0, 0x64, s4
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT:    s_endpgm
+  %result = call i32 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 32)
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i32(i32 addrspace(1)* %out, i32 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i32:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i32:
+; SDAG-GFX10:       ; %bb.0:
+; SDAG-GFX10-NEXT:    s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i32:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, 0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    global_store_b32 v0, v0, s[0:1]
+; GISEL-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT:    s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i32:
+; GISEL-GFX10:       ; %bb.0:
+; GISEL-GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT:    global_store_dword v0, v0, s[0:1]
+; GISEL-GFX10-NEXT:    s_endpgm
+  %result = call i32 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 30)
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i32_ne(i32 addrspace(1)* %out, i32 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i32_ne:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_clause 0x1
+; SDAG-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT:    v_cmp_ne_u32_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT:    s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i32_ne:
+; SDAG-GFX10:       ; %bb.0:
+; SDAG-GFX10-NEXT:    s_clause 0x1
+; SDAG-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT:    v_cmp_ne_u32_e64 s0, 0x64, s4
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT:    s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i32_ne:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_clause 0x1
+; GISEL-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    v_cmp_ne_u32_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT:    s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i32_ne:
+; GISEL-GFX10:       ; %bb.0:
+; GISEL-GFX10-NEXT:    s_clause 0x1
+; GISEL-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT:    v_cmp_ne_u32_e64 s0, 0x64, s4
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT:    s_endpgm
+  %result = call i32 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 33)
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i32_ugt(i32 addrspace(1)* %out, i32 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i32_ugt:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_clause 0x1
+; SDAG-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT:    v_cmp_lt_u32_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT:    s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i32_ugt:
+; SDAG-GFX10:       ; %bb.0:
+; SDAG-GFX10-NEXT:    s_clause 0x1
+; SDAG-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT:    v_cmp_lt_u32_e64 s0, 0x64, s4
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT:    s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i32_ugt:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_clause 0x1
+; GISEL-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    v_cmp_lt_u32_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT:    s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i32_ugt:
+; GISEL-GFX10:       ; %bb.0:
+; GISEL-GFX10-NEXT:    s_clause 0x1
+; GISEL-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT:    v_cmp_lt_u32_e64 s0, 0x64, s4
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT:    s_endpgm
+  %result = call i32 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 34)
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i32_uge(i32 addrspace(1)* %out, i32 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i32_uge:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_clause 0x1
+; SDAG-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT:    v_cmp_le_u32_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT:    s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i32_uge:
+; SDAG-GFX10:       ; %bb.0:
+; SDAG-GFX10-NEXT:    s_clause 0x1
+; SDAG-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT:    v_cmp_le_u32_e64 s0, 0x64, s4
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT:    s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i32_uge:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_clause 0x1
+; GISEL-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    v_cmp_le_u32_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT:    s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i32_uge:
+; GISEL-GFX10:       ; %bb.0:
+; GISEL-GFX10-NEXT:    s_clause 0x1
+; GISEL-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT:    v_cmp_le_u32_e64 s0, 0x64, s4
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT:    s_endpgm
+  %result = call i32 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 35)
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i32_ult(i32 addrspace(1)* %out, i32 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i32_ult:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_clause 0x1
+; SDAG-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT:    v_cmp_gt_u32_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT:    s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i32_ult:
+; SDAG-GFX10:       ; %bb.0:
+; SDAG-GFX10-NEXT:    s_clause 0x1
+; SDAG-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT:    v_cmp_gt_u32_e64 s0, 0x64, s4
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT:    s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i32_ult:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_clause 0x1
+; GISEL-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    v_cmp_gt_u32_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT:    s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i32_ult:
+; GISEL-GFX10:       ; %bb.0:
+; GISEL-GFX10-NEXT:    s_clause 0x1
+; GISEL-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT:    v_cmp_gt_u32_e64 s0, 0x64, s4
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT:    s_endpgm
+  %result = call i32 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 36)
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i32_ule(i32 addrspace(1)* %out, i32 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i32_ule:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_clause 0x1
+; SDAG-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT:    v_cmp_ge_u32_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT:    s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i32_ule:
+; SDAG-GFX10:       ; %bb.0:
+; SDAG-GFX10-NEXT:    s_clause 0x1
+; SDAG-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT:    v_cmp_ge_u32_e64 s0, 0x64, s4
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT:    s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i32_ule:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_clause 0x1
+; GISEL-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    v_cmp_ge_u32_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT:    s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i32_ule:
+; GISEL-GFX10:       ; %bb.0:
+; GISEL-GFX10-NEXT:    s_clause 0x1
+; GISEL-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT:    v_cmp_ge_u32_e64 s0, 0x64, s4
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT:    s_endpgm
+  %result = call i32 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 37)
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i32_sgt(i32 addrspace(1)* %out, i32 %src) #1 {
+; SDAG-GFX11-LABEL: v_icmp_i32_sgt:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_clause 0x1
+; SDAG-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT:    v_cmp_lt_i32_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT:    s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i32_sgt:
+; SDAG-GFX10:       ; %bb.0:
+; SDAG-GFX10-NEXT:    s_clause 0x1
+; SDAG-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT:    v_cmp_lt_i32_e64 s0, 0x64, s4
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT:    s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i32_sgt:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_clause 0x1
+; GISEL-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    v_cmp_lt_i32_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT:    s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i32_sgt:
+; GISEL-GFX10:       ; %bb.0:
+; GISEL-GFX10-NEXT:    s_clause 0x1
+; GISEL-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT:    v_cmp_lt_i32_e64 s0, 0x64, s4
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT:    s_endpgm
+  %result = call i32 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 38)
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i32_sge(i32 addrspace(1)* %out, i32 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i32_sge:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_clause 0x1
+; SDAG-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT:    v_cmp_le_i32_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT:    s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i32_sge:
+; SDAG-GFX10:       ; %bb.0:
+; SDAG-GFX10-NEXT:    s_clause 0x1
+; SDAG-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT:    v_cmp_le_i32_e64 s0, 0x64, s4
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT:    s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i32_sge:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_clause 0x1
+; GISEL-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    v_cmp_le_i32_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT:    s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i32_sge:
+; GISEL-GFX10:       ; %bb.0:
+; GISEL-GFX10-NEXT:    s_clause 0x1
+; GISEL-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT:    v_cmp_le_i32_e64 s0, 0x64, s4
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT:    s_endpgm
+  %result = call i32 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 39)
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i32_slt(i32 addrspace(1)* %out, i32 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i32_slt:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_clause 0x1
+; SDAG-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT:    v_cmp_gt_i32_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT:    s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i32_slt:
+; SDAG-GFX10:       ; %bb.0:
+; SDAG-GFX10-NEXT:    s_clause 0x1
+; SDAG-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT:    v_cmp_gt_i32_e64 s0, 0x64, s4
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT:    s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i32_slt:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_clause 0x1
+; GISEL-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    v_cmp_gt_i32_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT:    s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i32_slt:
+; GISEL-GFX10:       ; %bb.0:
+; GISEL-GFX10-NEXT:    s_clause 0x1
+; GISEL-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT:    v_cmp_gt_i32_e64 s0, 0x64, s4
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT:    s_endpgm
+  %result = call i32 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 40)
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i32_sle(i32 addrspace(1)* %out, i32 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i32_sle:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_clause 0x1
+; SDAG-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT:    v_cmp_ge_i32_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT:    s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i32_sle:
+; SDAG-GFX10:       ; %bb.0:
+; SDAG-GFX10-NEXT:    s_clause 0x1
+; SDAG-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT:    v_cmp_ge_i32_e64 s0, 0x64, s4
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT:    s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i32_sle:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_clause 0x1
+; GISEL-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    v_cmp_ge_i32_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT:    s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i32_sle:
+; GISEL-GFX10:       ; %bb.0:
+; GISEL-GFX10-NEXT:    s_clause 0x1
+; GISEL-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT:    v_cmp_ge_i32_e64 s0, 0x64, s4
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT:    s_endpgm
+  %result = call i32 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 41)
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i64_eq(i32 addrspace(1)* %out, i64 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i64_eq:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; SDAG-GFX11-NEXT:    s_mov_b64 s[4:5], 0x64
+; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT:    v_cmp_eq_u64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT:    s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i64_eq:
+; SDAG-GFX10:       ; %bb.0:
+; SDAG-GFX10-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT:    s_mov_b64 s[4:5], 0x64
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT:    v_cmp_eq_u64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s2
+; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
+; SDAG-GFX10-NEXT:    s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i64_eq:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; GISEL-GFX11-NEXT:    s_mov_b64 s[4:5], 0x64
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    v_cmp_eq_u64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT:    s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i64_eq:
+; GISEL-GFX10:       ; %bb.0:
+; GISEL-GFX10-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT:    s_mov_b64 s[4:5], 0x64
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT:    v_cmp_eq_u64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
+; GISEL-GFX10-NEXT:    s_endpgm
+  %result = call i32 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 32)
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i64_ne(i32 addrspace(1)* %out, i64 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i64_ne:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; SDAG-GFX11-NEXT:    s_mov_b64 s[4:5], 0x64
+; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT:    v_cmp_ne_u64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT:    s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i64_ne:
+; SDAG-GFX10:       ; %bb.0:
+; SDAG-GFX10-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT:    s_mov_b64 s[4:5], 0x64
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT:    v_cmp_ne_u64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s2
+; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
+; SDAG-GFX10-NEXT:    s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i64_ne:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; GISEL-GFX11-NEXT:    s_mov_b64 s[4:5], 0x64
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    v_cmp_ne_u64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT:    s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i64_ne:
+; GISEL-GFX10:       ; %bb.0:
+; GISEL-GFX10-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT:    s_mov_b64 s[4:5], 0x64
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT:    v_cmp_ne_u64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
+; GISEL-GFX10-NEXT:    s_endpgm
+  %result = call i32 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 33)
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_u64_ugt(i32 addrspace(1)* %out, i64 %src) {
+; SDAG-GFX11-LABEL: v_icmp_u64_ugt:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; SDAG-GFX11-NEXT:    s_mov_b64 s[4:5], 0x64
+; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT:    v_cmp_gt_u64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT:    s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_u64_ugt:
+; SDAG-GFX10:       ; %bb.0:
+; SDAG-GFX10-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT:    s_mov_b64 s[4:5], 0x64
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT:    v_cmp_gt_u64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s2
+; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
+; SDAG-GFX10-NEXT:    s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_u64_ugt:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; GISEL-GFX11-NEXT:    s_mov_b64 s[4:5], 0x64
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    v_cmp_gt_u64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT:    s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_u64_ugt:
+; GISEL-GFX10:       ; %bb.0:
+; GISEL-GFX10-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT:    s_mov_b64 s[4:5], 0x64
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT:    v_cmp_gt_u64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
+; GISEL-GFX10-NEXT:    s_endpgm
+  %result = call i32 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 34)
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_u64_uge(i32 addrspace(1)* %out, i64 %src) {
+; SDAG-GFX11-LABEL: v_icmp_u64_uge:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; SDAG-GFX11-NEXT:    s_mov_b64 s[4:5], 0x64
+; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT:    v_cmp_ge_u64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT:    s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_u64_uge:
+; SDAG-GFX10:       ; %bb.0:
+; SDAG-GFX10-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT:    s_mov_b64 s[4:5], 0x64
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT:    v_cmp_ge_u64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s2
+; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
+; SDAG-GFX10-NEXT:    s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_u64_uge:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; GISEL-GFX11-NEXT:    s_mov_b64 s[4:5], 0x64
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    v_cmp_ge_u64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT:    s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_u64_uge:
+; GISEL-GFX10:       ; %bb.0:
+; GISEL-GFX10-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT:    s_mov_b64 s[4:5], 0x64
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT:    v_cmp_ge_u64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
+; GISEL-GFX10-NEXT:    s_endpgm
+  %result = call i32 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 35)
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_u64_ult(i32 addrspace(1)* %out, i64 %src) {
+; SDAG-GFX11-LABEL: v_icmp_u64_ult:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; SDAG-GFX11-NEXT:    s_mov_b64 s[4:5], 0x64
+; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT:    v_cmp_lt_u64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT:    s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_u64_ult:
+; SDAG-GFX10:       ; %bb.0:
+; SDAG-GFX10-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT:    s_mov_b64 s[4:5], 0x64
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT:    v_cmp_lt_u64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s2
+; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
+; SDAG-GFX10-NEXT:    s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_u64_ult:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; GISEL-GFX11-NEXT:    s_mov_b64 s[4:5], 0x64
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    v_cmp_lt_u64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT:    s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_u64_ult:
+; GISEL-GFX10:       ; %bb.0:
+; GISEL-GFX10-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT:    s_mov_b64 s[4:5], 0x64
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT:    v_cmp_lt_u64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
+; GISEL-GFX10-NEXT:    s_endpgm
+  %result = call i32 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 36)
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_u64_ule(i32 addrspace(1)* %out, i64 %src) {
+; SDAG-GFX11-LABEL: v_icmp_u64_ule:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; SDAG-GFX11-NEXT:    s_mov_b64 s[4:5], 0x64
+; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT:    v_cmp_le_u64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT:    s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_u64_ule:
+; SDAG-GFX10:       ; %bb.0:
+; SDAG-GFX10-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT:    s_mov_b64 s[4:5], 0x64
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT:    v_cmp_le_u64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s2
+; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
+; SDAG-GFX10-NEXT:    s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_u64_ule:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; GISEL-GFX11-NEXT:    s_mov_b64 s[4:5], 0x64
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    v_cmp_le_u64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT:    s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_u64_ule:
+; GISEL-GFX10:       ; %bb.0:
+; GISEL-GFX10-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT:    s_mov_b64 s[4:5], 0x64
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT:    v_cmp_le_u64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
+; GISEL-GFX10-NEXT:    s_endpgm
+  %result = call i32 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 37)
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i64_sgt(i32 addrspace(1)* %out, i64 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i64_sgt:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; SDAG-GFX11-NEXT:    s_mov_b64 s[4:5], 0x64
+; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT:    v_cmp_gt_i64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT:    s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i64_sgt:
+; SDAG-GFX10:       ; %bb.0:
+; SDAG-GFX10-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT:    s_mov_b64 s[4:5], 0x64
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT:    v_cmp_gt_i64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s2
+; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
+; SDAG-GFX10-NEXT:    s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i64_sgt:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; GISEL-GFX11-NEXT:    s_mov_b64 s[4:5], 0x64
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    v_cmp_gt_i64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT:    s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i64_sgt:
+; GISEL-GFX10:       ; %bb.0:
+; GISEL-GFX10-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT:    s_mov_b64 s[4:5], 0x64
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT:    v_cmp_gt_i64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
+; GISEL-GFX10-NEXT:    s_endpgm
+  %result = call i32 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 38)
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i64_sge(i32 addrspace(1)* %out, i64 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i64_sge:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; SDAG-GFX11-NEXT:    s_mov_b64 s[4:5], 0x64
+; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT:    v_cmp_ge_i64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT:    s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i64_sge:
+; SDAG-GFX10:       ; %bb.0:
+; SDAG-GFX10-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT:    s_mov_b64 s[4:5], 0x64
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT:    v_cmp_ge_i64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s2
+; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
+; SDAG-GFX10-NEXT:    s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i64_sge:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; GISEL-GFX11-NEXT:    s_mov_b64 s[4:5], 0x64
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    v_cmp_ge_i64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT:    s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i64_sge:
+; GISEL-GFX10:       ; %bb.0:
+; GISEL-GFX10-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT:    s_mov_b64 s[4:5], 0x64
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT:    v_cmp_ge_i64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
+; GISEL-GFX10-NEXT:    s_endpgm
+  %result = call i32 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 39)
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i64_slt(i32 addrspace(1)* %out, i64 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i64_slt:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; SDAG-GFX11-NEXT:    s_mov_b64 s[4:5], 0x64
+; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT:    v_cmp_lt_i64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT:    s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i64_slt:
+; SDAG-GFX10:       ; %bb.0:
+; SDAG-GFX10-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT:    s_mov_b64 s[4:5], 0x64
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT:    v_cmp_lt_i64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s2
+; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
+; SDAG-GFX10-NEXT:    s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i64_slt:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; GISEL-GFX11-NEXT:    s_mov_b64 s[4:5], 0x64
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    v_cmp_lt_i64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT:    s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i64_slt:
+; GISEL-GFX10:       ; %bb.0:
+; GISEL-GFX10-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT:    s_mov_b64 s[4:5], 0x64
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT:    v_cmp_lt_i64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
+; GISEL-GFX10-NEXT:    s_endpgm
+  %result = call i32 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 40)
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i64_sle(i32 addrspace(1)* %out, i64 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i64_sle:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; SDAG-GFX11-NEXT:    s_mov_b64 s[4:5], 0x64
+; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT:    v_cmp_le_i64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT:    s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i64_sle:
+; SDAG-GFX10:       ; %bb.0:
+; SDAG-GFX10-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT:    s_mov_b64 s[4:5], 0x64
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT:    v_cmp_le_i64_e64 s2, s[2:3], s[4:5]
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s2
+; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
+; SDAG-GFX10-NEXT:    s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i64_sle:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; GISEL-GFX11-NEXT:    s_mov_b64 s[4:5], 0x64
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    v_cmp_le_i64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT:    s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i64_sle:
+; GISEL-GFX10:       ; %bb.0:
+; GISEL-GFX10-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT:    s_mov_b64 s[4:5], 0x64
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT:    v_cmp_le_i64_e64 s2, s[2:3], s[4:5]
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
+; GISEL-GFX10-NEXT:    s_endpgm
+  %result = call i32 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 41)
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i16_eq(i32 addrspace(1)* %out, i16 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i16_eq:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_clause 0x1
+; SDAG-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT:    v_cmp_eq_u16_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT:    s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i16_eq:
+; SDAG-GFX10:       ; %bb.0:
+; SDAG-GFX10-NEXT:    s_clause 0x1
+; SDAG-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT:    v_cmp_eq_u16_e64 s0, 0x64, s4
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT:    s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i16_eq:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_clause 0x1
+; GISEL-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    v_cmp_eq_u16_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT:    s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i16_eq:
+; GISEL-GFX10:       ; %bb.0:
+; GISEL-GFX10-NEXT:    s_clause 0x1
+; GISEL-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT:    v_cmp_eq_u16_e64 s0, 0x64, s4
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT:    s_endpgm
+  %result = call i32 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 32)
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i16(i32 addrspace(1)* %out, i16 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i16:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i16:
+; SDAG-GFX10:       ; %bb.0:
+; SDAG-GFX10-NEXT:    s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i16:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, 0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    global_store_b32 v0, v0, s[0:1]
+; GISEL-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT:    s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i16:
+; GISEL-GFX10:       ; %bb.0:
+; GISEL-GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT:    global_store_dword v0, v0, s[0:1]
+; GISEL-GFX10-NEXT:    s_endpgm
+  %result = call i32 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 30)
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i16_ne(i32 addrspace(1)* %out, i16 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i16_ne:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_clause 0x1
+; SDAG-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT:    v_cmp_ne_u16_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT:    s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i16_ne:
+; SDAG-GFX10:       ; %bb.0:
+; SDAG-GFX10-NEXT:    s_clause 0x1
+; SDAG-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT:    v_cmp_ne_u16_e64 s0, 0x64, s4
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT:    s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i16_ne:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_clause 0x1
+; GISEL-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    v_cmp_ne_u16_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT:    s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i16_ne:
+; GISEL-GFX10:       ; %bb.0:
+; GISEL-GFX10-NEXT:    s_clause 0x1
+; GISEL-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT:    v_cmp_ne_u16_e64 s0, 0x64, s4
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT:    s_endpgm
+  %result = call i32 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 33)
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i16_ugt(i32 addrspace(1)* %out, i16 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i16_ugt:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_clause 0x1
+; SDAG-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT:    v_cmp_lt_u16_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT:    s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i16_ugt:
+; SDAG-GFX10:       ; %bb.0:
+; SDAG-GFX10-NEXT:    s_clause 0x1
+; SDAG-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT:    v_cmp_lt_u16_e64 s0, 0x64, s4
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT:    s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i16_ugt:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_clause 0x1
+; GISEL-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    v_cmp_lt_u16_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT:    s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i16_ugt:
+; GISEL-GFX10:       ; %bb.0:
+; GISEL-GFX10-NEXT:    s_clause 0x1
+; GISEL-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT:    v_cmp_lt_u16_e64 s0, 0x64, s4
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT:    s_endpgm
+  %result = call i32 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 34)
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i16_uge(i32 addrspace(1)* %out, i16 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i16_uge:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_clause 0x1
+; SDAG-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT:    v_cmp_le_u16_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT:    s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i16_uge:
+; SDAG-GFX10:       ; %bb.0:
+; SDAG-GFX10-NEXT:    s_clause 0x1
+; SDAG-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT:    v_cmp_le_u16_e64 s0, 0x64, s4
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT:    s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i16_uge:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_clause 0x1
+; GISEL-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    v_cmp_le_u16_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT:    s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i16_uge:
+; GISEL-GFX10:       ; %bb.0:
+; GISEL-GFX10-NEXT:    s_clause 0x1
+; GISEL-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT:    v_cmp_le_u16_e64 s0, 0x64, s4
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT:    s_endpgm
+  %result = call i32 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 35)
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i16_ult(i32 addrspace(1)* %out, i16 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i16_ult:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_clause 0x1
+; SDAG-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT:    v_cmp_gt_u16_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT:    s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i16_ult:
+; SDAG-GFX10:       ; %bb.0:
+; SDAG-GFX10-NEXT:    s_clause 0x1
+; SDAG-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT:    v_cmp_gt_u16_e64 s0, 0x64, s4
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT:    s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i16_ult:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_clause 0x1
+; GISEL-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    v_cmp_gt_u16_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT:    s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i16_ult:
+; GISEL-GFX10:       ; %bb.0:
+; GISEL-GFX10-NEXT:    s_clause 0x1
+; GISEL-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT:    v_cmp_gt_u16_e64 s0, 0x64, s4
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT:    s_endpgm
+  %result = call i32 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 36)
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i16_ule(i32 addrspace(1)* %out, i16 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i16_ule:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_clause 0x1
+; SDAG-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT:    v_cmp_ge_u16_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT:    s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i16_ule:
+; SDAG-GFX10:       ; %bb.0:
+; SDAG-GFX10-NEXT:    s_clause 0x1
+; SDAG-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT:    v_cmp_ge_u16_e64 s0, 0x64, s4
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT:    s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i16_ule:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_clause 0x1
+; GISEL-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    v_cmp_ge_u16_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT:    s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i16_ule:
+; GISEL-GFX10:       ; %bb.0:
+; GISEL-GFX10-NEXT:    s_clause 0x1
+; GISEL-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT:    v_cmp_ge_u16_e64 s0, 0x64, s4
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT:    s_endpgm
+  %result = call i32 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 37)
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i16_sgt(i32 addrspace(1)* %out, i16 %src) #1 {
+; SDAG-GFX11-LABEL: v_icmp_i16_sgt:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_clause 0x1
+; SDAG-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT:    v_cmp_lt_i16_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT:    s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i16_sgt:
+; SDAG-GFX10:       ; %bb.0:
+; SDAG-GFX10-NEXT:    s_clause 0x1
+; SDAG-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT:    v_cmp_lt_i16_e64 s0, 0x64, s4
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT:    s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i16_sgt:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_clause 0x1
+; GISEL-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    v_cmp_lt_i16_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT:    s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i16_sgt:
+; GISEL-GFX10:       ; %bb.0:
+; GISEL-GFX10-NEXT:    s_clause 0x1
+; GISEL-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT:    v_cmp_lt_i16_e64 s0, 0x64, s4
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT:    s_endpgm
+  %result = call i32 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 38)
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i16_sge(i32 addrspace(1)* %out, i16 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i16_sge:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_clause 0x1
+; SDAG-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT:    v_cmp_le_i16_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT:    s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i16_sge:
+; SDAG-GFX10:       ; %bb.0:
+; SDAG-GFX10-NEXT:    s_clause 0x1
+; SDAG-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT:    v_cmp_le_i16_e64 s0, 0x64, s4
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT:    s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i16_sge:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_clause 0x1
+; GISEL-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    v_cmp_le_i16_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT:    s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i16_sge:
+; GISEL-GFX10:       ; %bb.0:
+; GISEL-GFX10-NEXT:    s_clause 0x1
+; GISEL-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT:    v_cmp_le_i16_e64 s0, 0x64, s4
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT:    s_endpgm
+  %result = call i32 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 39)
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i16_slt(i32 addrspace(1)* %out, i16 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i16_slt:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_clause 0x1
+; SDAG-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT:    v_cmp_gt_i16_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT:    s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i16_slt:
+; SDAG-GFX10:       ; %bb.0:
+; SDAG-GFX10-NEXT:    s_clause 0x1
+; SDAG-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT:    v_cmp_gt_i16_e64 s0, 0x64, s4
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT:    s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i16_slt:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_clause 0x1
+; GISEL-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    v_cmp_gt_i16_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT:    s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i16_slt:
+; GISEL-GFX10:       ; %bb.0:
+; GISEL-GFX10-NEXT:    s_clause 0x1
+; GISEL-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT:    v_cmp_gt_i16_e64 s0, 0x64, s4
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT:    s_endpgm
+  %result = call i32 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 40)
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i16_sle(i32 addrspace(1)* %out, i16 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i16_sle:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_clause 0x1
+; SDAG-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; SDAG-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT:    v_cmp_ge_i16_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
+; SDAG-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT:    s_endpgm
+;
+; SDAG-GFX10-LABEL: v_icmp_i16_sle:
+; SDAG-GFX10:       ; %bb.0:
+; SDAG-GFX10-NEXT:    s_clause 0x1
+; SDAG-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; SDAG-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX10-NEXT:    v_cmp_ge_i16_e64 s0, 0x64, s4
+; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s0
+; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[2:3]
+; SDAG-GFX10-NEXT:    s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i16_sle:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_clause 0x1
+; GISEL-GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GISEL-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    v_cmp_ge_i16_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GISEL-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT:    s_endpgm
+;
+; GISEL-GFX10-LABEL: v_icmp_i16_sle:
+; GISEL-GFX10:       ; %bb.0:
+; GISEL-GFX10-NEXT:    s_clause 0x1
+; GISEL-GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GISEL-GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT:    v_cmp_ge_i16_e64 s0, 0x64, s4
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s0
+; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[2:3]
+; GISEL-GFX10-NEXT:    s_endpgm
+  %result = call i32 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 41)
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i1_ne0(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+; GFX11-LABEL: v_icmp_i1_ne0:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    s_cmp_gt_u32 s2, 1
+; GFX11-NEXT:    s_cselect_b32 s2, -1, 0
+; GFX11-NEXT:    s_cmp_gt_u32 s3, 2
+; GFX11-NEXT:    s_cselect_b32 s3, -1, 0
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT:    s_and_b32 s2, s2, s3
+; GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; GFX10-LABEL: v_icmp_i1_ne0:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-NEXT:    s_cmp_gt_u32 s2, 1
+; GFX10-NEXT:    s_cselect_b32 s2, -1, 0
+; GFX10-NEXT:    s_cmp_gt_u32 s3, 2
+; GFX10-NEXT:    s_cselect_b32 s3, -1, 0
+; GFX10-NEXT:    s_and_b32 s2, s2, s3
+; GFX10-NEXT:    v_mov_b32_e32 v1, s2
+; GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX10-NEXT:    s_endpgm
+  %c0 = icmp ugt i32 %a, 1
+  %c1 = icmp ugt i32 %b, 2
+  %src = and i1 %c0, %c1
+  %result = call i32 @llvm.amdgcn.icmp.i1(i1 %src, i1 false, i32 33)
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_ps void @test_intr_icmp_i32_invalid_cc(i32 addrspace(1)* %out, i32 %src) {
+; SDAG-GFX11-LABEL: test_intr_icmp_i32_invalid_cc:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_endpgm
+;
+; SDAG-GFX10-LABEL: test_intr_icmp_i32_invalid_cc:
+; SDAG-GFX10:       ; %bb.0:
+; SDAG-GFX10-NEXT:    s_endpgm
+;
+; GISEL-GFX11-LABEL: test_intr_icmp_i32_invalid_cc:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    global_store_b32 v[0:1], v0, off
+; GISEL-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT:    s_endpgm
+;
+; GISEL-GFX10-LABEL: test_intr_icmp_i32_invalid_cc:
+; GISEL-GFX10:       ; %bb.0:
+; GISEL-GFX10-NEXT:    global_store_dword v[0:1], v0, off
+; GISEL-GFX10-NEXT:    s_endpgm
+  %result = call i32 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 9999)
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
+attributes #0 = { nounwind readnone convergent }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GCN: {{.*}}

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll
new file mode 100644
index 0000000000000..f76973f791b2a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll
@@ -0,0 +1,2161 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize64" -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX11,SDAG-GFX11 %s
+; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,SDAG-VI %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,SDAG-GFX9 %s
+
+; RUN: llc -global-isel -global-isel-abort=2 -march=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize64" -verify-machineinstrs < %s 2>%t | FileCheck -check-prefixes=GCN,GFX11,GISEL-GFX11 %s
+; RUN: FileCheck --check-prefix=ERR %s < %t
+; RUN: llc -global-isel -global-isel-abort=2 -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s 2>%t | FileCheck -check-prefixes=GCN,VI,GISEL-VI %s
+; RUN: FileCheck --check-prefix=ERR %s < %t
+; RUN: llc -global-isel -global-isel-abort=2 -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s 2>%t | FileCheck -check-prefixes=GCN,GFX9,GISEL-GFX9 %s
+; RUN: FileCheck --check-prefix=ERR %s < %t
+
+; Note: GlobalISel abort is disabled so we don't crash on i1 inputs.
+;  They are allowed in DAGISel but we (intentionally) don't support them
+;  in GlobalISel.
+
+; ERR: warning: Instruction selection used fallback path for v_icmp_i1_ne0
+
+declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) #0
+declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #0
+declare i64 @llvm.amdgcn.icmp.i16(i16, i16, i32) #0
+declare i64 @llvm.amdgcn.icmp.i1(i1, i1, i32) #0
+
+define amdgpu_kernel void @v_icmp_i32_eq(i64 addrspace(1)* %out, i32 %src) {
+; GFX11-LABEL: v_icmp_i32_eq:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_clause 0x1
+; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_eq_u32_e64 s[2:3], 0x64, s2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-NEXT:    v_mov_b32_e32 v1, s3
+; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i32_eq:
+; SDAG-VI:       ; %bb.0:
+; SDAG-VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; SDAG-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT:    v_cmp_eq_u32_e64 s[2:3], s2, v0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, s0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v2, s2
+; SDAG-VI-NEXT:    v_mov_b32_e32 v1, s1
+; SDAG-VI-NEXT:    v_mov_b32_e32 v3, s3
+; SDAG-VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; SDAG-VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: v_icmp_i32_eq:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s4, v0
+; GFX9-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT:    s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i32_eq:
+; GISEL-VI:       ; %bb.0:
+; GISEL-VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; GISEL-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; GISEL-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT:    v_cmp_eq_u32_e64 s[2:3], s2, v0
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT:    v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT:    v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT:    s_endpgm
+  %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 32)
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i32(i64 addrspace(1)* %out, i32 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i32:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i32:
+; SDAG-VI:       ; %bb.0:
+; SDAG-VI-NEXT:    s_endpgm
+;
+; SDAG-GFX9-LABEL: v_icmp_i32:
+; SDAG-GFX9:       ; %bb.0:
+; SDAG-GFX9-NEXT:    s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i32:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, 0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    global_store_b64 v0, v[0:1], s[0:1]
+; GISEL-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT:    s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i32:
+; GISEL-VI:       ; %bb.0:
+; GISEL-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s0
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, s1
+; GISEL-VI-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
+; GISEL-VI-NEXT:    s_endpgm
+;
+; GISEL-GFX9-LABEL: v_icmp_i32:
+; GISEL-GFX9:       ; %bb.0:
+; GISEL-GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v0, 0
+; GISEL-GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX9-NEXT:    global_store_dwordx2 v0, v[0:1], s[0:1]
+; GISEL-GFX9-NEXT:    s_endpgm
+  %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 30)
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i32_ne(i64 addrspace(1)* %out, i32 %src) {
+; GFX11-LABEL: v_icmp_i32_ne:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_clause 0x1
+; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_ne_u32_e64 s[2:3], 0x64, s2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-NEXT:    v_mov_b32_e32 v1, s3
+; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i32_ne:
+; SDAG-VI:       ; %bb.0:
+; SDAG-VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; SDAG-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT:    v_cmp_ne_u32_e64 s[2:3], s2, v0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, s0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v2, s2
+; SDAG-VI-NEXT:    v_mov_b32_e32 v1, s1
+; SDAG-VI-NEXT:    v_mov_b32_e32 v3, s3
+; SDAG-VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; SDAG-VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: v_icmp_i32_ne:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_cmp_ne_u32_e64 s[0:1], s4, v0
+; GFX9-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT:    s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i32_ne:
+; GISEL-VI:       ; %bb.0:
+; GISEL-VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; GISEL-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; GISEL-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT:    v_cmp_ne_u32_e64 s[2:3], s2, v0
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT:    v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT:    v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT:    s_endpgm
+  %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 33)
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i32_ugt(i64 addrspace(1)* %out, i32 %src) {
+; GFX11-LABEL: v_icmp_i32_ugt:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_clause 0x1
+; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_lt_u32_e64 s[2:3], 0x64, s2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-NEXT:    v_mov_b32_e32 v1, s3
+; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i32_ugt:
+; SDAG-VI:       ; %bb.0:
+; SDAG-VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; SDAG-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT:    v_cmp_gt_u32_e64 s[2:3], s2, v0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, s0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v2, s2
+; SDAG-VI-NEXT:    v_mov_b32_e32 v1, s1
+; SDAG-VI-NEXT:    v_mov_b32_e32 v3, s3
+; SDAG-VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; SDAG-VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: v_icmp_i32_ugt:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_cmp_gt_u32_e64 s[0:1], s4, v0
+; GFX9-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT:    s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i32_ugt:
+; GISEL-VI:       ; %bb.0:
+; GISEL-VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; GISEL-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; GISEL-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT:    v_cmp_gt_u32_e64 s[2:3], s2, v0
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT:    v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT:    v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT:    s_endpgm
+  %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 34)
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i32_uge(i64 addrspace(1)* %out, i32 %src) {
+; GFX11-LABEL: v_icmp_i32_uge:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_clause 0x1
+; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_le_u32_e64 s[2:3], 0x64, s2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-NEXT:    v_mov_b32_e32 v1, s3
+; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i32_uge:
+; SDAG-VI:       ; %bb.0:
+; SDAG-VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; SDAG-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT:    v_cmp_ge_u32_e64 s[2:3], s2, v0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, s0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v2, s2
+; SDAG-VI-NEXT:    v_mov_b32_e32 v1, s1
+; SDAG-VI-NEXT:    v_mov_b32_e32 v3, s3
+; SDAG-VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; SDAG-VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: v_icmp_i32_uge:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_cmp_ge_u32_e64 s[0:1], s4, v0
+; GFX9-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT:    s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i32_uge:
+; GISEL-VI:       ; %bb.0:
+; GISEL-VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; GISEL-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; GISEL-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT:    v_cmp_ge_u32_e64 s[2:3], s2, v0
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT:    v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT:    v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT:    s_endpgm
+  %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 35)
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i32_ult(i64 addrspace(1)* %out, i32 %src) {
+; GFX11-LABEL: v_icmp_i32_ult:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_clause 0x1
+; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_gt_u32_e64 s[2:3], 0x64, s2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-NEXT:    v_mov_b32_e32 v1, s3
+; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i32_ult:
+; SDAG-VI:       ; %bb.0:
+; SDAG-VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; SDAG-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT:    v_cmp_lt_u32_e64 s[2:3], s2, v0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, s0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v2, s2
+; SDAG-VI-NEXT:    v_mov_b32_e32 v1, s1
+; SDAG-VI-NEXT:    v_mov_b32_e32 v3, s3
+; SDAG-VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; SDAG-VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: v_icmp_i32_ult:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_cmp_lt_u32_e64 s[0:1], s4, v0
+; GFX9-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT:    s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i32_ult:
+; GISEL-VI:       ; %bb.0:
+; GISEL-VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; GISEL-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; GISEL-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT:    v_cmp_lt_u32_e64 s[2:3], s2, v0
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT:    v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT:    v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT:    s_endpgm
+  %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 36)
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i32_ule(i64 addrspace(1)* %out, i32 %src) {
+; GFX11-LABEL: v_icmp_i32_ule:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_clause 0x1
+; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_ge_u32_e64 s[2:3], 0x64, s2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-NEXT:    v_mov_b32_e32 v1, s3
+; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i32_ule:
+; SDAG-VI:       ; %bb.0:
+; SDAG-VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; SDAG-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT:    v_cmp_le_u32_e64 s[2:3], s2, v0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, s0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v2, s2
+; SDAG-VI-NEXT:    v_mov_b32_e32 v1, s1
+; SDAG-VI-NEXT:    v_mov_b32_e32 v3, s3
+; SDAG-VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; SDAG-VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: v_icmp_i32_ule:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s4, v0
+; GFX9-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT:    s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i32_ule:
+; GISEL-VI:       ; %bb.0:
+; GISEL-VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; GISEL-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; GISEL-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT:    v_cmp_le_u32_e64 s[2:3], s2, v0
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT:    v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT:    v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT:    s_endpgm
+  %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 37)
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i32_sgt(i64 addrspace(1)* %out, i32 %src) #1 {
+; GFX11-LABEL: v_icmp_i32_sgt:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_clause 0x1
+; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_lt_i32_e64 s[2:3], 0x64, s2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-NEXT:    v_mov_b32_e32 v1, s3
+; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i32_sgt:
+; SDAG-VI:       ; %bb.0:
+; SDAG-VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; SDAG-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT:    v_cmp_gt_i32_e64 s[2:3], s2, v0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, s0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v2, s2
+; SDAG-VI-NEXT:    v_mov_b32_e32 v1, s1
+; SDAG-VI-NEXT:    v_mov_b32_e32 v3, s3
+; SDAG-VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; SDAG-VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: v_icmp_i32_sgt:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_cmp_gt_i32_e64 s[0:1], s4, v0
+; GFX9-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT:    s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i32_sgt:
+; GISEL-VI:       ; %bb.0:
+; GISEL-VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; GISEL-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; GISEL-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT:    v_cmp_gt_i32_e64 s[2:3], s2, v0
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT:    v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT:    v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT:    s_endpgm
+  %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 38)
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i32_sge(i64 addrspace(1)* %out, i32 %src) {
+; GFX11-LABEL: v_icmp_i32_sge:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_clause 0x1
+; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_le_i32_e64 s[2:3], 0x64, s2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-NEXT:    v_mov_b32_e32 v1, s3
+; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i32_sge:
+; SDAG-VI:       ; %bb.0:
+; SDAG-VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; SDAG-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT:    v_cmp_ge_i32_e64 s[2:3], s2, v0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, s0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v2, s2
+; SDAG-VI-NEXT:    v_mov_b32_e32 v1, s1
+; SDAG-VI-NEXT:    v_mov_b32_e32 v3, s3
+; SDAG-VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; SDAG-VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: v_icmp_i32_sge:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_cmp_ge_i32_e64 s[0:1], s4, v0
+; GFX9-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT:    s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i32_sge:
+; GISEL-VI:       ; %bb.0:
+; GISEL-VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; GISEL-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; GISEL-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT:    v_cmp_ge_i32_e64 s[2:3], s2, v0
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT:    v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT:    v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT:    s_endpgm
+  %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 39)
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i32_slt(i64 addrspace(1)* %out, i32 %src) {
+; GFX11-LABEL: v_icmp_i32_slt:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_clause 0x1
+; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_gt_i32_e64 s[2:3], 0x64, s2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-NEXT:    v_mov_b32_e32 v1, s3
+; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i32_slt:
+; SDAG-VI:       ; %bb.0:
+; SDAG-VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; SDAG-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT:    v_cmp_lt_i32_e64 s[2:3], s2, v0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, s0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v2, s2
+; SDAG-VI-NEXT:    v_mov_b32_e32 v1, s1
+; SDAG-VI-NEXT:    v_mov_b32_e32 v3, s3
+; SDAG-VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; SDAG-VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: v_icmp_i32_slt:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_cmp_lt_i32_e64 s[0:1], s4, v0
+; GFX9-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT:    s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i32_slt:
+; GISEL-VI:       ; %bb.0:
+; GISEL-VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; GISEL-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; GISEL-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT:    v_cmp_lt_i32_e64 s[2:3], s2, v0
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT:    v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT:    v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT:    s_endpgm
+  %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 40)
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i32_sle(i64 addrspace(1)* %out, i32 %src) {
+; GFX11-LABEL: v_icmp_i32_sle:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_clause 0x1
+; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_ge_i32_e64 s[2:3], 0x64, s2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-NEXT:    v_mov_b32_e32 v1, s3
+; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i32_sle:
+; SDAG-VI:       ; %bb.0:
+; SDAG-VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; SDAG-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT:    v_cmp_le_i32_e64 s[2:3], s2, v0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, s0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v2, s2
+; SDAG-VI-NEXT:    v_mov_b32_e32 v1, s1
+; SDAG-VI-NEXT:    v_mov_b32_e32 v3, s3
+; SDAG-VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; SDAG-VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: v_icmp_i32_sle:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_cmp_le_i32_e64 s[0:1], s4, v0
+; GFX9-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT:    s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i32_sle:
+; GISEL-VI:       ; %bb.0:
+; GISEL-VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; GISEL-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; GISEL-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT:    v_cmp_le_i32_e64 s[2:3], s2, v0
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT:    v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT:    v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT:    s_endpgm
+  %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 41)
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i64_eq(i64 addrspace(1)* %out, i64 %src) {
+; GFX11-LABEL: v_icmp_i64_eq:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT:    s_mov_b64 s[4:5], 0x64
+; GFX11-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_eq_u64_e64 s[2:3], s[2:3], s[4:5]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-NEXT:    v_mov_b32_e32 v1, s3
+; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i64_eq:
+; SDAG-VI:       ; %bb.0:
+; SDAG-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT:    v_mov_b32_e32 v1, 0
+; SDAG-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT:    v_cmp_eq_u64_e64 s[2:3], s[2:3], v[0:1]
+; SDAG-VI-NEXT:    v_mov_b32_e32 v2, s0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, s2
+; SDAG-VI-NEXT:    v_mov_b32_e32 v3, s1
+; SDAG-VI-NEXT:    v_mov_b32_e32 v1, s3
+; SDAG-VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; SDAG-VI-NEXT:    s_endpgm
+;
+; SDAG-GFX9-LABEL: v_icmp_i64_eq:
+; SDAG-GFX9:       ; %bb.0:
+; SDAG-GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-GFX9-NEXT:    v_mov_b32_e32 v1, 0
+; SDAG-GFX9-NEXT:    v_mov_b32_e32 v0, 0x64
+; SDAG-GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX9-NEXT:    v_cmp_eq_u64_e64 s[2:3], s[2:3], v[0:1]
+; SDAG-GFX9-NEXT:    v_mov_b32_e32 v2, s2
+; SDAG-GFX9-NEXT:    v_mov_b32_e32 v3, s3
+; SDAG-GFX9-NEXT:    global_store_dwordx2 v1, v[2:3], s[0:1]
+; SDAG-GFX9-NEXT:    s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i64_eq:
+; GISEL-VI:       ; %bb.0:
+; GISEL-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-VI-NEXT:    s_mov_b64 s[4:5], 0x64
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s4
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, s5
+; GISEL-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT:    v_cmp_eq_u64_e64 s[2:3], s[2:3], v[0:1]
+; GISEL-VI-NEXT:    v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT:    v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT:    s_endpgm
+;
+; GISEL-GFX9-LABEL: v_icmp_i64_eq:
+; GISEL-GFX9:       ; %bb.0:
+; GISEL-GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-GFX9-NEXT:    s_mov_b64 s[4:5], 0x64
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v0, s4
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v1, s5
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GISEL-GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX9-NEXT:    v_cmp_eq_u64_e64 s[2:3], s[2:3], v[0:1]
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v1, s3
+; GISEL-GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
+; GISEL-GFX9-NEXT:    s_endpgm
+  %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 32)
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i64_ne(i64 addrspace(1)* %out, i64 %src) {
+; GFX11-LABEL: v_icmp_i64_ne:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT:    s_mov_b64 s[4:5], 0x64
+; GFX11-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_ne_u64_e64 s[2:3], s[2:3], s[4:5]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-NEXT:    v_mov_b32_e32 v1, s3
+; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i64_ne:
+; SDAG-VI:       ; %bb.0:
+; SDAG-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT:    v_mov_b32_e32 v1, 0
+; SDAG-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT:    v_cmp_ne_u64_e64 s[2:3], s[2:3], v[0:1]
+; SDAG-VI-NEXT:    v_mov_b32_e32 v2, s0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, s2
+; SDAG-VI-NEXT:    v_mov_b32_e32 v3, s1
+; SDAG-VI-NEXT:    v_mov_b32_e32 v1, s3
+; SDAG-VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; SDAG-VI-NEXT:    s_endpgm
+;
+; SDAG-GFX9-LABEL: v_icmp_i64_ne:
+; SDAG-GFX9:       ; %bb.0:
+; SDAG-GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-GFX9-NEXT:    v_mov_b32_e32 v1, 0
+; SDAG-GFX9-NEXT:    v_mov_b32_e32 v0, 0x64
+; SDAG-GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX9-NEXT:    v_cmp_ne_u64_e64 s[2:3], s[2:3], v[0:1]
+; SDAG-GFX9-NEXT:    v_mov_b32_e32 v2, s2
+; SDAG-GFX9-NEXT:    v_mov_b32_e32 v3, s3
+; SDAG-GFX9-NEXT:    global_store_dwordx2 v1, v[2:3], s[0:1]
+; SDAG-GFX9-NEXT:    s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i64_ne:
+; GISEL-VI:       ; %bb.0:
+; GISEL-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-VI-NEXT:    s_mov_b64 s[4:5], 0x64
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s4
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, s5
+; GISEL-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT:    v_cmp_ne_u64_e64 s[2:3], s[2:3], v[0:1]
+; GISEL-VI-NEXT:    v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT:    v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT:    s_endpgm
+;
+; GISEL-GFX9-LABEL: v_icmp_i64_ne:
+; GISEL-GFX9:       ; %bb.0:
+; GISEL-GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-GFX9-NEXT:    s_mov_b64 s[4:5], 0x64
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v0, s4
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v1, s5
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GISEL-GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX9-NEXT:    v_cmp_ne_u64_e64 s[2:3], s[2:3], v[0:1]
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v1, s3
+; GISEL-GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
+; GISEL-GFX9-NEXT:    s_endpgm
+  %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 33)
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_u64_ugt(i64 addrspace(1)* %out, i64 %src) {
+; GFX11-LABEL: v_icmp_u64_ugt:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT:    s_mov_b64 s[4:5], 0x64
+; GFX11-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_gt_u64_e64 s[2:3], s[2:3], s[4:5]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-NEXT:    v_mov_b32_e32 v1, s3
+; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_u64_ugt:
+; SDAG-VI:       ; %bb.0:
+; SDAG-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT:    v_mov_b32_e32 v1, 0
+; SDAG-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT:    v_cmp_gt_u64_e64 s[2:3], s[2:3], v[0:1]
+; SDAG-VI-NEXT:    v_mov_b32_e32 v2, s0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, s2
+; SDAG-VI-NEXT:    v_mov_b32_e32 v3, s1
+; SDAG-VI-NEXT:    v_mov_b32_e32 v1, s3
+; SDAG-VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; SDAG-VI-NEXT:    s_endpgm
+;
+; SDAG-GFX9-LABEL: v_icmp_u64_ugt:
+; SDAG-GFX9:       ; %bb.0:
+; SDAG-GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-GFX9-NEXT:    v_mov_b32_e32 v1, 0
+; SDAG-GFX9-NEXT:    v_mov_b32_e32 v0, 0x64
+; SDAG-GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX9-NEXT:    v_cmp_gt_u64_e64 s[2:3], s[2:3], v[0:1]
+; SDAG-GFX9-NEXT:    v_mov_b32_e32 v2, s2
+; SDAG-GFX9-NEXT:    v_mov_b32_e32 v3, s3
+; SDAG-GFX9-NEXT:    global_store_dwordx2 v1, v[2:3], s[0:1]
+; SDAG-GFX9-NEXT:    s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_u64_ugt:
+; GISEL-VI:       ; %bb.0:
+; GISEL-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-VI-NEXT:    s_mov_b64 s[4:5], 0x64
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s4
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, s5
+; GISEL-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT:    v_cmp_gt_u64_e64 s[2:3], s[2:3], v[0:1]
+; GISEL-VI-NEXT:    v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT:    v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT:    s_endpgm
+;
+; GISEL-GFX9-LABEL: v_icmp_u64_ugt:
+; GISEL-GFX9:       ; %bb.0:
+; GISEL-GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-GFX9-NEXT:    s_mov_b64 s[4:5], 0x64
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v0, s4
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v1, s5
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GISEL-GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX9-NEXT:    v_cmp_gt_u64_e64 s[2:3], s[2:3], v[0:1]
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v1, s3
+; GISEL-GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
+; GISEL-GFX9-NEXT:    s_endpgm
+  %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 34)
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_u64_uge(i64 addrspace(1)* %out, i64 %src) {
+; GFX11-LABEL: v_icmp_u64_uge:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT:    s_mov_b64 s[4:5], 0x64
+; GFX11-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_ge_u64_e64 s[2:3], s[2:3], s[4:5]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-NEXT:    v_mov_b32_e32 v1, s3
+; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_u64_uge:
+; SDAG-VI:       ; %bb.0:
+; SDAG-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT:    v_mov_b32_e32 v1, 0
+; SDAG-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT:    v_cmp_ge_u64_e64 s[2:3], s[2:3], v[0:1]
+; SDAG-VI-NEXT:    v_mov_b32_e32 v2, s0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, s2
+; SDAG-VI-NEXT:    v_mov_b32_e32 v3, s1
+; SDAG-VI-NEXT:    v_mov_b32_e32 v1, s3
+; SDAG-VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; SDAG-VI-NEXT:    s_endpgm
+;
+; SDAG-GFX9-LABEL: v_icmp_u64_uge:
+; SDAG-GFX9:       ; %bb.0:
+; SDAG-GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-GFX9-NEXT:    v_mov_b32_e32 v1, 0
+; SDAG-GFX9-NEXT:    v_mov_b32_e32 v0, 0x64
+; SDAG-GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX9-NEXT:    v_cmp_ge_u64_e64 s[2:3], s[2:3], v[0:1]
+; SDAG-GFX9-NEXT:    v_mov_b32_e32 v2, s2
+; SDAG-GFX9-NEXT:    v_mov_b32_e32 v3, s3
+; SDAG-GFX9-NEXT:    global_store_dwordx2 v1, v[2:3], s[0:1]
+; SDAG-GFX9-NEXT:    s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_u64_uge:
+; GISEL-VI:       ; %bb.0:
+; GISEL-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-VI-NEXT:    s_mov_b64 s[4:5], 0x64
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s4
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, s5
+; GISEL-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT:    v_cmp_ge_u64_e64 s[2:3], s[2:3], v[0:1]
+; GISEL-VI-NEXT:    v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT:    v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT:    s_endpgm
+;
+; GISEL-GFX9-LABEL: v_icmp_u64_uge:
+; GISEL-GFX9:       ; %bb.0:
+; GISEL-GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-GFX9-NEXT:    s_mov_b64 s[4:5], 0x64
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v0, s4
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v1, s5
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GISEL-GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX9-NEXT:    v_cmp_ge_u64_e64 s[2:3], s[2:3], v[0:1]
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v1, s3
+; GISEL-GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
+; GISEL-GFX9-NEXT:    s_endpgm
+  %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 35)
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_u64_ult(i64 addrspace(1)* %out, i64 %src) {
+; GFX11-LABEL: v_icmp_u64_ult:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT:    s_mov_b64 s[4:5], 0x64
+; GFX11-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_lt_u64_e64 s[2:3], s[2:3], s[4:5]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-NEXT:    v_mov_b32_e32 v1, s3
+; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_u64_ult:
+; SDAG-VI:       ; %bb.0:
+; SDAG-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT:    v_mov_b32_e32 v1, 0
+; SDAG-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT:    v_cmp_lt_u64_e64 s[2:3], s[2:3], v[0:1]
+; SDAG-VI-NEXT:    v_mov_b32_e32 v2, s0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, s2
+; SDAG-VI-NEXT:    v_mov_b32_e32 v3, s1
+; SDAG-VI-NEXT:    v_mov_b32_e32 v1, s3
+; SDAG-VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; SDAG-VI-NEXT:    s_endpgm
+;
+; SDAG-GFX9-LABEL: v_icmp_u64_ult:
+; SDAG-GFX9:       ; %bb.0:
+; SDAG-GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-GFX9-NEXT:    v_mov_b32_e32 v1, 0
+; SDAG-GFX9-NEXT:    v_mov_b32_e32 v0, 0x64
+; SDAG-GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX9-NEXT:    v_cmp_lt_u64_e64 s[2:3], s[2:3], v[0:1]
+; SDAG-GFX9-NEXT:    v_mov_b32_e32 v2, s2
+; SDAG-GFX9-NEXT:    v_mov_b32_e32 v3, s3
+; SDAG-GFX9-NEXT:    global_store_dwordx2 v1, v[2:3], s[0:1]
+; SDAG-GFX9-NEXT:    s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_u64_ult:
+; GISEL-VI:       ; %bb.0:
+; GISEL-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-VI-NEXT:    s_mov_b64 s[4:5], 0x64
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s4
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, s5
+; GISEL-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT:    v_cmp_lt_u64_e64 s[2:3], s[2:3], v[0:1]
+; GISEL-VI-NEXT:    v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT:    v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT:    s_endpgm
+;
+; GISEL-GFX9-LABEL: v_icmp_u64_ult:
+; GISEL-GFX9:       ; %bb.0:
+; GISEL-GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-GFX9-NEXT:    s_mov_b64 s[4:5], 0x64
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v0, s4
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v1, s5
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GISEL-GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX9-NEXT:    v_cmp_lt_u64_e64 s[2:3], s[2:3], v[0:1]
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v1, s3
+; GISEL-GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
+; GISEL-GFX9-NEXT:    s_endpgm
+  %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 36)
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_u64_ule(i64 addrspace(1)* %out, i64 %src) {
+; GFX11-LABEL: v_icmp_u64_ule:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT:    s_mov_b64 s[4:5], 0x64
+; GFX11-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_le_u64_e64 s[2:3], s[2:3], s[4:5]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-NEXT:    v_mov_b32_e32 v1, s3
+; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_u64_ule:
+; SDAG-VI:       ; %bb.0:
+; SDAG-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT:    v_mov_b32_e32 v1, 0
+; SDAG-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT:    v_cmp_le_u64_e64 s[2:3], s[2:3], v[0:1]
+; SDAG-VI-NEXT:    v_mov_b32_e32 v2, s0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, s2
+; SDAG-VI-NEXT:    v_mov_b32_e32 v3, s1
+; SDAG-VI-NEXT:    v_mov_b32_e32 v1, s3
+; SDAG-VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; SDAG-VI-NEXT:    s_endpgm
+;
+; SDAG-GFX9-LABEL: v_icmp_u64_ule:
+; SDAG-GFX9:       ; %bb.0:
+; SDAG-GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-GFX9-NEXT:    v_mov_b32_e32 v1, 0
+; SDAG-GFX9-NEXT:    v_mov_b32_e32 v0, 0x64
+; SDAG-GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX9-NEXT:    v_cmp_le_u64_e64 s[2:3], s[2:3], v[0:1]
+; SDAG-GFX9-NEXT:    v_mov_b32_e32 v2, s2
+; SDAG-GFX9-NEXT:    v_mov_b32_e32 v3, s3
+; SDAG-GFX9-NEXT:    global_store_dwordx2 v1, v[2:3], s[0:1]
+; SDAG-GFX9-NEXT:    s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_u64_ule:
+; GISEL-VI:       ; %bb.0:
+; GISEL-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-VI-NEXT:    s_mov_b64 s[4:5], 0x64
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s4
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, s5
+; GISEL-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT:    v_cmp_le_u64_e64 s[2:3], s[2:3], v[0:1]
+; GISEL-VI-NEXT:    v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT:    v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT:    s_endpgm
+;
+; GISEL-GFX9-LABEL: v_icmp_u64_ule:
+; GISEL-GFX9:       ; %bb.0:
+; GISEL-GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-GFX9-NEXT:    s_mov_b64 s[4:5], 0x64
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v0, s4
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v1, s5
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GISEL-GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX9-NEXT:    v_cmp_le_u64_e64 s[2:3], s[2:3], v[0:1]
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v1, s3
+; GISEL-GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
+; GISEL-GFX9-NEXT:    s_endpgm
+  %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 37)
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i64_sgt(i64 addrspace(1)* %out, i64 %src) {
+; GFX11-LABEL: v_icmp_i64_sgt:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT:    s_mov_b64 s[4:5], 0x64
+; GFX11-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_gt_i64_e64 s[2:3], s[2:3], s[4:5]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-NEXT:    v_mov_b32_e32 v1, s3
+; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i64_sgt:
+; SDAG-VI:       ; %bb.0:
+; SDAG-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT:    v_mov_b32_e32 v1, 0
+; SDAG-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT:    v_cmp_gt_i64_e64 s[2:3], s[2:3], v[0:1]
+; SDAG-VI-NEXT:    v_mov_b32_e32 v2, s0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, s2
+; SDAG-VI-NEXT:    v_mov_b32_e32 v3, s1
+; SDAG-VI-NEXT:    v_mov_b32_e32 v1, s3
+; SDAG-VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; SDAG-VI-NEXT:    s_endpgm
+;
+; SDAG-GFX9-LABEL: v_icmp_i64_sgt:
+; SDAG-GFX9:       ; %bb.0:
+; SDAG-GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-GFX9-NEXT:    v_mov_b32_e32 v1, 0
+; SDAG-GFX9-NEXT:    v_mov_b32_e32 v0, 0x64
+; SDAG-GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX9-NEXT:    v_cmp_gt_i64_e64 s[2:3], s[2:3], v[0:1]
+; SDAG-GFX9-NEXT:    v_mov_b32_e32 v2, s2
+; SDAG-GFX9-NEXT:    v_mov_b32_e32 v3, s3
+; SDAG-GFX9-NEXT:    global_store_dwordx2 v1, v[2:3], s[0:1]
+; SDAG-GFX9-NEXT:    s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i64_sgt:
+; GISEL-VI:       ; %bb.0:
+; GISEL-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-VI-NEXT:    s_mov_b64 s[4:5], 0x64
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s4
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, s5
+; GISEL-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT:    v_cmp_gt_i64_e64 s[2:3], s[2:3], v[0:1]
+; GISEL-VI-NEXT:    v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT:    v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT:    s_endpgm
+;
+; GISEL-GFX9-LABEL: v_icmp_i64_sgt:
+; GISEL-GFX9:       ; %bb.0:
+; GISEL-GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-GFX9-NEXT:    s_mov_b64 s[4:5], 0x64
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v0, s4
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v1, s5
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GISEL-GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX9-NEXT:    v_cmp_gt_i64_e64 s[2:3], s[2:3], v[0:1]
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v1, s3
+; GISEL-GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
+; GISEL-GFX9-NEXT:    s_endpgm
+  %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 38)
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i64_sge(i64 addrspace(1)* %out, i64 %src) {
+; GFX11-LABEL: v_icmp_i64_sge:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT:    s_mov_b64 s[4:5], 0x64
+; GFX11-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_ge_i64_e64 s[2:3], s[2:3], s[4:5]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-NEXT:    v_mov_b32_e32 v1, s3
+; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i64_sge:
+; SDAG-VI:       ; %bb.0:
+; SDAG-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT:    v_mov_b32_e32 v1, 0
+; SDAG-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT:    v_cmp_ge_i64_e64 s[2:3], s[2:3], v[0:1]
+; SDAG-VI-NEXT:    v_mov_b32_e32 v2, s0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, s2
+; SDAG-VI-NEXT:    v_mov_b32_e32 v3, s1
+; SDAG-VI-NEXT:    v_mov_b32_e32 v1, s3
+; SDAG-VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; SDAG-VI-NEXT:    s_endpgm
+;
+; SDAG-GFX9-LABEL: v_icmp_i64_sge:
+; SDAG-GFX9:       ; %bb.0:
+; SDAG-GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-GFX9-NEXT:    v_mov_b32_e32 v1, 0
+; SDAG-GFX9-NEXT:    v_mov_b32_e32 v0, 0x64
+; SDAG-GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX9-NEXT:    v_cmp_ge_i64_e64 s[2:3], s[2:3], v[0:1]
+; SDAG-GFX9-NEXT:    v_mov_b32_e32 v2, s2
+; SDAG-GFX9-NEXT:    v_mov_b32_e32 v3, s3
+; SDAG-GFX9-NEXT:    global_store_dwordx2 v1, v[2:3], s[0:1]
+; SDAG-GFX9-NEXT:    s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i64_sge:
+; GISEL-VI:       ; %bb.0:
+; GISEL-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-VI-NEXT:    s_mov_b64 s[4:5], 0x64
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s4
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, s5
+; GISEL-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT:    v_cmp_ge_i64_e64 s[2:3], s[2:3], v[0:1]
+; GISEL-VI-NEXT:    v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT:    v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT:    s_endpgm
+;
+; GISEL-GFX9-LABEL: v_icmp_i64_sge:
+; GISEL-GFX9:       ; %bb.0:
+; GISEL-GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-GFX9-NEXT:    s_mov_b64 s[4:5], 0x64
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v0, s4
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v1, s5
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GISEL-GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX9-NEXT:    v_cmp_ge_i64_e64 s[2:3], s[2:3], v[0:1]
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v1, s3
+; GISEL-GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
+; GISEL-GFX9-NEXT:    s_endpgm
+  %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 39)
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i64_slt(i64 addrspace(1)* %out, i64 %src) {
+; GFX11-LABEL: v_icmp_i64_slt:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT:    s_mov_b64 s[4:5], 0x64
+; GFX11-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_lt_i64_e64 s[2:3], s[2:3], s[4:5]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-NEXT:    v_mov_b32_e32 v1, s3
+; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i64_slt:
+; SDAG-VI:       ; %bb.0:
+; SDAG-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT:    v_mov_b32_e32 v1, 0
+; SDAG-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT:    v_cmp_lt_i64_e64 s[2:3], s[2:3], v[0:1]
+; SDAG-VI-NEXT:    v_mov_b32_e32 v2, s0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, s2
+; SDAG-VI-NEXT:    v_mov_b32_e32 v3, s1
+; SDAG-VI-NEXT:    v_mov_b32_e32 v1, s3
+; SDAG-VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; SDAG-VI-NEXT:    s_endpgm
+;
+; SDAG-GFX9-LABEL: v_icmp_i64_slt:
+; SDAG-GFX9:       ; %bb.0:
+; SDAG-GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-GFX9-NEXT:    v_mov_b32_e32 v1, 0
+; SDAG-GFX9-NEXT:    v_mov_b32_e32 v0, 0x64
+; SDAG-GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX9-NEXT:    v_cmp_lt_i64_e64 s[2:3], s[2:3], v[0:1]
+; SDAG-GFX9-NEXT:    v_mov_b32_e32 v2, s2
+; SDAG-GFX9-NEXT:    v_mov_b32_e32 v3, s3
+; SDAG-GFX9-NEXT:    global_store_dwordx2 v1, v[2:3], s[0:1]
+; SDAG-GFX9-NEXT:    s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i64_slt:
+; GISEL-VI:       ; %bb.0:
+; GISEL-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-VI-NEXT:    s_mov_b64 s[4:5], 0x64
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s4
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, s5
+; GISEL-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT:    v_cmp_lt_i64_e64 s[2:3], s[2:3], v[0:1]
+; GISEL-VI-NEXT:    v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT:    v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT:    s_endpgm
+;
+; GISEL-GFX9-LABEL: v_icmp_i64_slt:
+; GISEL-GFX9:       ; %bb.0:
+; GISEL-GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-GFX9-NEXT:    s_mov_b64 s[4:5], 0x64
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v0, s4
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v1, s5
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GISEL-GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX9-NEXT:    v_cmp_lt_i64_e64 s[2:3], s[2:3], v[0:1]
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v1, s3
+; GISEL-GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
+; GISEL-GFX9-NEXT:    s_endpgm
+  %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 40)
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i64_sle(i64 addrspace(1)* %out, i64 %src) {
+; GFX11-LABEL: v_icmp_i64_sle:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT:    s_mov_b64 s[4:5], 0x64
+; GFX11-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_le_i64_e64 s[2:3], s[2:3], s[4:5]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-NEXT:    v_mov_b32_e32 v1, s3
+; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i64_sle:
+; SDAG-VI:       ; %bb.0:
+; SDAG-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT:    v_mov_b32_e32 v1, 0
+; SDAG-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT:    v_cmp_le_i64_e64 s[2:3], s[2:3], v[0:1]
+; SDAG-VI-NEXT:    v_mov_b32_e32 v2, s0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, s2
+; SDAG-VI-NEXT:    v_mov_b32_e32 v3, s1
+; SDAG-VI-NEXT:    v_mov_b32_e32 v1, s3
+; SDAG-VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; SDAG-VI-NEXT:    s_endpgm
+;
+; SDAG-GFX9-LABEL: v_icmp_i64_sle:
+; SDAG-GFX9:       ; %bb.0:
+; SDAG-GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; SDAG-GFX9-NEXT:    v_mov_b32_e32 v1, 0
+; SDAG-GFX9-NEXT:    v_mov_b32_e32 v0, 0x64
+; SDAG-GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-GFX9-NEXT:    v_cmp_le_i64_e64 s[2:3], s[2:3], v[0:1]
+; SDAG-GFX9-NEXT:    v_mov_b32_e32 v2, s2
+; SDAG-GFX9-NEXT:    v_mov_b32_e32 v3, s3
+; SDAG-GFX9-NEXT:    global_store_dwordx2 v1, v[2:3], s[0:1]
+; SDAG-GFX9-NEXT:    s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i64_sle:
+; GISEL-VI:       ; %bb.0:
+; GISEL-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-VI-NEXT:    s_mov_b64 s[4:5], 0x64
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s4
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, s5
+; GISEL-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT:    v_cmp_le_i64_e64 s[2:3], s[2:3], v[0:1]
+; GISEL-VI-NEXT:    v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT:    v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT:    s_endpgm
+;
+; GISEL-GFX9-LABEL: v_icmp_i64_sle:
+; GISEL-GFX9:       ; %bb.0:
+; GISEL-GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GISEL-GFX9-NEXT:    s_mov_b64 s[4:5], 0x64
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v0, s4
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v1, s5
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GISEL-GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX9-NEXT:    v_cmp_le_i64_e64 s[2:3], s[2:3], v[0:1]
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v1, s3
+; GISEL-GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
+; GISEL-GFX9-NEXT:    s_endpgm
+  %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 41)
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i16_eq(i64 addrspace(1)* %out, i16 %src) {
+; GFX11-LABEL: v_icmp_i16_eq:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_clause 0x1
+; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_eq_u16_e64 s[2:3], 0x64, s2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-NEXT:    v_mov_b32_e32 v1, s3
+; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i16_eq:
+; SDAG-VI:       ; %bb.0:
+; SDAG-VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; SDAG-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT:    v_cmp_eq_u16_e64 s[2:3], s2, v0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, s0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v2, s2
+; SDAG-VI-NEXT:    v_mov_b32_e32 v1, s1
+; SDAG-VI-NEXT:    v_mov_b32_e32 v3, s3
+; SDAG-VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; SDAG-VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: v_icmp_i16_eq:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_cmp_eq_u16_e64 s[0:1], s4, v0
+; GFX9-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT:    s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i16_eq:
+; GISEL-VI:       ; %bb.0:
+; GISEL-VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; GISEL-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; GISEL-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT:    v_cmp_eq_u16_e64 s[2:3], s2, v0
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT:    v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT:    v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT:    s_endpgm
+  %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 32)
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i16(i64 addrspace(1)* %out, i16 %src) {
+; SDAG-GFX11-LABEL: v_icmp_i16:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i16:
+; SDAG-VI:       ; %bb.0:
+; SDAG-VI-NEXT:    s_endpgm
+;
+; SDAG-GFX9-LABEL: v_icmp_i16:
+; SDAG-GFX9:       ; %bb.0:
+; SDAG-GFX9-NEXT:    s_endpgm
+;
+; GISEL-GFX11-LABEL: v_icmp_i16:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, 0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    global_store_b64 v0, v[0:1], s[0:1]
+; GISEL-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT:    s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i16:
+; GISEL-VI:       ; %bb.0:
+; GISEL-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s0
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, s1
+; GISEL-VI-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
+; GISEL-VI-NEXT:    s_endpgm
+;
+; GISEL-GFX9-LABEL: v_icmp_i16:
+; GISEL-GFX9:       ; %bb.0:
+; GISEL-GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v0, 0
+; GISEL-GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX9-NEXT:    global_store_dwordx2 v0, v[0:1], s[0:1]
+; GISEL-GFX9-NEXT:    s_endpgm
+  %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 30)
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i16_ne(i64 addrspace(1)* %out, i16 %src) {
+; GFX11-LABEL: v_icmp_i16_ne:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_clause 0x1
+; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_ne_u16_e64 s[2:3], 0x64, s2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-NEXT:    v_mov_b32_e32 v1, s3
+; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i16_ne:
+; SDAG-VI:       ; %bb.0:
+; SDAG-VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; SDAG-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT:    v_cmp_ne_u16_e64 s[2:3], s2, v0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, s0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v2, s2
+; SDAG-VI-NEXT:    v_mov_b32_e32 v1, s1
+; SDAG-VI-NEXT:    v_mov_b32_e32 v3, s3
+; SDAG-VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; SDAG-VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: v_icmp_i16_ne:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_cmp_ne_u16_e64 s[0:1], s4, v0
+; GFX9-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT:    s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i16_ne:
+; GISEL-VI:       ; %bb.0:
+; GISEL-VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; GISEL-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; GISEL-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT:    v_cmp_ne_u16_e64 s[2:3], s2, v0
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT:    v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT:    v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT:    s_endpgm
+  %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 33)
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i16_ugt(i64 addrspace(1)* %out, i16 %src) {
+; GFX11-LABEL: v_icmp_i16_ugt:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_clause 0x1
+; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_lt_u16_e64 s[2:3], 0x64, s2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-NEXT:    v_mov_b32_e32 v1, s3
+; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i16_ugt:
+; SDAG-VI:       ; %bb.0:
+; SDAG-VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; SDAG-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT:    v_cmp_gt_u16_e64 s[2:3], s2, v0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, s0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v2, s2
+; SDAG-VI-NEXT:    v_mov_b32_e32 v1, s1
+; SDAG-VI-NEXT:    v_mov_b32_e32 v3, s3
+; SDAG-VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; SDAG-VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: v_icmp_i16_ugt:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_cmp_gt_u16_e64 s[0:1], s4, v0
+; GFX9-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT:    s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i16_ugt:
+; GISEL-VI:       ; %bb.0:
+; GISEL-VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; GISEL-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; GISEL-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT:    v_cmp_gt_u16_e64 s[2:3], s2, v0
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT:    v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT:    v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT:    s_endpgm
+  %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 34)
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i16_uge(i64 addrspace(1)* %out, i16 %src) {
+; GFX11-LABEL: v_icmp_i16_uge:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_clause 0x1
+; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_le_u16_e64 s[2:3], 0x64, s2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-NEXT:    v_mov_b32_e32 v1, s3
+; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i16_uge:
+; SDAG-VI:       ; %bb.0:
+; SDAG-VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; SDAG-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT:    v_cmp_ge_u16_e64 s[2:3], s2, v0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, s0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v2, s2
+; SDAG-VI-NEXT:    v_mov_b32_e32 v1, s1
+; SDAG-VI-NEXT:    v_mov_b32_e32 v3, s3
+; SDAG-VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; SDAG-VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: v_icmp_i16_uge:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_cmp_ge_u16_e64 s[0:1], s4, v0
+; GFX9-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT:    s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i16_uge:
+; GISEL-VI:       ; %bb.0:
+; GISEL-VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; GISEL-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; GISEL-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT:    v_cmp_ge_u16_e64 s[2:3], s2, v0
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT:    v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT:    v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT:    s_endpgm
+  %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 35)
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i16_ult(i64 addrspace(1)* %out, i16 %src) {
+; GFX11-LABEL: v_icmp_i16_ult:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_clause 0x1
+; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_gt_u16_e64 s[2:3], 0x64, s2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-NEXT:    v_mov_b32_e32 v1, s3
+; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i16_ult:
+; SDAG-VI:       ; %bb.0:
+; SDAG-VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; SDAG-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT:    v_cmp_lt_u16_e64 s[2:3], s2, v0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, s0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v2, s2
+; SDAG-VI-NEXT:    v_mov_b32_e32 v1, s1
+; SDAG-VI-NEXT:    v_mov_b32_e32 v3, s3
+; SDAG-VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; SDAG-VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: v_icmp_i16_ult:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_cmp_lt_u16_e64 s[0:1], s4, v0
+; GFX9-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT:    s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i16_ult:
+; GISEL-VI:       ; %bb.0:
+; GISEL-VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; GISEL-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; GISEL-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT:    v_cmp_lt_u16_e64 s[2:3], s2, v0
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT:    v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT:    v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT:    s_endpgm
+  %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 36)
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i16_ule(i64 addrspace(1)* %out, i16 %src) {
+; GFX11-LABEL: v_icmp_i16_ule:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_clause 0x1
+; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_ge_u16_e64 s[2:3], 0x64, s2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-NEXT:    v_mov_b32_e32 v1, s3
+; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i16_ule:
+; SDAG-VI:       ; %bb.0:
+; SDAG-VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; SDAG-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT:    v_cmp_le_u16_e64 s[2:3], s2, v0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, s0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v2, s2
+; SDAG-VI-NEXT:    v_mov_b32_e32 v1, s1
+; SDAG-VI-NEXT:    v_mov_b32_e32 v3, s3
+; SDAG-VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; SDAG-VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: v_icmp_i16_ule:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_cmp_le_u16_e64 s[0:1], s4, v0
+; GFX9-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT:    s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i16_ule:
+; GISEL-VI:       ; %bb.0:
+; GISEL-VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; GISEL-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; GISEL-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT:    v_cmp_le_u16_e64 s[2:3], s2, v0
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT:    v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT:    v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT:    s_endpgm
+  %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 37)
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i16_sgt(i64 addrspace(1)* %out, i16 %src) #1 {
+; GFX11-LABEL: v_icmp_i16_sgt:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_clause 0x1
+; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_lt_i16_e64 s[2:3], 0x64, s2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-NEXT:    v_mov_b32_e32 v1, s3
+; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i16_sgt:
+; SDAG-VI:       ; %bb.0:
+; SDAG-VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; SDAG-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT:    v_cmp_gt_i16_e64 s[2:3], s2, v0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, s0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v2, s2
+; SDAG-VI-NEXT:    v_mov_b32_e32 v1, s1
+; SDAG-VI-NEXT:    v_mov_b32_e32 v3, s3
+; SDAG-VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; SDAG-VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: v_icmp_i16_sgt:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_cmp_gt_i16_e64 s[0:1], s4, v0
+; GFX9-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT:    s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i16_sgt:
+; GISEL-VI:       ; %bb.0:
+; GISEL-VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; GISEL-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; GISEL-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT:    v_cmp_gt_i16_e64 s[2:3], s2, v0
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT:    v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT:    v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT:    s_endpgm
+  %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 38)
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i16_sge(i64 addrspace(1)* %out, i16 %src) {
+; GFX11-LABEL: v_icmp_i16_sge:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_clause 0x1
+; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_le_i16_e64 s[2:3], 0x64, s2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-NEXT:    v_mov_b32_e32 v1, s3
+; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i16_sge:
+; SDAG-VI:       ; %bb.0:
+; SDAG-VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; SDAG-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT:    v_cmp_ge_i16_e64 s[2:3], s2, v0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, s0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v2, s2
+; SDAG-VI-NEXT:    v_mov_b32_e32 v1, s1
+; SDAG-VI-NEXT:    v_mov_b32_e32 v3, s3
+; SDAG-VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; SDAG-VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: v_icmp_i16_sge:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_cmp_ge_i16_e64 s[0:1], s4, v0
+; GFX9-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT:    s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i16_sge:
+; GISEL-VI:       ; %bb.0:
+; GISEL-VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; GISEL-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; GISEL-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT:    v_cmp_ge_i16_e64 s[2:3], s2, v0
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT:    v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT:    v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT:    s_endpgm
+  %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 39)
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i16_slt(i64 addrspace(1)* %out, i16 %src) {
+; GFX11-LABEL: v_icmp_i16_slt:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_clause 0x1
+; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_gt_i16_e64 s[2:3], 0x64, s2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-NEXT:    v_mov_b32_e32 v1, s3
+; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i16_slt:
+; SDAG-VI:       ; %bb.0:
+; SDAG-VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; SDAG-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT:    v_cmp_lt_i16_e64 s[2:3], s2, v0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, s0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v2, s2
+; SDAG-VI-NEXT:    v_mov_b32_e32 v1, s1
+; SDAG-VI-NEXT:    v_mov_b32_e32 v3, s3
+; SDAG-VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; SDAG-VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: v_icmp_i16_slt:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_cmp_lt_i16_e64 s[0:1], s4, v0
+; GFX9-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT:    s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i16_slt:
+; GISEL-VI:       ; %bb.0:
+; GISEL-VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; GISEL-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; GISEL-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT:    v_cmp_lt_i16_e64 s[2:3], s2, v0
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT:    v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT:    v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT:    s_endpgm
+  %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 40)
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i16_sle(i64 addrspace(1)* %out, i16 %src) {
+; GFX11-LABEL: v_icmp_i16_sle:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_clause 0x1
+; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c
+; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_ge_i16_e64 s[2:3], 0x64, s2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-NEXT:    v_mov_b32_e32 v1, s3
+; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; SDAG-VI-LABEL: v_icmp_i16_sle:
+; SDAG-VI:       ; %bb.0:
+; SDAG-VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; SDAG-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; SDAG-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-VI-NEXT:    v_cmp_le_i16_e64 s[2:3], s2, v0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v0, s0
+; SDAG-VI-NEXT:    v_mov_b32_e32 v2, s2
+; SDAG-VI-NEXT:    v_mov_b32_e32 v1, s1
+; SDAG-VI-NEXT:    v_mov_b32_e32 v3, s3
+; SDAG-VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; SDAG-VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: v_icmp_i16_sle:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_cmp_le_i16_e64 s[0:1], s4, v0
+; GFX9-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT:    s_endpgm
+;
+; GISEL-VI-LABEL: v_icmp_i16_sle:
+; GISEL-VI:       ; %bb.0:
+; GISEL-VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; GISEL-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; GISEL-VI-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-VI-NEXT:    v_cmp_le_i16_e64 s[2:3], s2, v0
+; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-VI-NEXT:    v_mov_b32_e32 v3, s1
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, s3
+; GISEL-VI-NEXT:    v_mov_b32_e32 v2, s0
+; GISEL-VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; GISEL-VI-NEXT:    s_endpgm
+  %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 41)
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @v_icmp_i1_ne0(i64 addrspace(1)* %out, i32 %a, i32 %b) {
+; GFX11-LABEL: v_icmp_i1_ne0:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    s_cmp_gt_u32 s2, 1
+; GFX11-NEXT:    s_cselect_b64 s[4:5], -1, 0
+; GFX11-NEXT:    s_cmp_gt_u32 s3, 2
+; GFX11-NEXT:    s_cselect_b64 s[2:3], -1, 0
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT:    s_and_b64 s[2:3], s[4:5], s[2:3]
+; GFX11-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11-NEXT:    v_mov_b32_e32 v1, s3
+; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; VI-LABEL: v_icmp_i1_ne0:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    s_cmp_gt_u32 s2, 1
+; VI-NEXT:    s_cselect_b64 s[4:5], -1, 0
+; VI-NEXT:    s_cmp_gt_u32 s3, 2
+; VI-NEXT:    s_cselect_b64 s[2:3], -1, 0
+; VI-NEXT:    s_and_b64 s[2:3], s[4:5], s[2:3]
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    v_mov_b32_e32 v2, s2
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_mov_b32_e32 v3, s3
+; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: v_icmp_i1_ne0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    s_cmp_gt_u32 s2, 1
+; GFX9-NEXT:    s_cselect_b64 s[4:5], -1, 0
+; GFX9-NEXT:    s_cmp_gt_u32 s3, 2
+; GFX9-NEXT:    s_cselect_b64 s[2:3], -1, 0
+; GFX9-NEXT:    s_and_b64 s[2:3], s[4:5], s[2:3]
+; GFX9-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9-NEXT:    v_mov_b32_e32 v1, s3
+; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX9-NEXT:    s_endpgm
+  %c0 = icmp ugt i32 %a, 1
+  %c1 = icmp ugt i32 %b, 2
+  %src = and i1 %c0, %c1
+  %result = call i64 @llvm.amdgcn.icmp.i1(i1 %src, i1 false, i32 33)
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_ps void @test_intr_icmp_i32_invalid_cc(i64 addrspace(1)* %out, i32 %src) {
+; SDAG-GFX11-LABEL: test_intr_icmp_i32_invalid_cc:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_endpgm
+;
+; SDAG-VI-LABEL: test_intr_icmp_i32_invalid_cc:
+; SDAG-VI:       ; %bb.0:
+; SDAG-VI-NEXT:    s_endpgm
+;
+; SDAG-GFX9-LABEL: test_intr_icmp_i32_invalid_cc:
+; SDAG-GFX9:       ; %bb.0:
+; SDAG-GFX9-NEXT:    s_endpgm
+;
+; GISEL-GFX11-LABEL: test_intr_icmp_i32_invalid_cc:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    global_store_b64 v[0:1], v[0:1], off
+; GISEL-GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT:    s_endpgm
+;
+; GISEL-VI-LABEL: test_intr_icmp_i32_invalid_cc:
+; GISEL-VI:       ; %bb.0:
+; GISEL-VI-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
+; GISEL-VI-NEXT:    s_endpgm
+;
+; GISEL-GFX9-LABEL: test_intr_icmp_i32_invalid_cc:
+; GISEL-GFX9:       ; %bb.0:
+; GISEL-GFX9-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
+; GISEL-GFX9-NEXT:    s_endpgm
+  %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 9999)
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+attributes #0 = { nounwind readnone convergent }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GCN: {{.*}}


        


More information about the llvm-commits mailing list