[llvm] [AMDGPU] Merge two V_CNDMASK instructions into V_DUAL_CNDMASK (PR #135007)

Wed Apr 9 08:49:28 PDT 2025

https://github.com/mihajlovicana updated https://github.com/llvm/llvm-project/pull/135007

>From 51b12c3be2bad4925742b0bd9833b5a77c335a62 Mon Sep 17 00:00:00 2001
From: Ana Mihajlovic <Ana.Mihajlovic at amd.com>
Date: Wed, 9 Apr 2025 15:24:55 +0200
Subject: [PATCH 1/4] precommit

---
 .../CodeGen/AMDGPU/short-select-cndmask.ll    | 51 +++++++++++++++++++
 1 file changed, 51 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/short-select-cndmask.ll

diff --git a/llvm/test/CodeGen/AMDGPU/short-select-cndmask.ll b/llvm/test/CodeGen/AMDGPU/short-select-cndmask.ll
new file mode 100644
index 0000000000000..2d6810a34afb2
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/short-select-cndmask.ll
@@ -0,0 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GCN
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GCN
+
+define amdgpu_cs void @test(i32 %a, i32 %x, i32 %y, i32 %p, i32 %q, i32 %r, i32 %s, ptr addrspace(1) %out) {
+; GCN-LABEL: test:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_eq_u32_e32 vcc_lo, -1, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v3, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e32 v3, v6, v5, vcc_lo
+; GCN-NEXT:    global_store_b128 v[7:8], v[0:3], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = icmp eq i32 %a, -1
+  %val1 = select i1 %vcc, i32 %x, i32 %y
+  %val2 = select i1 %vcc, i32 0, i32 %p
+  %val3 = select i1 %vcc, i32 0, i32 %q
+  %val4 = select i1 %vcc, i32 %r, i32 %s
+  %ret0 = insertelement <4 x i32> poison, i32 %val1, i32 0
+  %ret1 = insertelement <4 x i32> %ret0, i32 %val2, i32 1
+  %ret2 = insertelement <4 x i32> %ret1, i32 %val3, i32 2
+  %ret3 = insertelement <4 x i32> %ret2, i32 %val4, i32 3
+  store <4 x i32> %ret3, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_negative_case(i32 %a, i32 %x, i32 %y, i32 %p, i32 %q, i32 %r, i32 %s, ptr addrspace(1) %out) {
+; GCN-LABEL: test_negative_case:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_eq_u32_e32 vcc_lo, -1, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v3, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e32 v3, v6, v5, vcc_lo
+; GCN-NEXT:    global_store_b128 v[7:8], v[0:3], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = icmp eq i32 %a, -1
+  %val1 = select i1 %vcc, i32 %x, i32 %y
+  %val2 = select i1 %vcc, i32 0, i32 %p
+  %val3 = select i1 %vcc, i32 0, i32 %q
+  %val4 = select i1 %vcc, i32 %r, i32 %s
+  %ret0 = insertelement <4 x i32> poison, i32 %val1, i32 0
+  %ret1 = insertelement <4 x i32> %ret0, i32 %val2, i32 1
+  %ret2 = insertelement <4 x i32> %ret1, i32 %val3, i32 2
+  %ret3 = insertelement <4 x i32> %ret2, i32 %val4, i32 3
+  store <4 x i32> %ret3, ptr addrspace(1) %out
+  ret void
+}

>From b2f1080ae2dbbd94fda0902feba9bac89a5a6385 Mon Sep 17 00:00:00 2001
From: Ana Mihajlovic <Ana.Mihajlovic at amd.com>
Date: Wed, 9 Apr 2025 15:37:22 +0200
Subject: [PATCH 2/4] [AMDGPU] Merge V_CNDMASKS into V_DUAL_CNDMASK

---
 llvm/lib/Target/AMDGPU/SIFoldOperands.cpp     | 91 ++++++++++++++++++-
 .../test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll | 18 ++--
 .../test/CodeGen/AMDGPU/GlobalISel/usubsat.ll | 18 ++--
 llvm/test/CodeGen/AMDGPU/div_i128.ll          | 20 ++--
 llvm/test/CodeGen/AMDGPU/div_v2i128.ll        | 80 ++++++++--------
 .../CodeGen/AMDGPU/extract_vector_dynelt.ll   |  6 +-
 llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll | 69 +++++++-------
 llvm/test/CodeGen/AMDGPU/fptoi.i128.ll        | 36 ++++----
 .../CodeGen/AMDGPU/insert_vector_dynelt.ll    | 47 +++++-----
 llvm/test/CodeGen/AMDGPU/itofp.i128.ll        | 36 ++++----
 .../CodeGen/AMDGPU/short-select-cndmask.ll    | 16 ++--
 11 files changed, 260 insertions(+), 177 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index d6acf9e081b9f..4ad538e0b1e5f 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -105,6 +105,25 @@ class SIFoldOperandsImpl {
     }
   }
 
+  unsigned getInverseCompareOpcode(MachineInstr &MI) const {
+    switch (MI.getOpcode()) {
+    case AMDGPU::V_CMP_EQ_U32_e64:
+      return AMDGPU::V_CMP_NE_U32_e64;
+    case AMDGPU::V_CMP_NE_U32_e64:
+      return AMDGPU::V_CMP_EQ_U32_e64;
+    case AMDGPU::V_CMP_GE_U32_e64:
+      return AMDGPU::V_CMP_LT_U32_e64;
+    case AMDGPU::V_CMP_LE_U32_e64:
+      return AMDGPU::V_CMP_GT_U32_e64;
+    case AMDGPU::V_CMP_GT_U32_e64:
+      return AMDGPU::V_CMP_LE_U32_e64;
+    case AMDGPU::V_CMP_LT_U32_e64:
+      return AMDGPU::V_CMP_GE_U32_e64;
+    default:
+      return 0;
+    }
+  }
+
   bool foldCopyToVGPROfScalarAddOfFrameIndex(Register DstReg, Register SrcReg,
                                              MachineInstr &MI) const;
 
@@ -133,7 +152,8 @@ class SIFoldOperandsImpl {
 
   std::optional<int64_t> getImmOrMaterializedImm(MachineOperand &Op) const;
   bool tryConstantFoldOp(MachineInstr *MI) const;
-  bool tryFoldCndMask(MachineInstr &MI) const;
+  bool tryFoldCndMask(MachineInstr &MI, Register *RegVCC,
+                      Register *newVCC) const;
   bool tryFoldZeroHighBits(MachineInstr &MI) const;
   bool foldInstOperand(MachineInstr &MI, MachineOperand &OpToFold) const;
 
@@ -152,6 +172,9 @@ class SIFoldOperandsImpl {
 
   bool tryOptimizeAGPRPhis(MachineBasicBlock &MBB);
 
+  bool shouldSwitchOperands(MachineRegisterInfo &MRI, MachineInstr &MI,
+                            const SIInstrInfo &TII) const;
+
 public:
   SIFoldOperandsImpl() = default;
 
@@ -1459,13 +1482,73 @@ bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const {
   return false;
 }
 
+bool SIFoldOperandsImpl::shouldSwitchOperands(MachineRegisterInfo &MRI,
+                                              MachineInstr &MI,
+                                              const SIInstrInfo &TII) const {
+  auto allUses = MRI.use_nodbg_operands(MI.getOperand(5).getReg());
+  unsigned count = 0;
+
+  for (auto &Use : allUses) {
+    if (Use.getParent()->getOpcode() != AMDGPU::V_CNDMASK_B32_e64)
+      return false;
+    MachineOperand *Src0 =
+        TII.getNamedOperand(*Use.getParent(), AMDGPU::OpName::src0);
+    MachineOperand *Src1 =
+        TII.getNamedOperand(*Use.getParent(), AMDGPU::OpName::src1);
+
+    auto src0Imm = getImmOrMaterializedImm(*Src0);
+    auto src1Imm = getImmOrMaterializedImm(*Src1);
+
+    if (!src1Imm && src0Imm)
+      return false;
+    if (src1Imm && !src0Imm)
+      count++;
+  }
+  return (count >= 2);
+}
+
 // Try to fold an instruction into a simpler one
-bool SIFoldOperandsImpl::tryFoldCndMask(MachineInstr &MI) const {
+bool SIFoldOperandsImpl::tryFoldCndMask(MachineInstr &MI, Register *RegVCC,
+                                        Register *NewVCC) const {
   unsigned Opc = MI.getOpcode();
   if (Opc != AMDGPU::V_CNDMASK_B32_e32 && Opc != AMDGPU::V_CNDMASK_B32_e64 &&
       Opc != AMDGPU::V_CNDMASK_B64_PSEUDO)
     return false;
 
+  if (Opc == AMDGPU::V_CNDMASK_B32_e64) {
+    const DebugLoc &DL = MI.getDebugLoc();
+    auto Reg = MI.getOperand(5).getReg();
+
+    if (*RegVCC != Reg) {
+      MachineInstr *DefMI = MRI->getVRegDef(Reg);
+      if (DefMI) {
+        unsigned Opcode = getInverseCompareOpcode(*DefMI);
+        if (Opcode &&
+            SIFoldOperandsImpl::shouldSwitchOperands(*MRI, MI, *TII)) {
+          auto cmpDL = DefMI->getDebugLoc();
+          *NewVCC = MRI->createVirtualRegister(MRI->getRegClass(Reg));
+          *RegVCC = Reg;
+          MachineInstrBuilder inverseCompare = BuildMI(
+              *DefMI->getParent(), DefMI, cmpDL, TII->get(Opcode), *NewVCC);
+
+          inverseCompare.add(DefMI->getOperand(1));
+          inverseCompare.add(DefMI->getOperand(2));
+        }
+      }
+    }
+    if (*RegVCC == Reg) {
+      BuildMI(*MI.getParent(), MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64),
+              MI.getOperand(0).getReg())
+          .add(MI.getOperand(3))
+          .add(MI.getOperand(4))
+          .add(MI.getOperand(1))
+          .add(MI.getOperand(2))
+          .addReg(*NewVCC);
+      MI.eraseFromParent();
+      return true;
+    }
+  }
+
   MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
   MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
   if (!Src1->isIdenticalTo(*Src0)) {
@@ -2533,10 +2616,12 @@ bool SIFoldOperandsImpl::run(MachineFunction &MF) {
   bool HasNSZ = MFI->hasNoSignedZerosFPMath();
 
   bool Changed = false;
+  Register Reg = 0;
+  Register newVCC = 0;
   for (MachineBasicBlock *MBB : depth_first(&MF)) {
     MachineOperand *CurrentKnownM0Val = nullptr;
     for (auto &MI : make_early_inc_range(*MBB)) {
-      Changed |= tryFoldCndMask(MI);
+      Changed |= tryFoldCndMask(MI, &Reg, &newVCC);
 
       if (tryFoldZeroHighBits(MI)) {
         Changed = true;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll
index d9158e3558395..536504747c971 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll
@@ -2835,9 +2835,9 @@ define i48 @v_uaddsat_i48(i48 %lhs, i48 %rhs) {
 ; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
 ; GFX6-NEXT:    v_or_b32_e32 v0, v0, v3
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, v1, v2
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, v0, -1, vcc
-; GFX6-NEXT:    v_cndmask_b32_e64 v1, v2, -1, vcc
+; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX6-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc
+; GFX6-NEXT:    v_cndmask_b32_e32 v1, -1, v2, vcc
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_uaddsat_i48:
@@ -2944,10 +2944,10 @@ define amdgpu_ps <2 x float> @uaddsat_i48_sv(i48 inreg %lhs, i48 %rhs) {
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
 ; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, v1, v2
+; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v2
 ; GFX6-NEXT:    v_or_b32_e32 v0, v0, v3
-; GFX6-NEXT:    v_cndmask_b32_e64 v1, v2, -1, vcc
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, v0, -1, vcc
+; GFX6-NEXT:    v_cndmask_b32_e32 v1, -1, v2, vcc
+; GFX6-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc
 ; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX6-NEXT:    ; return to shader part epilog
 ;
@@ -3003,10 +3003,10 @@ define amdgpu_ps <2 x float> @uaddsat_i48_vs(i48 %lhs, i48 inreg %rhs) {
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
 ; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, v1, v2
+; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v2
 ; GFX6-NEXT:    v_or_b32_e32 v0, v0, v3
-; GFX6-NEXT:    v_cndmask_b32_e64 v1, v2, -1, vcc
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, v0, -1, vcc
+; GFX6-NEXT:    v_cndmask_b32_e32 v1, -1, v2, vcc
+; GFX6-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc
 ; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX6-NEXT:    ; return to shader part epilog
 ;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll
index 1fd139b06417f..1944d1577ae29 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll
@@ -2705,9 +2705,9 @@ define i48 @v_usubsat_i48(i48 %lhs, i48 %rhs) {
 ; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
 ; GFX6-NEXT:    v_or_b32_e32 v0, v0, v3
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, v1, v2
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; GFX6-NEXT:    v_cndmask_b32_e64 v1, v2, 0, vcc
+; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX6-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GFX6-NEXT:    v_cndmask_b32_e32 v1, 0, v2, vcc
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_usubsat_i48:
@@ -2815,9 +2815,9 @@ define amdgpu_ps <2 x float> @usubsat_i48_sv(i48 inreg %lhs, i48 %rhs) {
 ; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
 ; GFX6-NEXT:    v_or_b32_e32 v0, v0, v3
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, v1, v2
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; GFX6-NEXT:    v_cndmask_b32_e64 v1, v2, 0, vcc
+; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX6-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GFX6-NEXT:    v_cndmask_b32_e32 v1, 0, v2, vcc
 ; GFX6-NEXT:    ; return to shader part epilog
 ;
 ; GFX8-LABEL: usubsat_i48_sv:
@@ -2873,9 +2873,9 @@ define amdgpu_ps <2 x float> @usubsat_i48_vs(i48 %lhs, i48 inreg %rhs) {
 ; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
 ; GFX6-NEXT:    v_or_b32_e32 v0, v0, v3
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, v1, v2
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; GFX6-NEXT:    v_cndmask_b32_e64 v1, v2, 0, vcc
+; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX6-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GFX6-NEXT:    v_cndmask_b32_e32 v1, 0, v2, vcc
 ; GFX6-NEXT:    ; return to shader part epilog
 ;
 ; GFX8-LABEL: usubsat_i48_vs:
diff --git a/llvm/test/CodeGen/AMDGPU/div_i128.ll b/llvm/test/CodeGen/AMDGPU/div_i128.ll
index 06c0417211809..efd633d21dba1 100644
--- a/llvm/test/CodeGen/AMDGPU/div_i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/div_i128.ll
@@ -1287,11 +1287,11 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-NEXT:    v_xor_b32_e32 v6, 0x7f, v0
 ; GFX9-G-NEXT:    v_or_b32_e32 v14, v6, v2
 ; GFX9-G-NEXT:    v_and_b32_e32 v6, 1, v20
-; GFX9-G-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
-; GFX9-G-NEXT:    v_cndmask_b32_e64 v6, v8, 0, vcc
-; GFX9-G-NEXT:    v_cndmask_b32_e64 v7, v9, 0, vcc
-; GFX9-G-NEXT:    v_cndmask_b32_e64 v12, v10, 0, vcc
-; GFX9-G-NEXT:    v_cndmask_b32_e64 v13, v11, 0, vcc
+; GFX9-G-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v6
+; GFX9-G-NEXT:    v_cndmask_b32_e32 v6, 0, v8, vcc
+; GFX9-G-NEXT:    v_cndmask_b32_e32 v7, 0, v9, vcc
+; GFX9-G-NEXT:    v_cndmask_b32_e32 v12, 0, v10, vcc
+; GFX9-G-NEXT:    v_cndmask_b32_e32 v13, 0, v11, vcc
 ; GFX9-G-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[14:15]
 ; GFX9-G-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
 ; GFX9-G-NEXT:    v_or_b32_e32 v14, v20, v14
@@ -3414,11 +3414,11 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-NEXT:    v_xor_b32_e32 v8, 0x7f, v12
 ; GFX9-G-NEXT:    v_or_b32_e32 v16, v8, v14
 ; GFX9-G-NEXT:    v_and_b32_e32 v8, 1, v18
-; GFX9-G-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
-; GFX9-G-NEXT:    v_cndmask_b32_e64 v10, v0, 0, vcc
-; GFX9-G-NEXT:    v_cndmask_b32_e64 v11, v1, 0, vcc
-; GFX9-G-NEXT:    v_cndmask_b32_e64 v8, v2, 0, vcc
-; GFX9-G-NEXT:    v_cndmask_b32_e64 v9, v3, 0, vcc
+; GFX9-G-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v8
+; GFX9-G-NEXT:    v_cndmask_b32_e32 v10, 0, v0, vcc
+; GFX9-G-NEXT:    v_cndmask_b32_e32 v11, 0, v1, vcc
+; GFX9-G-NEXT:    v_cndmask_b32_e32 v8, 0, v2, vcc
+; GFX9-G-NEXT:    v_cndmask_b32_e32 v9, 0, v3, vcc
 ; GFX9-G-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[16:17]
 ; GFX9-G-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
 ; GFX9-G-NEXT:    v_or_b32_e32 v16, v18, v16
diff --git a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
index 77b78f1f8a333..07d7276e3b944 100644
--- a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
@@ -495,13 +495,13 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    v_or_b32_e32 v8, v9, v8
 ; GISEL-NEXT:    v_and_b32_e32 v9, 1, v9
 ; GISEL-NEXT:    v_and_b32_e32 v8, 1, v8
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v22, v18, 0, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v9
+; GISEL-NEXT:    v_cndmask_b32_e32 v22, 0, v18, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, v20, 0, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, v21, 0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v8, 0, v20, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v9, 0, v21, vcc
 ; GISEL-NEXT:    s_xor_b64 s[4:5], s[4:5], -1
-; GISEL-NEXT:    v_cndmask_b32_e64 v23, v19, 0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v23, 0, v19, vcc
 ; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
 ; GISEL-NEXT:    s_cbranch_execz .LBB0_6
 ; GISEL-NEXT:  ; %bb.1: ; %udiv-bb15
@@ -685,12 +685,12 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    v_or_b32_e32 v11, v14, v15
 ; GISEL-NEXT:    v_and_b32_e32 v14, 1, v11
 ; GISEL-NEXT:    v_or_b32_e32 v10, v11, v10
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v14
-; GISEL-NEXT:    v_cndmask_b32_e64 v14, v6, 0, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v14
+; GISEL-NEXT:    v_cndmask_b32_e32 v14, 0, v6, vcc
 ; GISEL-NEXT:    v_and_b32_e32 v16, 1, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, v7, 0, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, v12, 0, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, v13, 0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v15, 0, v7, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v10, 0, v12, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v11, 0, v13, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v16
 ; GISEL-NEXT:    s_xor_b64 s[4:5], vcc, -1
 ; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], s[4:5]
@@ -1251,13 +1251,13 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    v_or_b32_e32 v2, v3, v2
 ; GISEL-NEXT:    v_and_b32_e32 v3, 1, v3
 ; GISEL-NEXT:    v_and_b32_e32 v2, 1, v2
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, v0, 0, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
+; GISEL-NEXT:    v_cndmask_b32_e32 v18, 0, v0, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v2
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, v16, 0, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, v17, 0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v16, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, 0, v17, vcc
 ; GISEL-NEXT:    s_xor_b64 s[4:5], s[4:5], -1
-; GISEL-NEXT:    v_cndmask_b32_e64 v19, v1, 0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v19, 0, v1, vcc
 ; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], s[4:5]
 ; GISEL-NEXT:    s_cbranch_execz .LBB1_6
 ; GISEL-NEXT:  ; %bb.1: ; %udiv-bb15
@@ -1423,12 +1423,12 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    v_or_b32_e32 v9, v20, v10
 ; GISEL-NEXT:    v_and_b32_e32 v10, 1, v9
 ; GISEL-NEXT:    v_or_b32_e32 v8, v9, v8
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, v4, 0, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v10
+; GISEL-NEXT:    v_cndmask_b32_e32 v10, 0, v4, vcc
 ; GISEL-NEXT:    v_and_b32_e32 v20, 1, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, v5, 0, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, v6, 0, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, v7, 0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v11, 0, v5, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v8, 0, v6, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v9, 0, v7, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v20
 ; GISEL-NEXT:    s_xor_b64 s[4:5], vcc, -1
 ; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], s[4:5]
@@ -2093,13 +2093,13 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    v_or_b32_e32 v18, v19, v18
 ; GISEL-NEXT:    v_and_b32_e32 v19, 1, v19
 ; GISEL-NEXT:    v_and_b32_e32 v18, 1, v18
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v19
-; GISEL-NEXT:    v_cndmask_b32_e64 v31, v16, 0, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v19
+; GISEL-NEXT:    v_cndmask_b32_e32 v31, 0, v16, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v18
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, v8, 0, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v19, v9, 0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v18, 0, v8, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v19, 0, v9, vcc
 ; GISEL-NEXT:    s_xor_b64 s[4:5], s[4:5], -1
-; GISEL-NEXT:    v_cndmask_b32_e64 v32, v17, 0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v32, 0, v17, vcc
 ; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], s[4:5]
 ; GISEL-NEXT:    s_cbranch_execz .LBB2_6
 ; GISEL-NEXT:  ; %bb.1: ; %udiv-bb15
@@ -2283,12 +2283,12 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    v_or_b32_e32 v3, v20, v21
 ; GISEL-NEXT:    v_and_b32_e32 v20, 1, v3
 ; GISEL-NEXT:    v_or_b32_e32 v2, v3, v2
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v20
-; GISEL-NEXT:    v_cndmask_b32_e64 v20, v12, 0, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v20
+; GISEL-NEXT:    v_cndmask_b32_e32 v20, 0, v12, vcc
 ; GISEL-NEXT:    v_and_b32_e32 v22, 1, v2
-; GISEL-NEXT:    v_cndmask_b32_e64 v21, v13, 0, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, v6, 0, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, v7, 0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v21, 0, v13, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v6, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, 0, v7, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v22
 ; GISEL-NEXT:    s_xor_b64 s[4:5], vcc, -1
 ; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], s[4:5]
@@ -2920,13 +2920,13 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    v_or_b32_e32 v20, v21, v20
 ; GISEL-NEXT:    v_and_b32_e32 v21, 1, v21
 ; GISEL-NEXT:    v_and_b32_e32 v20, 1, v20
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v21
-; GISEL-NEXT:    v_cndmask_b32_e64 v32, v0, 0, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v21
+; GISEL-NEXT:    v_cndmask_b32_e32 v32, 0, v0, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v20
-; GISEL-NEXT:    v_cndmask_b32_e64 v20, v2, 0, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v21, v3, 0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v20, 0, v2, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v21, 0, v3, vcc
 ; GISEL-NEXT:    s_xor_b64 s[4:5], s[4:5], -1
-; GISEL-NEXT:    v_cndmask_b32_e64 v33, v1, 0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v33, 0, v1, vcc
 ; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], s[4:5]
 ; GISEL-NEXT:    s_cbranch_execz .LBB3_6
 ; GISEL-NEXT:  ; %bb.1: ; %udiv-bb15
@@ -3092,12 +3092,12 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    v_or_b32_e32 v19, v26, v24
 ; GISEL-NEXT:    v_and_b32_e32 v24, 1, v19
 ; GISEL-NEXT:    v_or_b32_e32 v18, v19, v18
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v24
-; GISEL-NEXT:    v_cndmask_b32_e64 v24, v4, 0, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v24
+; GISEL-NEXT:    v_cndmask_b32_e32 v24, 0, v4, vcc
 ; GISEL-NEXT:    v_and_b32_e32 v26, 1, v18
-; GISEL-NEXT:    v_cndmask_b32_e64 v25, v5, 0, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, v6, 0, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v19, v7, 0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v25, 0, v5, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v18, 0, v6, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v19, 0, v7, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v26
 ; GISEL-NEXT:    s_xor_b64 s[4:5], vcc, -1
 ; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], s[4:5]
diff --git a/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll b/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll
index 10de973dac0c5..cd1426f868bce 100644
--- a/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll
@@ -1282,10 +1282,10 @@ define double @double16_extelt_vec(i32 %sel) {
 ; GCN-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
 ; GCN-NEXT:    s_or_b64 vcc, vcc, s[4:5]
 ; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 15, v0
-; GCN-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 15, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
 ; GCN-NEXT:    v_mov_b32_e32 v1, 0x40301999
-; GCN-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %ext = extractelement <16 x double> <double 1.1, double 2.1, double 3.1, double 4.1, double 5.1, double 6.1, double 7.1, double 8.1, double 9.1, double 10.1, double 11.1, double 12.1, double 13.1, double 14.1, double 15.1, double 16.1>, i32 %sel
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
index 14f7cbcd0f438..1b471166b5d29 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
@@ -2836,9 +2836,9 @@ define float @v_fneg_select_infloop_regression_f32(float %arg, i1 %arg1) {
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; GCN-NEXT:    v_cndmask_b32_e64 v0, -v0, 0, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v1
+; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, -v0, vcc
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %i = select i1 %arg1, float 0.0, float %arg
   %i2 = fneg float %i
@@ -2897,9 +2897,9 @@ define float @v_fneg_select_infloop_regression_inline_imm_f32(float %arg, i1 %ar
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 2.0, vcc
-; GCN-NEXT:    v_cndmask_b32_e64 v0, -v0, 2.0, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v1
+; GCN-NEXT:    v_cndmask_b32_e32 v0, 2.0, v0, vcc
+; GCN-NEXT:    v_cndmask_b32_e64 v0, 2.0, -v0, vcc
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %i = select i1 %arg1, float 2.0, float %arg
   %i2 = fneg float %i
@@ -2958,9 +2958,9 @@ define float @v_fneg_select_infloop_regression_neg_inline_imm_f32(float %arg, i1
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, -2.0, vcc
-; GCN-NEXT:    v_cndmask_b32_e64 v0, -v0, -2.0, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v1
+; GCN-NEXT:    v_cndmask_b32_e32 v0, -2.0, v0, vcc
+; GCN-NEXT:    v_cndmask_b32_e64 v0, -2.0, -v0, vcc
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %i = select i1 %arg1, float -2.0, float %arg
   %i2 = fneg float %i
@@ -3068,8 +3068,9 @@ define double @v_fneg_select_infloop_regression_f64(double %arg, i1 %arg1) {
 ; GCN-NEXT:    v_bfrev_b32_e32 v3, 1
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v2
 ; GCN-NEXT:    v_cndmask_b32_e64 v1, -v1, v3, vcc
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v2
+; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %i = select i1 %arg1, double 0.0, double %arg
   %i2 = fneg double %i
@@ -3122,20 +3123,20 @@ define half @v_fneg_select_infloop_regression_f16(half %arg, i1 %arg1) {
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; SI-NEXT:    v_and_b32_e32 v1, 1, v1
-; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
+; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v1
 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; SI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; SI-NEXT:    v_cndmask_b32_e64 v0, -v0, 0, vcc
+; SI-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; SI-NEXT:    v_cndmask_b32_e64 v0, 0, -v0, vcc
 ; SI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-LABEL: v_fneg_select_infloop_regression_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-NEXT:    v_and_b32_e32 v1, 1, v1
-; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
-; VI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v1
+; VI-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; VI-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
-; VI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; VI-NEXT:    s_setpc_b64 s[30:31]
   %i = select i1 %arg1, half 0.0, half %arg
   %i2 = fneg half %i
@@ -3189,10 +3190,10 @@ define <2 x half> @v_fneg_select_infloop_regression_v2f16(<2 x half> %arg, i1 %a
 ; SI-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 ; SI-NEXT:    v_or_b32_e32 v0, v0, v1
 ; SI-NEXT:    v_and_b32_e32 v1, 1, v2
-; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
-; SI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v1
+; SI-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; SI-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
-; SI-NEXT:    v_cndmask_b32_e64 v1, v0, 0, vcc
+; SI-NEXT:    v_cndmask_b32_e32 v1, 0, v0, vcc
 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, v1
 ; SI-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
 ; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
@@ -3202,10 +3203,10 @@ define <2 x half> @v_fneg_select_infloop_regression_v2f16(<2 x half> %arg, i1 %a
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-NEXT:    v_and_b32_e32 v1, 1, v1
-; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
-; VI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v1
+; VI-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; VI-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
-; VI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; VI-NEXT:    s_setpc_b64 s[30:31]
   %i = select i1 %arg1, <2 x half> zeroinitializer, <2 x half> %arg
   %i2 = fneg <2 x half> %i
@@ -3264,11 +3265,11 @@ define <2 x float> @v_fneg_select_infloop_regression_v2f32(<2 x float> %arg, i1
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_and_b32_e32 v2, 1, v2
 ; GCN-NEXT:    v_bfrev_b32_e32 v3, 1
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v2
-; GCN-NEXT:    v_cndmask_b32_e64 v1, -v1, v3, vcc
-; GCN-NEXT:    v_cndmask_b32_e64 v0, -v0, v3, vcc
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v2
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v3, -v1, vcc
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v3, -v0, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %i = select i1 %arg1, <2 x float> zeroinitializer, <2 x float> %arg
   %i2 = fneg <2 x float> %i
@@ -3316,9 +3317,9 @@ define float @v_fabs_select_infloop_regression_f32(float %arg, i1 %arg1) {
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; GCN-NEXT:    v_cndmask_b32_e64 v0, |v0|, 0, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v1
+; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, |v0|, vcc
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %i = select i1 %arg1, float 0.0, float %arg
   %i2 = call float @llvm.fabs.f32(float %i)
@@ -3367,9 +3368,9 @@ define float @v_fneg_fabs_select_infloop_regression(float %arg, i1 %arg1) {
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; GCN-NEXT:    v_cndmask_b32_e64 v0, -|v0|, 0, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v1
+; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, -|v0|, vcc
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %i = select i1 %arg1, float 0.0, float %arg
   %i2 = call float @llvm.fabs.f32(float %i)
diff --git a/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll b/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
index 3465c782bd700..0ff60af86135b 100644
--- a/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
@@ -241,10 +241,10 @@ define i128 @fptosi_f64_to_i128(double %x) {
 ; GISEL-NEXT:    v_sub_u32_e32 v2, 64, v7
 ; GISEL-NEXT:    v_lshrrev_b64 v[2:3], v2, v[4:5]
 ; GISEL-NEXT:    v_lshlrev_b64 v[4:5], v6, v[4:5]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v7
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v7
 ; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v11, v9, v[0:1]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, v2, 0, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, v2, s[6:7]
 ; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v12, v8, v[6:7]
 ; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[8:9], v10, v8, 0
 ; GISEL-NEXT:    v_mov_b32_e32 v2, v6
@@ -256,7 +256,7 @@ define i128 @fptosi_f64_to_i128(double %x) {
 ; GISEL-NEXT:    v_addc_co_u32_e64 v6, s[10:11], v7, v6, s[10:11]
 ; GISEL-NEXT:    v_addc_co_u32_e64 v4, s[8:9], v6, v4, s[8:9]
 ; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v12, v9, v[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, 0, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, v3, s[6:7]
 ; GISEL-NEXT:    ; implicit-def: $vgpr9
 ; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[6:7], v3, v8, v[6:7]
 ; GISEL-NEXT:    ; implicit-def: $vgpr6
@@ -605,10 +605,10 @@ define i128 @fptoui_f64_to_i128(double %x) {
 ; GISEL-NEXT:    v_sub_u32_e32 v2, 64, v7
 ; GISEL-NEXT:    v_lshrrev_b64 v[2:3], v2, v[4:5]
 ; GISEL-NEXT:    v_lshlrev_b64 v[4:5], v6, v[4:5]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v7
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v7
 ; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v11, v9, v[0:1]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, v2, 0, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, v2, s[6:7]
 ; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v12, v8, v[6:7]
 ; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[8:9], v10, v8, 0
 ; GISEL-NEXT:    v_mov_b32_e32 v2, v6
@@ -620,7 +620,7 @@ define i128 @fptoui_f64_to_i128(double %x) {
 ; GISEL-NEXT:    v_addc_co_u32_e64 v6, s[10:11], v7, v6, s[10:11]
 ; GISEL-NEXT:    v_addc_co_u32_e64 v4, s[8:9], v6, v4, s[8:9]
 ; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v12, v9, v[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, 0, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, v3, s[6:7]
 ; GISEL-NEXT:    ; implicit-def: $vgpr9
 ; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[6:7], v3, v8, v[6:7]
 ; GISEL-NEXT:    ; implicit-def: $vgpr6
@@ -962,10 +962,10 @@ define i128 @fptosi_f32_to_i128(float %x) {
 ; GISEL-NEXT:    v_sub_u32_e32 v2, 64, v7
 ; GISEL-NEXT:    v_lshrrev_b64 v[2:3], v2, v[4:5]
 ; GISEL-NEXT:    v_lshlrev_b64 v[4:5], v6, v[4:5]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v7
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v7
 ; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v11, v8, v[0:1]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, v2, 0, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, v2, s[6:7]
 ; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v12, v9, v[6:7]
 ; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[8:9], v10, v9, 0
 ; GISEL-NEXT:    v_mov_b32_e32 v2, v6
@@ -977,7 +977,7 @@ define i128 @fptosi_f32_to_i128(float %x) {
 ; GISEL-NEXT:    v_addc_co_u32_e64 v6, s[10:11], v7, v6, s[10:11]
 ; GISEL-NEXT:    v_addc_co_u32_e64 v4, s[8:9], v6, v4, s[8:9]
 ; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v12, v8, v[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, 0, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, v3, s[6:7]
 ; GISEL-NEXT:    ; implicit-def: $vgpr8
 ; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[6:7], v3, v9, v[6:7]
 ; GISEL-NEXT:    ; implicit-def: $vgpr6
@@ -1313,10 +1313,10 @@ define i128 @fptoui_f32_to_i128(float %x) {
 ; GISEL-NEXT:    v_sub_u32_e32 v2, 64, v7
 ; GISEL-NEXT:    v_lshrrev_b64 v[2:3], v2, v[4:5]
 ; GISEL-NEXT:    v_lshlrev_b64 v[4:5], v6, v[4:5]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v7
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v7
 ; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v11, v8, v[0:1]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, v2, 0, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, v2, s[6:7]
 ; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v12, v9, v[6:7]
 ; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[8:9], v10, v9, 0
 ; GISEL-NEXT:    v_mov_b32_e32 v2, v6
@@ -1328,7 +1328,7 @@ define i128 @fptoui_f32_to_i128(float %x) {
 ; GISEL-NEXT:    v_addc_co_u32_e64 v6, s[10:11], v7, v6, s[10:11]
 ; GISEL-NEXT:    v_addc_co_u32_e64 v4, s[8:9], v6, v4, s[8:9]
 ; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v12, v8, v[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, 0, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, v3, s[6:7]
 ; GISEL-NEXT:    ; implicit-def: $vgpr8
 ; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[6:7], v3, v9, v[6:7]
 ; GISEL-NEXT:    ; implicit-def: $vgpr6
@@ -1692,8 +1692,8 @@ define i128 @fptosi_bf16_to_i128(bfloat %x) {
 ; GISEL-NEXT:    v_lshlrev_b64 v[4:5], v4, v[6:7]
 ; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v12, v9, v[0:1]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, v2, 0, s[6:7]
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, v2, s[6:7]
 ; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v10, v8, v[6:7]
 ; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[8:9], v11, v8, 0
 ; GISEL-NEXT:    v_mov_b32_e32 v2, v6
@@ -1705,7 +1705,7 @@ define i128 @fptosi_bf16_to_i128(bfloat %x) {
 ; GISEL-NEXT:    v_addc_co_u32_e64 v6, s[10:11], v7, v6, s[10:11]
 ; GISEL-NEXT:    v_addc_co_u32_e64 v4, s[8:9], v6, v4, s[8:9]
 ; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v10, v9, v[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, 0, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, v3, s[6:7]
 ; GISEL-NEXT:    ; implicit-def: $vgpr5
 ; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[6:7], v3, v8, v[6:7]
 ; GISEL-NEXT:    ; implicit-def: $vgpr6_vgpr7
@@ -2039,8 +2039,8 @@ define i128 @fptoui_bf16_to_i128(bfloat %x) {
 ; GISEL-NEXT:    v_lshlrev_b64 v[4:5], v4, v[6:7]
 ; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v12, v9, v[0:1]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, v2, 0, s[6:7]
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, v2, s[6:7]
 ; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v10, v8, v[6:7]
 ; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[8:9], v11, v8, 0
 ; GISEL-NEXT:    v_mov_b32_e32 v2, v6
@@ -2052,7 +2052,7 @@ define i128 @fptoui_bf16_to_i128(bfloat %x) {
 ; GISEL-NEXT:    v_addc_co_u32_e64 v6, s[10:11], v7, v6, s[10:11]
 ; GISEL-NEXT:    v_addc_co_u32_e64 v4, s[8:9], v6, v4, s[8:9]
 ; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v10, v9, v[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, 0, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, v3, s[6:7]
 ; GISEL-NEXT:    ; implicit-def: $vgpr5
 ; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[6:7], v3, v8, v[6:7]
 ; GISEL-NEXT:    ; implicit-def: $vgpr6_vgpr7
diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll
index 4b9da7b49e997..e649c3034f35b 100644
--- a/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll
@@ -1921,30 +1921,31 @@ define <8 x double> @double8_inselt_vec(<8 x double> %vec, i32 %sel) {
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v16
-; GCN-NEXT:    v_mov_b32_e32 v17, 0x3ff00000
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v17, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v16
-; GCN-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
-; GCN-NEXT:    v_cndmask_b32_e32 v3, v3, v17, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v16
-; GCN-NEXT:    v_cndmask_b32_e64 v4, v4, 0, vcc
-; GCN-NEXT:    v_cndmask_b32_e32 v5, v5, v17, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v16
-; GCN-NEXT:    v_cndmask_b32_e64 v6, v6, 0, vcc
-; GCN-NEXT:    v_cndmask_b32_e32 v7, v7, v17, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v16
-; GCN-NEXT:    v_cndmask_b32_e64 v8, v8, 0, vcc
-; GCN-NEXT:    v_cndmask_b32_e32 v9, v9, v17, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v16
-; GCN-NEXT:    v_cndmask_b32_e64 v10, v10, 0, vcc
-; GCN-NEXT:    v_cndmask_b32_e32 v11, v11, v17, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v16
-; GCN-NEXT:    v_cndmask_b32_e64 v12, v12, 0, vcc
-; GCN-NEXT:    v_cndmask_b32_e32 v13, v13, v17, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v16
-; GCN-NEXT:    v_cndmask_b32_e64 v14, v14, 0, vcc
-; GCN-NEXT:    v_cndmask_b32_e32 v15, v15, v17, vcc
+; GCN-NEXT:    v_mov_b32_e32 v17, 0x3ff00000
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v16
+; GCN-NEXT:    v_cndmask_b32_e32 v1, v17, v1, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v16
+; GCN-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v3, v17, v3, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 2, v16
+; GCN-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v5, v17, v5, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 3, v16
+; GCN-NEXT:    v_cndmask_b32_e32 v6, 0, v6, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v7, v17, v7, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 4, v16
+; GCN-NEXT:    v_cndmask_b32_e32 v8, 0, v8, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v9, v17, v9, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 5, v16
+; GCN-NEXT:    v_cndmask_b32_e32 v10, 0, v10, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v11, v17, v11, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 6, v16
+; GCN-NEXT:    v_cndmask_b32_e32 v12, 0, v12, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v13, v17, v13, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 7, v16
+; GCN-NEXT:    v_cndmask_b32_e32 v14, 0, v14, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v15, v17, v15, vcc
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %v = insertelement <8 x double> %vec, double 1.000000e+00, i32 %sel
diff --git a/llvm/test/CodeGen/AMDGPU/itofp.i128.ll b/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
index c316ec71863d0..6bfeda6a1a9e5 100644
--- a/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
@@ -214,11 +214,11 @@ define float @sitofp_i128_to_f32(i128 %x) {
 ; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v14
 ; GISEL-NEXT:    v_cndmask_b32_e32 v5, v11, v15, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v11, v12, v16, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v14
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v14
 ; GISEL-NEXT:    v_cndmask_b32_e32 v9, 0, v9, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v10, 0, v10, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, -1, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, v11, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, -1, v5, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, -1, v11, s[4:5]
 ; GISEL-NEXT:    v_and_b32_e32 v2, v9, v2
 ; GISEL-NEXT:    v_and_b32_e32 v3, v10, v3
 ; GISEL-NEXT:    v_and_or_b32 v0, v5, v0, v2
@@ -459,11 +459,11 @@ define float @uitofp_i128_to_f32(i128 %x) {
 ; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v13
 ; GISEL-NEXT:    v_cndmask_b32_e32 v5, v10, v14, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v10, v11, v15, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v13
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v13
 ; GISEL-NEXT:    v_cndmask_b32_e32 v8, 0, v8, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v9, 0, v9, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, -1, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, v10, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, -1, v5, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, -1, v10, s[4:5]
 ; GISEL-NEXT:    v_and_b32_e32 v2, v8, v2
 ; GISEL-NEXT:    v_and_b32_e32 v3, v9, v3
 ; GISEL-NEXT:    v_and_or_b32 v0, v5, v0, v2
@@ -746,11 +746,11 @@ define double @sitofp_i128_to_f64(i128 %x) {
 ; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v15
 ; GISEL-NEXT:    v_cndmask_b32_e32 v9, v12, v16, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v12, v13, v17, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v15
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v15
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, v9, -1, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, v12, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, -1, v9, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, -1, v12, s[4:5]
 ; GISEL-NEXT:    v_and_b32_e32 v0, v0, v4
 ; GISEL-NEXT:    v_and_b32_e32 v1, v1, v5
 ; GISEL-NEXT:    v_and_or_b32 v0, v9, v2, v0
@@ -1023,11 +1023,11 @@ define double @uitofp_i128_to_f64(i128 %x) {
 ; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v15
 ; GISEL-NEXT:    v_cndmask_b32_e32 v8, v12, v16, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v12, v13, v17, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v15
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v15
 ; GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v5, 0, v5, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, v8, -1, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, v12, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, -1, v8, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, -1, v12, s[4:5]
 ; GISEL-NEXT:    v_and_b32_e32 v2, v4, v2
 ; GISEL-NEXT:    v_and_b32_e32 v3, v5, v3
 ; GISEL-NEXT:    v_and_or_b32 v0, v8, v0, v2
@@ -1305,11 +1305,11 @@ define half @sitofp_i128_to_f16(i128 %x) {
 ; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v14
 ; GISEL-NEXT:    v_cndmask_b32_e32 v5, v11, v15, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v11, v12, v16, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v14
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v14
 ; GISEL-NEXT:    v_cndmask_b32_e32 v9, 0, v9, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v10, 0, v10, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, -1, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, v11, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, -1, v5, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, -1, v11, s[4:5]
 ; GISEL-NEXT:    v_and_b32_e32 v2, v9, v2
 ; GISEL-NEXT:    v_and_b32_e32 v3, v10, v3
 ; GISEL-NEXT:    v_and_or_b32 v0, v5, v0, v2
@@ -1552,11 +1552,11 @@ define half @uitofp_i128_to_f16(i128 %x) {
 ; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v13
 ; GISEL-NEXT:    v_cndmask_b32_e32 v5, v10, v14, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v10, v11, v15, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v13
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v13
 ; GISEL-NEXT:    v_cndmask_b32_e32 v8, 0, v8, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v9, 0, v9, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, -1, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, v10, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, -1, v5, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, -1, v10, s[4:5]
 ; GISEL-NEXT:    v_and_b32_e32 v2, v8, v2
 ; GISEL-NEXT:    v_and_b32_e32 v3, v9, v3
 ; GISEL-NEXT:    v_and_or_b32 v0, v5, v0, v2
diff --git a/llvm/test/CodeGen/AMDGPU/short-select-cndmask.ll b/llvm/test/CodeGen/AMDGPU/short-select-cndmask.ll
index 2d6810a34afb2..8f9b56c42de64 100644
--- a/llvm/test/CodeGen/AMDGPU/short-select-cndmask.ll
+++ b/llvm/test/CodeGen/AMDGPU/short-select-cndmask.ll
@@ -5,11 +5,9 @@
 define amdgpu_cs void @test(i32 %a, i32 %x, i32 %y, i32 %p, i32 %q, i32 %r, i32 %s, ptr addrspace(1) %out) {
 ; GCN-LABEL: test:
 ; GCN:       ; %bb.0: ; %.entry
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc_lo, -1, v0
-; GCN-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v3, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v2, v4, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e32 v3, v6, v5, vcc_lo
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc_lo, -1, v0
+; GCN-NEXT:    v_dual_cndmask_b32 v0, v1, v2 :: v_dual_cndmask_b32 v1, 0, v3
+; GCN-NEXT:    v_dual_cndmask_b32 v2, 0, v4 :: v_dual_cndmask_b32 v3, v5, v6
 ; GCN-NEXT:    global_store_b128 v[7:8], v[0:3], off
 ; GCN-NEXT:    s_endpgm
 .entry:
@@ -29,11 +27,9 @@ define amdgpu_cs void @test(i32 %a, i32 %x, i32 %y, i32 %p, i32 %q, i32 %r, i32
 define amdgpu_cs void @test_negative_case(i32 %a, i32 %x, i32 %y, i32 %p, i32 %q, i32 %r, i32 %s, ptr addrspace(1) %out) {
 ; GCN-LABEL: test_negative_case:
 ; GCN:       ; %bb.0: ; %.entry
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc_lo, -1, v0
-; GCN-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v3, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v2, v4, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e32 v3, v6, v5, vcc_lo
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc_lo, -1, v0
+; GCN-NEXT:    v_dual_cndmask_b32 v0, v1, v2 :: v_dual_cndmask_b32 v1, 0, v3
+; GCN-NEXT:    v_dual_cndmask_b32 v2, 0, v4 :: v_dual_cndmask_b32 v3, v5, v6
 ; GCN-NEXT:    global_store_b128 v[7:8], v[0:3], off
 ; GCN-NEXT:    s_endpgm
 .entry:

>From e7e34626dc8355b30a07ea2fc3c84d02d8f56bfb Mon Sep 17 00:00:00 2001
From: Ana Mihajlovic <Ana.Mihajlovic at amd.com>
Date: Wed, 9 Apr 2025 17:08:40 +0200
Subject: [PATCH 3/4] cover case for single v_cndmask to reduce code size

---
 llvm/lib/Target/AMDGPU/SIFoldOperands.cpp     |  15 +-
 .../AMDGPU/GlobalISel/extractelement.ll       | 204 +++++++++---------
 .../test/CodeGen/AMDGPU/GlobalISel/saddsat.ll | 137 ++++++------
 ...amdgpu-codegenprepare-fold-binop-select.ll |   8 +-
 llvm/test/CodeGen/AMDGPU/ctlz.ll              |  20 +-
 llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll   |  22 +-
 llvm/test/CodeGen/AMDGPU/cttz.ll              |  20 +-
 llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll   |  12 +-
 llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll |   3 +-
 .../CodeGen/AMDGPU/insert_vector_dynelt.ll    |   5 +-
 .../CodeGen/AMDGPU/private-memory-atomics.ll  |   6 +-
 11 files changed, 231 insertions(+), 221 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 4ad538e0b1e5f..1ec0ab9b5bd9c 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -119,6 +119,19 @@ class SIFoldOperandsImpl {
       return AMDGPU::V_CMP_LE_U32_e64;
     case AMDGPU::V_CMP_LT_U32_e64:
       return AMDGPU::V_CMP_GE_U32_e64;
+
+    //   case AMDGPU::V_CMP_EQ_U32_e64:
+    //   return AMDGPU::V_CMP_NE_U32_e64;
+    // case AMDGPU::V_CMP_NE_U32_e64:
+    //   return AMDGPU::V_CMP_EQ_U32_e64;
+    // case AMDGPU::V_CMP_GE_U32_e64:
+    //   return AMDGPU::V_CMP_LT_U32_e64;
+    // case AMDGPU::V_CMP_LE_U32_e64:
+    //   return AMDGPU::V_CMP_GT_U32_e64;
+    // case AMDGPU::V_CMP_GT_U32_e64:
+    //   return AMDGPU::V_CMP_LE_U32_e64;
+    // case AMDGPU::V_CMP_LT_U32_e64:
+    //   return AMDGPU::V_CMP_GE_U32_e64;
     default:
       return 0;
     }
@@ -1504,7 +1517,7 @@ bool SIFoldOperandsImpl::shouldSwitchOperands(MachineRegisterInfo &MRI,
     if (src1Imm && !src0Imm)
       count++;
   }
-  return (count >= 2);
+  return (count >= 1);
 }
 
 // Try to fold an instruction into a simpler one
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
index 31a229a908142..772b72ac5efa2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
@@ -11,22 +11,22 @@ define float @dyn_extract_v8f32_const_s_v(i32 %sel) {
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
 ; GCN-NEXT:    v_mov_b32_e32 v1, 0x40400000
 ; GCN-NEXT:    v_cndmask_b32_e64 v6, 1.0, 2.0, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
-; GCN-NEXT:    v_cndmask_b32_e32 v1, v6, v1, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 2, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 3, v0
 ; GCN-NEXT:    v_mov_b32_e32 v2, 0x40a00000
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v1, 4.0, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v1, 4.0, v1, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 4, v0
 ; GCN-NEXT:    v_mov_b32_e32 v3, 0x40c00000
-; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 5, v0
 ; GCN-NEXT:    v_mov_b32_e32 v4, 0x40e00000
-; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 6, v0
 ; GCN-NEXT:    v_mov_b32_e32 v5, 0x41000000
-; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v0
-; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v5, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 7, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v5, v1, vcc
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10PLUS-LABEL: dyn_extract_v8f32_const_s_v:
@@ -34,18 +34,18 @@ define float @dyn_extract_v8f32_const_s_v(i32 %sel) {
 ; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
 ; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo
-; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
-; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo
-; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
-; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo
-; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
-; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo
-; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
-; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo
-; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
-; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo
-; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v0
-; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v0, v1, 0x41000000, vcc_lo
+; GFX10PLUS-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 2, v0
+; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v1, 0x40400000, v1, vcc_lo
+; GFX10PLUS-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 3, v0
+; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v1, 4.0, v1, vcc_lo
+; GFX10PLUS-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 4, v0
+; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v1, 0x40a00000, v1, vcc_lo
+; GFX10PLUS-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 5, v0
+; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v1, 0x40c00000, v1, vcc_lo
+; GFX10PLUS-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 6, v0
+; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v1, 0x40e00000, v1, vcc_lo
+; GFX10PLUS-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 7, v0
+; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, 0x41000000, v1, vcc_lo
 ; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %ext = extractelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, i32 %sel
@@ -3383,43 +3383,43 @@ define float @dyn_extract_v15f32_const_s_v(i32 %sel) {
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
 ; GCN-NEXT:    v_mov_b32_e32 v1, 0x40400000
 ; GCN-NEXT:    v_cndmask_b32_e64 v13, 1.0, 2.0, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
-; GCN-NEXT:    v_cndmask_b32_e32 v1, v13, v1, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 2, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 3, v0
 ; GCN-NEXT:    v_mov_b32_e32 v2, 0x40a00000
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v1, 4.0, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v1, 4.0, v1, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 4, v0
 ; GCN-NEXT:    v_mov_b32_e32 v3, 0x40c00000
-; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 5, v0
 ; GCN-NEXT:    v_mov_b32_e32 v4, 0x40e00000
-; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 6, v0
 ; GCN-NEXT:    v_mov_b32_e32 v5, 0x41000000
-; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 7, v0
 ; GCN-NEXT:    v_mov_b32_e32 v6, 0x41100000
-; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 8, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 8, v0
 ; GCN-NEXT:    v_mov_b32_e32 v7, 0x41200000
-; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 9, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v1, v6, v1, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 9, v0
 ; GCN-NEXT:    v_mov_b32_e32 v8, 0x41300000
-; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 10, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 10, v0
 ; GCN-NEXT:    v_mov_b32_e32 v9, 0x41400000
-; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 11, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 11, v0
 ; GCN-NEXT:    v_mov_b32_e32 v10, 0x41500000
-; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 12, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 12, v0
 ; GCN-NEXT:    v_mov_b32_e32 v11, 0x41600000
-; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v10, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 13, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v1, v10, v1, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 13, v0
 ; GCN-NEXT:    v_mov_b32_e32 v12, 0x41700000
-; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 14, v0
-; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v12, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v1, v11, v1, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 14, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v1, v12, v1, vcc
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 15, v0
 ; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
@@ -3429,32 +3429,32 @@ define float @dyn_extract_v15f32_const_s_v(i32 %sel) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo
-; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo
-; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo
-; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo
-; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo
-; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo
-; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41000000, vcc_lo
-; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 8, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41100000, vcc_lo
-; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 9, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41200000, vcc_lo
-; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 10, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41300000, vcc_lo
-; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 11, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41400000, vcc_lo
-; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 12, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41500000, vcc_lo
-; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 13, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41600000, vcc_lo
-; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 14, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41700000, vcc_lo
+; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 2, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x40400000, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 3, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 4.0, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 4, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x40a00000, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 5, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x40c00000, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 6, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x40e00000, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 7, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x41000000, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 8, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x41100000, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 9, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x41200000, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 10, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x41300000, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 11, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x41400000, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 12, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x41500000, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 13, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x41600000, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 14, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x41700000, v1, vcc_lo
 ; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 15, v0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v0, v1, s4, vcc_lo
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
@@ -3464,32 +3464,32 @@ define float @dyn_extract_v15f32_const_s_v(i32 %sel) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v0
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41000000, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 8, v0
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41100000, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 9, v0
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41200000, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 10, v0
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41300000, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 11, v0
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41400000, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 12, v0
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41500000, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 13, v0
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41600000, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 14, v0
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41700000, vcc_lo
+; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 2, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x40400000, v1, vcc_lo
+; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 3, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 4.0, v1, vcc_lo
+; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 4, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x40a00000, v1, vcc_lo
+; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 5, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x40c00000, v1, vcc_lo
+; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 6, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x40e00000, v1, vcc_lo
+; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 7, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x41000000, v1, vcc_lo
+; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 8, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x41100000, v1, vcc_lo
+; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 9, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x41200000, v1, vcc_lo
+; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 10, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x41300000, v1, vcc_lo
+; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 11, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x41400000, v1, vcc_lo
+; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 12, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x41500000, v1, vcc_lo
+; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 13, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x41600000, v1, vcc_lo
+; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 14, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x41700000, v1, vcc_lo
 ; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 15, v0
 ; GFX11-NEXT:    v_cndmask_b32_e64 v0, v1, s0, vcc_lo
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
index 723ad5646c0a3..03b713f6866a0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
@@ -5155,8 +5155,8 @@ define amdgpu_ps i128 @s_saddsat_i128(i128 inreg %lhs, i128 inreg %rhs) {
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[0:1]
 ; GFX8-NEXT:    s_and_b32 s0, 1, s2
-; GFX8-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0
-; GFX8-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[0:1]
+; GFX8-NEXT:    v_cmp_eq_u32_e64 vcc, 0, s0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX8-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX8-NEXT:    s_ashr_i32 s0, s9, 31
 ; GFX8-NEXT:    v_and_b32_e32 v0, 1, v0
@@ -5202,8 +5202,8 @@ define amdgpu_ps i128 @s_saddsat_i128(i128 inreg %lhs, i128 inreg %rhs) {
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[0:1]
 ; GFX9-NEXT:    s_and_b32 s0, 1, s2
-; GFX9-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0
-; GFX9-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[0:1]
+; GFX9-NEXT:    v_cmp_eq_u32_e64 vcc, 0, s0
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX9-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX9-NEXT:    s_ashr_i32 s0, s9, 31
 ; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
@@ -5241,16 +5241,16 @@ define amdgpu_ps i128 @s_saddsat_i128(i128 inreg %lhs, i128 inreg %rhs) {
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s0
 ; GFX10-NEXT:    s_and_b32 s0, 1, s10
 ; GFX10-NEXT:    s_cmp_eq_u64 s[6:7], 0
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s2
-; GFX10-NEXT:    s_cselect_b32 s1, 1, 0
 ; GFX10-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s0
+; GFX10-NEXT:    s_cselect_b32 s1, 1, 0
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s2
 ; GFX10-NEXT:    s_and_b32 s1, 1, s1
-; GFX10-NEXT:    v_cmp_ne_u32_e64 s0, 0, s1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v2, 0, s0
-; GFX10-NEXT:    v_mov_b32_e32 v2, s5
 ; GFX10-NEXT:    s_ashr_i32 s0, s9, 31
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, 0, s1
 ; GFX10-NEXT:    s_add_i32 s1, s0, 0x80000000
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0, v2, vcc_lo
+; GFX10-NEXT:    v_mov_b32_e32 v2, s5
 ; GFX10-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s4
 ; GFX10-NEXT:    v_and_b32_e32 v0, 1, v0
@@ -5282,16 +5282,15 @@ define amdgpu_ps i128 @s_saddsat_i128(i128 inreg %lhs, i128 inreg %rhs) {
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s0
 ; GFX11-NEXT:    s_and_b32 s0, 1, s10
 ; GFX11-NEXT:    s_cmp_eq_u64 s[6:7], 0
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s2
-; GFX11-NEXT:    s_cselect_b32 s1, 1, 0
 ; GFX11-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s0
+; GFX11-NEXT:    s_cselect_b32 s1, 1, 0
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s2
 ; GFX11-NEXT:    s_and_b32 s1, 1, s1
-; GFX11-NEXT:    v_cmp_ne_u32_e64 s0, 0, s1
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v2, 0, s0
-; GFX11-NEXT:    v_mov_b32_e32 v2, s5
 ; GFX11-NEXT:    s_ashr_i32 s0, s9, 31
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_eq_u32_e64 vcc_lo, 0, s1
 ; GFX11-NEXT:    s_add_i32 s1, s0, 0x80000000
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, 0, v2 :: v_dual_mov_b32 v2, s5
 ; GFX11-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX11-NEXT:    v_dual_mov_b32 v1, s4 :: v_dual_and_b32 v0, 1, v0
 ; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
@@ -5511,8 +5510,8 @@ define amdgpu_ps <4 x float> @saddsat_i128_vs(i128 %lhs, i128 inreg %rhs) {
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[0:1]
 ; GFX8-NEXT:    s_and_b32 s0, 1, s4
-; GFX8-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0
-; GFX8-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[0:1]
+; GFX8-NEXT:    v_cmp_eq_u32_e64 vcc, 0, s0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX8-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v2, 31, v7
 ; GFX8-NEXT:    v_bfrev_b32_e32 v1, 1
@@ -5545,8 +5544,8 @@ define amdgpu_ps <4 x float> @saddsat_i128_vs(i128 %lhs, i128 inreg %rhs) {
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[0:1]
 ; GFX9-NEXT:    s_and_b32 s0, 1, s4
-; GFX9-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0
-; GFX9-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[0:1]
+; GFX9-NEXT:    v_cmp_eq_u32_e64 vcc, 0, s0
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX9-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v2, 31, v7
 ; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
@@ -5572,13 +5571,13 @@ define amdgpu_ps <4 x float> @saddsat_i128_vs(i128 %lhs, i128 inreg %rhs) {
 ; GFX10-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX10-NEXT:    v_cmp_lt_i64_e32 vcc_lo, v[6:7], v[2:3]
 ; GFX10-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s1
-; GFX10-NEXT:    v_cmp_ne_u32_e64 s0, 0, s0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
 ; GFX10-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[6:7], v[2:3]
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v2, 31, v7
 ; GFX10-NEXT:    v_add_nc_u32_e32 v3, 0x80000000, v2
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v8, 0, s0
+; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, 0, s0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0, v8, vcc_lo
 ; GFX10-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX10-NEXT:    v_and_b32_e32 v0, 1, v0
 ; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
@@ -5602,18 +5601,18 @@ define amdgpu_ps <4 x float> @saddsat_i128_vs(i128 %lhs, i128 inreg %rhs) {
 ; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX11-NEXT:    v_cmp_lt_i64_e32 vcc_lo, v[6:7], v[2:3]
 ; GFX11-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s1
-; GFX11-NEXT:    v_cmp_ne_u32_e64 s0, 0, s0
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
 ; GFX11-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[6:7], v[2:3]
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v2, 31, v7
 ; GFX11-NEXT:    v_dual_cndmask_b32 v0, v1, v0 :: v_dual_add_nc_u32 v3, 0x80000000, v2
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v8, 0, s0
+; GFX11-NEXT:    v_cmp_eq_u32_e64 vcc_lo, 0, s0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0, v8, vcc_lo
 ; GFX11-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
 ; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
 ; GFX11-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc_lo
-; GFX11-NEXT:    v_dual_cndmask_b32 v0, v4, v2 :: v_dual_cndmask_b32 v3, v7, v3
-; GFX11-NEXT:    v_cndmask_b32_e32 v2, v6, v2, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v4, v2, vcc_lo
+; GFX11-NEXT:    v_dual_cndmask_b32 v2, v6, v2 :: v_dual_cndmask_b32 v3, v7, v3
 ; GFX11-NEXT:    ; return to shader part epilog
   %result = call i128 @llvm.sadd.sat.i128(i128 %lhs, i128 %rhs)
   %cast = bitcast i128 %result to <4 x float>
@@ -5982,8 +5981,8 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[0:1]
 ; GFX8-NEXT:    s_and_b32 s0, 1, s2
-; GFX8-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0
-; GFX8-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[0:1]
+; GFX8-NEXT:    v_cmp_eq_u32_e64 vcc, 0, s0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX8-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX8-NEXT:    s_ashr_i32 s0, s17, 31
 ; GFX8-NEXT:    v_and_b32_e32 v0, 1, v0
@@ -6021,8 +6020,8 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[4:5]
 ; GFX8-NEXT:    s_and_b32 s4, 1, s6
-; GFX8-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, s4
-; GFX8-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
+; GFX8-NEXT:    v_cmp_eq_u32_e64 vcc, 0, s4
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX8-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX8-NEXT:    s_ashr_i32 s4, s3, 31
 ; GFX8-NEXT:    v_and_b32_e32 v0, 1, v0
@@ -6072,8 +6071,8 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[0:1]
 ; GFX9-NEXT:    s_and_b32 s0, 1, s2
-; GFX9-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0
-; GFX9-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[0:1]
+; GFX9-NEXT:    v_cmp_eq_u32_e64 vcc, 0, s0
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX9-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX9-NEXT:    s_ashr_i32 s0, s17, 31
 ; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
@@ -6111,8 +6110,8 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[4:5]
 ; GFX9-NEXT:    s_and_b32 s4, 1, s6
-; GFX9-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, s4
-; GFX9-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
+; GFX9-NEXT:    v_cmp_eq_u32_e64 vcc, 0, s4
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX9-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX9-NEXT:    s_ashr_i32 s4, s3, 31
 ; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
@@ -6154,53 +6153,53 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s0
 ; GFX10-NEXT:    s_and_b32 s0, 1, s18
 ; GFX10-NEXT:    s_cmp_eq_u64 s[10:11], 0
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s2
-; GFX10-NEXT:    s_cselect_b32 s1, 1, 0
 ; GFX10-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s0
-; GFX10-NEXT:    s_and_b32 s1, 1, s1
+; GFX10-NEXT:    s_cselect_b32 s1, 1, 0
 ; GFX10-NEXT:    s_ashr_i32 s10, s17, 31
-; GFX10-NEXT:    v_cmp_ne_u32_e64 s0, 0, s1
-; GFX10-NEXT:    s_add_i32 s11, s10, 0x80000000
+; GFX10-NEXT:    s_and_b32 s1, 1, s1
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s2
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v2, 0, s0
+; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, 0, s1
+; GFX10-NEXT:    s_add_i32 s11, s10, 0x80000000
 ; GFX10-NEXT:    s_add_u32 s0, s4, s12
 ; GFX10-NEXT:    s_addc_u32 s1, s5, s13
 ; GFX10-NEXT:    s_addc_u32 s2, s6, s14
 ; GFX10-NEXT:    v_cmp_lt_u64_e64 s4, s[0:1], s[4:5]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0, v2, vcc_lo
 ; GFX10-NEXT:    s_addc_u32 s3, s7, s15
-; GFX10-NEXT:    v_xor_b32_e32 v0, v1, v0
-; GFX10-NEXT:    s_cmp_eq_u64 s[2:3], s[6:7]
 ; GFX10-NEXT:    v_mov_b32_e32 v5, s0
-; GFX10-NEXT:    s_cselect_b32 s12, 1, 0
+; GFX10-NEXT:    s_cmp_eq_u64 s[2:3], s[6:7]
+; GFX10-NEXT:    v_mov_b32_e32 v6, s1
+; GFX10-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s4
 ; GFX10-NEXT:    v_cmp_lt_i64_e64 s4, s[2:3], s[6:7]
+; GFX10-NEXT:    s_cselect_b32 s12, 1, 0
 ; GFX10-NEXT:    v_cmp_lt_i64_e64 s6, s[14:15], 0
 ; GFX10-NEXT:    v_and_b32_e32 v0, 1, v0
-; GFX10-NEXT:    v_mov_b32_e32 v6, s1
 ; GFX10-NEXT:    v_mov_b32_e32 v7, s3
 ; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s4
 ; GFX10-NEXT:    s_and_b32 s4, 1, s12
 ; GFX10-NEXT:    s_cmp_eq_u64 s[14:15], 0
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0, 1, s6
-; GFX10-NEXT:    s_cselect_b32 s5, 1, 0
 ; GFX10-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s4
+; GFX10-NEXT:    s_cselect_b32 s5, 1, 0
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0, 1, s6
 ; GFX10-NEXT:    s_and_b32 s5, 1, s5
-; GFX10-NEXT:    v_cmp_ne_u32_e64 s4, 0, s5
+; GFX10-NEXT:    s_ashr_i32 s4, s3, 31
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, 0, s5
+; GFX10-NEXT:    s_add_i32 s0, s4, 0x80000000
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, 0, v3, vcc_lo
+; GFX10-NEXT:    v_mov_b32_e32 v3, s8
 ; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
 ; GFX10-NEXT:    v_mov_b32_e32 v0, s16
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, v3, 0, s4
-; GFX10-NEXT:    v_mov_b32_e32 v3, s8
-; GFX10-NEXT:    s_ashr_i32 s4, s3, 31
-; GFX10-NEXT:    v_cndmask_b32_e64 v4, v4, s10, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v0, s10, vcc_lo
 ; GFX10-NEXT:    v_xor_b32_e32 v1, v2, v1
 ; GFX10-NEXT:    v_mov_b32_e32 v2, s17
 ; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, s10, vcc_lo
-; GFX10-NEXT:    s_add_i32 s0, s4, 0x80000000
-; GFX10-NEXT:    v_readfirstlane_b32 s1, v4
+; GFX10-NEXT:    v_cndmask_b32_e64 v4, v4, s10, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v0, v0, s10, vcc_lo
 ; GFX10-NEXT:    v_and_b32_e32 v1, 1, v1
 ; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s11, vcc_lo
+; GFX10-NEXT:    v_readfirstlane_b32 s1, v4
 ; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX10-NEXT:    v_readfirstlane_b32 s2, v0
@@ -6231,42 +6230,43 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s0
 ; GFX11-NEXT:    s_and_b32 s0, 1, s18
 ; GFX11-NEXT:    s_cmp_eq_u64 s[10:11], 0
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s2
-; GFX11-NEXT:    s_cselect_b32 s1, 1, 0
 ; GFX11-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s0
-; GFX11-NEXT:    s_and_b32 s1, 1, s1
+; GFX11-NEXT:    s_cselect_b32 s1, 1, 0
 ; GFX11-NEXT:    s_ashr_i32 s10, s17, 31
-; GFX11-NEXT:    v_cmp_ne_u32_e64 s0, 0, s1
-; GFX11-NEXT:    s_add_i32 s11, s10, 0x80000000
+; GFX11-NEXT:    s_and_b32 s1, 1, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s2
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v2, 0, s0
+; GFX11-NEXT:    v_cmp_eq_u32_e64 vcc_lo, 0, s1
+; GFX11-NEXT:    s_add_i32 s11, s10, 0x80000000
 ; GFX11-NEXT:    s_add_u32 s0, s4, s12
 ; GFX11-NEXT:    s_addc_u32 s1, s5, s13
 ; GFX11-NEXT:    s_addc_u32 s2, s6, s14
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0, v2, vcc_lo
 ; GFX11-NEXT:    v_cmp_lt_u64_e64 s4, s[0:1], s[4:5]
 ; GFX11-NEXT:    s_addc_u32 s3, s7, s15
+; GFX11-NEXT:    v_dual_mov_b32 v6, s1 :: v_dual_mov_b32 v7, s3
 ; GFX11-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX11-NEXT:    s_cmp_eq_u64 s[2:3], s[6:7]
-; GFX11-NEXT:    v_dual_mov_b32 v6, s1 :: v_dual_mov_b32 v7, s3
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s4
 ; GFX11-NEXT:    v_cmp_lt_i64_e64 s4, s[2:3], s[6:7]
 ; GFX11-NEXT:    s_cselect_b32 s12, 1, 0
 ; GFX11-NEXT:    v_cmp_lt_i64_e64 s6, s[14:15], 0
-; GFX11-NEXT:    v_dual_mov_b32 v5, s0 :: v_dual_and_b32 v0, 1, v0
+; GFX11-NEXT:    v_mov_b32_e32 v5, s0
 ; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s4
 ; GFX11-NEXT:    s_and_b32 s4, 1, s12
 ; GFX11-NEXT:    s_cmp_eq_u64 s[14:15], 0
-; GFX11-NEXT:    v_cndmask_b32_e64 v3, 0, 1, s6
-; GFX11-NEXT:    s_cselect_b32 s5, 1, 0
 ; GFX11-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s4
+; GFX11-NEXT:    s_cselect_b32 s5, 1, 0
+; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
 ; GFX11-NEXT:    s_and_b32 s5, 1, s5
-; GFX11-NEXT:    v_cmp_ne_u32_e64 s4, 0, s5
+; GFX11-NEXT:    v_cndmask_b32_e64 v3, 0, 1, s6
 ; GFX11-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, v3, 0, s4
-; GFX11-NEXT:    v_mov_b32_e32 v3, s8
+; GFX11-NEXT:    v_cmp_eq_u32_e64 vcc_lo, 0, s5
+; GFX11-NEXT:    s_ashr_i32 s4, s3, 31
+; GFX11-NEXT:    s_add_i32 s0, s4, 0x80000000
+; GFX11-NEXT:    v_dual_cndmask_b32 v2, 0, v3 :: v_dual_mov_b32 v3, s8
 ; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
 ; GFX11-NEXT:    v_mov_b32_e32 v0, s16
-; GFX11-NEXT:    s_ashr_i32 s4, s3, 31
 ; GFX11-NEXT:    v_xor_b32_e32 v1, v2, v1
 ; GFX11-NEXT:    v_mov_b32_e32 v4, s9
 ; GFX11-NEXT:    v_mov_b32_e32 v2, s17
@@ -6275,7 +6275,6 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX11-NEXT:    v_and_b32_e32 v1, 1, v1
 ; GFX11-NEXT:    v_cndmask_b32_e64 v4, v4, s10, vcc_lo
 ; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s11, vcc_lo
-; GFX11-NEXT:    s_add_i32 s0, s4, 0x80000000
 ; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
 ; GFX11-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX11-NEXT:    v_readfirstlane_b32 s1, v4
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll
index 7fdc012d4f1b5..4d62e9bcc3bc3 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll
@@ -213,10 +213,10 @@ define i32 @select_sdiv_lhs_opaque_const1_i32(i1 %cond) {
 ; GCN-NEXT:    s_addc_u32 s5, s5, gv at gotpcrel32@hi+12
 ; GCN-NEXT:    s_load_dword s4, s[4:5], 0x0
 ; GCN-NEXT:    v_and_b32_e32 v0, 1, v0
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    v_mov_b32_e32 v1, s4
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v1, 5, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v0, 5, v1, vcc
 ; GCN-NEXT:    v_sub_u32_e32 v1, vcc, 0, v0
 ; GCN-NEXT:    v_max_i32_e32 v1, v0, v1
 ; GCN-NEXT:    v_cvt_f32_u32_e32 v2, v1
@@ -293,10 +293,10 @@ define i32 @select_sdiv_rhs_opaque_const1_i32(i1 %cond) {
 ; GCN-NEXT:    s_load_dword s4, s[4:5], 0x0
 ; GCN-NEXT:    v_and_b32_e32 v0, 1, v0
 ; GCN-NEXT:    v_mov_b32_e32 v1, 0xa410
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    v_mov_b32_e32 v2, s4
-; GCN-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
 ; GCN-NEXT:    s_mov_b32 s4, 0x30c30c31
 ; GCN-NEXT:    v_mul_hi_i32 v0, v0, s4
 ; GCN-NEXT:    v_lshrrev_b32_e32 v1, 31, v0
diff --git a/llvm/test/CodeGen/AMDGPU/ctlz.ll b/llvm/test/CodeGen/AMDGPU/ctlz.ll
index b4d450a90d595..179c0bd1fdfc8 100644
--- a/llvm/test/CodeGen/AMDGPU/ctlz.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctlz.ll
@@ -1099,9 +1099,9 @@ define amdgpu_kernel void @v_ctlz_i32_sel_eq_neg1(ptr addrspace(1) noalias %out,
 ; GFX10-GISEL-NEXT:    global_load_dword v0, v0, s[2:3]
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-GISEL-NEXT:    v_ffbh_u32_e32 v1, v0
-; GFX10-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX10-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
 ; GFX10-GISEL-NEXT:    v_min_u32_e32 v1, 32, v1
-; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, -1, vcc_lo
+; GFX10-GISEL-NEXT:    v_cndmask_b32_e32 v0, -1, v1, vcc_lo
 ; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX10-GISEL-NEXT:    s_endpgm
@@ -1327,8 +1327,8 @@ define amdgpu_kernel void @v_ctlz_i32_sel_eq_bitwidth(ptr addrspace(1) noalias %
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-GISEL-NEXT:    v_ffbh_u32_e32 v0, v0
 ; GFX10-GISEL-NEXT:    v_min_u32_e32 v0, 32, v0
-; GFX10-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 32, v0
-; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, -1, vcc_lo
+; GFX10-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 32, v0
+; GFX10-GISEL-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc_lo
 ; GFX10-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX10-GISEL-NEXT:    s_endpgm
 ;
@@ -1564,10 +1564,10 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %
 ; GFX10-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-GISEL-NEXT:    v_ffbh_u32_e32 v1, v0
-; GFX10-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX10-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
 ; GFX10-GISEL-NEXT:    v_min_u32_e32 v1, 32, v1
 ; GFX10-GISEL-NEXT:    v_add_nc_u16 v1, 0xffe8, v1
-; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0xffff, vcc_lo
+; GFX10-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0xffff, v1, vcc_lo
 ; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10-GISEL-NEXT:    global_store_byte v1, v0, s[0:1]
 ; GFX10-GISEL-NEXT:    s_endpgm
@@ -1684,11 +1684,11 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %
 ; GFX10-GISEL-NEXT:    global_load_ushort v1, v0, s[2:3]
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-GISEL-NEXT:    v_ffbh_u32_e32 v2, v1
-; GFX10-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX10-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
 ; GFX10-GISEL-NEXT:    v_min_u32_e32 v2, 32, v2
 ; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v2, -16, v2
 ; GFX10-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v1, v2, 0xffff, vcc_lo
+; GFX10-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0xffff, v2, vcc_lo
 ; GFX10-GISEL-NEXT:    global_store_short v0, v1, s[0:1]
 ; GFX10-GISEL-NEXT:    s_endpgm
 ;
@@ -1805,10 +1805,10 @@ define amdgpu_kernel void @v_ctlz_i7_sel_eq_neg1(ptr addrspace(1) noalias %out,
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, 0x7f, v0
 ; GFX10-GISEL-NEXT:    v_ffbh_u32_e32 v1, v0
-; GFX10-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX10-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
 ; GFX10-GISEL-NEXT:    v_min_u32_e32 v1, 32, v1
 ; GFX10-GISEL-NEXT:    v_add_nc_u16 v1, 0xffe7, v1
-; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0x7f, vcc_lo
+; GFX10-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0x7f, v1, vcc_lo
 ; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, 0x7f, v0
 ; GFX10-GISEL-NEXT:    global_store_byte v1, v0, s[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll b/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
index 65ee228b64c6a..f4bbba6246b65 100644
--- a/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
@@ -1542,8 +1542,8 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_neg1(ptr addrspace(1) no
 ; GFX9-GISEL-NEXT:    global_load_dword v0, v0, s[2:3]
 ; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-GISEL-NEXT:    v_ffbh_u32_e32 v1, v0
-; GFX9-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, -1, vcc
+; GFX9-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v0, -1, v1, vcc
 ; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX9-GISEL-NEXT:    s_endpgm
@@ -1712,8 +1712,8 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i8_sel_eq_neg1(ptr addrspace(1) noa
 ; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v3, 24, v0
 ; GFX9-GISEL-NEXT:    v_ffbh_u32_e32 v3, v3
-; GFX9-GISEL-NEXT:    v_cmp_eq_u32_sdwa vcc, v0, v1 src0_sel:BYTE_0 src1_sel:DWORD
-; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX9-GISEL-NEXT:    v_cmp_ne_u32_sdwa vcc, v0, v1 src0_sel:BYTE_0 src1_sel:DWORD
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
 ; GFX9-GISEL-NEXT:    global_store_byte v1, v0, s[0:1]
 ; GFX9-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1809,9 +1809,9 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_neg1_two_use(ptr addrspa
 ; GFX9-GISEL-NEXT:    global_load_dword v0, v0, s[2:3]
 ; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-GISEL-NEXT:    v_ffbh_u32_e32 v2, v0
-; GFX9-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT:    v_cndmask_b32_e64 v0, v2, -1, vcc
-; GFX9-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX9-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v0, -1, v2, vcc
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e64 v2, 1, 0, vcc
 ; GFX9-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-GISEL-NEXT:    global_store_byte v[0:1], v2, off
@@ -1897,8 +1897,8 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_0(ptr addrspace(1) noali
 ; GFX9-GISEL-NEXT:    global_load_dword v0, v0, s[2:3]
 ; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-GISEL-NEXT:    v_ffbh_u32_e32 v1, v0
-; GFX9-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; GFX9-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX9-GISEL-NEXT:    s_endpgm
@@ -2066,8 +2066,8 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_cmp_non0(ptr addrspace(1
 ; GFX9-GISEL-NEXT:    global_load_dword v0, v0, s[2:3]
 ; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-GISEL-NEXT:    v_ffbh_u32_e32 v1, v0
-; GFX9-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; GFX9-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v0
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX9-GISEL-NEXT:    s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/cttz.ll b/llvm/test/CodeGen/AMDGPU/cttz.ll
index f0c278a67c8bc..1bd640135a642 100644
--- a/llvm/test/CodeGen/AMDGPU/cttz.ll
+++ b/llvm/test/CodeGen/AMDGPU/cttz.ll
@@ -951,9 +951,9 @@ define amdgpu_kernel void @v_cttz_i32_sel_eq_neg1(ptr addrspace(1) noalias %out,
 ; GFX10-GISEL-NEXT:    global_load_dword v0, v0, s[2:3]
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-GISEL-NEXT:    v_ffbl_b32_e32 v1, v0
-; GFX10-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX10-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
 ; GFX10-GISEL-NEXT:    v_min_u32_e32 v1, 32, v1
-; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, -1, vcc_lo
+; GFX10-GISEL-NEXT:    v_cndmask_b32_e32 v0, -1, v1, vcc_lo
 ; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX10-GISEL-NEXT:    s_endpgm
@@ -1153,8 +1153,8 @@ define amdgpu_kernel void @v_cttz_i32_sel_eq_bitwidth(ptr addrspace(1) noalias %
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-GISEL-NEXT:    v_ffbl_b32_e32 v0, v0
 ; GFX10-GISEL-NEXT:    v_min_u32_e32 v0, 32, v0
-; GFX10-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 32, v0
-; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, -1, vcc_lo
+; GFX10-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 32, v0
+; GFX10-GISEL-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc_lo
 ; GFX10-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX10-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1357,9 +1357,9 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %
 ; GFX10-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-GISEL-NEXT:    v_or_b32_e32 v1, 0x100, v0
-; GFX10-GISEL-NEXT:    v_cmp_eq_u32_sdwa s2, v0, v2 src0_sel:BYTE_0 src1_sel:DWORD
+; GFX10-GISEL-NEXT:    v_cmp_ne_u32_sdwa vcc_lo, v0, v2 src0_sel:BYTE_0 src1_sel:DWORD
 ; GFX10-GISEL-NEXT:    v_ffbl_b32_e32 v1, v1
-; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0xffff, s2
+; GFX10-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0xffff, v1, vcc_lo
 ; GFX10-GISEL-NEXT:    global_store_byte v2, v0, s[0:1]
 ; GFX10-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1463,10 +1463,10 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %
 ; GFX10-GISEL-NEXT:    global_load_ushort v1, v0, s[2:3]
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-GISEL-NEXT:    v_or_b32_e32 v2, 0x10000, v1
-; GFX10-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX10-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
 ; GFX10-GISEL-NEXT:    v_ffbl_b32_e32 v2, v2
 ; GFX10-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v1, v2, 0xffff, vcc_lo
+; GFX10-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0xffff, v2, vcc_lo
 ; GFX10-GISEL-NEXT:    global_store_short v0, v1, s[0:1]
 ; GFX10-GISEL-NEXT:    s_endpgm
   %val = load i16, ptr addrspace(1) %valptr
@@ -1567,8 +1567,8 @@ define amdgpu_kernel void @v_cttz_i7_sel_eq_neg1(ptr addrspace(1) noalias %out,
 ; GFX10-GISEL-NEXT:    v_or_b32_e32 v1, 0x80, v0
 ; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, 0x7f, v0
 ; GFX10-GISEL-NEXT:    v_ffbl_b32_e32 v1, v1
-; GFX10-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0x7f, vcc_lo
+; GFX10-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX10-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0x7f, v1, vcc_lo
 ; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, 0x7f, v0
 ; GFX10-GISEL-NEXT:    global_store_byte v1, v0, s[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll b/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll
index 777f363fedf9a..06c61d599dea9 100644
--- a/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll
+++ b/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll
@@ -1170,8 +1170,8 @@ define amdgpu_kernel void @v_cttz_i32_sel_eq_neg1(ptr addrspace(1) noalias %out,
 ; GFX9-GISEL-NEXT:    v_or3_b32 v1, v2, v3, v1
 ; GFX9-GISEL-NEXT:    v_ffbl_b32_e32 v2, v1
 ; GFX9-GISEL-NEXT:    v_min_u32_e32 v2, 32, v2
-; GFX9-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-GISEL-NEXT:    v_cndmask_b32_e64 v1, v2, -1, vcc
+; GFX9-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v1, -1, v2, vcc
 ; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX9-GISEL-NEXT:    s_endpgm
   %val = load i32, ptr addrspace(1) %arrayidx, align 1
@@ -1510,8 +1510,8 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %
 ; GFX9-GISEL-NEXT:    v_or_b32_e32 v3, 0x100, v1
 ; GFX9-GISEL-NEXT:    v_ffbl_b32_e32 v3, v3
 ; GFX9-GISEL-NEXT:    v_and_b32_e32 v3, 0xff, v3
-; GFX9-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v2, vcc
+; GFX9-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v1, v2, v3, vcc
 ; GFX9-GISEL-NEXT:    global_store_byte v0, v1, s[0:1]
 ; GFX9-GISEL-NEXT:    s_endpgm
   %val = load i8, ptr addrspace(1) %arrayidx, align 1
@@ -1612,8 +1612,8 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %
 ; GFX9-GISEL-NEXT:    v_or_b32_e32 v2, 0x10000, v1
 ; GFX9-GISEL-NEXT:    v_ffbl_b32_e32 v2, v2
 ; GFX9-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; GFX9-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v1, v2, v3, vcc
+; GFX9-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v2, vcc
 ; GFX9-GISEL-NEXT:    global_store_short v0, v1, s[0:1]
 ; GFX9-GISEL-NEXT:    s_endpgm
   %val = load i16, ptr addrspace(1) %arrayidx, align 1
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
index 1b471166b5d29..7f51cbec5dc4e 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
@@ -3066,9 +3066,8 @@ define double @v_fneg_select_infloop_regression_f64(double %arg, i1 %arg1) {
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_and_b32_e32 v2, 1, v2
 ; GCN-NEXT:    v_bfrev_b32_e32 v3, 1
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v2
-; GCN-NEXT:    v_cndmask_b32_e64 v1, -v1, v3, vcc
 ; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v2
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v3, -v1, vcc
 ; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; GCN-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll
index e649c3034f35b..46c868b4559b4 100644
--- a/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll
@@ -1920,10 +1920,9 @@ define <8 x double> @double8_inselt_vec(<8 x double> %vec, i32 %sel) {
 ; GCN-LABEL: double8_inselt_vec:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v16
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; GCN-NEXT:    v_mov_b32_e32 v17, 0x3ff00000
 ; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v16
+; GCN-NEXT:    v_mov_b32_e32 v17, 0x3ff00000
+; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; GCN-NEXT:    v_cndmask_b32_e32 v1, v17, v1, vcc
 ; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v16
 ; GCN-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
diff --git a/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll b/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll
index 24a4d8fbde200..b0b875c2b11a1 100644
--- a/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll
+++ b/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll
@@ -123,10 +123,10 @@ define i32 @cmpxchg_private_i32(ptr addrspace(5) %ptr) {
 ; GCN-NEXT:    s_mov_b32 s7, 0xf000
 ; GCN-NEXT:    s_mov_b32 s6, -1
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
-; GCN-NEXT:    v_cndmask_b32_e64 v2, v1, 1, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
+; GCN-NEXT:    v_cndmask_b32_e32 v2, 1, v1, vcc
 ; GCN-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
-; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; GCN-NEXT:    v_cndmask_b32_e64 v0, 1, 0, vcc
 ; GCN-NEXT:    buffer_store_byte v0, off, s[4:7], 0
 ; GCN-NEXT:    s_waitcnt expcnt(0)
 ; GCN-NEXT:    v_mov_b32_e32 v0, v1

>From 3d1ae88e0555d0c1544f43904b85ca3aa4da4e89 Mon Sep 17 00:00:00 2001
From: Ana Mihajlovic <Ana.Mihajlovic at amd.com>
Date: Wed, 9 Apr 2025 17:48:46 +0200
Subject: [PATCH 4/4] added float instructions

---
 llvm/lib/Target/AMDGPU/SIFoldOperands.cpp     |   42 +-
 llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll   |  270 +-
 .../CodeGen/AMDGPU/GlobalISel/llvm.powi.ll    |   18 +-
 .../AMDGPU/GlobalISel/select-to-fmin-fmax.ll  |   28 +-
 .../amdgpu-simplify-libcall-pow-codegen.ll    | 3456 +++++++++++++----
 .../AMDGPU/copysign-simplify-demanded-bits.ll |    6 +-
 llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll    |    6 +-
 llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll         |   44 +-
 llvm/test/CodeGen/AMDGPU/llvm.exp.ll          |  684 ++--
 llvm/test/CodeGen/AMDGPU/llvm.exp10.ll        |  684 ++--
 10 files changed, 3595 insertions(+), 1643 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 1ec0ab9b5bd9c..02f5b88e2d7a0 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -107,6 +107,7 @@ class SIFoldOperandsImpl {
 
   unsigned getInverseCompareOpcode(MachineInstr &MI) const {
     switch (MI.getOpcode()) {
+    // unsigned 32
     case AMDGPU::V_CMP_EQ_U32_e64:
       return AMDGPU::V_CMP_NE_U32_e64;
     case AMDGPU::V_CMP_NE_U32_e64:
@@ -119,19 +120,19 @@ class SIFoldOperandsImpl {
       return AMDGPU::V_CMP_LE_U32_e64;
     case AMDGPU::V_CMP_LT_U32_e64:
       return AMDGPU::V_CMP_GE_U32_e64;
-
-    //   case AMDGPU::V_CMP_EQ_U32_e64:
-    //   return AMDGPU::V_CMP_NE_U32_e64;
-    // case AMDGPU::V_CMP_NE_U32_e64:
-    //   return AMDGPU::V_CMP_EQ_U32_e64;
-    // case AMDGPU::V_CMP_GE_U32_e64:
-    //   return AMDGPU::V_CMP_LT_U32_e64;
-    // case AMDGPU::V_CMP_LE_U32_e64:
-    //   return AMDGPU::V_CMP_GT_U32_e64;
-    // case AMDGPU::V_CMP_GT_U32_e64:
-    //   return AMDGPU::V_CMP_LE_U32_e64;
-    // case AMDGPU::V_CMP_LT_U32_e64:
-    //   return AMDGPU::V_CMP_GE_U32_e64;
+      // float 32
+    case AMDGPU::V_CMP_EQ_F32_e64:
+      return AMDGPU::V_CMP_NEQ_F32_e64;
+    case AMDGPU::V_CMP_NEQ_F32_e64:
+      return AMDGPU::V_CMP_EQ_F32_e64;
+    case AMDGPU::V_CMP_GE_F32_e64:
+      return AMDGPU::V_CMP_LT_F32_e64;
+    case AMDGPU::V_CMP_LE_F32_e64:
+      return AMDGPU::V_CMP_GT_F32_e64;
+    case AMDGPU::V_CMP_GT_F32_e64:
+      return AMDGPU::V_CMP_LE_F32_e64;
+    case AMDGPU::V_CMP_LT_F32_e64:
+      return AMDGPU::V_CMP_GE_F32_e64;
     default:
       return 0;
     }
@@ -139,7 +140,6 @@ class SIFoldOperandsImpl {
 
   bool foldCopyToVGPROfScalarAddOfFrameIndex(Register DstReg, Register SrcReg,
                                              MachineInstr &MI) const;
-
   bool updateOperand(FoldCandidate &Fold) const;
 
   bool canUseImmWithOpSel(FoldCandidate &Fold) const;
@@ -1541,11 +1541,17 @@ bool SIFoldOperandsImpl::tryFoldCndMask(MachineInstr &MI, Register *RegVCC,
           auto cmpDL = DefMI->getDebugLoc();
           *NewVCC = MRI->createVirtualRegister(MRI->getRegClass(Reg));
           *RegVCC = Reg;
-          MachineInstrBuilder inverseCompare = BuildMI(
+          MachineInstrBuilder InverseCompare = BuildMI(
               *DefMI->getParent(), DefMI, cmpDL, TII->get(Opcode), *NewVCC);
-
-          inverseCompare.add(DefMI->getOperand(1));
-          inverseCompare.add(DefMI->getOperand(2));
+          InverseCompare->setFlags(DefMI->getFlags());
+
+          unsigned OpNum = DefMI->getNumExplicitOperands();
+          for (unsigned i = 1; i < OpNum; i++) {
+            MachineOperand Op = DefMI->getOperand(i);
+            InverseCompare.add(Op);
+            if (Op.isReg() && Op.isKill())
+              InverseCompare->getOperand(i).setIsKill(false);
+          }
         }
       }
     }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll
index 99261cc269858..297541203a9de 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll
@@ -11,13 +11,13 @@ define float @v_pow_f32(float %x, float %y) {
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v2, 0x800000
-; GFX6-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX6-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX6-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX6-NEXT:    v_cndmask_b32_e64 v2, 1, 0, vcc
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX6-NEXT:    v_ldexp_f32_e32 v0, v0, v2
 ; GFX6-NEXT:    v_log_f32_e32 v0, v0
 ; GFX6-NEXT:    v_mov_b32_e32 v2, 0x42000000
-; GFX6-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX6-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
 ; GFX6-NEXT:    v_sub_f32_e32 v0, v0, v2
 ; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
 ; GFX6-NEXT:    v_mov_b32_e32 v1, 0xc2fc0000
@@ -35,13 +35,13 @@ define float @v_pow_f32(float %x, float %y) {
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v2, 0x800000
-; GFX8-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX8-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, 1, 0, vcc
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX8-NEXT:    v_ldexp_f32 v0, v0, v2
 ; GFX8-NEXT:    v_log_f32_e32 v0, v0
 ; GFX8-NEXT:    v_mov_b32_e32 v2, 0x42000000
-; GFX8-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
 ; GFX8-NEXT:    v_sub_f32_e32 v0, v0, v2
 ; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
 ; GFX8-NEXT:    v_mov_b32_e32 v1, 0xc2fc0000
@@ -59,13 +59,13 @@ define float @v_pow_f32(float %x, float %y) {
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0x800000
-; GFX9-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX9-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX9-NEXT:    v_cndmask_b32_e64 v2, 1, 0, vcc
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX9-NEXT:    v_ldexp_f32 v0, v0, v2
 ; GFX9-NEXT:    v_log_f32_e32 v0, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0x42000000
-; GFX9-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
 ; GFX9-NEXT:    v_sub_f32_e32 v0, v0, v2
 ; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0xc2fc0000
@@ -82,11 +82,11 @@ define float @v_pow_f32(float %x, float %y) {
 ; GFX10-LABEL: v_pow_f32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX10-NEXT:    v_cmp_le_f32_e32 vcc_lo, 0x800000, v0
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, 1, 0, vcc_lo
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX10-NEXT:    v_ldexp_f32 v0, v0, v2
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, 0x42000000, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0x42000000, 0, vcc_lo
 ; GFX10-NEXT:    v_log_f32_e32 v0, v0
 ; GFX10-NEXT:    v_sub_f32_e32 v0, v0, v2
 ; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
@@ -101,12 +101,12 @@ define float @v_pow_f32(float %x, float %y) {
 ; GFX11-LABEL: v_pow_f32:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX11-NEXT:    v_cmp_le_f32_e32 vcc_lo, 0x800000, v0
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, 1, 0, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX11-NEXT:    v_ldexp_f32 v0, v0, v2
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 0x42000000, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0x42000000, 0, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_log_f32_e32 v0, v0
 ; GFX11-NEXT:    s_waitcnt_depctr 0xfff
@@ -1004,13 +1004,13 @@ define float @v_pow_f32_fabs_lhs(float %x, float %y) {
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v2, 0x800000
-; GFX6-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, v2
-; GFX6-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX6-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v0|, v2
+; GFX6-NEXT:    v_cndmask_b32_e64 v2, 1, 0, s[4:5]
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX6-NEXT:    v_ldexp_f32_e64 v0, |v0|, v2
 ; GFX6-NEXT:    v_log_f32_e32 v0, v0
 ; GFX6-NEXT:    v_mov_b32_e32 v2, 0x42000000
-; GFX6-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX6-NEXT:    v_cndmask_b32_e64 v2, v2, 0, s[4:5]
 ; GFX6-NEXT:    v_sub_f32_e32 v0, v0, v2
 ; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
 ; GFX6-NEXT:    v_mov_b32_e32 v1, 0xc2fc0000
@@ -1028,13 +1028,13 @@ define float @v_pow_f32_fabs_lhs(float %x, float %y) {
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v2, 0x800000
-; GFX8-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, v2
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX8-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v0|, v2
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, 1, 0, s[4:5]
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX8-NEXT:    v_ldexp_f32 v0, |v0|, v2
 ; GFX8-NEXT:    v_log_f32_e32 v0, v0
 ; GFX8-NEXT:    v_mov_b32_e32 v2, 0x42000000
-; GFX8-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, v2, 0, s[4:5]
 ; GFX8-NEXT:    v_sub_f32_e32 v0, v0, v2
 ; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
 ; GFX8-NEXT:    v_mov_b32_e32 v1, 0xc2fc0000
@@ -1052,13 +1052,13 @@ define float @v_pow_f32_fabs_lhs(float %x, float %y) {
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0x800000
-; GFX9-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, v2
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX9-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v0|, v2
+; GFX9-NEXT:    v_cndmask_b32_e64 v2, 1, 0, s[4:5]
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX9-NEXT:    v_ldexp_f32 v0, |v0|, v2
 ; GFX9-NEXT:    v_log_f32_e32 v0, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0x42000000
-; GFX9-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e64 v2, v2, 0, s[4:5]
 ; GFX9-NEXT:    v_sub_f32_e32 v0, v0, v2
 ; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0xc2fc0000
@@ -1075,11 +1075,11 @@ define float @v_pow_f32_fabs_lhs(float %x, float %y) {
 ; GFX10-LABEL: v_pow_f32_fabs_lhs:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_cmp_gt_f32_e64 s4, 0x800000, |v0|
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s4
+; GFX10-NEXT:    v_cmp_le_f32_e64 s4, 0x800000, |v0|
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, 1, 0, s4
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX10-NEXT:    v_ldexp_f32 v0, |v0|, v2
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, 0x42000000, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0x42000000, 0, s4
 ; GFX10-NEXT:    v_log_f32_e32 v0, v0
 ; GFX10-NEXT:    v_sub_f32_e32 v0, v0, v2
 ; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
@@ -1094,13 +1094,13 @@ define float @v_pow_f32_fabs_lhs(float %x, float %y) {
 ; GFX11-LABEL: v_pow_f32_fabs_lhs:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_gt_f32_e64 s0, 0x800000, |v0|
+; GFX11-NEXT:    v_cmp_le_f32_e64 s0, 0x800000, |v0|
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, 1, 0, s0
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_ldexp_f32 v0, |v0|, v2
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 0x42000000, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0x42000000, 0, s0
 ; GFX11-NEXT:    v_log_f32_e32 v0, v0
 ; GFX11-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX11-NEXT:    v_sub_f32_e32 v0, v0, v2
@@ -1125,13 +1125,13 @@ define float @v_pow_f32_fabs_rhs(float %x, float %y) {
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v2, 0x800000
-; GFX6-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX6-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX6-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX6-NEXT:    v_cndmask_b32_e64 v2, 1, 0, vcc
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX6-NEXT:    v_ldexp_f32_e32 v0, v0, v2
 ; GFX6-NEXT:    v_log_f32_e32 v0, v0
 ; GFX6-NEXT:    v_mov_b32_e32 v2, 0x42000000
-; GFX6-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX6-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
 ; GFX6-NEXT:    v_sub_f32_e32 v0, v0, v2
 ; GFX6-NEXT:    v_mul_legacy_f32_e64 v0, v0, |v1|
 ; GFX6-NEXT:    v_mov_b32_e32 v1, 0xc2fc0000
@@ -1149,13 +1149,13 @@ define float @v_pow_f32_fabs_rhs(float %x, float %y) {
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v2, 0x800000
-; GFX8-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX8-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, 1, 0, vcc
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX8-NEXT:    v_ldexp_f32 v0, v0, v2
 ; GFX8-NEXT:    v_log_f32_e32 v0, v0
 ; GFX8-NEXT:    v_mov_b32_e32 v2, 0x42000000
-; GFX8-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
 ; GFX8-NEXT:    v_sub_f32_e32 v0, v0, v2
 ; GFX8-NEXT:    v_mul_legacy_f32_e64 v0, v0, |v1|
 ; GFX8-NEXT:    v_mov_b32_e32 v1, 0xc2fc0000
@@ -1173,13 +1173,13 @@ define float @v_pow_f32_fabs_rhs(float %x, float %y) {
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0x800000
-; GFX9-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX9-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX9-NEXT:    v_cndmask_b32_e64 v2, 1, 0, vcc
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX9-NEXT:    v_ldexp_f32 v0, v0, v2
 ; GFX9-NEXT:    v_log_f32_e32 v0, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0x42000000
-; GFX9-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
 ; GFX9-NEXT:    v_sub_f32_e32 v0, v0, v2
 ; GFX9-NEXT:    v_mul_legacy_f32_e64 v0, v0, |v1|
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0xc2fc0000
@@ -1196,11 +1196,11 @@ define float @v_pow_f32_fabs_rhs(float %x, float %y) {
 ; GFX10-LABEL: v_pow_f32_fabs_rhs:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX10-NEXT:    v_cmp_le_f32_e32 vcc_lo, 0x800000, v0
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, 1, 0, vcc_lo
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX10-NEXT:    v_ldexp_f32 v0, v0, v2
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, 0x42000000, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0x42000000, 0, vcc_lo
 ; GFX10-NEXT:    v_log_f32_e32 v0, v0
 ; GFX10-NEXT:    v_sub_f32_e32 v0, v0, v2
 ; GFX10-NEXT:    v_mul_legacy_f32_e64 v0, v0, |v1|
@@ -1215,12 +1215,12 @@ define float @v_pow_f32_fabs_rhs(float %x, float %y) {
 ; GFX11-LABEL: v_pow_f32_fabs_rhs:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX11-NEXT:    v_cmp_le_f32_e32 vcc_lo, 0x800000, v0
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, 1, 0, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX11-NEXT:    v_ldexp_f32 v0, v0, v2
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 0x42000000, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0x42000000, 0, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_log_f32_e32 v0, v0
 ; GFX11-NEXT:    s_waitcnt_depctr 0xfff
@@ -1246,13 +1246,13 @@ define float @v_pow_f32_fabs_lhs_rhs(float %x, float %y) {
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v2, 0x800000
-; GFX6-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, v2
-; GFX6-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX6-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v0|, v2
+; GFX6-NEXT:    v_cndmask_b32_e64 v2, 1, 0, s[4:5]
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX6-NEXT:    v_ldexp_f32_e64 v0, |v0|, v2
 ; GFX6-NEXT:    v_log_f32_e32 v0, v0
 ; GFX6-NEXT:    v_mov_b32_e32 v2, 0x42000000
-; GFX6-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX6-NEXT:    v_cndmask_b32_e64 v2, v2, 0, s[4:5]
 ; GFX6-NEXT:    v_sub_f32_e32 v0, v0, v2
 ; GFX6-NEXT:    v_mul_legacy_f32_e64 v0, v0, |v1|
 ; GFX6-NEXT:    v_mov_b32_e32 v1, 0xc2fc0000
@@ -1270,13 +1270,13 @@ define float @v_pow_f32_fabs_lhs_rhs(float %x, float %y) {
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v2, 0x800000
-; GFX8-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, v2
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX8-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v0|, v2
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, 1, 0, s[4:5]
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX8-NEXT:    v_ldexp_f32 v0, |v0|, v2
 ; GFX8-NEXT:    v_log_f32_e32 v0, v0
 ; GFX8-NEXT:    v_mov_b32_e32 v2, 0x42000000
-; GFX8-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, v2, 0, s[4:5]
 ; GFX8-NEXT:    v_sub_f32_e32 v0, v0, v2
 ; GFX8-NEXT:    v_mul_legacy_f32_e64 v0, v0, |v1|
 ; GFX8-NEXT:    v_mov_b32_e32 v1, 0xc2fc0000
@@ -1294,13 +1294,13 @@ define float @v_pow_f32_fabs_lhs_rhs(float %x, float %y) {
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0x800000
-; GFX9-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, v2
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX9-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v0|, v2
+; GFX9-NEXT:    v_cndmask_b32_e64 v2, 1, 0, s[4:5]
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX9-NEXT:    v_ldexp_f32 v0, |v0|, v2
 ; GFX9-NEXT:    v_log_f32_e32 v0, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0x42000000
-; GFX9-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e64 v2, v2, 0, s[4:5]
 ; GFX9-NEXT:    v_sub_f32_e32 v0, v0, v2
 ; GFX9-NEXT:    v_mul_legacy_f32_e64 v0, v0, |v1|
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0xc2fc0000
@@ -1317,11 +1317,11 @@ define float @v_pow_f32_fabs_lhs_rhs(float %x, float %y) {
 ; GFX10-LABEL: v_pow_f32_fabs_lhs_rhs:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_cmp_gt_f32_e64 s4, 0x800000, |v0|
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s4
+; GFX10-NEXT:    v_cmp_le_f32_e64 s4, 0x800000, |v0|
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, 1, 0, s4
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX10-NEXT:    v_ldexp_f32 v0, |v0|, v2
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, 0x42000000, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0x42000000, 0, s4
 ; GFX10-NEXT:    v_log_f32_e32 v0, v0
 ; GFX10-NEXT:    v_sub_f32_e32 v0, v0, v2
 ; GFX10-NEXT:    v_mul_legacy_f32_e64 v0, v0, |v1|
@@ -1336,13 +1336,13 @@ define float @v_pow_f32_fabs_lhs_rhs(float %x, float %y) {
 ; GFX11-LABEL: v_pow_f32_fabs_lhs_rhs:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_gt_f32_e64 s0, 0x800000, |v0|
+; GFX11-NEXT:    v_cmp_le_f32_e64 s0, 0x800000, |v0|
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, 1, 0, s0
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_ldexp_f32 v0, |v0|, v2
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 0x42000000, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0x42000000, 0, s0
 ; GFX11-NEXT:    v_log_f32_e32 v0, v0
 ; GFX11-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX11-NEXT:    v_sub_f32_e32 v0, v0, v2
@@ -1367,13 +1367,13 @@ define amdgpu_ps float @v_pow_f32_sgpr_vgpr(float inreg %x, float %y) {
 ; GFX6-LABEL: v_pow_f32_sgpr_vgpr:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    v_mov_b32_e32 v1, 0x800000
-; GFX6-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v1
-; GFX6-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX6-NEXT:    v_cmp_ge_f32_e32 vcc, s0, v1
+; GFX6-NEXT:    v_cndmask_b32_e64 v1, 1, 0, vcc
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
 ; GFX6-NEXT:    v_ldexp_f32_e32 v1, s0, v1
 ; GFX6-NEXT:    v_log_f32_e32 v1, v1
 ; GFX6-NEXT:    v_mov_b32_e32 v2, 0x42000000
-; GFX6-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX6-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
 ; GFX6-NEXT:    v_sub_f32_e32 v1, v1, v2
 ; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
 ; GFX6-NEXT:    v_mov_b32_e32 v1, 0xc2fc0000
@@ -1390,13 +1390,13 @@ define amdgpu_ps float @v_pow_f32_sgpr_vgpr(float inreg %x, float %y) {
 ; GFX8-LABEL: v_pow_f32_sgpr_vgpr:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    v_mov_b32_e32 v1, 0x800000
-; GFX8-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v1
-; GFX8-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX8-NEXT:    v_cmp_ge_f32_e32 vcc, s0, v1
+; GFX8-NEXT:    v_cndmask_b32_e64 v1, 1, 0, vcc
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
 ; GFX8-NEXT:    v_ldexp_f32 v1, s0, v1
 ; GFX8-NEXT:    v_log_f32_e32 v1, v1
 ; GFX8-NEXT:    v_mov_b32_e32 v2, 0x42000000
-; GFX8-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
 ; GFX8-NEXT:    v_sub_f32_e32 v1, v1, v2
 ; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
 ; GFX8-NEXT:    v_mov_b32_e32 v1, 0xc2fc0000
@@ -1413,13 +1413,13 @@ define amdgpu_ps float @v_pow_f32_sgpr_vgpr(float inreg %x, float %y) {
 ; GFX9-LABEL: v_pow_f32_sgpr_vgpr:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0x800000
-; GFX9-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v1
-; GFX9-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX9-NEXT:    v_cmp_ge_f32_e32 vcc, s0, v1
+; GFX9-NEXT:    v_cndmask_b32_e64 v1, 1, 0, vcc
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
 ; GFX9-NEXT:    v_ldexp_f32 v1, s0, v1
 ; GFX9-NEXT:    v_log_f32_e32 v1, v1
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0x42000000
-; GFX9-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
 ; GFX9-NEXT:    v_sub_f32_e32 v1, v1, v2
 ; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0xc2fc0000
@@ -1435,9 +1435,9 @@ define amdgpu_ps float @v_pow_f32_sgpr_vgpr(float inreg %x, float %y) {
 ;
 ; GFX10-LABEL: v_pow_f32_sgpr_vgpr:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cmp_gt_f32_e64 s1, 0x800000, s0
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s1
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, 0x42000000, s1
+; GFX10-NEXT:    v_cmp_le_f32_e64 s1, 0x800000, s0
+; GFX10-NEXT:    v_cndmask_b32_e64 v1, 1, 0, s1
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0x42000000, 0, s1
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
 ; GFX10-NEXT:    v_ldexp_f32 v1, s0, v1
 ; GFX10-NEXT:    v_log_f32_e32 v1, v1
@@ -1453,10 +1453,10 @@ define amdgpu_ps float @v_pow_f32_sgpr_vgpr(float inreg %x, float %y) {
 ;
 ; GFX11-LABEL: v_pow_f32_sgpr_vgpr:
 ; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cmp_gt_f32_e64 s1, 0x800000, s0
+; GFX11-NEXT:    v_cmp_le_f32_e64 s1, 0x800000, s0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s1
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 0x42000000, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v1, 1, 0, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0x42000000, 0, s1
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_ldexp_f32 v1, s0, v1
@@ -1482,13 +1482,13 @@ define amdgpu_ps float @v_pow_f32_vgpr_sgpr(float %x, float inreg %y) {
 ; GFX6-LABEL: v_pow_f32_vgpr_sgpr:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    v_mov_b32_e32 v1, 0x800000
-; GFX6-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX6-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX6-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v1
+; GFX6-NEXT:    v_cndmask_b32_e64 v1, 1, 0, vcc
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
 ; GFX6-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; GFX6-NEXT:    v_log_f32_e32 v0, v0
 ; GFX6-NEXT:    v_mov_b32_e32 v1, 0x42000000
-; GFX6-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX6-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
 ; GFX6-NEXT:    v_mov_b32_e32 v2, 0x42800000
 ; GFX6-NEXT:    v_sub_f32_e32 v0, v0, v1
 ; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, s0, v0
@@ -1505,13 +1505,13 @@ define amdgpu_ps float @v_pow_f32_vgpr_sgpr(float %x, float inreg %y) {
 ; GFX8-LABEL: v_pow_f32_vgpr_sgpr:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    v_mov_b32_e32 v1, 0x800000
-; GFX8-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX8-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX8-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v1
+; GFX8-NEXT:    v_cndmask_b32_e64 v1, 1, 0, vcc
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
 ; GFX8-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX8-NEXT:    v_log_f32_e32 v0, v0
 ; GFX8-NEXT:    v_mov_b32_e32 v1, 0x42000000
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
 ; GFX8-NEXT:    v_mov_b32_e32 v2, 0x42800000
 ; GFX8-NEXT:    v_sub_f32_e32 v0, v0, v1
 ; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, s0, v0
@@ -1528,13 +1528,13 @@ define amdgpu_ps float @v_pow_f32_vgpr_sgpr(float %x, float inreg %y) {
 ; GFX9-LABEL: v_pow_f32_vgpr_sgpr:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0x800000
-; GFX9-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX9-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX9-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v1
+; GFX9-NEXT:    v_cndmask_b32_e64 v1, 1, 0, vcc
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
 ; GFX9-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX9-NEXT:    v_log_f32_e32 v0, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0x42000000
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0x42800000
 ; GFX9-NEXT:    v_sub_f32_e32 v0, v0, v1
 ; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, s0, v0
@@ -1550,11 +1550,11 @@ define amdgpu_ps float @v_pow_f32_vgpr_sgpr(float %x, float inreg %y) {
 ;
 ; GFX10-LABEL: v_pow_f32_vgpr_sgpr:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX10-NEXT:    v_cmp_le_f32_e32 vcc_lo, 0x800000, v0
+; GFX10-NEXT:    v_cndmask_b32_e64 v1, 1, 0, vcc_lo
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
 ; GFX10-NEXT:    v_ldexp_f32 v0, v0, v1
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0x42000000, 0, vcc_lo
 ; GFX10-NEXT:    v_log_f32_e32 v0, v0
 ; GFX10-NEXT:    v_sub_f32_e32 v0, v0, v1
 ; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, s0, v0
@@ -1568,12 +1568,12 @@ define amdgpu_ps float @v_pow_f32_vgpr_sgpr(float %x, float inreg %y) {
 ;
 ; GFX11-LABEL: v_pow_f32_vgpr_sgpr:
 ; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX11-NEXT:    v_cmp_le_f32_e32 vcc_lo, 0x800000, v0
+; GFX11-NEXT:    v_cndmask_b32_e64 v1, 1, 0, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
 ; GFX11-NEXT:    v_ldexp_f32 v0, v0, v1
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0x42000000, 0, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_log_f32_e32 v0, v0
 ; GFX11-NEXT:    s_waitcnt_depctr 0xfff
@@ -1597,13 +1597,13 @@ define amdgpu_ps float @v_pow_f32_sgpr_sgpr(float inreg %x, float inreg %y) {
 ; GFX6-LABEL: v_pow_f32_sgpr_sgpr:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    v_mov_b32_e32 v0, 0x800000
-; GFX6-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v0
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX6-NEXT:    v_cmp_ge_f32_e32 vcc, s0, v0
+; GFX6-NEXT:    v_cndmask_b32_e64 v0, 1, 0, vcc
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
 ; GFX6-NEXT:    v_ldexp_f32_e32 v0, s0, v0
 ; GFX6-NEXT:    v_log_f32_e32 v0, v0
 ; GFX6-NEXT:    v_mov_b32_e32 v1, 0x42000000
-; GFX6-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX6-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
 ; GFX6-NEXT:    v_mov_b32_e32 v2, 0x42800000
 ; GFX6-NEXT:    v_sub_f32_e32 v0, v0, v1
 ; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, s1, v0
@@ -1620,13 +1620,13 @@ define amdgpu_ps float @v_pow_f32_sgpr_sgpr(float inreg %x, float inreg %y) {
 ; GFX8-LABEL: v_pow_f32_sgpr_sgpr:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    v_mov_b32_e32 v0, 0x800000
-; GFX8-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v0
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX8-NEXT:    v_cmp_ge_f32_e32 vcc, s0, v0
+; GFX8-NEXT:    v_cndmask_b32_e64 v0, 1, 0, vcc
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
 ; GFX8-NEXT:    v_ldexp_f32 v0, s0, v0
 ; GFX8-NEXT:    v_log_f32_e32 v0, v0
 ; GFX8-NEXT:    v_mov_b32_e32 v1, 0x42000000
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
 ; GFX8-NEXT:    v_mov_b32_e32 v2, 0x42800000
 ; GFX8-NEXT:    v_sub_f32_e32 v0, v0, v1
 ; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, s1, v0
@@ -1643,13 +1643,13 @@ define amdgpu_ps float @v_pow_f32_sgpr_sgpr(float inreg %x, float inreg %y) {
 ; GFX9-LABEL: v_pow_f32_sgpr_sgpr:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_mov_b32_e32 v0, 0x800000
-; GFX9-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v0
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX9-NEXT:    v_cmp_ge_f32_e32 vcc, s0, v0
+; GFX9-NEXT:    v_cndmask_b32_e64 v0, 1, 0, vcc
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
 ; GFX9-NEXT:    v_ldexp_f32 v0, s0, v0
 ; GFX9-NEXT:    v_log_f32_e32 v0, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0x42000000
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0x42800000
 ; GFX9-NEXT:    v_sub_f32_e32 v0, v0, v1
 ; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, s1, v0
@@ -1665,9 +1665,9 @@ define amdgpu_ps float @v_pow_f32_sgpr_sgpr(float inreg %x, float inreg %y) {
 ;
 ; GFX10-LABEL: v_pow_f32_sgpr_sgpr:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cmp_gt_f32_e64 s2, 0x800000, s0
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s2
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, s2
+; GFX10-NEXT:    v_cmp_le_f32_e64 s2, 0x800000, s0
+; GFX10-NEXT:    v_cndmask_b32_e64 v0, 1, 0, s2
+; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0x42000000, 0, s2
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
 ; GFX10-NEXT:    v_ldexp_f32 v0, s0, v0
 ; GFX10-NEXT:    v_log_f32_e32 v0, v0
@@ -1683,10 +1683,10 @@ define amdgpu_ps float @v_pow_f32_sgpr_sgpr(float inreg %x, float inreg %y) {
 ;
 ; GFX11-LABEL: v_pow_f32_sgpr_sgpr:
 ; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cmp_gt_f32_e64 s2, 0x800000, s0
+; GFX11-NEXT:    v_cmp_le_f32_e64 s2, 0x800000, s0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s2
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, s2
+; GFX11-NEXT:    v_cndmask_b32_e64 v0, 1, 0, s2
+; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0x42000000, 0, s2
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_ldexp_f32 v0, s0, v0
@@ -1713,13 +1713,13 @@ define float @v_pow_f32_fneg_lhs(float %x, float %y) {
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v2, 0x800000
-; GFX6-NEXT:    v_cmp_lt_f32_e64 vcc, -v0, v2
-; GFX6-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX6-NEXT:    v_cmp_ge_f32_e64 s[4:5], -v0, v2
+; GFX6-NEXT:    v_cndmask_b32_e64 v2, 1, 0, s[4:5]
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX6-NEXT:    v_ldexp_f32_e64 v0, -v0, v2
 ; GFX6-NEXT:    v_log_f32_e32 v0, v0
 ; GFX6-NEXT:    v_mov_b32_e32 v2, 0x42000000
-; GFX6-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX6-NEXT:    v_cndmask_b32_e64 v2, v2, 0, s[4:5]
 ; GFX6-NEXT:    v_sub_f32_e32 v0, v0, v2
 ; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
 ; GFX6-NEXT:    v_mov_b32_e32 v1, 0xc2fc0000
@@ -1737,13 +1737,13 @@ define float @v_pow_f32_fneg_lhs(float %x, float %y) {
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v2, 0x800000
-; GFX8-NEXT:    v_cmp_lt_f32_e64 vcc, -v0, v2
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX8-NEXT:    v_cmp_ge_f32_e64 s[4:5], -v0, v2
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, 1, 0, s[4:5]
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX8-NEXT:    v_ldexp_f32 v0, -v0, v2
 ; GFX8-NEXT:    v_log_f32_e32 v0, v0
 ; GFX8-NEXT:    v_mov_b32_e32 v2, 0x42000000
-; GFX8-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, v2, 0, s[4:5]
 ; GFX8-NEXT:    v_sub_f32_e32 v0, v0, v2
 ; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
 ; GFX8-NEXT:    v_mov_b32_e32 v1, 0xc2fc0000
@@ -1761,13 +1761,13 @@ define float @v_pow_f32_fneg_lhs(float %x, float %y) {
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0x800000
-; GFX9-NEXT:    v_cmp_lt_f32_e64 vcc, -v0, v2
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX9-NEXT:    v_cmp_ge_f32_e64 s[4:5], -v0, v2
+; GFX9-NEXT:    v_cndmask_b32_e64 v2, 1, 0, s[4:5]
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX9-NEXT:    v_ldexp_f32 v0, -v0, v2
 ; GFX9-NEXT:    v_log_f32_e32 v0, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0x42000000
-; GFX9-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e64 v2, v2, 0, s[4:5]
 ; GFX9-NEXT:    v_sub_f32_e32 v0, v0, v2
 ; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0xc2fc0000
@@ -1784,11 +1784,11 @@ define float @v_pow_f32_fneg_lhs(float %x, float %y) {
 ; GFX10-LABEL: v_pow_f32_fneg_lhs:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_cmp_gt_f32_e64 s4, 0x800000, -v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s4
+; GFX10-NEXT:    v_cmp_le_f32_e64 s4, 0x800000, -v0
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, 1, 0, s4
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX10-NEXT:    v_ldexp_f32 v0, -v0, v2
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, 0x42000000, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0x42000000, 0, s4
 ; GFX10-NEXT:    v_log_f32_e32 v0, v0
 ; GFX10-NEXT:    v_sub_f32_e32 v0, v0, v2
 ; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
@@ -1803,13 +1803,13 @@ define float @v_pow_f32_fneg_lhs(float %x, float %y) {
 ; GFX11-LABEL: v_pow_f32_fneg_lhs:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_gt_f32_e64 s0, 0x800000, -v0
+; GFX11-NEXT:    v_cmp_le_f32_e64 s0, 0x800000, -v0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, 1, 0, s0
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_ldexp_f32 v0, -v0, v2
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 0x42000000, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0x42000000, 0, s0
 ; GFX11-NEXT:    v_log_f32_e32 v0, v0
 ; GFX11-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX11-NEXT:    v_sub_f32_e32 v0, v0, v2
@@ -1834,13 +1834,13 @@ define float @v_pow_f32_fneg_rhs(float %x, float %y) {
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v2, 0x800000
-; GFX6-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX6-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX6-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX6-NEXT:    v_cndmask_b32_e64 v2, 1, 0, vcc
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX6-NEXT:    v_ldexp_f32_e32 v0, v0, v2
 ; GFX6-NEXT:    v_log_f32_e32 v0, v0
 ; GFX6-NEXT:    v_mov_b32_e32 v2, 0x42000000
-; GFX6-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX6-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
 ; GFX6-NEXT:    v_sub_f32_e32 v0, v0, v2
 ; GFX6-NEXT:    v_mul_legacy_f32_e64 v0, v0, -v1
 ; GFX6-NEXT:    v_mov_b32_e32 v1, 0xc2fc0000
@@ -1858,13 +1858,13 @@ define float @v_pow_f32_fneg_rhs(float %x, float %y) {
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v2, 0x800000
-; GFX8-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX8-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, 1, 0, vcc
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX8-NEXT:    v_ldexp_f32 v0, v0, v2
 ; GFX8-NEXT:    v_log_f32_e32 v0, v0
 ; GFX8-NEXT:    v_mov_b32_e32 v2, 0x42000000
-; GFX8-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
 ; GFX8-NEXT:    v_sub_f32_e32 v0, v0, v2
 ; GFX8-NEXT:    v_mul_legacy_f32_e64 v0, v0, -v1
 ; GFX8-NEXT:    v_mov_b32_e32 v1, 0xc2fc0000
@@ -1882,13 +1882,13 @@ define float @v_pow_f32_fneg_rhs(float %x, float %y) {
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0x800000
-; GFX9-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX9-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX9-NEXT:    v_cndmask_b32_e64 v2, 1, 0, vcc
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX9-NEXT:    v_ldexp_f32 v0, v0, v2
 ; GFX9-NEXT:    v_log_f32_e32 v0, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0x42000000
-; GFX9-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
 ; GFX9-NEXT:    v_sub_f32_e32 v0, v0, v2
 ; GFX9-NEXT:    v_mul_legacy_f32_e64 v0, v0, -v1
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0xc2fc0000
@@ -1905,11 +1905,11 @@ define float @v_pow_f32_fneg_rhs(float %x, float %y) {
 ; GFX10-LABEL: v_pow_f32_fneg_rhs:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX10-NEXT:    v_cmp_le_f32_e32 vcc_lo, 0x800000, v0
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, 1, 0, vcc_lo
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX10-NEXT:    v_ldexp_f32 v0, v0, v2
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, 0x42000000, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0x42000000, 0, vcc_lo
 ; GFX10-NEXT:    v_log_f32_e32 v0, v0
 ; GFX10-NEXT:    v_sub_f32_e32 v0, v0, v2
 ; GFX10-NEXT:    v_mul_legacy_f32_e64 v0, v0, -v1
@@ -1924,12 +1924,12 @@ define float @v_pow_f32_fneg_rhs(float %x, float %y) {
 ; GFX11-LABEL: v_pow_f32_fneg_rhs:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX11-NEXT:    v_cmp_le_f32_e32 vcc_lo, 0x800000, v0
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, 1, 0, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX11-NEXT:    v_ldexp_f32 v0, v0, v2
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 0x42000000, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0x42000000, 0, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_log_f32_e32 v0, v0
 ; GFX11-NEXT:    s_waitcnt_depctr 0xfff
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.powi.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.powi.ll
index fe002d69faf66..01461a09b5c2a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.powi.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.powi.ll
@@ -79,14 +79,14 @@ define float @v_powi_f32(float %l, i32 %r) {
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_mov_b32_e32 v2, 0x800000
-; GFX7-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX7-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX7-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX7-NEXT:    v_cndmask_b32_e64 v2, 1, 0, vcc
 ; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX7-NEXT:    v_ldexp_f32_e32 v0, v0, v2
 ; GFX7-NEXT:    v_log_f32_e32 v0, v0
 ; GFX7-NEXT:    v_cvt_f32_i32_e32 v1, v1
 ; GFX7-NEXT:    v_mov_b32_e32 v2, 0x42000000
-; GFX7-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX7-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
 ; GFX7-NEXT:    v_sub_f32_e32 v0, v0, v2
 ; GFX7-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
 ; GFX7-NEXT:    v_mov_b32_e32 v1, 0xc2fc0000
@@ -104,14 +104,14 @@ define float @v_powi_f32(float %l, i32 %r) {
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v2, 0x800000
-; GFX8-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX8-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, 1, 0, vcc
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX8-NEXT:    v_ldexp_f32 v0, v0, v2
 ; GFX8-NEXT:    v_log_f32_e32 v0, v0
 ; GFX8-NEXT:    v_cvt_f32_i32_e32 v1, v1
 ; GFX8-NEXT:    v_mov_b32_e32 v2, 0x42000000
-; GFX8-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
 ; GFX8-NEXT:    v_sub_f32_e32 v0, v0, v2
 ; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
 ; GFX8-NEXT:    v_mov_b32_e32 v1, 0xc2fc0000
@@ -128,13 +128,13 @@ define float @v_powi_f32(float %l, i32 %r) {
 ; GFX11-LABEL: v_powi_f32:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX11-NEXT:    v_cmp_le_f32_e32 vcc_lo, 0x800000, v0
 ; GFX11-NEXT:    v_cvt_f32_i32_e32 v1, v1
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, 1, 0, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX11-NEXT:    v_ldexp_f32 v0, v0, v2
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 0x42000000, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0x42000000, 0, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_log_f32_e32 v0, v0
 ; GFX11-NEXT:    s_waitcnt_depctr 0xfff
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/select-to-fmin-fmax.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/select-to-fmin-fmax.ll
index ee3bf96111994..1991afa83870a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/select-to-fmin-fmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/select-to-fmin-fmax.ll
@@ -18,8 +18,8 @@ define float @test_s32(float %a) #0 {
 ; GCN-LABEL: test_s32:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_cmp_gt_f32_e32 vcc, 0, v0
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GCN-NEXT:    v_cmp_le_f32_e32 vcc, 0, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %fcmp = fcmp olt float %a, 0.0
@@ -111,10 +111,10 @@ define <2 x float> @test_v2s32(<2 x float> %a) #0 {
 ; GCN-LABEL: test_v2s32:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_cmp_gt_f32_e32 vcc, 0, v0
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; GCN-NEXT:    v_cmp_gt_f32_e32 vcc, 0, v1
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GCN-NEXT:    v_cmp_le_f32_e32 vcc, 0, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GCN-NEXT:    v_cmp_le_f32_e32 vcc, 0, v1
+; GCN-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %fcmp = fcmp olt <2 x float> %a, zeroinitializer
@@ -126,14 +126,14 @@ define <4 x float> @test_v4s32(<4 x float> %a) #0 {
 ; GCN-LABEL: test_v4s32:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_cmp_gt_f32_e32 vcc, 0, v0
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; GCN-NEXT:    v_cmp_gt_f32_e32 vcc, 0, v1
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; GCN-NEXT:    v_cmp_gt_f32_e32 vcc, 0, v2
-; GCN-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
-; GCN-NEXT:    v_cmp_gt_f32_e32 vcc, 0, v3
-; GCN-NEXT:    v_cndmask_b32_e64 v3, v3, 0, vcc
+; GCN-NEXT:    v_cmp_le_f32_e32 vcc, 0, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GCN-NEXT:    v_cmp_le_f32_e32 vcc, 0, v1
+; GCN-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GCN-NEXT:    v_cmp_le_f32_e32 vcc, 0, v2
+; GCN-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GCN-NEXT:    v_cmp_le_f32_e32 vcc, 0, v3
+; GCN-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %fcmp = fcmp olt <4 x float> %a, zeroinitializer
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll
index 5bda853b76727..b494ff8ba1f5d 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll
@@ -1,773 +1,2719 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=amdgpu-simplifylib,instcombine -amdgpu-prelink < %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-prelink | FileCheck %s
-
-declare hidden float @_Z3powff(float, float)
-declare hidden double @_Z3powdd(double, double)
-declare hidden half @_Z3powDhDh(half, half)
-
-declare hidden float @_Z4powrff(float, float)
-declare hidden double @_Z4powrdd(double, double)
-declare hidden half @_Z4powrDhDh(half, half)
-
-declare hidden float @_Z4pownfi(float, i32)
-declare hidden double @_Z4powndi(double, i32)
-declare hidden half @_Z4pownDhi(half, i32)
-
-; --------------------------------------------------------------------
-; test pow
-; --------------------------------------------------------------------
-
-define half @test_pow_fast_f16(half %x, half %y) {
-; CHECK-LABEL: test_pow_fast_f16:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    s_getpc_b64 s[16:17]
-; CHECK-NEXT:    s_add_u32 s16, s16, _Z3powDhDh at rel32@lo+4
-; CHECK-NEXT:    s_addc_u32 s17, s17, _Z3powDhDh at rel32@hi+12
-; CHECK-NEXT:    s_setpc_b64 s[16:17]
-  %pow = tail call fast half @_Z3powDhDh(half %x, half %y)
-  ret half %pow
-}
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib,instcombine -amdgpu-prelink %s | FileCheck %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
+
+declare float @_Z3powff(float, float)
+declare <2 x float> @_Z3powDv2_fS_(<2 x float>, <2 x float>)
+declare <3 x float> @_Z3powDv3_fS_(<3 x float>, <3 x float>)
+declare <4 x float> @_Z3powDv4_fS_(<4 x float>, <4 x float>)
+declare <8 x float> @_Z3powDv8_fS_(<8 x float>, <8 x float>)
+declare <16 x float> @_Z3powDv16_fS_(<16 x float>, <16 x float>)
+declare double @_Z3powdd(double, double)
+declare <2 x double> @_Z3powDv2_dS_(<2 x double>, <2 x double>)
+declare <3 x double> @_Z3powDv3_dS_(<3 x double>, <3 x double>)
+declare <4 x double> @_Z3powDv4_dS_(<4 x double>, <4 x double>)
+declare <8 x double> @_Z3powDv8_dS_(<8 x double>, <8 x double>)
+declare <16 x double> @_Z3powDv16_dS_(<16 x double>, <16 x double>)
+declare half @_Z3powDhDh(half, half)
+declare <2 x half> @_Z3powDv2_DhS_(<2 x half>, <2 x half>)
+declare <3 x half> @_Z3powDv3_DhS_(<3 x half>, <3 x half>)
+declare <4 x half> @_Z3powDv4_DhS_(<4 x half>, <4 x half>)
+declare <8 x half> @_Z3powDv8_DhS_(<8 x half>, <8 x half>)
+declare <16 x half> @_Z3powDv16_DhS_(<16 x half>, <16 x half>)
+declare void @llvm.assume(i1 noundef)
+declare float @llvm.floor.f32(float)
+declare float @llvm.ceil.f32(float)
+declare float @llvm.trunc.f32(float)
+declare float @llvm.rint.f32(float)
+declare float @llvm.nearbyint.f32(float)
+declare float @llvm.round.f32(float)
+declare float @llvm.roundeven.f32(float)
 
 define float @test_pow_fast_f32(float %x, float %y) {
-; CHECK-LABEL: test_pow_fast_f32:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    s_getpc_b64 s[16:17]
-; CHECK-NEXT:    s_add_u32 s16, s16, _Z3powff at rel32@lo+4
-; CHECK-NEXT:    s_addc_u32 s17, s17, _Z3powff at rel32@hi+12
-; CHECK-NEXT:    s_setpc_b64 s[16:17]
+; CHECK-LABEL: define float @test_pow_fast_f32
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call fast float @_Z3powff(float [[X]], float [[Y]])
+; CHECK-NEXT:    ret float [[POW]]
+;
   %pow = tail call fast float @_Z3powff(float %x, float %y)
   ret float %pow
 }
 
-define double @test_pow_fast_f64(double %x, double %y) {
-; CHECK-LABEL: test_pow_fast_f64:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    s_getpc_b64 s[16:17]
-; CHECK-NEXT:    s_add_u32 s16, s16, _Z3powdd at rel32@lo+4
-; CHECK-NEXT:    s_addc_u32 s17, s17, _Z3powdd at rel32@hi+12
-; CHECK-NEXT:    s_setpc_b64 s[16:17]
-  %pow = tail call fast double @_Z3powdd(double %x, double %y)
+define <2 x float> @test_pow_fast_v2f32(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: define <2 x float> @test_pow_fast_v2f32
+; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call fast <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> [[Y]])
+; CHECK-NEXT:    ret <2 x float> [[POW]]
+;
+  %pow = tail call fast <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> %y)
+  ret <2 x float> %pow
+}
+
+define float @test_pow_afn_f32_nnan(float %x, float %y) {
+; CHECK-LABEL: define float @test_pow_afn_f32_nnan
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call nnan afn float @_Z3powff(float [[X]], float [[Y]])
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = tail call afn nnan float @_Z3powff(float %x, float %y)
+  ret float %pow
+}
+
+define float @test_pow_afn_f32_nnan_ninf(float %x, float %y) {
+; CHECK-LABEL: define float @test_pow_afn_f32_nnan_ninf
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call nnan ninf afn float @_Z3powff(float [[X]], float [[Y]])
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y)
+  ret float %pow
+}
+
+define <2 x float> @test_pow_afn_v2f32_nnan(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_nnan
+; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call nnan afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> [[Y]])
+; CHECK-NEXT:    ret <2 x float> [[POW]]
+;
+  %pow = tail call afn nnan <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> %y)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_afn_v2f32_nnan_ninf(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_nnan_ninf
+; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call nnan ninf afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> [[Y]])
+; CHECK-NEXT:    ret <2 x float> [[POW]]
+;
+  %pow = tail call afn nnan ninf <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> %y)
+  ret <2 x float> %pow
+}
+
+define float @test_pow_afn_f32(float %x, float %y) {
+; CHECK-LABEL: define float @test_pow_afn_f32
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z3powff(float [[X]], float [[Y]])
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = tail call afn float @_Z3powff(float %x, float %y)
+  ret float %pow
+}
+
+define <2 x float> @test_pow_afn_v2f32(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32
+; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> [[Y]])
+; CHECK-NEXT:    ret <2 x float> [[POW]]
+;
+  %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> %y)
+  ret <2 x float> %pow
+}
+
+define <3 x float> @test_pow_afn_v3f32(<3 x float> %x, <3 x float> %y) {
+; CHECK-LABEL: define <3 x float> @test_pow_afn_v3f32
+; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> [[X]], <3 x float> [[Y]])
+; CHECK-NEXT:    ret <3 x float> [[POW]]
+;
+  %pow = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> %x, <3 x float> %y)
+  ret <3 x float> %pow
+}
+
+define <4 x float> @test_pow_afn_v4f32(<4 x float> %x, <4 x float> %y) {
+; CHECK-LABEL: define <4 x float> @test_pow_afn_v4f32
+; CHECK-SAME: (<4 x float> [[X:%.*]], <4 x float> [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <4 x float> @_Z3powDv4_fS_(<4 x float> [[X]], <4 x float> [[Y]])
+; CHECK-NEXT:    ret <4 x float> [[POW]]
+;
+  %pow = tail call afn <4 x float> @_Z3powDv4_fS_(<4 x float> %x, <4 x float> %y)
+  ret <4 x float> %pow
+}
+
+define <8 x float> @test_pow_afn_v8f32(<8 x float> %x, <8 x float> %y) {
+; CHECK-LABEL: define <8 x float> @test_pow_afn_v8f32
+; CHECK-SAME: (<8 x float> [[X:%.*]], <8 x float> [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <8 x float> @_Z3powDv8_fS_(<8 x float> [[X]], <8 x float> [[Y]])
+; CHECK-NEXT:    ret <8 x float> [[POW]]
+;
+  %pow = tail call afn <8 x float> @_Z3powDv8_fS_(<8 x float> %x, <8 x float> %y)
+  ret <8 x float> %pow
+}
+
+define <16 x float> @test_pow_afn_v16f32(<16 x float> %x, <16 x float> %y) {
+; CHECK-LABEL: define <16 x float> @test_pow_afn_v16f32
+; CHECK-SAME: (<16 x float> [[X:%.*]], <16 x float> [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <16 x float> @_Z3powDv16_fS_(<16 x float> [[X]], <16 x float> [[Y]])
+; CHECK-NEXT:    ret <16 x float> [[POW]]
+;
+  %pow = tail call afn <16 x float> @_Z3powDv16_fS_(<16 x float> %x, <16 x float> %y)
+  ret <16 x float> %pow
+}
+
+define double @test_pow_afn_f64(double %x, double %y) {
+; CHECK-LABEL: define double @test_pow_afn_f64
+; CHECK-SAME: (double [[X:%.*]], double [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn double @_Z3powdd(double [[X]], double [[Y]])
+; CHECK-NEXT:    ret double [[POW]]
+;
+  %pow = tail call afn double @_Z3powdd(double %x, double %y)
   ret double %pow
 }
 
-define half @test_pow_fast_f16__integral_y(half %x, i32 %y.i) {
-; CHECK-LABEL: test_pow_fast_f16__integral_y:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_cvt_f32_i32_e32 v1, v1
-; CHECK-NEXT:    v_log_f16_e64 v3, |v0|
-; CHECK-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; CHECK-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; CHECK-NEXT:    v_cvt_i32_f32_e32 v1, v1
-; CHECK-NEXT:    v_cvt_f32_i32_e32 v2, v1
-; CHECK-NEXT:    v_lshlrev_b16_e32 v1, 15, v1
-; CHECK-NEXT:    v_and_b32_e32 v0, v1, v0
-; CHECK-NEXT:    v_cvt_f16_f32_e32 v2, v2
-; CHECK-NEXT:    v_mul_f16_e32 v2, v3, v2
-; CHECK-NEXT:    v_exp_f16_e32 v2, v2
-; CHECK-NEXT:    v_or_b32_e32 v0, v0, v2
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
-  %y = sitofp i32 %y.i to half
-  %pow = tail call fast half @_Z3powDhDh(half %x, half %y)
+define <2 x double> @test_pow_afn_v2f64(<2 x double> %x, <2 x double> %y) {
+; CHECK-LABEL: define <2 x double> @test_pow_afn_v2f64
+; CHECK-SAME: (<2 x double> [[X:%.*]], <2 x double> [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x double> @_Z3powDv2_dS_(<2 x double> [[X]], <2 x double> [[Y]])
+; CHECK-NEXT:    ret <2 x double> [[POW]]
+;
+  %pow = tail call afn <2 x double> @_Z3powDv2_dS_(<2 x double> %x, <2 x double> %y)
+  ret <2 x double> %pow
+}
+
+define <3 x double> @test_pow_afn_v3f64(<3 x double> %x, <3 x double> %y) {
+; CHECK-LABEL: define <3 x double> @test_pow_afn_v3f64
+; CHECK-SAME: (<3 x double> [[X:%.*]], <3 x double> [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <3 x double> @_Z3powDv3_dS_(<3 x double> [[X]], <3 x double> [[Y]])
+; CHECK-NEXT:    ret <3 x double> [[POW]]
+;
+  %pow = tail call afn <3 x double> @_Z3powDv3_dS_(<3 x double> %x, <3 x double> %y)
+  ret <3 x double> %pow
+}
+
+define <4 x double> @test_pow_afn_v4f64(<4 x double> %x, <4 x double> %y) {
+; CHECK-LABEL: define <4 x double> @test_pow_afn_v4f64
+; CHECK-SAME: (<4 x double> [[X:%.*]], <4 x double> [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <4 x double> @_Z3powDv4_dS_(<4 x double> [[X]], <4 x double> [[Y]])
+; CHECK-NEXT:    ret <4 x double> [[POW]]
+;
+  %pow = tail call afn <4 x double> @_Z3powDv4_dS_(<4 x double> %x, <4 x double> %y)
+  ret <4 x double> %pow
+}
+
+define <8 x double> @test_pow_afn_v8f64(<8 x double> %x, <8 x double> %y) {
+; CHECK-LABEL: define <8 x double> @test_pow_afn_v8f64
+; CHECK-SAME: (<8 x double> [[X:%.*]], <8 x double> [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <8 x double> @_Z3powDv8_dS_(<8 x double> [[X]], <8 x double> [[Y]])
+; CHECK-NEXT:    ret <8 x double> [[POW]]
+;
+  %pow = tail call afn <8 x double> @_Z3powDv8_dS_(<8 x double> %x, <8 x double> %y)
+  ret <8 x double> %pow
+}
+
+define <16 x double> @test_pow_afn_v16f64(<16 x double> %x, <16 x double> %y) {
+; CHECK-LABEL: define <16 x double> @test_pow_afn_v16f64
+; CHECK-SAME: (<16 x double> [[X:%.*]], <16 x double> [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <16 x double> @_Z3powDv16_dS_(<16 x double> [[X]], <16 x double> [[Y]])
+; CHECK-NEXT:    ret <16 x double> [[POW]]
+;
+  %pow = tail call afn <16 x double> @_Z3powDv16_dS_(<16 x double> %x, <16 x double> %y)
+  ret <16 x double> %pow
+}
+
+define half @test_pow_afn_f16(half %x, half %y) {
+; CHECK-LABEL: define half @test_pow_afn_f16
+; CHECK-SAME: (half [[X:%.*]], half [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn half @_Z3powDhDh(half [[X]], half [[Y]])
+; CHECK-NEXT:    ret half [[POW]]
+;
+  %pow = tail call afn half @_Z3powDhDh(half %x, half %y)
   ret half %pow
 }
 
-define float @test_pow_fast_f32__integral_y(float %x, i32 %y.i) {
-; CHECK-LABEL: test_pow_fast_f32__integral_y:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_cvt_f32_i32_e32 v1, v1
-; CHECK-NEXT:    s_mov_b32 s4, 0x800000
-; CHECK-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
-; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 32, vcc
-; CHECK-NEXT:    v_cvt_i32_f32_e32 v1, v1
-; CHECK-NEXT:    v_ldexp_f32 v3, |v0|, v3
-; CHECK-NEXT:    v_log_f32_e32 v3, v3
-; CHECK-NEXT:    v_mov_b32_e32 v2, 0x42000000
-; CHECK-NEXT:    v_cvt_f32_i32_e32 v4, v1
-; CHECK-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; CHECK-NEXT:    v_sub_f32_e32 v2, v3, v2
-; CHECK-NEXT:    s_mov_b32 s4, 0xc2fc0000
-; CHECK-NEXT:    v_mul_f32_e32 v3, v2, v4
-; CHECK-NEXT:    v_mov_b32_e32 v5, 0x42800000
-; CHECK-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v3
-; CHECK-NEXT:    v_cndmask_b32_e32 v3, 0, v5, vcc
-; CHECK-NEXT:    v_fma_f32 v2, v2, v4, v3
-; CHECK-NEXT:    v_exp_f32_e32 v2, v2
-; CHECK-NEXT:    v_not_b32_e32 v3, 63
-; CHECK-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
-; CHECK-NEXT:    v_lshlrev_b32_e32 v1, 31, v1
-; CHECK-NEXT:    v_ldexp_f32 v2, v2, v3
-; CHECK-NEXT:    v_and_or_b32 v0, v1, v0, v2
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
-  %y = sitofp i32 %y.i to float
-  %pow = tail call fast float @_Z3powff(float %x, float %y)
+define <2 x half> @test_pow_afn_v2f16(<2 x half> %x, <2 x half> %y) {
+; CHECK-LABEL: define <2 x half> @test_pow_afn_v2f16
+; CHECK-SAME: (<2 x half> [[X:%.*]], <2 x half> [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x half> @_Z3powDv2_DhS_(<2 x half> [[X]], <2 x half> [[Y]])
+; CHECK-NEXT:    ret <2 x half> [[POW]]
+;
+  %pow = tail call afn <2 x half> @_Z3powDv2_DhS_(<2 x half> %x, <2 x half> %y)
+  ret <2 x half> %pow
+}
+
+define <3 x half> @test_pow_afn_v3f16(<3 x half> %x, <3 x half> %y) {
+; CHECK-LABEL: define <3 x half> @test_pow_afn_v3f16
+; CHECK-SAME: (<3 x half> [[X:%.*]], <3 x half> [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <3 x half> @_Z3powDv3_DhS_(<3 x half> [[X]], <3 x half> [[Y]])
+; CHECK-NEXT:    ret <3 x half> [[POW]]
+;
+  %pow = tail call afn <3 x half> @_Z3powDv3_DhS_(<3 x half> %x, <3 x half> %y)
+  ret <3 x half> %pow
+}
+
+define <4 x half> @test_pow_afn_v4f16(<4 x half> %x, <4 x half> %y) {
+; CHECK-LABEL: define <4 x half> @test_pow_afn_v4f16
+; CHECK-SAME: (<4 x half> [[X:%.*]], <4 x half> [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <4 x half> @_Z3powDv4_DhS_(<4 x half> [[X]], <4 x half> [[Y]])
+; CHECK-NEXT:    ret <4 x half> [[POW]]
+;
+  %pow = tail call afn <4 x half> @_Z3powDv4_DhS_(<4 x half> %x, <4 x half> %y)
+  ret <4 x half> %pow
+}
+
+define <8 x half> @test_pow_afn_v8f16(<8 x half> %x, <8 x half> %y) {
+; CHECK-LABEL: define <8 x half> @test_pow_afn_v8f16
+; CHECK-SAME: (<8 x half> [[X:%.*]], <8 x half> [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <8 x half> @_Z3powDv8_DhS_(<8 x half> [[X]], <8 x half> [[Y]])
+; CHECK-NEXT:    ret <8 x half> [[POW]]
+;
+  %pow = tail call afn <8 x half> @_Z3powDv8_DhS_(<8 x half> %x, <8 x half> %y)
+  ret <8 x half> %pow
+}
+
+define <16 x half> @test_pow_afn_v16f16(<16 x half> %x, <16 x half> %y) {
+; CHECK-LABEL: define <16 x half> @test_pow_afn_v16f16
+; CHECK-SAME: (<16 x half> [[X:%.*]], <16 x half> [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <16 x half> @_Z3powDv16_DhS_(<16 x half> [[X]], <16 x half> [[Y]])
+; CHECK-NEXT:    ret <16 x half> [[POW]]
+;
+  %pow = tail call afn <16 x half> @_Z3powDv16_DhS_(<16 x half> %x, <16 x half> %y)
+  ret <16 x half> %pow
+}
+
+define float @test_pow_f32(float %x, float %y) {
+; CHECK-LABEL: define float @test_pow_f32
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call float @_Z3powff(float [[X]], float [[Y]])
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = tail call float @_Z3powff(float %x, float %y)
+  ret float %pow
+}
+
+define float @test_pow_f32_nnan(float %x, float %y) {
+; CHECK-LABEL: define float @test_pow_f32_nnan
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call nnan float @_Z3powff(float [[X]], float [[Y]])
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = tail call nnan float @_Z3powff(float %x, float %y)
+  ret float %pow
+}
+
+define <2 x float> @test_pow_v2f32(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: define <2 x float> @test_pow_v2f32
+; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> [[Y]])
+; CHECK-NEXT:    ret <2 x float> [[POW]]
+;
+  %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> %y)
+  ret <2 x float> %pow
+}
+
+define <3 x float> @test_pow_v3f32(<3 x float> %x, <3 x float> %y) {
+; CHECK-LABEL: define <3 x float> @test_pow_v3f32
+; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call <3 x float> @_Z3powDv3_fS_(<3 x float> [[X]], <3 x float> [[Y]])
+; CHECK-NEXT:    ret <3 x float> [[POW]]
+;
+  %pow = tail call <3 x float> @_Z3powDv3_fS_(<3 x float> %x, <3 x float> %y)
+  ret <3 x float> %pow
+}
+
+define <4 x float> @test_pow_v4f32(<4 x float> %x, <4 x float> %y) {
+; CHECK-LABEL: define <4 x float> @test_pow_v4f32
+; CHECK-SAME: (<4 x float> [[X:%.*]], <4 x float> [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call <4 x float> @_Z3powDv4_fS_(<4 x float> [[X]], <4 x float> [[Y]])
+; CHECK-NEXT:    ret <4 x float> [[POW]]
+;
+  %pow = tail call <4 x float> @_Z3powDv4_fS_(<4 x float> %x, <4 x float> %y)
+  ret <4 x float> %pow
+}
+
+define <8 x float> @test_pow_v8f32(<8 x float> %x, <8 x float> %y) {
+; CHECK-LABEL: define <8 x float> @test_pow_v8f32
+; CHECK-SAME: (<8 x float> [[X:%.*]], <8 x float> [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call <8 x float> @_Z3powDv8_fS_(<8 x float> [[X]], <8 x float> [[Y]])
+; CHECK-NEXT:    ret <8 x float> [[POW]]
+;
+  %pow = tail call <8 x float> @_Z3powDv8_fS_(<8 x float> %x, <8 x float> %y)
+  ret <8 x float> %pow
+}
+
+define <16 x float> @test_pow_v16f32(<16 x float> %x, <16 x float> %y) {
+; CHECK-LABEL: define <16 x float> @test_pow_v16f32
+; CHECK-SAME: (<16 x float> [[X:%.*]], <16 x float> [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call <16 x float> @_Z3powDv16_fS_(<16 x float> [[X]], <16 x float> [[Y]])
+; CHECK-NEXT:    ret <16 x float> [[POW]]
+;
+  %pow = tail call <16 x float> @_Z3powDv16_fS_(<16 x float> %x, <16 x float> %y)
+  ret <16 x float> %pow
+}
+
+define double @test_pow_f64(double %x, double %y) {
+; CHECK-LABEL: define double @test_pow_f64
+; CHECK-SAME: (double [[X:%.*]], double [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call double @_Z3powdd(double [[X]], double [[Y]])
+; CHECK-NEXT:    ret double [[POW]]
+;
+  %pow = tail call double @_Z3powdd(double %x, double %y)
+  ret double %pow
+}
+
+define <2 x double> @test_pow_v2f64(<2 x double> %x, <2 x double> %y) {
+; CHECK-LABEL: define <2 x double> @test_pow_v2f64
+; CHECK-SAME: (<2 x double> [[X:%.*]], <2 x double> [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call <2 x double> @_Z3powDv2_dS_(<2 x double> [[X]], <2 x double> [[Y]])
+; CHECK-NEXT:    ret <2 x double> [[POW]]
+;
+  %pow = tail call <2 x double> @_Z3powDv2_dS_(<2 x double> %x, <2 x double> %y)
+  ret <2 x double> %pow
+}
+
+define <3 x double> @test_pow_v3f64(<3 x double> %x, <3 x double> %y) {
+; CHECK-LABEL: define <3 x double> @test_pow_v3f64
+; CHECK-SAME: (<3 x double> [[X:%.*]], <3 x double> [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call <3 x double> @_Z3powDv3_dS_(<3 x double> [[X]], <3 x double> [[Y]])
+; CHECK-NEXT:    ret <3 x double> [[POW]]
+;
+  %pow = tail call <3 x double> @_Z3powDv3_dS_(<3 x double> %x, <3 x double> %y)
+  ret <3 x double> %pow
+}
+
+define <4 x double> @test_pow_v4f64(<4 x double> %x, <4 x double> %y) {
+; CHECK-LABEL: define <4 x double> @test_pow_v4f64
+; CHECK-SAME: (<4 x double> [[X:%.*]], <4 x double> [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call <4 x double> @_Z3powDv4_dS_(<4 x double> [[X]], <4 x double> [[Y]])
+; CHECK-NEXT:    ret <4 x double> [[POW]]
+;
+  %pow = tail call <4 x double> @_Z3powDv4_dS_(<4 x double> %x, <4 x double> %y)
+  ret <4 x double> %pow
+}
+
+define <8 x double> @test_pow_v8f64(<8 x double> %x, <8 x double> %y) {
+; CHECK-LABEL: define <8 x double> @test_pow_v8f64
+; CHECK-SAME: (<8 x double> [[X:%.*]], <8 x double> [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call <8 x double> @_Z3powDv8_dS_(<8 x double> [[X]], <8 x double> [[Y]])
+; CHECK-NEXT:    ret <8 x double> [[POW]]
+;
+  %pow = tail call <8 x double> @_Z3powDv8_dS_(<8 x double> %x, <8 x double> %y)
+  ret <8 x double> %pow
+}
+
+define <16 x double> @test_pow_v16f64(<16 x double> %x, <16 x double> %y) {
+; CHECK-LABEL: define <16 x double> @test_pow_v16f64
+; CHECK-SAME: (<16 x double> [[X:%.*]], <16 x double> [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call <16 x double> @_Z3powDv16_dS_(<16 x double> [[X]], <16 x double> [[Y]])
+; CHECK-NEXT:    ret <16 x double> [[POW]]
+;
+  %pow = tail call <16 x double> @_Z3powDv16_dS_(<16 x double> %x, <16 x double> %y)
+  ret <16 x double> %pow
+}
+
+define half @test_pow_f16(half %x, half %y) {
+; CHECK-LABEL: define half @test_pow_f16
+; CHECK-SAME: (half [[X:%.*]], half [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call half @_Z3powDhDh(half [[X]], half [[Y]])
+; CHECK-NEXT:    ret half [[POW]]
+;
+  %pow = tail call half @_Z3powDhDh(half %x, half %y)
+  ret half %pow
+}
+
+define <2 x half> @test_pow_v2f16(<2 x half> %x, <2 x half> %y) {
+; CHECK-LABEL: define <2 x half> @test_pow_v2f16
+; CHECK-SAME: (<2 x half> [[X:%.*]], <2 x half> [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call <2 x half> @_Z3powDv2_DhS_(<2 x half> [[X]], <2 x half> [[Y]])
+; CHECK-NEXT:    ret <2 x half> [[POW]]
+;
+  %pow = tail call <2 x half> @_Z3powDv2_DhS_(<2 x half> %x, <2 x half> %y)
+  ret <2 x half> %pow
+}
+
+define <3 x half> @test_pow_v3f16(<3 x half> %x, <3 x half> %y) {
+; CHECK-LABEL: define <3 x half> @test_pow_v3f16
+; CHECK-SAME: (<3 x half> [[X:%.*]], <3 x half> [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call <3 x half> @_Z3powDv3_DhS_(<3 x half> [[X]], <3 x half> [[Y]])
+; CHECK-NEXT:    ret <3 x half> [[POW]]
+;
+  %pow = tail call <3 x half> @_Z3powDv3_DhS_(<3 x half> %x, <3 x half> %y)
+  ret <3 x half> %pow
+}
+
+define <4 x half> @test_pow_v4f16(<4 x half> %x, <4 x half> %y) {
+; CHECK-LABEL: define <4 x half> @test_pow_v4f16
+; CHECK-SAME: (<4 x half> [[X:%.*]], <4 x half> [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call <4 x half> @_Z3powDv4_DhS_(<4 x half> [[X]], <4 x half> [[Y]])
+; CHECK-NEXT:    ret <4 x half> [[POW]]
+;
+  %pow = tail call <4 x half> @_Z3powDv4_DhS_(<4 x half> %x, <4 x half> %y)
+  ret <4 x half> %pow
+}
+
+define <8 x half> @test_pow_v8f16(<8 x half> %x, <8 x half> %y) {
+; CHECK-LABEL: define <8 x half> @test_pow_v8f16
+; CHECK-SAME: (<8 x half> [[X:%.*]], <8 x half> [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call <8 x half> @_Z3powDv8_DhS_(<8 x half> [[X]], <8 x half> [[Y]])
+; CHECK-NEXT:    ret <8 x half> [[POW]]
+;
+  %pow = tail call <8 x half> @_Z3powDv8_DhS_(<8 x half> %x, <8 x half> %y)
+  ret <8 x half> %pow
+}
+
+define <16 x half> @test_pow_v16f16(<16 x half> %x, <16 x half> %y) {
+; CHECK-LABEL: define <16 x half> @test_pow_v16f16
+; CHECK-SAME: (<16 x half> [[X:%.*]], <16 x half> [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call <16 x half> @_Z3powDv16_DhS_(<16 x half> [[X]], <16 x half> [[Y]])
+; CHECK-NEXT:    ret <16 x half> [[POW]]
+;
+  %pow = tail call <16 x half> @_Z3powDv16_DhS_(<16 x half> %x, <16 x half> %y)
+  ret <16 x half> %pow
+}
+
+define float @test_pow_afn_f32_minsize(float %x, float %y) #0 {
+; CHECK-LABEL: define float @test_pow_afn_f32_minsize
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z3powff(float [[X]], float [[Y]])
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = tail call afn float @_Z3powff(float %x, float %y)
+  ret float %pow
+}
+
+define float @test_pow_afn_f32_nnan_minsize(float %x, float %y) #0 {
+; CHECK-LABEL: define float @test_pow_afn_f32_nnan_minsize
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT:    [[POW:%.*]] = tail call nnan afn float @_Z3powff(float [[X]], float [[Y]])
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = tail call afn nnan float @_Z3powff(float %x, float %y)
+  ret float %pow
+}
+
+define float @test_pow_afn_f32_noinline(float %x, float %y) {
+; CHECK-LABEL: define float @test_pow_afn_f32_noinline
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z3powff(float [[X]], float [[Y]]) #[[ATTR5:[0-9]+]]
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = tail call afn float @_Z3powff(float %x, float %y) #1
+  ret float %pow
+}
+
+define float @test_pow_afn_f32_nnan_noinline(float %x, float %y) {
+; CHECK-LABEL: define float @test_pow_afn_f32_nnan_noinline
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call nnan afn float @_Z3powff(float [[X]], float [[Y]]) #[[ATTR5]]
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = tail call afn nnan float @_Z3powff(float %x, float %y) #1
+  ret float %pow
+}
+
+define float @test_pow_afn_f32_strictfp(float %x, float %y) #2 {
+; CHECK-LABEL: define float @test_pow_afn_f32_strictfp
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR3:[0-9]+]] {
+; CHECK-NEXT:    [[POW:%.*]] = tail call nnan nsz afn float @_Z3powff(float [[X]], float [[Y]]) #[[ATTR3]]
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = tail call afn nsz nnan float @_Z3powff(float %x, float %y) #2
+  ret float %pow
+}
+
+define float @test_pow_fast_f32_nobuiltin(float %x, float %y) {
+; CHECK-LABEL: define float @test_pow_fast_f32_nobuiltin
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call fast float @_Z3powff(float [[X]], float [[Y]]) #[[ATTR6:[0-9]+]]
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = tail call fast float @_Z3powff(float %x, float %y) #3
+  ret float %pow
+}
+
+define float @test_pow_afn_f32_0.0(float %x) {
+; CHECK-LABEL: define float @test_pow_afn_f32_0.0
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %pow = tail call afn float @_Z3powff(float %x, float 0.0)
+  ret float %pow
+}
+
+define float @test_pow_afn_f32_neg0.0(float %x) {
+; CHECK-LABEL: define float @test_pow_afn_f32_neg0.0
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %pow = tail call afn float @_Z3powff(float %x, float -0.0)
+  ret float %pow
+}
+
+define <2 x float> @test_pow_afn_v2f32_0.0(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_0.0
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    ret <2 x float> splat (float 1.000000e+00)
+;
+  %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 0.0, float 0.0>)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_afn_v2f32_neg0.0(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg0.0
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    ret <2 x float> splat (float 1.000000e+00)
+;
+  %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float -0.0, float -0.0>)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_afn_v2f32_plus_minus_0.0(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_plus_minus_0.0
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    ret <2 x float> splat (float 1.000000e+00)
+;
+  %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 0.0, float -0.0>)
+  ret <2 x float> %pow
+}
+
+define <3 x float> @test_pow_afn_v3f32_0.0_splat_undef(<3 x float> %x, <3 x float> %y) {
+; CHECK-LABEL: define <3 x float> @test_pow_afn_v3f32_0.0_splat_undef
+; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) {
+; CHECK-NEXT:    ret <3 x float> splat (float 1.000000e+00)
+;
+  %pow = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> %x, <3 x float> <float 0.0, float poison, float 0.0>)
+  ret <3 x float> %pow
+}
+
+define <3 x float> @test_pow_afn_v3f32_neg0.0_splat_undef(<3 x float> %x, <3 x float> %y) {
+; CHECK-LABEL: define <3 x float> @test_pow_afn_v3f32_neg0.0_splat_undef
+; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) {
+; CHECK-NEXT:    ret <3 x float> splat (float 1.000000e+00)
+;
+  %pow = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> %x, <3 x float> <float -0.0, float poison, float -0.0>)
+  ret <3 x float> %pow
+}
+
+define float @test_pow_afn_f32_0.5(float %x) {
+; CHECK-LABEL: define float @test_pow_afn_f32_0.5
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    [[__POW2SQRT:%.*]] = call afn float @_Z4sqrtf(float [[X]])
+; CHECK-NEXT:    ret float [[__POW2SQRT]]
+;
+  %pow = tail call afn float @_Z3powff(float %x, float 0.5)
+  ret float %pow
+}
+
+define float @test_pow_afn_f32_neg0.5(float %x) {
+; CHECK-LABEL: define float @test_pow_afn_f32_neg0.5
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    [[__POW2RSQRT:%.*]] = call afn float @_Z5rsqrtf(float [[X]])
+; CHECK-NEXT:    ret float [[__POW2RSQRT]]
+;
+  %pow = tail call afn float @_Z3powff(float %x, float -0.5)
+  ret float %pow
+}
+
+define <2 x float> @test_pow_afn_v2f32_0.5(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_0.5
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[__POW2SQRT:%.*]] = call afn <2 x float> @_Z4sqrtDv2_f(<2 x float> [[X]])
+; CHECK-NEXT:    ret <2 x float> [[__POW2SQRT]]
+;
+  %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 0.5, float 0.5>)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_afn_v2f32_neg0.5(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg0.5
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[__POW2RSQRT:%.*]] = call afn <2 x float> @_Z5rsqrtDv2_f(<2 x float> [[X]])
+; CHECK-NEXT:    ret <2 x float> [[__POW2RSQRT]]
+;
+  %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float -0.5, float -0.5>)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_afn_v2f32_plus_minus_0.5(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_plus_minus_0.5
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> <float 5.000000e-01, float -5.000000e-01>)
+; CHECK-NEXT:    ret <2 x float> [[POW]]
+;
+  %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 0.5, float -0.5>)
+  ret <2 x float> %pow
+}
+
+define <3 x float> @test_pow_afn_v3f32_0.5_splat_undef(<3 x float> %x, <3 x float> %y) {
+; CHECK-LABEL: define <3 x float> @test_pow_afn_v3f32_0.5_splat_undef
+; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) {
+; CHECK-NEXT:    [[__POW2SQRT:%.*]] = call afn <3 x float> @_Z4sqrtDv3_f(<3 x float> [[X]])
+; CHECK-NEXT:    ret <3 x float> [[__POW2SQRT]]
+;
+  %pow = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> %x, <3 x float> <float 0.5, float poison, float 0.5>)
+  ret <3 x float> %pow
+}
+
+define <3 x float> @test_pow_afn_v3f32_neg0.5_splat_undef(<3 x float> %x, <3 x float> %y) {
+; CHECK-LABEL: define <3 x float> @test_pow_afn_v3f32_neg0.5_splat_undef
+; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) {
+; CHECK-NEXT:    [[__POW2RSQRT:%.*]] = call afn <3 x float> @_Z5rsqrtDv3_f(<3 x float> [[X]])
+; CHECK-NEXT:    ret <3 x float> [[__POW2RSQRT]]
+;
+  %pow = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> %x, <3 x float> <float -0.5, float poison, float -0.5>)
+  ret <3 x float> %pow
+}
+
+define float @test_pow_afn_f32_1.0(float %x) {
+; CHECK-LABEL: define float @test_pow_afn_f32_1.0
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    ret float [[X]]
+;
+  %pow = tail call afn float @_Z3powff(float %x, float 1.0)
+  ret float %pow
+}
+
+define float @test_pow_afn_f32_neg1.0(float %x) {
+; CHECK-LABEL: define float @test_pow_afn_f32_neg1.0
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    [[__POWRECIP:%.*]] = fdiv afn float 1.000000e+00, [[X]]
+; CHECK-NEXT:    ret float [[__POWRECIP]]
+;
+  %pow = tail call afn float @_Z3powff(float %x, float -1.0)
+  ret float %pow
+}
+
+define <2 x float> @test_pow_afn_v2f32_1.0(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_1.0
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    ret <2 x float> [[X]]
+;
+  %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 1.0, float 1.0>)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_afn_v2f32_neg1.0(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg1.0
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[__POWRECIP:%.*]] = fdiv afn <2 x float> splat (float 1.000000e+00), [[X]]
+; CHECK-NEXT:    ret <2 x float> [[__POWRECIP]]
+;
+  %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float -1.0, float -1.0>)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_afn_v2f32_plus_minus_1.0(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_plus_minus_1.0
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 1, i32 -1>)
+; CHECK-NEXT:    ret <2 x float> [[POW]]
+;
+  %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 1.0, float -1.0>)
+  ret <2 x float> %pow
+}
+
+define <3 x float> @test_pow_afn_v3f32_1.0_splat_undef(<3 x float> %x, <3 x float> %y) {
+; CHECK-LABEL: define <3 x float> @test_pow_afn_v3f32_1.0_splat_undef
+; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) {
+; CHECK-NEXT:    ret <3 x float> [[X]]
+;
+  %pow = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> %x, <3 x float> <float 1.0, float poison, float 1.0>)
+  ret <3 x float> %pow
+}
+
+define <3 x float> @test_pow_afn_v3f32_neg1.0_splat_undef(<3 x float> %x, <3 x float> %y) {
+; CHECK-LABEL: define <3 x float> @test_pow_afn_v3f32_neg1.0_splat_undef
+; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) {
+; CHECK-NEXT:    [[__POWRECIP:%.*]] = fdiv afn <3 x float> splat (float 1.000000e+00), [[X]]
+; CHECK-NEXT:    ret <3 x float> [[__POWRECIP]]
+;
+  %pow = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> %x, <3 x float> <float -1.0, float poison, float -1.0>)
+  ret <3 x float> %pow
+}
+
+define float @test_pow_afn_f32_2.0(float %x) {
+; CHECK-LABEL: define float @test_pow_afn_f32_2.0
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    [[__POW2:%.*]] = fmul afn float [[X]], [[X]]
+; CHECK-NEXT:    ret float [[__POW2]]
+;
+  %pow = tail call afn float @_Z3powff(float %x, float 2.0)
+  ret float %pow
+}
+
+define float @test_pow_afn_f32_neg2.0(float %x) {
+; CHECK-LABEL: define float @test_pow_afn_f32_neg2.0
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 -2)
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = tail call afn float @_Z3powff(float %x, float -2.0)
+  ret float %pow
+}
+
+define <2 x float> @test_pow_afn_v2f32_2.0(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_2.0
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[__POW2:%.*]] = fmul afn <2 x float> [[X]], [[X]]
+; CHECK-NEXT:    ret <2 x float> [[__POW2]]
+;
+  %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 2.0, float 2.0>)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_afn_v2f32_neg2.0(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg2.0
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 -2))
+; CHECK-NEXT:    ret <2 x float> [[POW]]
+;
+  %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float -2.0, float -2.0>)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_afn_v2f32_plus_minus_2.0(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_plus_minus_2.0
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 2, i32 -2>)
+; CHECK-NEXT:    ret <2 x float> [[POW]]
+;
+  %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 2.0, float -2.0>)
+  ret <2 x float> %pow
+}
+
+define float @test_pow_afn_f32_3.0(float %x) {
+; CHECK-LABEL: define float @test_pow_afn_f32_3.0
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 3)
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = tail call afn float @_Z3powff(float %x, float 3.0)
+  ret float %pow
+}
+
+define float @test_pow_afn_f32_neg3.0(float %x) {
+; CHECK-LABEL: define float @test_pow_afn_f32_neg3.0
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 -3)
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = tail call afn float @_Z3powff(float %x, float -3.0)
+  ret float %pow
+}
+
+define <2 x float> @test_pow_afn_v2f32_3.0(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_3.0
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 3))
+; CHECK-NEXT:    ret <2 x float> [[POW]]
+;
+  %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 3.0, float 3.0>)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_afn_v2f32_neg3.0(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg3.0
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 -3))
+; CHECK-NEXT:    ret <2 x float> [[POW]]
+;
+  %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float -3.0, float -3.0>)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_afn_v2f32_plus_minus_3.0(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_plus_minus_3.0
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 3, i32 -3>)
+; CHECK-NEXT:    ret <2 x float> [[POW]]
+;
+  %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 3.0, float -3.0>)
+  ret <2 x float> %pow
+}
+
+define float @test_pow_afn_f32_3.99(float %x) {
+; CHECK-LABEL: define float @test_pow_afn_f32_3.99
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z3powff(float [[X]], float 0x400FEB8520000000)
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = tail call afn float @_Z3powff(float %x, float 0x400FEB8520000000)
+  ret float %pow
+}
+
+define float @test_pow_afn_f32_neg3.99(float %x) {
+; CHECK-LABEL: define float @test_pow_afn_f32_neg3.99
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z3powff(float [[X]], float 0xC00FEB8520000000)
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = tail call afn float @_Z3powff(float %x, float 0xC00FEB8520000000)
+  ret float %pow
+}
+
+define <2 x float> @test_pow_afn_v2f32_3.99(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_3.99
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> splat (float 0x400FEB8520000000))
+; CHECK-NEXT:    ret <2 x float> [[POW]]
+;
+  %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 0x400FEB8520000000, float 0x400FEB8520000000>)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_afn_v2f32_neg3.99(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg3.99
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> splat (float 0xC00FEB8520000000))
+; CHECK-NEXT:    ret <2 x float> [[POW]]
+;
+  %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 0xC00FEB8520000000, float 0xC00FEB8520000000>)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_afn_v2f32_plus_minus_3.99(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_plus_minus_3.99
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> <float 0x400FEB8520000000, float 0xC00FEB8520000000>)
+; CHECK-NEXT:    ret <2 x float> [[POW]]
+;
+  %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 0x400FEB8520000000, float 0xC00FEB8520000000>)
+  ret <2 x float> %pow
+}
+
+define float @test_pow_afn_f32_8.0(float %x) {
+; CHECK-LABEL: define float @test_pow_afn_f32_8.0
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 8)
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = tail call afn float @_Z3powff(float %x, float 8.0)
+  ret float %pow
+}
+
+define float @test_pow_afn_f32_neg8.0(float %x) {
+; CHECK-LABEL: define float @test_pow_afn_f32_neg8.0
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 -8)
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = tail call afn float @_Z3powff(float %x, float -8.0)
+  ret float %pow
+}
+
+define <2 x float> @test_pow_afn_v2f32_8.0(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_8.0
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 8))
+; CHECK-NEXT:    ret <2 x float> [[POW]]
+;
+  %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 8.0, float 8.0>)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_afn_v2f32_neg8.0(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg8.0
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 -8))
+; CHECK-NEXT:    ret <2 x float> [[POW]]
+;
+  %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float -8.0, float -8.0>)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_afn_v2f32_plus_minus_8.0(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_plus_minus_8.0
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 8, i32 -8>)
+; CHECK-NEXT:    ret <2 x float> [[POW]]
+;
+  %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 8.0, float -8.0>)
+  ret <2 x float> %pow
+}
+
+define float @test_pow_afn_f32_12.0(float %x) {
+; CHECK-LABEL: define float @test_pow_afn_f32_12.0
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 12)
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = tail call afn float @_Z3powff(float %x, float 12.0)
+  ret float %pow
+}
+
+define float @test_pow_afn_f32_neg12.0(float %x) {
+; CHECK-LABEL: define float @test_pow_afn_f32_neg12.0
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 -12)
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = tail call afn float @_Z3powff(float %x, float -12.0)
+  ret float %pow
+}
+
+define <2 x float> @test_pow_afn_v2f32_12.0(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_12.0
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 12))
+; CHECK-NEXT:    ret <2 x float> [[POW]]
+;
+  %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 12.0, float 12.0>)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_afn_v2f32_neg12.0(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg12.0
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 -12))
+; CHECK-NEXT:    ret <2 x float> [[POW]]
+;
+  %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float -12.0, float -12.0>)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_afn_v2f32_plus_minus_12.0(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_plus_minus_12.0
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 12, i32 -12>)
+; CHECK-NEXT:    ret <2 x float> [[POW]]
+;
+  %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 12.0, float -12.0>)
+  ret <2 x float> %pow
+}
+
+define float @test_pow_afn_f32_13.0(float %x) {
+; CHECK-LABEL: define float @test_pow_afn_f32_13.0
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 13)
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = tail call afn float @_Z3powff(float %x, float 13.0)
+  ret float %pow
+}
+
+define float @test_pow_afn_f32_neg13.0(float %x) {
+; CHECK-LABEL: define float @test_pow_afn_f32_neg13.0
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 -13)
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = tail call afn float @_Z3powff(float %x, float -13.0)
+  ret float %pow
+}
+
+define <2 x float> @test_pow_afn_v2f32_13.0(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_13.0
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 13))
+; CHECK-NEXT:    ret <2 x float> [[POW]]
+;
+  %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 13.0, float 13.0>)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_afn_v2f32_neg13.0(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg13.0
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 -13))
+; CHECK-NEXT:    ret <2 x float> [[POW]]
+;
+  %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float -13.0, float -13.0>)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_afn_v2f32_13.0_15.0(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_13.0_15.0
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 13, i32 15>)
+; CHECK-NEXT:    ret <2 x float> [[POW]]
+;
+  %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 13.0, float 15.0>)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_afn_v2f32_13.0_14.0(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_13.0_14.0
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 13, i32 14>)
+; CHECK-NEXT:    ret <2 x float> [[POW]]
+;
+  %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 13.0, float 14.0>)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_afn_v2f32_14.0_16.0(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_14.0_16.0
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 14, i32 16>)
+; CHECK-NEXT:    ret <2 x float> [[POW]]
+;
+  %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 14.0, float 16.0>)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_afn_v2f32_plus_minus_13.0(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_plus_minus_13.0
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 13, i32 -13>)
+; CHECK-NEXT:    ret <2 x float> [[POW]]
+;
+  %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 13.0, float -13.0>)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_afn_v2f32_plus_minus_13.0_minus_14.0(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_plus_minus_13.0_minus_14.0
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 -13, i32 -14>)
+; CHECK-NEXT:    ret <2 x float> [[POW]]
+;
+  %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float -13.0, float -14.0>)
+  ret <2 x float> %pow
+}
+
+define float @test_pow_afn_f32_nnan_x_known_positive(float nofpclass(ninf nnorm nsub) %x, float %y) {
+; CHECK-LABEL: define float @test_pow_afn_f32_nnan_x_known_positive
+; CHECK-SAME: (float nofpclass(ninf nsub nnorm) [[X:%.*]], float [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call nnan afn float @_Z4powrff(float [[X]], float [[Y]])
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = tail call afn nnan float @_Z3powff(float %x, float %y)
+  ret float %pow
+}
+
+define float @test_pow_afn_f32_nnan_ninf_x_known_positive(float nofpclass(ninf nnorm nsub) %x, float %y) {
+; CHECK-LABEL: define float @test_pow_afn_f32_nnan_ninf_x_known_positive
+; CHECK-SAME: (float nofpclass(ninf nsub nnorm) [[X:%.*]], float [[Y:%.*]]) {
+; CHECK-NEXT:    [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[X]])
+; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn float [[Y]], [[__LOG2]]
+; CHECK-NEXT:    [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]])
+; CHECK-NEXT:    ret float [[__EXP2]]
+;
+  %pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y)
+  ret float %pow
+}
+
+define <2 x float> @test_pow_afn_v2f32_nnan_x_known_positive(<2 x float> nofpclass(ninf nnorm nsub) %x, <2 x float> %y) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_nnan_x_known_positive
+; CHECK-SAME: (<2 x float> nofpclass(ninf nsub nnorm) [[X:%.*]], <2 x float> [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call nnan afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> [[Y]])
+; CHECK-NEXT:    ret <2 x float> [[POW]]
+;
+  %pow = tail call afn nnan <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> %y)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_afn_v2f32_nnan_ninf_x_known_positive(<2 x float> nofpclass(ninf nnorm nsub) %x, <2 x float> %y) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_nnan_ninf_x_known_positive
+; CHECK-SAME: (<2 x float> nofpclass(ninf nsub nnorm) [[X:%.*]], <2 x float> [[Y:%.*]]) {
+; CHECK-NEXT:    [[__LOG2:%.*]] = call nnan ninf afn <2 x float> @llvm.log2.v2f32(<2 x float> [[X]])
+; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[Y]], [[__LOG2]]
+; CHECK-NEXT:    [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @llvm.exp2.v2f32(<2 x float> [[__YLOGX]])
+; CHECK-NEXT:    ret <2 x float> [[__EXP2]]
+;
+  %pow = tail call afn nnan ninf <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> %y)
+  ret <2 x float> %pow
+}
+
+define float @test_pow_f32_x_known_positive(float nofpclass(ninf nnorm nsub) %x, float %y) {
+; CHECK-LABEL: define float @test_pow_f32_x_known_positive
+; CHECK-SAME: (float nofpclass(ninf nsub nnorm) [[X:%.*]], float [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call float @_Z4powrff(float [[X]], float [[Y]])
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = tail call float @_Z3powff(float %x, float %y)
   ret float %pow
 }
 
-define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) {
-; CHECK-LABEL: test_pow_fast_f64__integral_y:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    s_mov_b32 s16, s33
-; CHECK-NEXT:    s_mov_b32 s33, s32
-; CHECK-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; CHECK-NEXT:    buffer_store_dword v43, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
-; CHECK-NEXT:    s_mov_b64 exec, s[18:19]
-; CHECK-NEXT:    v_writelane_b32 v43, s16, 14
-; CHECK-NEXT:    v_writelane_b32 v43, s30, 0
-; CHECK-NEXT:    v_writelane_b32 v43, s31, 1
-; CHECK-NEXT:    v_writelane_b32 v43, s34, 2
-; CHECK-NEXT:    v_writelane_b32 v43, s35, 3
-; CHECK-NEXT:    v_writelane_b32 v43, s36, 4
-; CHECK-NEXT:    v_writelane_b32 v43, s37, 5
-; CHECK-NEXT:    v_writelane_b32 v43, s38, 6
-; CHECK-NEXT:    v_writelane_b32 v43, s39, 7
-; CHECK-NEXT:    s_addk_i32 s32, 0x800
-; CHECK-NEXT:    v_writelane_b32 v43, s48, 8
-; CHECK-NEXT:    v_writelane_b32 v43, s49, 9
-; CHECK-NEXT:    s_mov_b64 s[48:49], s[4:5]
-; CHECK-NEXT:    s_getpc_b64 s[4:5]
-; CHECK-NEXT:    s_add_u32 s4, s4, _Z4log2d at gotpcrel32@lo+4
-; CHECK-NEXT:    s_addc_u32 s5, s5, _Z4log2d at gotpcrel32@hi+12
-; CHECK-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
-; CHECK-NEXT:    v_writelane_b32 v43, s50, 10
-; CHECK-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
-; CHECK-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
-; CHECK-NEXT:    buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
-; CHECK-NEXT:    v_writelane_b32 v43, s51, 11
-; CHECK-NEXT:    v_mov_b32_e32 v42, v1
-; CHECK-NEXT:    v_writelane_b32 v43, s52, 12
-; CHECK-NEXT:    v_and_b32_e32 v1, 0x7fffffff, v42
-; CHECK-NEXT:    s_mov_b64 s[4:5], s[48:49]
-; CHECK-NEXT:    v_writelane_b32 v43, s53, 13
-; CHECK-NEXT:    v_mov_b32_e32 v40, v31
-; CHECK-NEXT:    v_mov_b32_e32 v41, v2
-; CHECK-NEXT:    s_mov_b32 s50, s15
-; CHECK-NEXT:    s_mov_b32 s51, s14
-; CHECK-NEXT:    s_mov_b32 s52, s13
-; CHECK-NEXT:    s_mov_b32 s53, s12
-; CHECK-NEXT:    s_mov_b64 s[34:35], s[10:11]
-; CHECK-NEXT:    s_mov_b64 s[36:37], s[8:9]
-; CHECK-NEXT:    s_mov_b64 s[38:39], s[6:7]
-; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; CHECK-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; CHECK-NEXT:    v_cvt_f64_i32_e32 v[2:3], v41
-; CHECK-NEXT:    s_getpc_b64 s[4:5]
-; CHECK-NEXT:    s_add_u32 s4, s4, _Z4exp2d at gotpcrel32@lo+4
-; CHECK-NEXT:    s_addc_u32 s5, s5, _Z4exp2d at gotpcrel32@hi+12
-; CHECK-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
-; CHECK-NEXT:    s_mov_b64 s[4:5], s[48:49]
-; CHECK-NEXT:    v_mul_f64 v[0:1], v[0:1], v[2:3]
-; CHECK-NEXT:    s_mov_b64 s[6:7], s[38:39]
-; CHECK-NEXT:    s_mov_b64 s[8:9], s[36:37]
-; CHECK-NEXT:    s_mov_b64 s[10:11], s[34:35]
-; CHECK-NEXT:    s_mov_b32 s12, s53
-; CHECK-NEXT:    s_mov_b32 s13, s52
-; CHECK-NEXT:    s_mov_b32 s14, s51
-; CHECK-NEXT:    s_mov_b32 s15, s50
-; CHECK-NEXT:    v_mov_b32_e32 v31, v40
-; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; CHECK-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; CHECK-NEXT:    v_lshlrev_b32_e32 v2, 31, v41
-; CHECK-NEXT:    v_and_b32_e32 v2, v2, v42
-; CHECK-NEXT:    buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload
-; CHECK-NEXT:    buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
-; CHECK-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
-; CHECK-NEXT:    v_or_b32_e32 v1, v2, v1
-; CHECK-NEXT:    v_readlane_b32 s53, v43, 13
-; CHECK-NEXT:    v_readlane_b32 s52, v43, 12
-; CHECK-NEXT:    v_readlane_b32 s51, v43, 11
-; CHECK-NEXT:    v_readlane_b32 s50, v43, 10
-; CHECK-NEXT:    v_readlane_b32 s49, v43, 9
-; CHECK-NEXT:    v_readlane_b32 s48, v43, 8
-; CHECK-NEXT:    v_readlane_b32 s39, v43, 7
-; CHECK-NEXT:    v_readlane_b32 s38, v43, 6
-; CHECK-NEXT:    v_readlane_b32 s37, v43, 5
-; CHECK-NEXT:    v_readlane_b32 s36, v43, 4
-; CHECK-NEXT:    v_readlane_b32 s35, v43, 3
-; CHECK-NEXT:    v_readlane_b32 s34, v43, 2
-; CHECK-NEXT:    v_readlane_b32 s31, v43, 1
-; CHECK-NEXT:    v_readlane_b32 s30, v43, 0
-; CHECK-NEXT:    s_mov_b32 s32, s33
-; CHECK-NEXT:    v_readlane_b32 s4, v43, 14
-; CHECK-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; CHECK-NEXT:    buffer_load_dword v43, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
-; CHECK-NEXT:    s_mov_b64 exec, s[6:7]
-; CHECK-NEXT:    s_mov_b32 s33, s4
-; CHECK-NEXT:    s_waitcnt vmcnt(0)
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
-  %y = sitofp i32 %y.i to double
-  %pow = tail call fast double @_Z3powdd(double %x, double %y)
+define float @test_pow_afn_f32_x_known_positive(float nofpclass(ninf nnorm nsub) %x, float %y) {
+; CHECK-LABEL: define float @test_pow_afn_f32_x_known_positive
+; CHECK-SAME: (float nofpclass(ninf nsub nnorm) [[X:%.*]], float [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z4powrff(float [[X]], float [[Y]])
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = tail call afn float @_Z3powff(float %x, float %y)
+  ret float %pow
+}
+
+define <2 x float> @test_pow_v2f32_x_known_positive(<2 x float> nofpclass(ninf nnorm nsub) %x, <2 x float> %y) {
+; CHECK-LABEL: define <2 x float> @test_pow_v2f32_x_known_positive
+; CHECK-SAME: (<2 x float> nofpclass(ninf nsub nnorm) [[X:%.*]], <2 x float> [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> [[Y]])
+; CHECK-NEXT:    ret <2 x float> [[POW]]
+;
+  %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> %y)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_afn_v2f32_x_known_positive(<2 x float> nofpclass(ninf nnorm nsub) %x, <2 x float> %y) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_x_known_positive
+; CHECK-SAME: (<2 x float> nofpclass(ninf nsub nnorm) [[X:%.*]], <2 x float> [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> [[Y]])
+; CHECK-NEXT:    ret <2 x float> [[POW]]
+;
+  %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> %y)
+  ret <2 x float> %pow
+}
+
+define double @test_pow_afn_f64_nnan_x_known_positive(double nofpclass(ninf nnorm nsub) %x, double %y) {
+; CHECK-LABEL: define double @test_pow_afn_f64_nnan_x_known_positive
+; CHECK-SAME: (double nofpclass(ninf nsub nnorm) [[X:%.*]], double [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call nnan afn double @_Z4powrdd(double [[X]], double [[Y]])
+; CHECK-NEXT:    ret double [[POW]]
+;
+  %pow = tail call afn nnan double @_Z3powdd(double %x, double %y)
   ret double %pow
 }
 
-; --------------------------------------------------------------------
-; test powr
-; --------------------------------------------------------------------
-
-define half @test_powr_fast_f16(half %x, half %y) {
-; CHECK-LABEL: test_powr_fast_f16:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_log_f16_e32 v0, v0
-; CHECK-NEXT:    v_mul_f16_e32 v0, v1, v0
-; CHECK-NEXT:    v_exp_f16_e32 v0, v0
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
-  %powr = tail call fast half @_Z4powrDhDh(half %x, half %y)
-  ret half %powr
-}
-
-define float @test_powr_fast_f32(float %x, float %y) {
-; CHECK-LABEL: test_powr_fast_f32:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    s_mov_b32 s4, 0x800000
-; CHECK-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 32, vcc
-; CHECK-NEXT:    v_ldexp_f32 v0, v0, v3
-; CHECK-NEXT:    v_log_f32_e32 v0, v0
-; CHECK-NEXT:    v_mov_b32_e32 v2, 0x42000000
-; CHECK-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; CHECK-NEXT:    s_mov_b32 s4, 0xc2fc0000
-; CHECK-NEXT:    v_sub_f32_e32 v0, v0, v2
-; CHECK-NEXT:    v_mul_f32_e32 v2, v1, v0
-; CHECK-NEXT:    v_mov_b32_e32 v3, 0x42800000
-; CHECK-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v2
-; CHECK-NEXT:    v_cndmask_b32_e32 v2, 0, v3, vcc
-; CHECK-NEXT:    v_fma_f32 v0, v1, v0, v2
-; CHECK-NEXT:    v_exp_f32_e32 v0, v0
-; CHECK-NEXT:    v_not_b32_e32 v1, 63
-; CHECK-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
-; CHECK-NEXT:    v_ldexp_f32 v0, v0, v1
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
-  %powr = tail call fast float @_Z4powrff(float %x, float %y)
-  ret float %powr
-}
-
-define double @test_powr_fast_f64(double %x, double %y) {
-; CHECK-LABEL: test_powr_fast_f64:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    s_mov_b32 s16, s33
-; CHECK-NEXT:    s_mov_b32 s33, s32
-; CHECK-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; CHECK-NEXT:    buffer_store_dword v43, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
-; CHECK-NEXT:    s_mov_b64 exec, s[18:19]
-; CHECK-NEXT:    v_writelane_b32 v43, s16, 14
-; CHECK-NEXT:    v_writelane_b32 v43, s30, 0
-; CHECK-NEXT:    v_writelane_b32 v43, s31, 1
-; CHECK-NEXT:    v_writelane_b32 v43, s34, 2
-; CHECK-NEXT:    v_writelane_b32 v43, s35, 3
-; CHECK-NEXT:    v_writelane_b32 v43, s36, 4
-; CHECK-NEXT:    v_writelane_b32 v43, s37, 5
-; CHECK-NEXT:    v_writelane_b32 v43, s38, 6
-; CHECK-NEXT:    v_writelane_b32 v43, s39, 7
-; CHECK-NEXT:    s_addk_i32 s32, 0x800
-; CHECK-NEXT:    v_writelane_b32 v43, s48, 8
-; CHECK-NEXT:    v_writelane_b32 v43, s49, 9
-; CHECK-NEXT:    s_mov_b64 s[48:49], s[4:5]
-; CHECK-NEXT:    s_getpc_b64 s[4:5]
-; CHECK-NEXT:    s_add_u32 s4, s4, _Z4log2d at gotpcrel32@lo+4
-; CHECK-NEXT:    s_addc_u32 s5, s5, _Z4log2d at gotpcrel32@hi+12
-; CHECK-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
-; CHECK-NEXT:    v_writelane_b32 v43, s50, 10
-; CHECK-NEXT:    v_writelane_b32 v43, s51, 11
-; CHECK-NEXT:    v_writelane_b32 v43, s52, 12
-; CHECK-NEXT:    s_mov_b64 s[4:5], s[48:49]
-; CHECK-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
-; CHECK-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
-; CHECK-NEXT:    buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
-; CHECK-NEXT:    v_writelane_b32 v43, s53, 13
-; CHECK-NEXT:    v_mov_b32_e32 v42, v31
-; CHECK-NEXT:    v_mov_b32_e32 v41, v3
-; CHECK-NEXT:    v_mov_b32_e32 v40, v2
-; CHECK-NEXT:    s_mov_b32 s50, s15
-; CHECK-NEXT:    s_mov_b32 s51, s14
-; CHECK-NEXT:    s_mov_b32 s52, s13
-; CHECK-NEXT:    s_mov_b32 s53, s12
-; CHECK-NEXT:    s_mov_b64 s[34:35], s[10:11]
-; CHECK-NEXT:    s_mov_b64 s[36:37], s[8:9]
-; CHECK-NEXT:    s_mov_b64 s[38:39], s[6:7]
-; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; CHECK-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; CHECK-NEXT:    v_mul_f64 v[0:1], v[40:41], v[0:1]
-; CHECK-NEXT:    s_getpc_b64 s[4:5]
-; CHECK-NEXT:    s_add_u32 s4, s4, _Z4exp2d at gotpcrel32@lo+4
-; CHECK-NEXT:    s_addc_u32 s5, s5, _Z4exp2d at gotpcrel32@hi+12
-; CHECK-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
-; CHECK-NEXT:    s_mov_b64 s[4:5], s[48:49]
-; CHECK-NEXT:    s_mov_b64 s[6:7], s[38:39]
-; CHECK-NEXT:    s_mov_b64 s[8:9], s[36:37]
-; CHECK-NEXT:    s_mov_b64 s[10:11], s[34:35]
-; CHECK-NEXT:    s_mov_b32 s12, s53
-; CHECK-NEXT:    s_mov_b32 s13, s52
-; CHECK-NEXT:    s_mov_b32 s14, s51
-; CHECK-NEXT:    s_mov_b32 s15, s50
-; CHECK-NEXT:    v_mov_b32_e32 v31, v42
-; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; CHECK-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; CHECK-NEXT:    buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload
-; CHECK-NEXT:    buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
-; CHECK-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
-; CHECK-NEXT:    v_readlane_b32 s53, v43, 13
-; CHECK-NEXT:    v_readlane_b32 s52, v43, 12
-; CHECK-NEXT:    v_readlane_b32 s51, v43, 11
-; CHECK-NEXT:    v_readlane_b32 s50, v43, 10
-; CHECK-NEXT:    v_readlane_b32 s49, v43, 9
-; CHECK-NEXT:    v_readlane_b32 s48, v43, 8
-; CHECK-NEXT:    v_readlane_b32 s39, v43, 7
-; CHECK-NEXT:    v_readlane_b32 s38, v43, 6
-; CHECK-NEXT:    v_readlane_b32 s37, v43, 5
-; CHECK-NEXT:    v_readlane_b32 s36, v43, 4
-; CHECK-NEXT:    v_readlane_b32 s35, v43, 3
-; CHECK-NEXT:    v_readlane_b32 s34, v43, 2
-; CHECK-NEXT:    v_readlane_b32 s31, v43, 1
-; CHECK-NEXT:    v_readlane_b32 s30, v43, 0
-; CHECK-NEXT:    s_mov_b32 s32, s33
-; CHECK-NEXT:    v_readlane_b32 s4, v43, 14
-; CHECK-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; CHECK-NEXT:    buffer_load_dword v43, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
-; CHECK-NEXT:    s_mov_b64 exec, s[6:7]
-; CHECK-NEXT:    s_mov_b32 s33, s4
-; CHECK-NEXT:    s_waitcnt vmcnt(0)
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
-  %powr = tail call fast double @_Z4powrdd(double %x, double %y)
-  ret double %powr
-}
-
-; --------------------------------------------------------------------
-; test pown
-; --------------------------------------------------------------------
-
-define half @test_pown_fast_f16(half %x, i32 %y) {
-; CHECK-LABEL: test_pown_fast_f16:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_cvt_f32_i32_e32 v2, v1
-; CHECK-NEXT:    v_log_f16_e64 v3, |v0|
-; CHECK-NEXT:    v_lshlrev_b16_e32 v1, 15, v1
-; CHECK-NEXT:    v_and_b32_e32 v0, v1, v0
-; CHECK-NEXT:    v_cvt_f16_f32_e32 v2, v2
-; CHECK-NEXT:    v_mul_f16_e32 v2, v3, v2
-; CHECK-NEXT:    v_exp_f16_e32 v2, v2
-; CHECK-NEXT:    v_or_b32_e32 v0, v0, v2
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
-  %call = tail call fast half @_Z4pownDhi(half %x, i32 %y)
-  ret half %call
-}
-
-define float @test_pown_fast_f32(float %x, i32 %y) {
-; CHECK-LABEL: test_pown_fast_f32:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    s_mov_b32 s4, 0x800000
-; CHECK-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
-; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 32, vcc
-; CHECK-NEXT:    v_ldexp_f32 v3, |v0|, v3
-; CHECK-NEXT:    v_log_f32_e32 v3, v3
-; CHECK-NEXT:    v_cvt_f32_i32_e32 v4, v1
-; CHECK-NEXT:    v_mov_b32_e32 v2, 0x42000000
-; CHECK-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; CHECK-NEXT:    v_sub_f32_e32 v2, v3, v2
-; CHECK-NEXT:    v_mul_f32_e32 v3, v2, v4
-; CHECK-NEXT:    s_mov_b32 s4, 0xc2fc0000
-; CHECK-NEXT:    v_mov_b32_e32 v5, 0x42800000
-; CHECK-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v3
-; CHECK-NEXT:    v_cndmask_b32_e32 v3, 0, v5, vcc
-; CHECK-NEXT:    v_fma_f32 v2, v2, v4, v3
-; CHECK-NEXT:    v_exp_f32_e32 v2, v2
-; CHECK-NEXT:    v_not_b32_e32 v3, 63
-; CHECK-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
-; CHECK-NEXT:    v_lshlrev_b32_e32 v1, 31, v1
-; CHECK-NEXT:    v_ldexp_f32 v2, v2, v3
-; CHECK-NEXT:    v_and_or_b32 v0, v1, v0, v2
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
-  %call = tail call fast float @_Z4pownfi(float %x, i32 %y)
-  ret float %call
-}
-
-define double @test_pown_fast_f64(double %x, i32 %y) {
-; CHECK-LABEL: test_pown_fast_f64:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    s_mov_b32 s16, s33
-; CHECK-NEXT:    s_mov_b32 s33, s32
-; CHECK-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; CHECK-NEXT:    buffer_store_dword v43, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
-; CHECK-NEXT:    s_mov_b64 exec, s[18:19]
-; CHECK-NEXT:    v_writelane_b32 v43, s16, 14
-; CHECK-NEXT:    v_writelane_b32 v43, s30, 0
-; CHECK-NEXT:    v_writelane_b32 v43, s31, 1
-; CHECK-NEXT:    v_writelane_b32 v43, s34, 2
-; CHECK-NEXT:    v_writelane_b32 v43, s35, 3
-; CHECK-NEXT:    v_writelane_b32 v43, s36, 4
-; CHECK-NEXT:    v_writelane_b32 v43, s37, 5
-; CHECK-NEXT:    v_writelane_b32 v43, s38, 6
-; CHECK-NEXT:    v_writelane_b32 v43, s39, 7
-; CHECK-NEXT:    s_addk_i32 s32, 0x800
-; CHECK-NEXT:    v_writelane_b32 v43, s48, 8
-; CHECK-NEXT:    v_writelane_b32 v43, s49, 9
-; CHECK-NEXT:    s_mov_b64 s[48:49], s[4:5]
-; CHECK-NEXT:    s_getpc_b64 s[4:5]
-; CHECK-NEXT:    s_add_u32 s4, s4, _Z4log2d at gotpcrel32@lo+4
-; CHECK-NEXT:    s_addc_u32 s5, s5, _Z4log2d at gotpcrel32@hi+12
-; CHECK-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
-; CHECK-NEXT:    v_writelane_b32 v43, s50, 10
-; CHECK-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
-; CHECK-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
-; CHECK-NEXT:    buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
-; CHECK-NEXT:    v_writelane_b32 v43, s51, 11
-; CHECK-NEXT:    v_mov_b32_e32 v42, v1
-; CHECK-NEXT:    v_writelane_b32 v43, s52, 12
-; CHECK-NEXT:    v_and_b32_e32 v1, 0x7fffffff, v42
-; CHECK-NEXT:    s_mov_b64 s[4:5], s[48:49]
-; CHECK-NEXT:    v_writelane_b32 v43, s53, 13
-; CHECK-NEXT:    v_mov_b32_e32 v40, v31
-; CHECK-NEXT:    v_mov_b32_e32 v41, v2
-; CHECK-NEXT:    s_mov_b32 s50, s15
-; CHECK-NEXT:    s_mov_b32 s51, s14
-; CHECK-NEXT:    s_mov_b32 s52, s13
-; CHECK-NEXT:    s_mov_b32 s53, s12
-; CHECK-NEXT:    s_mov_b64 s[34:35], s[10:11]
-; CHECK-NEXT:    s_mov_b64 s[36:37], s[8:9]
-; CHECK-NEXT:    s_mov_b64 s[38:39], s[6:7]
-; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; CHECK-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; CHECK-NEXT:    v_cvt_f64_i32_e32 v[2:3], v41
-; CHECK-NEXT:    s_getpc_b64 s[4:5]
-; CHECK-NEXT:    s_add_u32 s4, s4, _Z4exp2d at gotpcrel32@lo+4
-; CHECK-NEXT:    s_addc_u32 s5, s5, _Z4exp2d at gotpcrel32@hi+12
-; CHECK-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
-; CHECK-NEXT:    s_mov_b64 s[4:5], s[48:49]
-; CHECK-NEXT:    v_mul_f64 v[0:1], v[0:1], v[2:3]
-; CHECK-NEXT:    s_mov_b64 s[6:7], s[38:39]
-; CHECK-NEXT:    s_mov_b64 s[8:9], s[36:37]
-; CHECK-NEXT:    s_mov_b64 s[10:11], s[34:35]
-; CHECK-NEXT:    s_mov_b32 s12, s53
-; CHECK-NEXT:    s_mov_b32 s13, s52
-; CHECK-NEXT:    s_mov_b32 s14, s51
-; CHECK-NEXT:    s_mov_b32 s15, s50
-; CHECK-NEXT:    v_mov_b32_e32 v31, v40
-; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; CHECK-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; CHECK-NEXT:    v_lshlrev_b32_e32 v2, 31, v41
-; CHECK-NEXT:    v_and_b32_e32 v2, v2, v42
-; CHECK-NEXT:    buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload
-; CHECK-NEXT:    buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
-; CHECK-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
-; CHECK-NEXT:    v_or_b32_e32 v1, v2, v1
-; CHECK-NEXT:    v_readlane_b32 s53, v43, 13
-; CHECK-NEXT:    v_readlane_b32 s52, v43, 12
-; CHECK-NEXT:    v_readlane_b32 s51, v43, 11
-; CHECK-NEXT:    v_readlane_b32 s50, v43, 10
-; CHECK-NEXT:    v_readlane_b32 s49, v43, 9
-; CHECK-NEXT:    v_readlane_b32 s48, v43, 8
-; CHECK-NEXT:    v_readlane_b32 s39, v43, 7
-; CHECK-NEXT:    v_readlane_b32 s38, v43, 6
-; CHECK-NEXT:    v_readlane_b32 s37, v43, 5
-; CHECK-NEXT:    v_readlane_b32 s36, v43, 4
-; CHECK-NEXT:    v_readlane_b32 s35, v43, 3
-; CHECK-NEXT:    v_readlane_b32 s34, v43, 2
-; CHECK-NEXT:    v_readlane_b32 s31, v43, 1
-; CHECK-NEXT:    v_readlane_b32 s30, v43, 0
-; CHECK-NEXT:    s_mov_b32 s32, s33
-; CHECK-NEXT:    v_readlane_b32 s4, v43, 14
-; CHECK-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; CHECK-NEXT:    buffer_load_dword v43, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
-; CHECK-NEXT:    s_mov_b64 exec, s[6:7]
-; CHECK-NEXT:    s_mov_b32 s33, s4
-; CHECK-NEXT:    s_waitcnt vmcnt(0)
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
-  %call = tail call fast double @_Z4powndi(double %x, i32 %y)
-  ret double %call
-}
-
-define half @test_pown_fast_f16_known_even(half %x, i32 %y.arg) {
-; CHECK-LABEL: test_pown_fast_f16_known_even:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_lshlrev_b32_e32 v1, 1, v1
-; CHECK-NEXT:    v_cvt_f32_i32_e32 v1, v1
-; CHECK-NEXT:    v_log_f16_e64 v0, |v0|
-; CHECK-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; CHECK-NEXT:    v_mul_f16_e32 v0, v0, v1
-; CHECK-NEXT:    v_exp_f16_e32 v0, v0
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
-  %y = shl i32 %y.arg, 1
-  %call = tail call fast half @_Z4pownDhi(half %x, i32 %y)
-  ret half %call
-}
-
-define float @test_pown_fast_f32_known_even(float %x, i32 %y.arg) {
-; CHECK-LABEL: test_pown_fast_f32_known_even:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    s_mov_b32 s4, 0x800000
-; CHECK-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
-; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 32, vcc
-; CHECK-NEXT:    v_ldexp_f32 v0, |v0|, v3
-; CHECK-NEXT:    v_lshlrev_b32_e32 v1, 1, v1
-; CHECK-NEXT:    v_log_f32_e32 v0, v0
-; CHECK-NEXT:    v_cvt_f32_i32_e32 v1, v1
-; CHECK-NEXT:    v_mov_b32_e32 v2, 0x42000000
-; CHECK-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; CHECK-NEXT:    v_sub_f32_e32 v0, v0, v2
-; CHECK-NEXT:    v_mul_f32_e32 v2, v0, v1
-; CHECK-NEXT:    s_mov_b32 s4, 0xc2fc0000
-; CHECK-NEXT:    v_mov_b32_e32 v3, 0x42800000
-; CHECK-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v2
-; CHECK-NEXT:    v_cndmask_b32_e32 v2, 0, v3, vcc
-; CHECK-NEXT:    v_fma_f32 v0, v0, v1, v2
-; CHECK-NEXT:    v_exp_f32_e32 v0, v0
-; CHECK-NEXT:    v_not_b32_e32 v1, 63
-; CHECK-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
-; CHECK-NEXT:    v_ldexp_f32 v0, v0, v1
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
-  %y = shl i32 %y.arg, 1
-  %call = tail call fast float @_Z4pownfi(float %x, i32 %y)
-  ret float %call
-}
-
-define double @test_pown_fast_f64_known_even(double %x, i32 %y.arg) {
-; CHECK-LABEL: test_pown_fast_f64_known_even:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    s_mov_b32 s16, s33
-; CHECK-NEXT:    s_mov_b32 s33, s32
-; CHECK-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; CHECK-NEXT:    buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
-; CHECK-NEXT:    s_mov_b64 exec, s[18:19]
-; CHECK-NEXT:    v_writelane_b32 v42, s16, 14
-; CHECK-NEXT:    v_writelane_b32 v42, s30, 0
-; CHECK-NEXT:    v_writelane_b32 v42, s31, 1
-; CHECK-NEXT:    v_writelane_b32 v42, s34, 2
-; CHECK-NEXT:    v_writelane_b32 v42, s35, 3
-; CHECK-NEXT:    v_writelane_b32 v42, s36, 4
-; CHECK-NEXT:    v_writelane_b32 v42, s37, 5
-; CHECK-NEXT:    v_writelane_b32 v42, s38, 6
-; CHECK-NEXT:    v_writelane_b32 v42, s39, 7
-; CHECK-NEXT:    s_addk_i32 s32, 0x400
-; CHECK-NEXT:    v_writelane_b32 v42, s48, 8
-; CHECK-NEXT:    v_writelane_b32 v42, s49, 9
-; CHECK-NEXT:    s_mov_b64 s[48:49], s[4:5]
-; CHECK-NEXT:    s_getpc_b64 s[4:5]
-; CHECK-NEXT:    s_add_u32 s4, s4, _Z4log2d at gotpcrel32@lo+4
-; CHECK-NEXT:    s_addc_u32 s5, s5, _Z4log2d at gotpcrel32@hi+12
-; CHECK-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
-; CHECK-NEXT:    v_writelane_b32 v42, s50, 10
-; CHECK-NEXT:    v_writelane_b32 v42, s51, 11
-; CHECK-NEXT:    v_writelane_b32 v42, s52, 12
-; CHECK-NEXT:    v_and_b32_e32 v1, 0x7fffffff, v1
-; CHECK-NEXT:    s_mov_b64 s[4:5], s[48:49]
-; CHECK-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
-; CHECK-NEXT:    buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
-; CHECK-NEXT:    v_writelane_b32 v42, s53, 13
-; CHECK-NEXT:    v_mov_b32_e32 v40, v31
-; CHECK-NEXT:    s_mov_b32 s50, s15
-; CHECK-NEXT:    s_mov_b32 s51, s14
-; CHECK-NEXT:    s_mov_b32 s52, s13
-; CHECK-NEXT:    s_mov_b32 s53, s12
-; CHECK-NEXT:    s_mov_b64 s[34:35], s[10:11]
-; CHECK-NEXT:    s_mov_b64 s[36:37], s[8:9]
-; CHECK-NEXT:    s_mov_b64 s[38:39], s[6:7]
-; CHECK-NEXT:    v_lshlrev_b32_e32 v41, 1, v2
-; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; CHECK-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; CHECK-NEXT:    v_cvt_f64_i32_e32 v[2:3], v41
-; CHECK-NEXT:    s_getpc_b64 s[4:5]
-; CHECK-NEXT:    s_add_u32 s4, s4, _Z4exp2d at gotpcrel32@lo+4
-; CHECK-NEXT:    s_addc_u32 s5, s5, _Z4exp2d at gotpcrel32@hi+12
-; CHECK-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
-; CHECK-NEXT:    s_mov_b64 s[4:5], s[48:49]
-; CHECK-NEXT:    v_mul_f64 v[0:1], v[0:1], v[2:3]
-; CHECK-NEXT:    s_mov_b64 s[6:7], s[38:39]
-; CHECK-NEXT:    s_mov_b64 s[8:9], s[36:37]
-; CHECK-NEXT:    s_mov_b64 s[10:11], s[34:35]
-; CHECK-NEXT:    s_mov_b32 s12, s53
-; CHECK-NEXT:    s_mov_b32 s13, s52
-; CHECK-NEXT:    s_mov_b32 s14, s51
-; CHECK-NEXT:    s_mov_b32 s15, s50
-; CHECK-NEXT:    v_mov_b32_e32 v31, v40
-; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; CHECK-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; CHECK-NEXT:    buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
-; CHECK-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
-; CHECK-NEXT:    v_readlane_b32 s53, v42, 13
-; CHECK-NEXT:    v_readlane_b32 s52, v42, 12
-; CHECK-NEXT:    v_readlane_b32 s51, v42, 11
-; CHECK-NEXT:    v_readlane_b32 s50, v42, 10
-; CHECK-NEXT:    v_readlane_b32 s49, v42, 9
-; CHECK-NEXT:    v_readlane_b32 s48, v42, 8
-; CHECK-NEXT:    v_readlane_b32 s39, v42, 7
-; CHECK-NEXT:    v_readlane_b32 s38, v42, 6
-; CHECK-NEXT:    v_readlane_b32 s37, v42, 5
-; CHECK-NEXT:    v_readlane_b32 s36, v42, 4
-; CHECK-NEXT:    v_readlane_b32 s35, v42, 3
-; CHECK-NEXT:    v_readlane_b32 s34, v42, 2
-; CHECK-NEXT:    v_readlane_b32 s31, v42, 1
-; CHECK-NEXT:    v_readlane_b32 s30, v42, 0
-; CHECK-NEXT:    s_mov_b32 s32, s33
-; CHECK-NEXT:    v_readlane_b32 s4, v42, 14
-; CHECK-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; CHECK-NEXT:    buffer_load_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
-; CHECK-NEXT:    s_mov_b64 exec, s[6:7]
-; CHECK-NEXT:    s_mov_b32 s33, s4
-; CHECK-NEXT:    s_waitcnt vmcnt(0)
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
-  %y = shl i32 %y.arg, 1
-  %call = tail call fast double @_Z4powndi(double %x, i32 %y)
-  ret double %call
-}
-
-define half @test_pown_fast_f16_known_odd(half %x, i32 %y.arg) {
-; CHECK-LABEL: test_pown_fast_f16_known_odd:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_or_b32_e32 v1, 1, v1
-; CHECK-NEXT:    v_cvt_f32_i32_e32 v1, v1
-; CHECK-NEXT:    v_log_f16_e64 v2, |v0|
-; CHECK-NEXT:    s_movk_i32 s4, 0x7fff
-; CHECK-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; CHECK-NEXT:    v_mul_f16_e32 v1, v2, v1
-; CHECK-NEXT:    v_exp_f16_e32 v1, v1
-; CHECK-NEXT:    v_bfi_b32 v0, s4, v1, v0
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
-  %y = or i32 %y.arg, 1
-  %call = tail call fast half @_Z4pownDhi(half %x, i32 %y)
-  ret half %call
-}
-
-define float @test_pown_fast_f32_known_odd(float %x, i32 %y.arg) {
-; CHECK-LABEL: test_pown_fast_f32_known_odd:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    s_mov_b32 s4, 0x800000
-; CHECK-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
-; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 32, vcc
-; CHECK-NEXT:    v_ldexp_f32 v3, |v0|, v3
-; CHECK-NEXT:    v_or_b32_e32 v1, 1, v1
-; CHECK-NEXT:    v_log_f32_e32 v3, v3
-; CHECK-NEXT:    v_cvt_f32_i32_e32 v1, v1
-; CHECK-NEXT:    v_mov_b32_e32 v2, 0x42000000
-; CHECK-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; CHECK-NEXT:    v_sub_f32_e32 v2, v3, v2
-; CHECK-NEXT:    v_mul_f32_e32 v3, v2, v1
-; CHECK-NEXT:    s_mov_b32 s4, 0xc2fc0000
-; CHECK-NEXT:    v_mov_b32_e32 v4, 0x42800000
-; CHECK-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v3
-; CHECK-NEXT:    v_cndmask_b32_e32 v3, 0, v4, vcc
-; CHECK-NEXT:    v_fma_f32 v1, v2, v1, v3
-; CHECK-NEXT:    v_exp_f32_e32 v1, v1
-; CHECK-NEXT:    v_not_b32_e32 v2, 63
-; CHECK-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; CHECK-NEXT:    s_brev_b32 s4, -2
-; CHECK-NEXT:    v_ldexp_f32 v1, v1, v2
-; CHECK-NEXT:    v_bfi_b32 v0, s4, v1, v0
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
-  %y = or i32 %y.arg, 1
-  %call = tail call fast float @_Z4pownfi(float %x, i32 %y)
-  ret float %call
-}
-
-define double @test_pown_fast_f64_known_odd(double %x, i32 %y.arg) {
-; CHECK-LABEL: test_pown_fast_f64_known_odd:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    s_mov_b32 s16, s33
-; CHECK-NEXT:    s_mov_b32 s33, s32
-; CHECK-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; CHECK-NEXT:    buffer_store_dword v43, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
-; CHECK-NEXT:    s_mov_b64 exec, s[18:19]
-; CHECK-NEXT:    v_writelane_b32 v43, s16, 14
-; CHECK-NEXT:    v_writelane_b32 v43, s30, 0
-; CHECK-NEXT:    v_writelane_b32 v43, s31, 1
-; CHECK-NEXT:    v_writelane_b32 v43, s34, 2
-; CHECK-NEXT:    v_writelane_b32 v43, s35, 3
-; CHECK-NEXT:    v_writelane_b32 v43, s36, 4
-; CHECK-NEXT:    v_writelane_b32 v43, s37, 5
-; CHECK-NEXT:    v_writelane_b32 v43, s38, 6
-; CHECK-NEXT:    v_writelane_b32 v43, s39, 7
-; CHECK-NEXT:    s_addk_i32 s32, 0x800
-; CHECK-NEXT:    v_writelane_b32 v43, s48, 8
-; CHECK-NEXT:    v_writelane_b32 v43, s49, 9
-; CHECK-NEXT:    s_mov_b64 s[48:49], s[4:5]
-; CHECK-NEXT:    s_getpc_b64 s[4:5]
-; CHECK-NEXT:    s_add_u32 s4, s4, _Z4log2d at gotpcrel32@lo+4
-; CHECK-NEXT:    s_addc_u32 s5, s5, _Z4log2d at gotpcrel32@hi+12
-; CHECK-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
-; CHECK-NEXT:    v_writelane_b32 v43, s50, 10
-; CHECK-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
-; CHECK-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
-; CHECK-NEXT:    buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
-; CHECK-NEXT:    v_writelane_b32 v43, s51, 11
-; CHECK-NEXT:    v_mov_b32_e32 v41, v1
-; CHECK-NEXT:    v_writelane_b32 v43, s52, 12
-; CHECK-NEXT:    v_and_b32_e32 v1, 0x7fffffff, v41
-; CHECK-NEXT:    s_mov_b64 s[4:5], s[48:49]
-; CHECK-NEXT:    v_writelane_b32 v43, s53, 13
-; CHECK-NEXT:    v_mov_b32_e32 v40, v31
-; CHECK-NEXT:    s_mov_b32 s50, s15
-; CHECK-NEXT:    s_mov_b32 s51, s14
-; CHECK-NEXT:    s_mov_b32 s52, s13
-; CHECK-NEXT:    s_mov_b32 s53, s12
-; CHECK-NEXT:    s_mov_b64 s[34:35], s[10:11]
-; CHECK-NEXT:    s_mov_b64 s[36:37], s[8:9]
-; CHECK-NEXT:    s_mov_b64 s[38:39], s[6:7]
-; CHECK-NEXT:    v_or_b32_e32 v42, 1, v2
-; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; CHECK-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; CHECK-NEXT:    v_cvt_f64_i32_e32 v[2:3], v42
-; CHECK-NEXT:    s_getpc_b64 s[4:5]
-; CHECK-NEXT:    s_add_u32 s4, s4, _Z4exp2d at gotpcrel32@lo+4
-; CHECK-NEXT:    s_addc_u32 s5, s5, _Z4exp2d at gotpcrel32@hi+12
-; CHECK-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
-; CHECK-NEXT:    s_mov_b64 s[4:5], s[48:49]
-; CHECK-NEXT:    v_mul_f64 v[0:1], v[0:1], v[2:3]
-; CHECK-NEXT:    s_mov_b64 s[6:7], s[38:39]
-; CHECK-NEXT:    s_mov_b64 s[8:9], s[36:37]
-; CHECK-NEXT:    s_mov_b64 s[10:11], s[34:35]
-; CHECK-NEXT:    s_mov_b32 s12, s53
-; CHECK-NEXT:    s_mov_b32 s13, s52
-; CHECK-NEXT:    s_mov_b32 s14, s51
-; CHECK-NEXT:    s_mov_b32 s15, s50
-; CHECK-NEXT:    v_mov_b32_e32 v31, v40
-; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; CHECK-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; CHECK-NEXT:    v_and_b32_e32 v2, 0x80000000, v41
-; CHECK-NEXT:    buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload
-; CHECK-NEXT:    buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
-; CHECK-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
-; CHECK-NEXT:    v_or_b32_e32 v1, v2, v1
-; CHECK-NEXT:    v_readlane_b32 s53, v43, 13
-; CHECK-NEXT:    v_readlane_b32 s52, v43, 12
-; CHECK-NEXT:    v_readlane_b32 s51, v43, 11
-; CHECK-NEXT:    v_readlane_b32 s50, v43, 10
-; CHECK-NEXT:    v_readlane_b32 s49, v43, 9
-; CHECK-NEXT:    v_readlane_b32 s48, v43, 8
-; CHECK-NEXT:    v_readlane_b32 s39, v43, 7
-; CHECK-NEXT:    v_readlane_b32 s38, v43, 6
-; CHECK-NEXT:    v_readlane_b32 s37, v43, 5
-; CHECK-NEXT:    v_readlane_b32 s36, v43, 4
-; CHECK-NEXT:    v_readlane_b32 s35, v43, 3
-; CHECK-NEXT:    v_readlane_b32 s34, v43, 2
-; CHECK-NEXT:    v_readlane_b32 s31, v43, 1
-; CHECK-NEXT:    v_readlane_b32 s30, v43, 0
-; CHECK-NEXT:    s_mov_b32 s32, s33
-; CHECK-NEXT:    v_readlane_b32 s4, v43, 14
-; CHECK-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; CHECK-NEXT:    buffer_load_dword v43, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
-; CHECK-NEXT:    s_mov_b64 exec, s[6:7]
-; CHECK-NEXT:    s_mov_b32 s33, s4
-; CHECK-NEXT:    s_waitcnt vmcnt(0)
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
-  %y = or i32 %y.arg, 1
-  %call = tail call fast double @_Z4powndi(double %x, i32 %y)
-  ret double %call
-}
-
-!llvm.module.flags = !{!0}
-!0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
+define double @test_pow_afn_f64_nnan_ninf_x_known_positive(double nofpclass(ninf nnorm nsub) %x, double %y) {
+; CHECK-LABEL: define double @test_pow_afn_f64_nnan_ninf_x_known_positive
+; CHECK-SAME: (double nofpclass(ninf nsub nnorm) [[X:%.*]], double [[Y:%.*]]) {
+; CHECK-NEXT:    [[__LOG2:%.*]] = call nnan ninf afn double @_Z4log2d(double [[X]])
+; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn double [[Y]], [[__LOG2]]
+; CHECK-NEXT:    [[__EXP2:%.*]] = call nnan ninf afn double @_Z4exp2d(double [[__YLOGX]])
+; CHECK-NEXT:    ret double [[__EXP2]]
+;
+  %pow = tail call afn nnan ninf double @_Z3powdd(double %x, double %y)
+  ret double %pow
+}
+
+define <2 x double> @test_pow_afn_v2f64_nnan_x_known_positive(<2 x double> nofpclass(ninf nnorm nsub) %x, <2 x double> %y) {
+; CHECK-LABEL: define <2 x double> @test_pow_afn_v2f64_nnan_x_known_positive
+; CHECK-SAME: (<2 x double> nofpclass(ninf nsub nnorm) [[X:%.*]], <2 x double> [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call nnan afn <2 x double> @_Z4powrDv2_dS_(<2 x double> [[X]], <2 x double> [[Y]])
+; CHECK-NEXT:    ret <2 x double> [[POW]]
+;
+  %pow = tail call afn nnan <2 x double> @_Z3powDv2_dS_(<2 x double> %x, <2 x double> %y)
+  ret <2 x double> %pow
+}
+
+define <2 x double> @test_pow_afn_v2f64_nnan_ninf_x_known_positive(<2 x double> nofpclass(ninf nnorm nsub) %x, <2 x double> %y) {
+; CHECK-LABEL: define <2 x double> @test_pow_afn_v2f64_nnan_ninf_x_known_positive
+; CHECK-SAME: (<2 x double> nofpclass(ninf nsub nnorm) [[X:%.*]], <2 x double> [[Y:%.*]]) {
+; CHECK-NEXT:    [[__LOG2:%.*]] = call nnan ninf afn <2 x double> @_Z4log2Dv2_d(<2 x double> [[X]])
+; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x double> [[Y]], [[__LOG2]]
+; CHECK-NEXT:    [[__EXP2:%.*]] = call nnan ninf afn <2 x double> @_Z4exp2Dv2_d(<2 x double> [[__YLOGX]])
+; CHECK-NEXT:    ret <2 x double> [[__EXP2]]
+;
+  %pow = tail call afn nnan ninf <2 x double> @_Z3powDv2_dS_(<2 x double> %x, <2 x double> %y)
+  ret <2 x double> %pow
+}
+
+define double @test_pow_f64_x_known_positive(double nofpclass(ninf nnorm nsub) %x, double %y) {
+; CHECK-LABEL: define double @test_pow_f64_x_known_positive
+; CHECK-SAME: (double nofpclass(ninf nsub nnorm) [[X:%.*]], double [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call double @_Z4powrdd(double [[X]], double [[Y]])
+; CHECK-NEXT:    ret double [[POW]]
+;
+  %pow = tail call double @_Z3powdd(double %x, double %y)
+  ret double %pow
+}
+
+define double @test_pow_afn_f64_x_known_positive(double nofpclass(ninf nnorm nsub) %x, double %y) {
+; CHECK-LABEL: define double @test_pow_afn_f64_x_known_positive
+; CHECK-SAME: (double nofpclass(ninf nsub nnorm) [[X:%.*]], double [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn double @_Z4powrdd(double [[X]], double [[Y]])
+; CHECK-NEXT:    ret double [[POW]]
+;
+  %pow = tail call afn double @_Z3powdd(double %x, double %y)
+  ret double %pow
+}
+
+define <2 x double> @test_pow_v2f64_x_known_positive(<2 x double> nofpclass(ninf nnorm nsub) %x, <2 x double> %y) {
+; CHECK-LABEL: define <2 x double> @test_pow_v2f64_x_known_positive
+; CHECK-SAME: (<2 x double> nofpclass(ninf nsub nnorm) [[X:%.*]], <2 x double> [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call <2 x double> @_Z4powrDv2_dS_(<2 x double> [[X]], <2 x double> [[Y]])
+; CHECK-NEXT:    ret <2 x double> [[POW]]
+;
+  %pow = tail call <2 x double> @_Z3powDv2_dS_(<2 x double> %x, <2 x double> %y)
+  ret <2 x double> %pow
+}
+
+define <2 x double> @test_pow_afn_v2f64_x_known_positive(<2 x double> nofpclass(ninf nnorm nsub) %x, <2 x double> %y) {
+; CHECK-LABEL: define <2 x double> @test_pow_afn_v2f64_x_known_positive
+; CHECK-SAME: (<2 x double> nofpclass(ninf nsub nnorm) [[X:%.*]], <2 x double> [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x double> @_Z4powrDv2_dS_(<2 x double> [[X]], <2 x double> [[Y]])
+; CHECK-NEXT:    ret <2 x double> [[POW]]
+;
+  %pow = tail call afn <2 x double> @_Z3powDv2_dS_(<2 x double> %x, <2 x double> %y)
+  ret <2 x double> %pow
+}
+
+define half @test_pow_afn_f16_nnan_x_known_positive(half nofpclass(ninf nnorm nsub) %x, half %y) {
+; CHECK-LABEL: define half @test_pow_afn_f16_nnan_x_known_positive
+; CHECK-SAME: (half nofpclass(ninf nsub nnorm) [[X:%.*]], half [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call nnan afn half @_Z4powrDhDh(half [[X]], half [[Y]])
+; CHECK-NEXT:    ret half [[POW]]
+;
+  %pow = tail call afn nnan half @_Z3powDhDh(half %x, half %y)
+  ret half %pow
+}
+
+define half @test_pow_afn_f16_nnan_ninf_x_known_positive(half nofpclass(ninf nnorm nsub) %x, half %y) {
+; CHECK-LABEL: define half @test_pow_afn_f16_nnan_ninf_x_known_positive
+; CHECK-SAME: (half nofpclass(ninf nsub nnorm) [[X:%.*]], half [[Y:%.*]]) {
+; CHECK-NEXT:    [[__LOG2:%.*]] = call nnan ninf afn half @llvm.log2.f16(half [[X]])
+; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn half [[Y]], [[__LOG2]]
+; CHECK-NEXT:    [[__EXP2:%.*]] = call nnan ninf afn half @llvm.exp2.f16(half [[__YLOGX]])
+; CHECK-NEXT:    ret half [[__EXP2]]
+;
+  %pow = tail call afn nnan ninf half @_Z3powDhDh(half %x, half %y)
+  ret half %pow
+}
+
+define <2 x half> @test_pow_afn_v2f16_nnan_x_known_positive(<2 x half> nofpclass(ninf nnorm nsub) %x, <2 x half> %y) {
+; CHECK-LABEL: define <2 x half> @test_pow_afn_v2f16_nnan_x_known_positive
+; CHECK-SAME: (<2 x half> nofpclass(ninf nsub nnorm) [[X:%.*]], <2 x half> [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call nnan afn <2 x half> @_Z4powrDv2_DhS_(<2 x half> [[X]], <2 x half> [[Y]])
+; CHECK-NEXT:    ret <2 x half> [[POW]]
+;
+  %pow = tail call afn nnan <2 x half> @_Z3powDv2_DhS_(<2 x half> %x, <2 x half> %y)
+  ret <2 x half> %pow
+}
+
+define <2 x half> @test_pow_afn_v2f16_nnan_ninf_x_known_positive(<2 x half> nofpclass(ninf nnorm nsub) %x, <2 x half> %y) {
+; CHECK-LABEL: define <2 x half> @test_pow_afn_v2f16_nnan_ninf_x_known_positive
+; CHECK-SAME: (<2 x half> nofpclass(ninf nsub nnorm) [[X:%.*]], <2 x half> [[Y:%.*]]) {
+; CHECK-NEXT:    [[__LOG2:%.*]] = call nnan ninf afn <2 x half> @llvm.log2.v2f16(<2 x half> [[X]])
+; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x half> [[Y]], [[__LOG2]]
+; CHECK-NEXT:    [[__EXP2:%.*]] = call nnan ninf afn <2 x half> @llvm.exp2.v2f16(<2 x half> [[__YLOGX]])
+; CHECK-NEXT:    ret <2 x half> [[__EXP2]]
+;
+  %pow = tail call afn nnan ninf <2 x half> @_Z3powDv2_DhS_(<2 x half> %x, <2 x half> %y)
+  ret <2 x half> %pow
+}
+
+define half @test_pow_f16_x_known_positive(half nofpclass(ninf nnorm nsub) %x, half %y) {
+; CHECK-LABEL: define half @test_pow_f16_x_known_positive
+; CHECK-SAME: (half nofpclass(ninf nsub nnorm) [[X:%.*]], half [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call half @_Z4powrDhDh(half [[X]], half [[Y]])
+; CHECK-NEXT:    ret half [[POW]]
+;
+  %pow = tail call half @_Z3powDhDh(half %x, half %y)
+  ret half %pow
+}
+
+define half @test_pow_afn_f16_x_known_positive(half nofpclass(ninf nnorm nsub) %x, half %y) {
+; CHECK-LABEL: define half @test_pow_afn_f16_x_known_positive
+; CHECK-SAME: (half nofpclass(ninf nsub nnorm) [[X:%.*]], half [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn half @_Z4powrDhDh(half [[X]], half [[Y]])
+; CHECK-NEXT:    ret half [[POW]]
+;
+  %pow = tail call afn half @_Z3powDhDh(half %x, half %y)
+  ret half %pow
+}
+
+define <2 x half> @test_pow_v2f16_x_known_positive(<2 x half> nofpclass(ninf nnorm nsub) %x, <2 x half> %y) {
+; CHECK-LABEL: define <2 x half> @test_pow_v2f16_x_known_positive
+; CHECK-SAME: (<2 x half> nofpclass(ninf nsub nnorm) [[X:%.*]], <2 x half> [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call <2 x half> @_Z4powrDv2_DhS_(<2 x half> [[X]], <2 x half> [[Y]])
+; CHECK-NEXT:    ret <2 x half> [[POW]]
+;
+  %pow = tail call <2 x half> @_Z3powDv2_DhS_(<2 x half> %x, <2 x half> %y)
+  ret <2 x half> %pow
+}
+
+define <2 x half> @test_pow_afn_v2f16_x_known_positive(<2 x half> nofpclass(ninf nnorm nsub) %x, <2 x half> %y) {
+; CHECK-LABEL: define <2 x half> @test_pow_afn_v2f16_x_known_positive
+; CHECK-SAME: (<2 x half> nofpclass(ninf nsub nnorm) [[X:%.*]], <2 x half> [[Y:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x half> @_Z4powrDv2_DhS_(<2 x half> [[X]], <2 x half> [[Y]])
+; CHECK-NEXT:    ret <2 x half> [[POW]]
+;
+  %pow = tail call afn <2 x half> @_Z3powDv2_DhS_(<2 x half> %x, <2 x half> %y)
+  ret <2 x half> %pow
+}
+
+define float @test_pow_f32__y_0(float %x) {
+; CHECK-LABEL: define float @test_pow_f32__y_0
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %pow = tail call float @_Z3powff(float %x, float 0.0)
+  ret float %pow
+}
+
+define float @test_pow_f32__y_n0(float %x) {
+; CHECK-LABEL: define float @test_pow_f32__y_n0
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %pow = tail call float @_Z3powff(float %x, float -0.0)
+  ret float %pow
+}
+
+define float @test_pow_f32__y_1(float %x) {
+; CHECK-LABEL: define float @test_pow_f32__y_1
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    ret float [[X]]
+;
+  %pow = tail call float @_Z3powff(float %x, float 1.0)
+  ret float %pow
+}
+
+define float @test_pow_f32__y_n1(float %x) {
+; CHECK-LABEL: define float @test_pow_f32__y_n1
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    [[__POWRECIP:%.*]] = fdiv float 1.000000e+00, [[X]]
+; CHECK-NEXT:    ret float [[__POWRECIP]]
+;
+  %pow = tail call float @_Z3powff(float %x, float -1.0)
+  ret float %pow
+}
+
+define float @test_pow_f32__y_2(float %x) {
+; CHECK-LABEL: define float @test_pow_f32__y_2
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    [[__POW2:%.*]] = fmul float [[X]], [[X]]
+; CHECK-NEXT:    ret float [[__POW2]]
+;
+  %pow = tail call float @_Z3powff(float %x, float 2.0)
+  ret float %pow
+}
+
+define float @test_pow_f32__y_n2(float %x) {
+; CHECK-LABEL: define float @test_pow_f32__y_n2
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call float @_Z4pownfi(float [[X]], i32 -2)
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = tail call float @_Z3powff(float %x, float -2.0)
+  ret float %pow
+}
+
+define float @test_pow_f32__y_half(float %x) {
+; CHECK-LABEL: define float @test_pow_f32__y_half
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    [[__POW2SQRT:%.*]] = call float @_Z4sqrtf(float [[X]])
+; CHECK-NEXT:    ret float [[__POW2SQRT]]
+;
+  %pow = tail call float @_Z3powff(float %x, float 0.5)
+  ret float %pow
+}
+
+define float @test_pow_f32__y_neg_half(float %x) {
+; CHECK-LABEL: define float @test_pow_f32__y_neg_half
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    [[__POW2RSQRT:%.*]] = call float @_Z5rsqrtf(float [[X]])
+; CHECK-NEXT:    ret float [[__POW2RSQRT]]
+;
+  %pow = tail call float @_Z3powff(float %x, float -0.5)
+  ret float %pow
+}
+
+define float @test_pow_f32__y_3(float %x) {
+; CHECK-LABEL: define float @test_pow_f32__y_3
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call float @_Z4pownfi(float [[X]], i32 3)
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = tail call float @_Z3powff(float %x, float 3.0)
+  ret float %pow
+}
+
+define float @test_pow_f32__y_n3(float %x) {
+; CHECK-LABEL: define float @test_pow_f32__y_n3
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call float @_Z4pownfi(float [[X]], i32 -3)
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = tail call float @_Z3powff(float %x, float -3.0)
+  ret float %pow
+}
+
+define float @test_pow_f32__y_2_5(float %x) {
+; CHECK-LABEL: define float @test_pow_f32__y_2_5
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call float @_Z3powff(float [[X]], float 2.500000e+00)
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = tail call float @_Z3powff(float %x, float 2.5)
+  ret float %pow
+}
+
+define float @test_pow_f32__y_n_2_5(float %x) {
+; CHECK-LABEL: define float @test_pow_f32__y_n_2_5
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call float @_Z3powff(float [[X]], float -2.500000e+00)
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = tail call float @_Z3powff(float %x, float -2.5)
+  ret float %pow
+}
+
+define <2 x float> @test_pow_v2f32__y_0(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_0
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    ret <2 x float> splat (float 1.000000e+00)
+;
+  %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> zeroinitializer)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_v2f32__y_n0(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_n0
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    ret <2 x float> splat (float 1.000000e+00)
+;
+  %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float -0.0, float -0.0>)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_v2f32__y_1(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_1
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    ret <2 x float> [[X]]
+;
+  %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 1.0,float 1.0>)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_v2f32__y_n1(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_n1
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[__POWRECIP:%.*]] = fdiv <2 x float> splat (float 1.000000e+00), [[X]]
+; CHECK-NEXT:    ret <2 x float> [[__POWRECIP]]
+;
+  %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float -1.0, float -1.0>)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_v2f32__y_2(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_2
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[__POW2:%.*]] = fmul <2 x float> [[X]], [[X]]
+; CHECK-NEXT:    ret <2 x float> [[__POW2]]
+;
+  %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 2.0, float 2.0>)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_v2f32__y_n2(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_n2
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 -2))
+; CHECK-NEXT:    ret <2 x float> [[POW]]
+;
+  %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float -2.0, float -2.0>)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_v2f32__y_half(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_half
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[__POW2SQRT:%.*]] = call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[X]])
+; CHECK-NEXT:    ret <2 x float> [[__POW2SQRT]]
+;
+  %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 0.5, float 0.5>)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_v2f32__y_neg_half(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_neg_half
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[__POW2RSQRT:%.*]] = call <2 x float> @_Z5rsqrtDv2_f(<2 x float> [[X]])
+; CHECK-NEXT:    ret <2 x float> [[__POW2RSQRT]]
+;
+  %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float -0.5, float -0.5>)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_v2f32__y_3(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_3
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 3))
+; CHECK-NEXT:    ret <2 x float> [[POW]]
+;
+  %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 3.0, float 3.0>)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_v2f32__y_n3(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_n3
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 -3))
+; CHECK-NEXT:    ret <2 x float> [[POW]]
+;
+  %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float -3.0,float -3.0>)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_v2f32__y_2_5(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_2_5
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> splat (float 2.500000e+00))
+; CHECK-NEXT:    ret <2 x float> [[POW]]
+;
+  %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 2.5, float 2.5>)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_v2f32__y_n_2_5(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_n_2_5
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> splat (float -2.500000e+00))
+; CHECK-NEXT:    ret <2 x float> [[POW]]
+;
+  %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float -2.5, float -2.5>)
+  ret <2 x float> %pow
+}
+
+define float @test_pow_f32__known_positive__y_0(float nofpclass(ninf nnorm nsub) %x) {
+; CHECK-LABEL: define float @test_pow_f32__known_positive__y_0
+; CHECK-SAME: (float nofpclass(ninf nsub nnorm) [[X:%.*]]) {
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %pow = tail call float @_Z3powff(float %x, float 0.0)
+  ret float %pow
+}
+
+define float @test_pow_f32__known_positive__y_1(float nofpclass(ninf nnorm nsub) %x) {
+; CHECK-LABEL: define float @test_pow_f32__known_positive__y_1
+; CHECK-SAME: (float nofpclass(ninf nsub nnorm) [[X:%.*]]) {
+; CHECK-NEXT:    ret float [[X]]
+;
+  %pow = tail call float @_Z3powff(float %x, float 1.0)
+  ret float %pow
+}
+
+define float @test_pow_f32__known_positive__y_neg1(float nofpclass(ninf nnorm nsub) %x) {
+; CHECK-LABEL: define float @test_pow_f32__known_positive__y_neg1
+; CHECK-SAME: (float nofpclass(ninf nsub nnorm) [[X:%.*]]) {
+; CHECK-NEXT:    [[__POWRECIP:%.*]] = fdiv float 1.000000e+00, [[X]]
+; CHECK-NEXT:    ret float [[__POWRECIP]]
+;
+  %pow = tail call float @_Z3powff(float %x, float -1.0)
+  ret float %pow
+}
+
+define float @test_pow_f32__known_positive__y_2(float nofpclass(ninf nnorm nsub) %x) {
+; CHECK-LABEL: define float @test_pow_f32__known_positive__y_2
+; CHECK-SAME: (float nofpclass(ninf nsub nnorm) [[X:%.*]]) {
+; CHECK-NEXT:    [[__POW2:%.*]] = fmul float [[X]], [[X]]
+; CHECK-NEXT:    ret float [[__POW2]]
+;
+  %pow = tail call float @_Z3powff(float %x, float 2.0)
+  ret float %pow
+}
+
+define float @test_pow_f32__known_positive__y_half(float nofpclass(ninf nnorm nsub) %x) {
+; CHECK-LABEL: define float @test_pow_f32__known_positive__y_half
+; CHECK-SAME: (float nofpclass(ninf nsub nnorm) [[X:%.*]]) {
+; CHECK-NEXT:    [[__POW2SQRT:%.*]] = call float @_Z4sqrtf(float [[X]])
+; CHECK-NEXT:    ret float [[__POW2SQRT]]
+;
+  %pow = tail call float @_Z3powff(float %x, float 0.5)
+  ret float %pow
+}
+
+define float @test_pow_f32__known_positive__y_neghalf(float nofpclass(ninf nnorm nsub) %x) {
+; CHECK-LABEL: define float @test_pow_f32__known_positive__y_neghalf
+; CHECK-SAME: (float nofpclass(ninf nsub nnorm) [[X:%.*]]) {
+; CHECK-NEXT:    [[__POW2RSQRT:%.*]] = call float @_Z5rsqrtf(float [[X]])
+; CHECK-NEXT:    ret float [[__POW2RSQRT]]
+;
+  %pow = tail call float @_Z3powff(float %x, float -0.5)
+  ret float %pow
+}
+
+define float @test_pow_f32_x_assumed_oge_0(float %x, float %y) {
+; CHECK-LABEL: define float @test_pow_f32_x_assumed_oge_0
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) {
+; CHECK-NEXT:    [[X_OGE_ZERO:%.*]] = fcmp oge float [[X]], 0.000000e+00
+; CHECK-NEXT:    call void @llvm.assume(i1 [[X_OGE_ZERO]])
+; CHECK-NEXT:    [[POW:%.*]] = tail call float @_Z4powrff(float [[X]], float [[Y]])
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %x.oge.zero = fcmp oge float %x, 0.0
+  call void @llvm.assume(i1 %x.oge.zero)
+  %pow = tail call float @_Z3powff(float %x, float %y)
+  ret float %pow
+}
+
+define float @test_pow_f32_x_assumed_ogt_0(float %x, float %y) {
+; CHECK-LABEL: define float @test_pow_f32_x_assumed_ogt_0
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) {
+; CHECK-NEXT:    [[X_OGE_ZERO:%.*]] = fcmp ogt float [[X]], 0.000000e+00
+; CHECK-NEXT:    call void @llvm.assume(i1 [[X_OGE_ZERO]])
+; CHECK-NEXT:    [[POW:%.*]] = tail call float @_Z4powrff(float [[X]], float [[Y]])
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %x.oge.zero = fcmp ogt float %x, 0.0
+  call void @llvm.assume(i1 %x.oge.zero)
+  %pow = tail call float @_Z3powff(float %x, float %y)
+  ret float %pow
+}
+
+define float @test_pow_f32_x_assumed_uge_0(float %x, float %y) {
+; CHECK-LABEL: define float @test_pow_f32_x_assumed_uge_0
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) {
+; CHECK-NEXT:    [[X_UGE_ZERO:%.*]] = fcmp uge float [[X]], 0.000000e+00
+; CHECK-NEXT:    call void @llvm.assume(i1 [[X_UGE_ZERO]])
+; CHECK-NEXT:    [[POW:%.*]] = tail call float @_Z4powrff(float [[X]], float [[Y]])
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %x.uge.zero = fcmp uge float %x, 0.0
+  call void @llvm.assume(i1 %x.uge.zero)
+  %pow = tail call float @_Z3powff(float %x, float %y)
+  ret float %pow
+}
+
+define float @test_pow_f32_x_assumed_ugt_0(float %x, float %y) {
+; CHECK-LABEL: define float @test_pow_f32_x_assumed_ugt_0
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) {
+; CHECK-NEXT:    [[X_UGT_ZERO:%.*]] = fcmp ugt float [[X]], 0.000000e+00
+; CHECK-NEXT:    call void @llvm.assume(i1 [[X_UGT_ZERO]])
+; CHECK-NEXT:    [[POW:%.*]] = tail call float @_Z4powrff(float [[X]], float [[Y]])
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %x.ugt.zero = fcmp ugt float %x, 0.0
+  call void @llvm.assume(i1 %x.ugt.zero)
+  %pow = tail call float @_Z3powff(float %x, float %y)
+  ret float %pow
+}
+
+define float @test_pow_afn_f32__y_poison(float %x) {
+; CHECK-LABEL: define float @test_pow_afn_f32__y_poison
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 poison)
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = tail call afn float @_Z3powff(float %x, float poison)
+  ret float %pow
+}
+
+define float @test_pow_afn_f32__y_3(float %x) {
+; CHECK-LABEL: define float @test_pow_afn_f32__y_3
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 3)
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = tail call afn float @_Z3powff(float %x, float 3.0)
+  ret float %pow
+}
+
+define float @test_pow_afn_f32_nnan_ninf__y_3(float %x) {
+; CHECK-LABEL: define float @test_pow_afn_f32_nnan_ninf__y_3
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    [[__POWX2:%.*]] = fmul nnan ninf afn float [[X]], [[X]]
+; CHECK-NEXT:    [[__POWPROD:%.*]] = fmul nnan ninf afn float [[X]], [[__POWX2]]
+; CHECK-NEXT:    ret float [[__POWPROD]]
+;
+  %pow = tail call afn nnan ninf float @_Z3powff(float %x, float 3.0)
+  ret float %pow
+}
+
+define float @test_pow_afn_f32__y_4(float %x) {
+; CHECK-LABEL: define float @test_pow_afn_f32__y_4
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 4)
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = tail call afn float @_Z3powff(float %x, float 4.0)
+  ret float %pow
+}
+
+define float @test_pow_afn_f32_nnan_ninf__y_4(float %x) {
+; CHECK-LABEL: define float @test_pow_afn_f32_nnan_ninf__y_4
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    [[__POWX2:%.*]] = fmul nnan ninf afn float [[X]], [[X]]
+; CHECK-NEXT:    [[__POWX21:%.*]] = fmul nnan ninf afn float [[__POWX2]], [[__POWX2]]
+; CHECK-NEXT:    ret float [[__POWX21]]
+;
+  %pow = tail call afn nnan ninf float @_Z3powff(float %x, float 4.0)
+  ret float %pow
+}
+
+define float @test_pow_afn_f32_nnan_ninf__y_4_5(float %x) {
+; CHECK-LABEL: define float @test_pow_afn_f32_nnan_ninf__y_4_5
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call nnan ninf afn float @_Z3powff(float [[X]], float 4.500000e+00)
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = tail call afn nnan ninf float @_Z3powff(float %x, float 4.5)
+  ret float %pow
+}
+
+define float @test_pow_afn_f32__y_5(float %x) {
+; CHECK-LABEL: define float @test_pow_afn_f32__y_5
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 5)
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = tail call afn float @_Z3powff(float %x, float 5.0)
+  ret float %pow
+}
+
+define float @test_pow_afn_f32_nnan_ninf__y_5(float %x) {
+; CHECK-LABEL: define float @test_pow_afn_f32_nnan_ninf__y_5
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    [[__POWX2:%.*]] = fmul nnan ninf afn float [[X]], [[X]]
+; CHECK-NEXT:    [[__POWX21:%.*]] = fmul nnan ninf afn float [[__POWX2]], [[__POWX2]]
+; CHECK-NEXT:    [[__POWPROD:%.*]] = fmul nnan ninf afn float [[X]], [[__POWX21]]
+; CHECK-NEXT:    ret float [[__POWPROD]]
+;
+  %pow = tail call afn nnan ninf float @_Z3powff(float %x, float 5.0)
+  ret float %pow
+}
+
+define float @test_pow_afn_f32__y_neg5(float %x) {
+; CHECK-LABEL: define float @test_pow_afn_f32__y_neg5
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 -5)
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = tail call afn float @_Z3powff(float %x, float -5.0)
+  ret float %pow
+}
+
+define float @test_pow_afn_f32_nnan_ninf__y_neg5(float %x) {
+; CHECK-LABEL: define float @test_pow_afn_f32_nnan_ninf__y_neg5
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    [[__POWX2:%.*]] = fmul nnan ninf afn float [[X]], [[X]]
+; CHECK-NEXT:    [[__POWX21:%.*]] = fmul nnan ninf afn float [[__POWX2]], [[__POWX2]]
+; CHECK-NEXT:    [[__POWPROD:%.*]] = fmul nnan ninf afn float [[X]], [[__POWX21]]
+; CHECK-NEXT:    [[__1POWPROD:%.*]] = fdiv nnan ninf afn float 1.000000e+00, [[__POWPROD]]
+; CHECK-NEXT:    ret float [[__1POWPROD]]
+;
+  %pow = tail call afn nnan ninf float @_Z3powff(float %x, float -5.0)
+  ret float %pow
+}
+
+define float @test_pow_afn_f32_nnan_ninf__y_10(float %x) {
+; CHECK-LABEL: define float @test_pow_afn_f32_nnan_ninf__y_10
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    [[__POWX2:%.*]] = fmul nnan ninf afn float [[X]], [[X]]
+; CHECK-NEXT:    [[__POWX21:%.*]] = fmul nnan ninf afn float [[__POWX2]], [[__POWX2]]
+; CHECK-NEXT:    [[__POWX22:%.*]] = fmul nnan ninf afn float [[__POWX21]], [[__POWX21]]
+; CHECK-NEXT:    [[__POWPROD:%.*]] = fmul nnan ninf afn float [[__POWX2]], [[__POWX22]]
+; CHECK-NEXT:    ret float [[__POWPROD]]
+;
+  %pow = tail call afn nnan ninf float @_Z3powff(float %x, float 10.0)
+  ret float %pow
+}
+
+define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_poison(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_poison
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    ret <2 x float> poison
+;
+  %pow = tail call afn nnan ninf <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> poison)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_3(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_3
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[__POWX2:%.*]] = fmul nnan ninf afn <2 x float> [[X]], [[X]]
+; CHECK-NEXT:    [[__POWPROD:%.*]] = fmul nnan ninf afn <2 x float> [[X]], [[__POWX2]]
+; CHECK-NEXT:    ret <2 x float> [[__POWPROD]]
+;
+  %pow = tail call afn nnan ninf <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 3.0, float 3.0>)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_4(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_4
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[__POWX2:%.*]] = fmul nnan ninf afn <2 x float> [[X]], [[X]]
+; CHECK-NEXT:    [[__POWX21:%.*]] = fmul nnan ninf afn <2 x float> [[__POWX2]], [[__POWX2]]
+; CHECK-NEXT:    ret <2 x float> [[__POWX21]]
+;
+  %pow = tail call afn nnan ninf <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 4.0, float 4.0>)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_4_5(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_4_5
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call nnan ninf afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> splat (float 4.500000e+00))
+; CHECK-NEXT:    ret <2 x float> [[POW]]
+;
+  %pow = tail call afn nnan ninf <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 4.5, float 4.5>)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_4_5_undef(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_4_5_undef
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call nnan ninf afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> <float 4.500000e+00, float poison>)
+; CHECK-NEXT:    ret <2 x float> [[POW]]
+;
+  %pow = tail call afn nnan ninf <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 4.5, float poison>)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_5(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_5
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[__POWX2:%.*]] = fmul nnan ninf afn <2 x float> [[X]], [[X]]
+; CHECK-NEXT:    [[__POWX21:%.*]] = fmul nnan ninf afn <2 x float> [[__POWX2]], [[__POWX2]]
+; CHECK-NEXT:    [[__POWPROD:%.*]] = fmul nnan ninf afn <2 x float> [[X]], [[__POWX21]]
+; CHECK-NEXT:    ret <2 x float> [[__POWPROD]]
+;
+  %pow = tail call afn nnan ninf <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 5.0, float 5.0>)
+  ret <2 x float> %pow
+}
+
+define float @test_pow_afn_f32_nnan_ninf__y_5_known_positive(float nofpclass(ninf nsub nnorm) %x) {
+; CHECK-LABEL: define float @test_pow_afn_f32_nnan_ninf__y_5_known_positive
+; CHECK-SAME: (float nofpclass(ninf nsub nnorm) [[X:%.*]]) {
+; CHECK-NEXT:    [[__POWX2:%.*]] = fmul nnan ninf afn float [[X]], [[X]]
+; CHECK-NEXT:    [[__POWX21:%.*]] = fmul nnan ninf afn float [[__POWX2]], [[__POWX2]]
+; CHECK-NEXT:    [[__POWPROD:%.*]] = fmul nnan ninf afn float [[X]], [[__POWX21]]
+; CHECK-NEXT:    ret float [[__POWPROD]]
+;
+  %pow = tail call afn nnan ninf float @_Z3powff(float %x, float 5.0)
+  ret float %pow
+}
+
+; we know we can ignore missing ninf on the input from the flag on the call
+define float @test_pow_afn_f32_nnan_ninf__y_5_known_positive_with_ninf_flag(float nofpclass(nsub nnorm) %x) {
+; CHECK-LABEL: define float @test_pow_afn_f32_nnan_ninf__y_5_known_positive_with_ninf_flag
+; CHECK-SAME: (float nofpclass(nsub nnorm) [[X:%.*]]) {
+; CHECK-NEXT:    [[__POWX2:%.*]] = fmul nnan ninf afn float [[X]], [[X]]
+; CHECK-NEXT:    [[__POWX21:%.*]] = fmul nnan ninf afn float [[__POWX2]], [[__POWX2]]
+; CHECK-NEXT:    [[__POWPROD:%.*]] = fmul nnan ninf afn float [[X]], [[__POWX21]]
+; CHECK-NEXT:    ret float [[__POWPROD]]
+;
+  %pow = tail call afn nnan ninf float @_Z3powff(float %x, float 5.0)
+  ret float %pow
+}
+
+define double @test_pow_afn_f64__y_3(double %x) {
+; CHECK-LABEL: define double @test_pow_afn_f64__y_3
+; CHECK-SAME: (double [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn double @_Z4powndi(double [[X]], i32 3)
+; CHECK-NEXT:    ret double [[POW]]
+;
+  %pow = tail call afn double @_Z3powdd(double %x, double 3.0)
+  ret double %pow
+}
+
+define double @test_pow_afn_f64_nnan_ninf__y_3(double %x) {
+; CHECK-LABEL: define double @test_pow_afn_f64_nnan_ninf__y_3
+; CHECK-SAME: (double [[X:%.*]]) {
+; CHECK-NEXT:    [[__POWX2:%.*]] = fmul nnan ninf afn double [[X]], [[X]]
+; CHECK-NEXT:    [[__POWPROD:%.*]] = fmul nnan ninf afn double [[X]], [[__POWX2]]
+; CHECK-NEXT:    ret double [[__POWPROD]]
+;
+  %pow = tail call afn nnan ninf double @_Z3powdd(double %x, double 3.0)
+  ret double %pow
+}
+
+define double @test_pow_afn_f64__y_4(double %x) {
+; CHECK-LABEL: define double @test_pow_afn_f64__y_4
+; CHECK-SAME: (double [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn double @_Z4powndi(double [[X]], i32 4)
+; CHECK-NEXT:    ret double [[POW]]
+;
+  %pow = tail call afn double @_Z3powdd(double %x, double 4.0)
+  ret double %pow
+}
+
+define double @test_pow_afn_f64_nnan_ninf__y_4(double %x) {
+; CHECK-LABEL: define double @test_pow_afn_f64_nnan_ninf__y_4
+; CHECK-SAME: (double [[X:%.*]]) {
+; CHECK-NEXT:    [[__POWX2:%.*]] = fmul nnan ninf afn double [[X]], [[X]]
+; CHECK-NEXT:    [[__POWX21:%.*]] = fmul nnan ninf afn double [[__POWX2]], [[__POWX2]]
+; CHECK-NEXT:    ret double [[__POWX21]]
+;
+  %pow = tail call afn nnan ninf double @_Z3powdd(double %x, double 4.0)
+  ret double %pow
+}
+
+define double @test_pow_afn_f64_nnan_ninf__y_4_5(double %x) {
+; CHECK-LABEL: define double @test_pow_afn_f64_nnan_ninf__y_4_5
+; CHECK-SAME: (double [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call nnan ninf afn double @_Z3powdd(double [[X]], double 4.500000e+00)
+; CHECK-NEXT:    ret double [[POW]]
+;
+  %pow = tail call afn nnan ninf double @_Z3powdd(double %x, double 4.5)
+  ret double %pow
+}
+
+define double @test_pow_afn_f64__y_5(double %x) {
+; CHECK-LABEL: define double @test_pow_afn_f64__y_5
+; CHECK-SAME: (double [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn double @_Z4powndi(double [[X]], i32 5)
+; CHECK-NEXT:    ret double [[POW]]
+;
+  %pow = tail call afn double @_Z3powdd(double %x, double 5.0)
+  ret double %pow
+}
+
+define double @test_pow_afn_f64_nnan_ninf__y_5(double %x) {
+; CHECK-LABEL: define double @test_pow_afn_f64_nnan_ninf__y_5
+; CHECK-SAME: (double [[X:%.*]]) {
+; CHECK-NEXT:    [[__POWX2:%.*]] = fmul nnan ninf afn double [[X]], [[X]]
+; CHECK-NEXT:    [[__POWX21:%.*]] = fmul nnan ninf afn double [[__POWX2]], [[__POWX2]]
+; CHECK-NEXT:    [[__POWPROD:%.*]] = fmul nnan ninf afn double [[X]], [[__POWX21]]
+; CHECK-NEXT:    ret double [[__POWPROD]]
+;
+  %pow = tail call afn nnan ninf double @_Z3powdd(double %x, double 5.0)
+  ret double %pow
+}
+
+define double @test_pow_afn_f64__y_neg5(double %x) {
+; CHECK-LABEL: define double @test_pow_afn_f64__y_neg5
+; CHECK-SAME: (double [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn double @_Z4powndi(double [[X]], i32 -5)
+; CHECK-NEXT:    ret double [[POW]]
+;
+  %pow = tail call afn double @_Z3powdd(double %x, double -5.0)
+  ret double %pow
+}
+
+define double @test_pow_afn_f64_nnan_ninf__y_neg5(double %x) {
+; CHECK-LABEL: define double @test_pow_afn_f64_nnan_ninf__y_neg5
+; CHECK-SAME: (double [[X:%.*]]) {
+; CHECK-NEXT:    [[__POWX2:%.*]] = fmul nnan ninf afn double [[X]], [[X]]
+; CHECK-NEXT:    [[__POWX21:%.*]] = fmul nnan ninf afn double [[__POWX2]], [[__POWX2]]
+; CHECK-NEXT:    [[__POWPROD:%.*]] = fmul nnan ninf afn double [[X]], [[__POWX21]]
+; CHECK-NEXT:    [[__1POWPROD:%.*]] = fdiv nnan ninf afn double 1.000000e+00, [[__POWPROD]]
+; CHECK-NEXT:    ret double [[__1POWPROD]]
+;
+  %pow = tail call afn nnan ninf double @_Z3powdd(double %x, double -5.0)
+  ret double %pow
+}
+
+define double @test_pow_afn_f64_nnan_ninf__y_10(double %x) {
+; CHECK-LABEL: define double @test_pow_afn_f64_nnan_ninf__y_10
+; CHECK-SAME: (double [[X:%.*]]) {
+; CHECK-NEXT:    [[__POWX2:%.*]] = fmul nnan ninf afn double [[X]], [[X]]
+; CHECK-NEXT:    [[__POWX21:%.*]] = fmul nnan ninf afn double [[__POWX2]], [[__POWX2]]
+; CHECK-NEXT:    [[__POWX22:%.*]] = fmul nnan ninf afn double [[__POWX21]], [[__POWX21]]
+; CHECK-NEXT:    [[__POWPROD:%.*]] = fmul nnan ninf afn double [[__POWX2]], [[__POWX22]]
+; CHECK-NEXT:    ret double [[__POWPROD]]
+;
+  %pow = tail call afn nnan ninf double @_Z3powdd(double %x, double 10.0)
+  ret double %pow
+}
+
+define <2 x double> @test_pow_afn_v2f64_nnan_ninf__y_3(<2 x double> %x) {
+; CHECK-LABEL: define <2 x double> @test_pow_afn_v2f64_nnan_ninf__y_3
+; CHECK-SAME: (<2 x double> [[X:%.*]]) {
+; CHECK-NEXT:    [[__POWX2:%.*]] = fmul nnan ninf afn <2 x double> [[X]], [[X]]
+; CHECK-NEXT:    [[__POWPROD:%.*]] = fmul nnan ninf afn <2 x double> [[X]], [[__POWX2]]
+; CHECK-NEXT:    ret <2 x double> [[__POWPROD]]
+;
+  %pow = tail call afn nnan ninf <2 x double> @_Z3powDv2_dS_(<2 x double> %x, <2 x double> <double 3.0, double 3.0>)
+  ret <2 x double> %pow
+}
+
+define <2 x double> @test_pow_afn_v2f64_nnan_ninf__y_4(<2 x double> %x) {
+; CHECK-LABEL: define <2 x double> @test_pow_afn_v2f64_nnan_ninf__y_4
+; CHECK-SAME: (<2 x double> [[X:%.*]]) {
+; CHECK-NEXT:    [[__POWX2:%.*]] = fmul nnan ninf afn <2 x double> [[X]], [[X]]
+; CHECK-NEXT:    [[__POWX21:%.*]] = fmul nnan ninf afn <2 x double> [[__POWX2]], [[__POWX2]]
+; CHECK-NEXT:    ret <2 x double> [[__POWX21]]
+;
+  %pow = tail call afn nnan ninf <2 x double> @_Z3powDv2_dS_(<2 x double> %x, <2 x double> <double 4.0, double 4.0>)
+  ret <2 x double> %pow
+}
+
+define <2 x double> @test_pow_afn_v2f64_nnan_ninf__y_4_5(<2 x double> %x) {
+; CHECK-LABEL: define <2 x double> @test_pow_afn_v2f64_nnan_ninf__y_4_5
+; CHECK-SAME: (<2 x double> [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call nnan ninf afn <2 x double> @_Z3powDv2_dS_(<2 x double> [[X]], <2 x double> splat (double 4.500000e+00))
+; CHECK-NEXT:    ret <2 x double> [[POW]]
+;
+  %pow = tail call afn nnan ninf <2 x double> @_Z3powDv2_dS_(<2 x double> %x, <2 x double> <double 4.5, double 4.5>)
+  ret <2 x double> %pow
+}
+
+define <2 x double> @test_pow_afn_v2f64_nnan_ninf__y_5(<2 x double> %x) {
+; CHECK-LABEL: define <2 x double> @test_pow_afn_v2f64_nnan_ninf__y_5
+; CHECK-SAME: (<2 x double> [[X:%.*]]) {
+; CHECK-NEXT:    [[__POWX2:%.*]] = fmul nnan ninf afn <2 x double> [[X]], [[X]]
+; CHECK-NEXT:    [[__POWX21:%.*]] = fmul nnan ninf afn <2 x double> [[__POWX2]], [[__POWX2]]
+; CHECK-NEXT:    [[__POWPROD:%.*]] = fmul nnan ninf afn <2 x double> [[X]], [[__POWX21]]
+; CHECK-NEXT:    ret <2 x double> [[__POWPROD]]
+;
+  %pow = tail call afn nnan ninf <2 x double> @_Z3powDv2_dS_(<2 x double> %x, <2 x double> <double 5.0, double 5.0>)
+  ret <2 x double> %pow
+}
+
+define half @test_pow_afn_f16__y_3(half %x) {
+; CHECK-LABEL: define half @test_pow_afn_f16__y_3
+; CHECK-SAME: (half [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn half @_Z4pownDhi(half [[X]], i32 3)
+; CHECK-NEXT:    ret half [[POW]]
+;
+  %pow = tail call afn half @_Z3powDhDh(half %x, half 3.0)
+  ret half %pow
+}
+
+define half @test_pow_afn_f16_nnan_ninf__y_3(half %x) {
+; CHECK-LABEL: define half @test_pow_afn_f16_nnan_ninf__y_3
+; CHECK-SAME: (half [[X:%.*]]) {
+; CHECK-NEXT:    [[__POWX2:%.*]] = fmul nnan ninf afn half [[X]], [[X]]
+; CHECK-NEXT:    [[__POWPROD:%.*]] = fmul nnan ninf afn half [[X]], [[__POWX2]]
+; CHECK-NEXT:    ret half [[__POWPROD]]
+;
+  %pow = tail call afn nnan ninf half @_Z3powDhDh(half %x, half 3.0)
+  ret half %pow
+}
+
+define half @test_pow_afn_f16__y_4(half %x) {
+; CHECK-LABEL: define half @test_pow_afn_f16__y_4
+; CHECK-SAME: (half [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn half @_Z4pownDhi(half [[X]], i32 4)
+; CHECK-NEXT:    ret half [[POW]]
+;
+  %pow = tail call afn half @_Z3powDhDh(half %x, half 4.0)
+  ret half %pow
+}
+
+define half @test_pow_afn_f16_nnan_ninf__y_4(half %x) {
+; CHECK-LABEL: define half @test_pow_afn_f16_nnan_ninf__y_4
+; CHECK-SAME: (half [[X:%.*]]) {
+; CHECK-NEXT:    [[__POWX2:%.*]] = fmul nnan ninf afn half [[X]], [[X]]
+; CHECK-NEXT:    [[__POWX21:%.*]] = fmul nnan ninf afn half [[__POWX2]], [[__POWX2]]
+; CHECK-NEXT:    ret half [[__POWX21]]
+;
+  %pow = tail call afn nnan ninf half @_Z3powDhDh(half %x, half 4.0)
+  ret half %pow
+}
+
+define half @test_pow_afn_f16_nnan_ninf__y_4_5(half %x) {
+; CHECK-LABEL: define half @test_pow_afn_f16_nnan_ninf__y_4_5
+; CHECK-SAME: (half [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call nnan ninf afn half @_Z3powDhDh(half [[X]], half 0xH4480)
+; CHECK-NEXT:    ret half [[POW]]
+;
+  %pow = tail call afn nnan ninf half @_Z3powDhDh(half %x, half 4.5)
+  ret half %pow
+}
+
+define half @test_pow_afn_f16__y_5(half %x) {
+; CHECK-LABEL: define half @test_pow_afn_f16__y_5
+; CHECK-SAME: (half [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn half @_Z4pownDhi(half [[X]], i32 5)
+; CHECK-NEXT:    ret half [[POW]]
+;
+  %pow = tail call afn half @_Z3powDhDh(half %x, half 5.0)
+  ret half %pow
+}
+
+define half @test_pow_afn_f16_nnan_ninf__y_5(half %x) {
+; CHECK-LABEL: define half @test_pow_afn_f16_nnan_ninf__y_5
+; CHECK-SAME: (half [[X:%.*]]) {
+; CHECK-NEXT:    [[__POWX2:%.*]] = fmul nnan ninf afn half [[X]], [[X]]
+; CHECK-NEXT:    [[__POWX21:%.*]] = fmul nnan ninf afn half [[__POWX2]], [[__POWX2]]
+; CHECK-NEXT:    [[__POWPROD:%.*]] = fmul nnan ninf afn half [[X]], [[__POWX21]]
+; CHECK-NEXT:    ret half [[__POWPROD]]
+;
+  %pow = tail call afn nnan ninf half @_Z3powDhDh(half %x, half 5.0)
+  ret half %pow
+}
+
+define half @test_pow_afn_f16__y_neg5(half %x) {
+; CHECK-LABEL: define half @test_pow_afn_f16__y_neg5
+; CHECK-SAME: (half [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn half @_Z4pownDhi(half [[X]], i32 -5)
+; CHECK-NEXT:    ret half [[POW]]
+;
+  %pow = tail call afn half @_Z3powDhDh(half %x, half -5.0)
+  ret half %pow
+}
+
+define half @test_pow_afn_f16_nnan_ninf__y_neg5(half %x) {
+; CHECK-LABEL: define half @test_pow_afn_f16_nnan_ninf__y_neg5
+; CHECK-SAME: (half [[X:%.*]]) {
+; CHECK-NEXT:    [[__POWX2:%.*]] = fmul nnan ninf afn half [[X]], [[X]]
+; CHECK-NEXT:    [[__POWX21:%.*]] = fmul nnan ninf afn half [[__POWX2]], [[__POWX2]]
+; CHECK-NEXT:    [[__POWPROD:%.*]] = fmul nnan ninf afn half [[X]], [[__POWX21]]
+; CHECK-NEXT:    [[__1POWPROD:%.*]] = fdiv nnan ninf afn half 0xH3C00, [[__POWPROD]]
+; CHECK-NEXT:    ret half [[__1POWPROD]]
+;
+  %pow = tail call afn nnan ninf half @_Z3powDhDh(half %x, half -5.0)
+  ret half %pow
+}
+
+define half @test_pow_afn_f16_nnan_ninf__y_10(half %x) {
+; CHECK-LABEL: define half @test_pow_afn_f16_nnan_ninf__y_10
+; CHECK-SAME: (half [[X:%.*]]) {
+; CHECK-NEXT:    [[__POWX2:%.*]] = fmul nnan ninf afn half [[X]], [[X]]
+; CHECK-NEXT:    [[__POWX21:%.*]] = fmul nnan ninf afn half [[__POWX2]], [[__POWX2]]
+; CHECK-NEXT:    [[__POWX22:%.*]] = fmul nnan ninf afn half [[__POWX21]], [[__POWX21]]
+; CHECK-NEXT:    [[__POWPROD:%.*]] = fmul nnan ninf afn half [[__POWX2]], [[__POWX22]]
+; CHECK-NEXT:    ret half [[__POWPROD]]
+;
+  %pow = tail call afn nnan ninf half @_Z3powDhDh(half %x, half 10.0)
+  ret half %pow
+}
+
+define <2 x half> @test_pow_afn_v2f16_nnan_ninf__y_3(<2 x half> %x) {
+; CHECK-LABEL: define <2 x half> @test_pow_afn_v2f16_nnan_ninf__y_3
+; CHECK-SAME: (<2 x half> [[X:%.*]]) {
+; CHECK-NEXT:    [[__POWX2:%.*]] = fmul nnan ninf afn <2 x half> [[X]], [[X]]
+; CHECK-NEXT:    [[__POWPROD:%.*]] = fmul nnan ninf afn <2 x half> [[X]], [[__POWX2]]
+; CHECK-NEXT:    ret <2 x half> [[__POWPROD]]
+;
+  %pow = tail call afn nnan ninf <2 x half> @_Z3powDv2_DhS_(<2 x half> %x, <2 x half> <half 3.0, half 3.0>)
+  ret <2 x half> %pow
+}
+
+define <2 x half> @test_pow_afn_v2f16_nnan_ninf__y_4(<2 x half> %x) {
+; CHECK-LABEL: define <2 x half> @test_pow_afn_v2f16_nnan_ninf__y_4
+; CHECK-SAME: (<2 x half> [[X:%.*]]) {
+; CHECK-NEXT:    [[__POWX2:%.*]] = fmul nnan ninf afn <2 x half> [[X]], [[X]]
+; CHECK-NEXT:    [[__POWX21:%.*]] = fmul nnan ninf afn <2 x half> [[__POWX2]], [[__POWX2]]
+; CHECK-NEXT:    ret <2 x half> [[__POWX21]]
+;
+  %pow = tail call afn nnan ninf <2 x half> @_Z3powDv2_DhS_(<2 x half> %x, <2 x half> <half 4.0, half 4.0>)
+  ret <2 x half> %pow
+}
+
+define <2 x half> @test_pow_afn_v2f16_nnan_ninf__y_4_5(<2 x half> %x) {
+; CHECK-LABEL: define <2 x half> @test_pow_afn_v2f16_nnan_ninf__y_4_5
+; CHECK-SAME: (<2 x half> [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call nnan ninf afn <2 x half> @_Z3powDv2_DhS_(<2 x half> [[X]], <2 x half> splat (half 0xH4480))
+; CHECK-NEXT:    ret <2 x half> [[POW]]
+;
+  %pow = tail call afn nnan ninf <2 x half> @_Z3powDv2_DhS_(<2 x half> %x, <2 x half> <half 4.5, half 4.5>)
+  ret <2 x half> %pow
+}
+
+define <2 x half> @test_pow_afn_v2f16_nnan_ninf__y_5(<2 x half> %x) {
+; CHECK-LABEL: define <2 x half> @test_pow_afn_v2f16_nnan_ninf__y_5
+; CHECK-SAME: (<2 x half> [[X:%.*]]) {
+; CHECK-NEXT:    [[__POWX2:%.*]] = fmul nnan ninf afn <2 x half> [[X]], [[X]]
+; CHECK-NEXT:    [[__POWX21:%.*]] = fmul nnan ninf afn <2 x half> [[__POWX2]], [[__POWX2]]
+; CHECK-NEXT:    [[__POWPROD:%.*]] = fmul nnan ninf afn <2 x half> [[X]], [[__POWX21]]
+; CHECK-NEXT:    ret <2 x half> [[__POWPROD]]
+;
+  %pow = tail call afn nnan ninf <2 x half> @_Z3powDv2_DhS_(<2 x half> %x, <2 x half> <half 5.0, half 5.0>)
+  ret <2 x half> %pow
+}
+
+define float @test_pow_f32_known_integral_sitofp(float %x, i32 %y) {
+; CHECK-LABEL: define float @test_pow_f32_known_integral_sitofp
+; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[Y_CAST:%.*]] = sitofp i32 [[Y]] to float
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32
+; CHECK-NEXT:    [[POW:%.*]] = tail call float @_Z4pownfi(float [[X]], i32 [[TMP1]])
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %y.cast = sitofp i32 %y to float
+  %pow = tail call float @_Z3powff(float %x, float %y.cast)
+  ret float %pow
+}
+
+define float @test_pow_afn_f32_known_integral_sitofp(float %x, i32 %y) {
+; CHECK-LABEL: define float @test_pow_afn_f32_known_integral_sitofp
+; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[Y_CAST:%.*]] = sitofp i32 [[Y]] to float
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 [[TMP1]])
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %y.cast = sitofp i32 %y to float
+  %pow = tail call afn float @_Z3powff(float %x, float %y.cast)
+  ret float %pow
+}
+
+define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp(float %x, i32 %y) {
+; CHECK-LABEL: define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp
+; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[Y_CAST:%.*]] = sitofp i32 [[Y]] to float
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32
+; CHECK-NEXT:    [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]])
+; CHECK-NEXT:    [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[__FABS]])
+; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp i32 [[TMP1]] to float
+; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[POWNI2F]]
+; CHECK-NEXT:    [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]])
+; CHECK-NEXT:    [[__YEVEN:%.*]] = shl i32 [[TMP1]], 31
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float [[X]] to i32
+; CHECK-NEXT:    [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
+; CHECK-NEXT:    [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
+; CHECK-NEXT:    ret float [[TMP5]]
+;
+  %y.cast = sitofp i32 %y to float
+  %pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y.cast)
+  ret float %pow
+}
+
+define float @test_pow_afn_nnan_f32_known_integral_sitofp(float %x, i32 %y) {
+; CHECK-LABEL: define float @test_pow_afn_nnan_f32_known_integral_sitofp
+; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[Y_CAST:%.*]] = sitofp i32 [[Y]] to float
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32
+; CHECK-NEXT:    [[POW:%.*]] = tail call nnan afn float @_Z4pownfi(float [[X]], i32 [[TMP1]])
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %y.cast = sitofp i32 %y to float
+  %pow = tail call afn nnan float @_Z3powff(float %x, float %y.cast)
+  ret float %pow
+}
+
+define float @test_pow_afn_ninf_f32_known_integral_sitofp(float %x, i32 %y) {
+; CHECK-LABEL: define float @test_pow_afn_ninf_f32_known_integral_sitofp
+; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[Y_CAST:%.*]] = sitofp i32 [[Y]] to float
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32
+; CHECK-NEXT:    [[POW:%.*]] = tail call ninf afn float @_Z4pownfi(float [[X]], i32 [[TMP1]])
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %y.cast = sitofp i32 %y to float
+  %pow = tail call afn ninf float @_Z3powff(float %x, float %y.cast)
+  ret float %pow
+}
+
+define float @test_pow_afn_f32_known_integral_sitofp_finite_argument(float %x, i32 %y) {
+; CHECK-LABEL: define float @test_pow_afn_f32_known_integral_sitofp_finite_argument
+; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[Y_CAST:%.*]] = sitofp i32 [[Y]] to float
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32
+; CHECK-NEXT:    [[POW:%.*]] = tail call float @_Z4pownfi(float [[X]], i32 [[TMP1]])
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %y.cast = sitofp i32 %y to float
+  %pow = tail call float @_Z3powff(float %x, float nofpclass(inf nan) %y.cast)
+  ret float %pow
+}
+
+define float @test_pow_f32_known_integral_uitofp(float %x, i32 %y) {
+; CHECK-LABEL: define float @test_pow_f32_known_integral_uitofp
+; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[Y_CAST:%.*]] = uitofp i32 [[Y]] to float
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32
+; CHECK-NEXT:    [[POW:%.*]] = tail call float @_Z4pownfi(float [[X]], i32 [[TMP1]])
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %y.cast = uitofp i32 %y to float
+  %pow = tail call float @_Z3powff(float %x, float %y.cast)
+  ret float %pow
+}
+
+define float @test_pow_afn_f32_known_integral_uitofp(float %x, i32 %y) {
+; CHECK-LABEL: define float @test_pow_afn_f32_known_integral_uitofp
+; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[Y_CAST:%.*]] = uitofp i32 [[Y]] to float
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 [[TMP1]])
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %y.cast = uitofp i32 %y to float
+  %pow = tail call afn float @_Z3powff(float %x, float %y.cast)
+  ret float %pow
+}
+
+define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp(float %x, i32 %y) {
+; CHECK-LABEL: define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp
+; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[Y_CAST:%.*]] = uitofp i32 [[Y]] to float
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32
+; CHECK-NEXT:    [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]])
+; CHECK-NEXT:    [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[__FABS]])
+; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp i32 [[TMP1]] to float
+; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[POWNI2F]]
+; CHECK-NEXT:    [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]])
+; CHECK-NEXT:    [[__YEVEN:%.*]] = shl i32 [[TMP1]], 31
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float [[X]] to i32
+; CHECK-NEXT:    [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
+; CHECK-NEXT:    [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
+; CHECK-NEXT:    ret float [[TMP5]]
+;
+  %y.cast = uitofp i32 %y to float
+  %pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y.cast)
+  ret float %pow
+}
+
+; cast from i256 may produce infinity so can't assume integer without ninf
+define float @test_pow_afn_nnan_f32_known_integral_uitofp_i256(float %x, i256 %y) {
+; CHECK-LABEL: define float @test_pow_afn_nnan_f32_known_integral_uitofp_i256
+; CHECK-SAME: (float [[X:%.*]], i256 [[Y:%.*]]) {
+; CHECK-NEXT:    [[Y_CAST:%.*]] = uitofp i256 [[Y]] to float
+; CHECK-NEXT:    [[POW:%.*]] = tail call nnan afn float @_Z3powff(float [[X]], float [[Y_CAST]])
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %y.cast = uitofp i256 %y to float
+  %pow = tail call afn nnan float @_Z3powff(float %x, float %y.cast)
+  ret float %pow
+}
+
+; cast from i256 may produce infinity so can't assume integer without ninf
+define float @test_pow_afn_nnan_f32_known_integral_sitofp_i256(float %x, i256 %y) {
+; CHECK-LABEL: define float @test_pow_afn_nnan_f32_known_integral_sitofp_i256
+; CHECK-SAME: (float [[X:%.*]], i256 [[Y:%.*]]) {
+; CHECK-NEXT:    [[Y_CAST:%.*]] = sitofp i256 [[Y]] to float
+; CHECK-NEXT:    [[POW:%.*]] = tail call nnan afn float @_Z3powff(float [[X]], float [[Y_CAST]])
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %y.cast = sitofp i256 %y to float
+  %pow = tail call afn nnan float @_Z3powff(float %x, float %y.cast)
+  ret float %pow
+}
+
+define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp_i256(float %x, i256 %y) {
+; CHECK-LABEL: define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp_i256
+; CHECK-SAME: (float [[X:%.*]], i256 [[Y:%.*]]) {
+; CHECK-NEXT:    [[Y_CAST:%.*]] = uitofp i256 [[Y]] to float
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32
+; CHECK-NEXT:    [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]])
+; CHECK-NEXT:    [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[__FABS]])
+; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp i32 [[TMP1]] to float
+; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[POWNI2F]]
+; CHECK-NEXT:    [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]])
+; CHECK-NEXT:    [[__YEVEN:%.*]] = shl i32 [[TMP1]], 31
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float [[X]] to i32
+; CHECK-NEXT:    [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
+; CHECK-NEXT:    [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
+; CHECK-NEXT:    ret float [[TMP5]]
+;
+  %y.cast = uitofp i256 %y to float
+  %pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y.cast)
+  ret float %pow
+}
+
+define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp_i256(float %x, i256 %y) {
+; CHECK-LABEL: define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp_i256
+; CHECK-SAME: (float [[X:%.*]], i256 [[Y:%.*]]) {
+; CHECK-NEXT:    [[Y_CAST:%.*]] = sitofp i256 [[Y]] to float
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32
+; CHECK-NEXT:    [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]])
+; CHECK-NEXT:    [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[__FABS]])
+; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp i32 [[TMP1]] to float
+; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[POWNI2F]]
+; CHECK-NEXT:    [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]])
+; CHECK-NEXT:    [[__YEVEN:%.*]] = shl i32 [[TMP1]], 31
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float [[X]] to i32
+; CHECK-NEXT:    [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
+; CHECK-NEXT:    [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
+; CHECK-NEXT:    ret float [[TMP5]]
+;
+  %y.cast = sitofp i256 %y to float
+  %pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y.cast)
+  ret float %pow
+}
+
+define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_sitofp(<2 x float> %x, <2 x i32> %y) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_sitofp
+; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x i32> [[Y:%.*]]) {
+; CHECK-NEXT:    [[Y_CAST:%.*]] = sitofp <2 x i32> [[Y]] to <2 x float>
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi <2 x float> [[Y_CAST]] to <2 x i32>
+; CHECK-NEXT:    [[__FABS:%.*]] = call nnan ninf afn <2 x float> @llvm.fabs.v2f32(<2 x float> [[X]])
+; CHECK-NEXT:    [[__LOG2:%.*]] = call nnan ninf afn <2 x float> @llvm.log2.v2f32(<2 x float> [[__FABS]])
+; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float>
+; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[POWNI2F]]
+; CHECK-NEXT:    [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @llvm.exp2.v2f32(<2 x float> [[__YLOGX]])
+; CHECK-NEXT:    [[__YEVEN:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 31)
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>
+; CHECK-NEXT:    [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32>
+; CHECK-NEXT:    [[TMP4:%.*]] = or disjoint <2 x i32> [[__POW_SIGN]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <2 x float>
+; CHECK-NEXT:    ret <2 x float> [[TMP5]]
+;
+  %y.cast = sitofp <2 x i32> %y to <2 x float>
+  %pow = tail call afn nnan ninf <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> %y.cast)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_v2f32_known_integral_uitofp(<2 x float> %x, <2 x i32> %y) {
+; CHECK-LABEL: define <2 x float> @test_pow_v2f32_known_integral_uitofp
+; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x i32> [[Y:%.*]]) {
+; CHECK-NEXT:    [[Y_CAST:%.*]] = uitofp <2 x i32> [[Y]] to <2 x float>
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi <2 x float> [[Y_CAST]] to <2 x i32>
+; CHECK-NEXT:    [[POW:%.*]] = tail call <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> [[TMP1]])
+; CHECK-NEXT:    ret <2 x float> [[POW]]
+;
+  %y.cast = uitofp <2 x i32> %y to <2 x float>
+  %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> %y.cast)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_afn_v2f32_known_integral_uitofp(<2 x float> %x, <2 x i32> %y) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_known_integral_uitofp
+; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x i32> [[Y:%.*]]) {
+; CHECK-NEXT:    [[Y_CAST:%.*]] = uitofp <2 x i32> [[Y]] to <2 x float>
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi <2 x float> [[Y_CAST]] to <2 x i32>
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> [[TMP1]])
+; CHECK-NEXT:    ret <2 x float> [[POW]]
+;
+  %y.cast = uitofp <2 x i32> %y to <2 x float>
+  %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> %y.cast)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_uitofp(<2 x float> %x, <2 x i32> %y) {
+; CHECK-LABEL: define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_uitofp
+; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x i32> [[Y:%.*]]) {
+; CHECK-NEXT:    [[Y_CAST:%.*]] = uitofp <2 x i32> [[Y]] to <2 x float>
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi <2 x float> [[Y_CAST]] to <2 x i32>
+; CHECK-NEXT:    [[__FABS:%.*]] = call nnan ninf afn <2 x float> @llvm.fabs.v2f32(<2 x float> [[X]])
+; CHECK-NEXT:    [[__LOG2:%.*]] = call nnan ninf afn <2 x float> @llvm.log2.v2f32(<2 x float> [[__FABS]])
+; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float>
+; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[POWNI2F]]
+; CHECK-NEXT:    [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @llvm.exp2.v2f32(<2 x float> [[__YLOGX]])
+; CHECK-NEXT:    [[__YEVEN:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 31)
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>
+; CHECK-NEXT:    [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32>
+; CHECK-NEXT:    [[TMP4:%.*]] = or disjoint <2 x i32> [[__POW_SIGN]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <2 x float>
+; CHECK-NEXT:    ret <2 x float> [[TMP5]]
+;
+  %y.cast = uitofp <2 x i32> %y to <2 x float>
+  %pow = tail call afn nnan ninf <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> %y.cast)
+  ret <2 x float> %pow
+}
+
+; Could fold to powr or pown
+define float @test_pow_f32_known_positive_x__known_integral_sitofp(float nofpclass(ninf nsub nnorm) %x, i32 %y) {
+; CHECK-LABEL: define float @test_pow_f32_known_positive_x__known_integral_sitofp
+; CHECK-SAME: (float nofpclass(ninf nsub nnorm) [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[Y_CAST:%.*]] = sitofp i32 [[Y]] to float
+; CHECK-NEXT:    [[POW:%.*]] = tail call float @_Z4powrff(float [[X]], float [[Y_CAST]])
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %y.cast = sitofp i32 %y to float
+  %pow = tail call float @_Z3powff(float %x, float %y.cast)
+  ret float %pow
+}
+
+define float @test_pow_afn_f32_known_positive_x__known_integral_sitofp(float nofpclass(ninf nsub nnorm) %x, i32 %y) {
+; CHECK-LABEL: define float @test_pow_afn_f32_known_positive_x__known_integral_sitofp
+; CHECK-SAME: (float nofpclass(ninf nsub nnorm) [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[Y_CAST:%.*]] = sitofp i32 [[Y]] to float
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z4powrff(float [[X]], float [[Y_CAST]])
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %y.cast = sitofp i32 %y to float
+  %pow = tail call afn float @_Z3powff(float %x, float %y.cast)
+  ret float %pow
+}
+
+define float @test_pow_afn_nnan_ninf_f32__known_positive_x__known_integral_sitofp(float nofpclass(ninf nsub nnorm) %x, i32 %y) {
+; CHECK-LABEL: define float @test_pow_afn_nnan_ninf_f32__known_positive_x__known_integral_sitofp
+; CHECK-SAME: (float nofpclass(ninf nsub nnorm) [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[Y_CAST:%.*]] = sitofp i32 [[Y]] to float
+; CHECK-NEXT:    [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[X]])
+; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[Y_CAST]]
+; CHECK-NEXT:    [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]])
+; CHECK-NEXT:    ret float [[__EXP2]]
+;
+  %y.cast = sitofp i32 %y to float
+  %pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y.cast)
+  ret float %pow
+}
+
+define float @test_pow_f32__y_known_integral_trunc_maybe_inf(float %x, float nofpclass(nan) %y.arg) {
+; CHECK-LABEL: define float @test_pow_f32__y_known_integral_trunc_maybe_inf
+; CHECK-SAME: (float [[X:%.*]], float nofpclass(nan) [[Y_ARG:%.*]]) {
+; CHECK-NEXT:    [[Y:%.*]] = call float @llvm.trunc.f32(float [[Y_ARG]])
+; CHECK-NEXT:    [[POW:%.*]] = tail call float @_Z3powff(float [[X]], float [[Y]])
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %y = call float @llvm.trunc.f32(float %y.arg)
+  %pow = tail call float @_Z3powff(float %x, float %y)
+  ret float %pow
+}
+
+define float @test_pow_f32__y_known_integral_trunc_maybe_nan(float %x, float nofpclass(inf) %y.arg) {
+; CHECK-LABEL: define float @test_pow_f32__y_known_integral_trunc_maybe_nan
+; CHECK-SAME: (float [[X:%.*]], float nofpclass(inf) [[Y_ARG:%.*]]) {
+; CHECK-NEXT:    [[Y:%.*]] = call float @llvm.trunc.f32(float [[Y_ARG]])
+; CHECK-NEXT:    [[POW:%.*]] = tail call float @_Z3powff(float [[X]], float [[Y]])
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %y = call float @llvm.trunc.f32(float %y.arg)
+  %pow = tail call float @_Z3powff(float %x, float %y)
+  ret float %pow
+}
+
+; Cannot fold to pown, may still be inf
+define float @test_pow_f32__y_known_integral_trunc_nnan_use(float %x, float %y.arg) {
+; CHECK-LABEL: define float @test_pow_f32__y_known_integral_trunc_nnan_use
+; CHECK-SAME: (float [[X:%.*]], float [[Y_ARG:%.*]]) {
+; CHECK-NEXT:    [[Y:%.*]] = call float @llvm.trunc.f32(float [[Y_ARG]])
+; CHECK-NEXT:    [[POW:%.*]] = tail call nnan float @_Z3powff(float [[X]], float [[Y]])
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %y = call float @llvm.trunc.f32(float %y.arg)
+  %pow = tail call nnan float @_Z3powff(float %x, float %y)
+  ret float %pow
+}
+
+; Cannot fold to pown, may still be nan
+define float @test_pow_f32__y_known_integral_trunc_ninf_use(float %x, float %y.arg) {
+; CHECK-LABEL: define float @test_pow_f32__y_known_integral_trunc_ninf_use
+; CHECK-SAME: (float [[X:%.*]], float [[Y_ARG:%.*]]) {
+; CHECK-NEXT:    [[Y:%.*]] = call float @llvm.trunc.f32(float [[Y_ARG]])
+; CHECK-NEXT:    [[POW:%.*]] = tail call ninf float @_Z3powff(float [[X]], float [[Y]])
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %y = call float @llvm.trunc.f32(float %y.arg)
+  %pow = tail call ninf float @_Z3powff(float %x, float %y)
+  ret float %pow
+}
+
+define float @test_pow_afn_f32_nnan_ninf__y_known_integral_trunc(float %x, float %y.arg) {
+; CHECK-LABEL: define float @test_pow_afn_f32_nnan_ninf__y_known_integral_trunc
+; CHECK-SAME: (float [[X:%.*]], float [[Y_ARG:%.*]]) {
+; CHECK-NEXT:    [[Y:%.*]] = call float @llvm.trunc.f32(float [[Y_ARG]])
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi float [[Y]] to i32
+; CHECK-NEXT:    [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]])
+; CHECK-NEXT:    [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[__FABS]])
+; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp i32 [[TMP1]] to float
+; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[POWNI2F]]
+; CHECK-NEXT:    [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]])
+; CHECK-NEXT:    [[__YEVEN:%.*]] = shl i32 [[TMP1]], 31
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float [[X]] to i32
+; CHECK-NEXT:    [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
+; CHECK-NEXT:    [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
+; CHECK-NEXT:    ret float [[TMP5]]
+;
+  %y = call float @llvm.trunc.f32(float %y.arg)
+  %pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y)
+  ret float %pow
+}
+
+define float @test_pow_afn_f32__y_known_integral_trunc(float %x, float nofpclass(inf nan) %y.arg) {
+; CHECK-LABEL: define float @test_pow_afn_f32__y_known_integral_trunc
+; CHECK-SAME: (float [[X:%.*]], float nofpclass(nan inf) [[Y_ARG:%.*]]) {
+; CHECK-NEXT:    [[Y:%.*]] = call float @llvm.trunc.f32(float [[Y_ARG]])
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi float [[Y]] to i32
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 [[TMP1]])
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %y = call float @llvm.trunc.f32(float %y.arg)
+  %pow = tail call afn float @_Z3powff(float %x, float %y)
+  ret float %pow
+}
+
+define float @test_pow_f32__y_known_integral_floor(float %x, float nofpclass(inf nan) %y.arg) {
+; CHECK-LABEL: define float @test_pow_f32__y_known_integral_floor
+; CHECK-SAME: (float [[X:%.*]], float nofpclass(nan inf) [[Y_ARG:%.*]]) {
+; CHECK-NEXT:    [[Y:%.*]] = call float @llvm.floor.f32(float [[Y_ARG]])
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi float [[Y]] to i32
+; CHECK-NEXT:    [[POW:%.*]] = tail call float @_Z4pownfi(float [[X]], i32 [[TMP1]])
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %y = call float @llvm.floor.f32(float %y.arg)
+  %pow = tail call float @_Z3powff(float %x, float %y)
+  ret float %pow
+}
+
+define float @test_pow_f32__y_known_integral_ceil(float %x, float nofpclass(inf nan) %y.arg) {
+; CHECK-LABEL: define float @test_pow_f32__y_known_integral_ceil
+; CHECK-SAME: (float [[X:%.*]], float nofpclass(nan inf) [[Y_ARG:%.*]]) {
+; CHECK-NEXT:    [[Y:%.*]] = call float @llvm.floor.f32(float [[Y_ARG]])
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi float [[Y]] to i32
+; CHECK-NEXT:    [[POW:%.*]] = tail call float @_Z4pownfi(float [[X]], i32 [[TMP1]])
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %y = call float @llvm.floor.f32(float %y.arg)
+  %pow = tail call float @_Z3powff(float %x, float %y)
+  ret float %pow
+}
+
+define float @test_pow_f32__y_known_integral_trunc(float %x, float nofpclass(inf nan) %y.arg) {
+; CHECK-LABEL: define float @test_pow_f32__y_known_integral_trunc
+; CHECK-SAME: (float [[X:%.*]], float nofpclass(nan inf) [[Y_ARG:%.*]]) {
+; CHECK-NEXT:    [[Y:%.*]] = call float @llvm.trunc.f32(float [[Y_ARG]])
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi float [[Y]] to i32
+; CHECK-NEXT:    [[POW:%.*]] = tail call float @_Z4pownfi(float [[X]], i32 [[TMP1]])
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %y = call float @llvm.trunc.f32(float %y.arg)
+  %pow = tail call float @_Z3powff(float %x, float %y)
+  ret float %pow
+}
+
+define float @test_pow_f32__y_known_integral_rint(float %x, float nofpclass(inf nan) %y.arg) {
+; CHECK-LABEL: define float @test_pow_f32__y_known_integral_rint
+; CHECK-SAME: (float [[X:%.*]], float nofpclass(nan inf) [[Y_ARG:%.*]]) {
+; CHECK-NEXT:    [[Y:%.*]] = call float @llvm.rint.f32(float [[Y_ARG]])
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi float [[Y]] to i32
+; CHECK-NEXT:    [[POW:%.*]] = tail call float @_Z4pownfi(float [[X]], i32 [[TMP1]])
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %y = call float @llvm.rint.f32(float %y.arg)
+  %pow = tail call float @_Z3powff(float %x, float %y)
+  ret float %pow
+}
+
+define float @test_pow_f32__y_known_integral_nearbyint(float %x, float nofpclass(inf nan) %y.arg) {
+; CHECK-LABEL: define float @test_pow_f32__y_known_integral_nearbyint
+; CHECK-SAME: (float [[X:%.*]], float nofpclass(nan inf) [[Y_ARG:%.*]]) {
+; CHECK-NEXT:    [[Y:%.*]] = call float @llvm.nearbyint.f32(float [[Y_ARG]])
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi float [[Y]] to i32
+; CHECK-NEXT:    [[POW:%.*]] = tail call float @_Z4pownfi(float [[X]], i32 [[TMP1]])
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %y = call float @llvm.nearbyint.f32(float %y.arg)
+  %pow = tail call float @_Z3powff(float %x, float %y)
+  ret float %pow
+}
+
+define float @test_pow_f32__y_known_integral_round(float %x, float nofpclass(inf nan) %y.arg) {
+; CHECK-LABEL: define float @test_pow_f32__y_known_integral_round
+; CHECK-SAME: (float [[X:%.*]], float nofpclass(nan inf) [[Y_ARG:%.*]]) {
+; CHECK-NEXT:    [[Y:%.*]] = call float @llvm.round.f32(float [[Y_ARG]])
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi float [[Y]] to i32
+; CHECK-NEXT:    [[POW:%.*]] = tail call float @_Z4pownfi(float [[X]], i32 [[TMP1]])
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %y = call float @llvm.round.f32(float %y.arg)
+  %pow = tail call float @_Z3powff(float %x, float %y)
+  ret float %pow
+}
+
+define float @test_pow_f32__y_known_integral_roundeven(float %x, float nofpclass(inf nan) %y.arg) {
+; CHECK-LABEL: define float @test_pow_f32__y_known_integral_roundeven
+; CHECK-SAME: (float [[X:%.*]], float nofpclass(nan inf) [[Y_ARG:%.*]]) {
+; CHECK-NEXT:    [[Y:%.*]] = call float @llvm.roundeven.f32(float [[Y_ARG]])
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi float [[Y]] to i32
+; CHECK-NEXT:    [[POW:%.*]] = tail call float @_Z4pownfi(float [[X]], i32 [[TMP1]])
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %y = call float @llvm.roundeven.f32(float %y.arg)
+  %pow = tail call float @_Z3powff(float %x, float %y)
+  ret float %pow
+}
+
+define float @test_pow_f32_known_integral_undef(float %x) {
+; CHECK-LABEL: define float @test_pow_f32_known_integral_undef
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call float @_Z3powff(float [[X]], float undef)
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = tail call float @_Z3powff(float %x, float undef)
+  ret float %pow
+}
+
+define float @test_pow_f32_known_integral_poison(float %x) {
+; CHECK-LABEL: define float @test_pow_f32_known_integral_poison
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call float @_Z4pownfi(float [[X]], i32 poison)
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = tail call float @_Z3powff(float %x, float poison)
+  ret float %pow
+}
+
+define <2 x float> @test_pow_v2f32_known_integral_constant_vector_undef_elt(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_v2f32_known_integral_constant_vector_undef_elt
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> <float 4.000000e+00, float undef>)
+; CHECK-NEXT:    ret <2 x float> [[POW]]
+;
+  %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 4.0, float undef>)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_v2f32_known_integral_constant_vector_poison_elt(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_v2f32_known_integral_constant_vector_poison_elt
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 4, i32 poison>)
+; CHECK-NEXT:    ret <2 x float> [[POW]]
+;
+  %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 4.0, float poison>)
+  ret <2 x float> %pow
+}
+
+attributes #0 = { minsize }
+attributes #1 = { noinline }
+attributes #2 = { strictfp }
+attributes #3 = { nobuiltin }
diff --git a/llvm/test/CodeGen/AMDGPU/copysign-simplify-demanded-bits.ll b/llvm/test/CodeGen/AMDGPU/copysign-simplify-demanded-bits.ll
index a01c2fa152ab3..32f6b11e41d8c 100644
--- a/llvm/test/CodeGen/AMDGPU/copysign-simplify-demanded-bits.ll
+++ b/llvm/test/CodeGen/AMDGPU/copysign-simplify-demanded-bits.ll
@@ -336,13 +336,13 @@ define float @test_copysign_pow_fast_f32__integral_y(float %x, i32 %y.i) {
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    s_mov_b32 s4, 0x800000
-; GFX9-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
-; GFX9-NEXT:    v_cndmask_b32_e64 v3, 0, 32, vcc
+; GFX9-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v0|, s4
+; GFX9-NEXT:    v_cndmask_b32_e64 v3, 32, 0, s[4:5]
 ; GFX9-NEXT:    v_ldexp_f32 v3, |v0|, v3
 ; GFX9-NEXT:    v_log_f32_e32 v3, v3
 ; GFX9-NEXT:    v_cvt_f32_i32_e32 v1, v1
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0x42000000
-; GFX9-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e64 v2, v2, 0, s[4:5]
 ; GFX9-NEXT:    v_sub_f32_e32 v2, v3, v2
 ; GFX9-NEXT:    v_mul_f32_e32 v3, v2, v1
 ; GFX9-NEXT:    s_mov_b32 s4, 0xc2fc0000
diff --git a/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll b/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll
index 3983655285e57..b20a45237eac5 100644
--- a/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll
@@ -1515,12 +1515,12 @@ define float @v_recip_sqrt_f32_ulp25(float %x) {
 ; CODEGEN-IEEE-GISEL:       ; %bb.0:
 ; CODEGEN-IEEE-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CODEGEN-IEEE-GISEL-NEXT:    v_mov_b32_e32 v1, 0x800000
-; CODEGEN-IEEE-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
-; CODEGEN-IEEE-GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
+; CODEGEN-IEEE-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v1
+; CODEGEN-IEEE-GISEL-NEXT:    v_cndmask_b32_e64 v1, 1, 0, vcc
 ; CODEGEN-IEEE-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
 ; CODEGEN-IEEE-GISEL-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; CODEGEN-IEEE-GISEL-NEXT:    v_sqrt_f32_e32 v0, v0
-; CODEGEN-IEEE-GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, -16, vcc
+; CODEGEN-IEEE-GISEL-NEXT:    v_cndmask_b32_e64 v1, -16, 0, vcc
 ; CODEGEN-IEEE-GISEL-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; CODEGEN-IEEE-GISEL-NEXT:    v_frexp_mant_f32_e32 v1, v0
 ; CODEGEN-IEEE-GISEL-NEXT:    v_rcp_f32_e32 v1, v1
diff --git a/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll b/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll
index 87c7cce854b11..c5d5b7c92d259 100644
--- a/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll
@@ -2326,12 +2326,12 @@ define float @v_sqrt_f32_ulp2_noncontractable_rcp(float %x) {
 ; GISEL-IEEE:       ; %bb.0:
 ; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0x800000
-; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
-; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
+; GISEL-IEEE-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v1
+; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, 1, 0, vcc
 ; GISEL-IEEE-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
 ; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
-; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, -16, vcc
+; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, -16, 0, vcc
 ; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x7f800000
 ; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; GISEL-IEEE-NEXT:    v_frexp_mant_f32_e32 v1, v0
@@ -2583,12 +2583,12 @@ define float @v_sqrt_f32_ulp2_contractable_fdiv_arcp(float %x, float %y) {
 ; GISEL-IEEE:       ; %bb.0:
 ; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x800000
-; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; GISEL-IEEE-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v2, 1, 0, vcc
 ; GISEL-IEEE-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v2
 ; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
-; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, -16, vcc
+; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v2, -16, 0, vcc
 ; GISEL-IEEE-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GISEL-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v2
 ; GISEL-IEEE-NEXT:    v_frexp_mant_f32_e32 v2, v0
@@ -3999,8 +3999,8 @@ define amdgpu_kernel void @elim_redun_check_neg0(ptr addrspace(1) %out, float %i
 ; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GISEL-IEEE-NEXT:    v_bfrev_b32_e32 v1, 1
 ; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v1
-; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GISEL-IEEE-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v1
+; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GISEL-IEEE-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; GISEL-IEEE-NEXT:    s_endpgm
 ;
@@ -4061,8 +4061,8 @@ define amdgpu_kernel void @elim_redun_check_neg0(ptr addrspace(1) %out, float %i
 ; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GISEL-DAZ-NEXT:    v_bfrev_b32_e32 v1, 1
 ; GISEL-DAZ-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v1
-; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GISEL-DAZ-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v1
+; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GISEL-DAZ-NEXT:    s_mov_b32 s2, -1
 ; GISEL-DAZ-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GISEL-DAZ-NEXT:    s_endpgm
@@ -4133,8 +4133,8 @@ define amdgpu_kernel void @elim_redun_check_pos0(ptr addrspace(1) %out, float %i
 ; GISEL-IEEE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
 ; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
-; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 vcc, s2, 0
-; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GISEL-IEEE-NEXT:    v_cmp_ge_f32_e64 vcc, s2, 0
+; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GISEL-IEEE-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; GISEL-IEEE-NEXT:    s_endpgm
 ;
@@ -4194,8 +4194,8 @@ define amdgpu_kernel void @elim_redun_check_pos0(ptr addrspace(1) %out, float %i
 ; GISEL-DAZ-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
 ; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
-; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e64 vcc, s2, 0
-; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GISEL-DAZ-NEXT:    v_cmp_ge_f32_e64 vcc, s2, 0
+; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GISEL-DAZ-NEXT:    s_mov_b32 s2, -1
 ; GISEL-DAZ-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GISEL-DAZ-NEXT:    s_endpgm
@@ -4434,10 +4434,10 @@ define amdgpu_kernel void @elim_redun_check_v2(ptr addrspace(1) %out, <2 x float
 ; GISEL-IEEE-NEXT:    v_bfrev_b32_e32 v3, 1
 ; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
 ; GISEL-IEEE-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, s6, v3
-; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc
-; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, s7, v3
-; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GISEL-IEEE-NEXT:    v_cmp_ge_f32_e32 vcc, s6, v3
+; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v4, v2, vcc
+; GISEL-IEEE-NEXT:    v_cmp_ge_f32_e32 vcc, s7, v3
+; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GISEL-IEEE-NEXT:    s_mov_b32 s6, -1
 ; GISEL-IEEE-NEXT:    s_mov_b32 s7, 0xf000
 ; GISEL-IEEE-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
@@ -4530,10 +4530,10 @@ define amdgpu_kernel void @elim_redun_check_v2(ptr addrspace(1) %out, <2 x float
 ; GISEL-DAZ-NEXT:    v_bfrev_b32_e32 v3, 1
 ; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
 ; GISEL-DAZ-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
-; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, s6, v3
-; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc
-; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, s7, v3
-; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GISEL-DAZ-NEXT:    v_cmp_ge_f32_e32 vcc, s6, v3
+; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v4, v2, vcc
+; GISEL-DAZ-NEXT:    v_cmp_ge_f32_e32 vcc, s7, v3
+; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GISEL-DAZ-NEXT:    s_mov_b32 s6, -1
 ; GISEL-DAZ-NEXT:    s_mov_b32 s7, 0xf000
 ; GISEL-DAZ-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.exp.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp.ll
index fdccacf372dfa..1b98e0b6674f4 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.exp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.exp.ll
@@ -69,11 +69,11 @@ define amdgpu_kernel void @s_exp_f32(ptr addrspace(1) %out, float %in) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v0, v0, v1
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v1
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v1
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x42b17218
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v1
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s2, v1
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
 ; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
@@ -123,13 +123,13 @@ define amdgpu_kernel void @s_exp_f32(ptr addrspace(1) %out, float %in) {
 ; GFX900-GISEL-NEXT:    v_cvt_i32_f32_e32 v1, v3
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v0, v0
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0x42b17218
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v1
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s2, v1
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX900-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX900-GISEL-NEXT:    s_endpgm
@@ -179,13 +179,13 @@ define amdgpu_kernel void @s_exp_f32(ptr addrspace(1) %out, float %in) {
 ; SI-GISEL-NEXT:    v_cvt_i32_f32_e32 v1, v3
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v0, v0
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x42b17218
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v1
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s2, v1
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; SI-GISEL-NEXT:    s_mov_b32 s2, -1
 ; SI-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-GISEL-NEXT:    s_endpgm
@@ -424,18 +424,18 @@ define amdgpu_kernel void @s_exp_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v5, v0
 ; VI-GISEL-NEXT:    v_ldexp_f32 v2, v2, v3
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v3
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v3
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 0x42b17218
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v6, 0x7f800000
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v4
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v6, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s2, v4
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v6, v2, vcc
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v5, v1
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v3
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v4
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s3, v3
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s3, v4
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s1
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v6, v1, vcc
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s0
 ; VI-GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; VI-GISEL-NEXT:    s_endpgm
@@ -504,16 +504,16 @@ define amdgpu_kernel void @s_exp_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v5, v0
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v4, 0xc2ce8ed0
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v2, v2, v3
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v4
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v4
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x42b17218
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v3
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v6, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s2, v3
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v6, v2, vcc
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v5, v1
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v4
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v3
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s3, v4
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s3, v3
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, v6, v1, vcc
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX900-GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX900-GISEL-NEXT:    s_endpgm
@@ -585,16 +585,16 @@ define amdgpu_kernel void @s_exp_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v5, v0
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v4, 0xc2ce8ed0
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v2, v2, v3
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v4
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v4
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x42b17218
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v3
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v6, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s2, v3
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v6, v2, vcc
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v5, v1
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v4
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v3
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s3, v4
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s3, v3
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v6, v1, vcc
 ; SI-GISEL-NEXT:    s_mov_b32 s2, -1
 ; SI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
 ; SI-GISEL-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
@@ -960,10 +960,10 @@ define amdgpu_kernel void @s_exp_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v5, v5
 ; VI-GISEL-NEXT:    v_ldexp_f32 v0, v0, v3
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v3
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s0, v3
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 0x42b17218
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v4
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s0, v4
 ; VI-GISEL-NEXT:    s_and_b32 s0, s2, 0xfffff000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v5, v5, v6
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v6, s0
@@ -980,17 +980,17 @@ define amdgpu_kernel void @s_exp_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
 ; VI-GISEL-NEXT:    v_cvt_i32_f32_e32 v2, v6
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v6, v1
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v7, 0x7f800000
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v3
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v5, 0, vcc
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v4
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v7, v0, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s1, v3
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v5, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s1, v4
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
 ; VI-GISEL-NEXT:    v_ldexp_f32 v2, v6, v2
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v3
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v4
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v3
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s2, v4
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s4
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v2, v7, v2, vcc
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s5
 ; VI-GISEL-NEXT:    flat_store_dwordx3 v[3:4], v[0:2]
 ; VI-GISEL-NEXT:    s_endpgm
@@ -1080,23 +1080,23 @@ define amdgpu_kernel void @s_exp_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v4, 0xc2ce8ed0
 ; GFX900-GISEL-NEXT:    v_add_f32_e32 v1, v6, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v0, v0, v3
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v4
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s0, v4
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x42b17218
 ; GFX900-GISEL-NEXT:    v_cvt_i32_f32_e32 v2, v2
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v6, v1
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v7, 0x7f800000
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v3
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v4
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v5, 0, vcc
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v3
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s0, v3
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v7, v0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s1, v4
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v5, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s1, v3
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v2, v6, v2
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v4
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v3
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v4
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s2, v3
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v2, v7, v2, vcc
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0
 ; GFX900-GISEL-NEXT:    global_store_dwordx3 v3, v[0:2], s[6:7]
 ; GFX900-GISEL-NEXT:    s_endpgm
@@ -1189,24 +1189,24 @@ define amdgpu_kernel void @s_exp_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v4, 0xc2ce8ed0
 ; SI-GISEL-NEXT:    v_add_f32_e32 v1, v6, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v0, v0, v3
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v4
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s0, v4
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x42b17218
 ; SI-GISEL-NEXT:    v_cvt_i32_f32_e32 v2, v2
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v6, v1
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v7, 0x7f800000
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v3
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v4
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v5, 0, vcc
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v3
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s0, v3
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v7, v0, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s1, v4
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v5, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s1, v3
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v2, v6, v2
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v4
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v3
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v4
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s2, v3
 ; SI-GISEL-NEXT:    s_mov_b32 s7, 0xf000
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v2, v7, v2, vcc
 ; SI-GISEL-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
 ; SI-GISEL-NEXT:    buffer_store_dword v2, off, s[4:7], 0 offset:8
 ; SI-GISEL-NEXT:    s_endpgm
@@ -1719,9 +1719,9 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ; VI-GISEL-NEXT:    v_add_f32_e32 v1, v6, v1
 ; VI-GISEL-NEXT:    v_cvt_i32_f32_e32 v6, v7
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v4
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v5
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s0, v4
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s0, v5
 ; VI-GISEL-NEXT:    s_and_b32 s0, s2, 0xfffff000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v6
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v6, s0
@@ -1751,23 +1751,23 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ; VI-GISEL-NEXT:    v_sub_f32_e32 v2, v2, v8
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v7, 0x7f800000
 ; VI-GISEL-NEXT:    v_add_f32_e32 v2, v2, v3
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v4
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v7, v0, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s1, v4
 ; VI-GISEL-NEXT:    v_cvt_i32_f32_e32 v3, v8
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v8, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v5
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v4
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v2, v6, 0, vcc
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v5
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s1, v5
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v4
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v6, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s2, v5
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v2, v7, v2, vcc
 ; VI-GISEL-NEXT:    v_ldexp_f32 v3, v8, v3
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v4
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, 0, vcc
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v5
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s3, v4
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s3, v5
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s4
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v3, v7, v3, vcc
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s5
 ; VI-GISEL-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; VI-GISEL-NEXT:    s_endpgm
@@ -1855,7 +1855,7 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ; GFX900-GISEL-NEXT:    v_cvt_i32_f32_e32 v1, v4
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v0, v0
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v4, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v4
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s0, v4
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX900-GISEL-NEXT:    v_mul_f32_e32 v1, s1, v2
 ; GFX900-GISEL-NEXT:    v_fma_f32 v6, s1, v2, -v1
@@ -1865,9 +1865,9 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ; GFX900-GISEL-NEXT:    v_add_f32_e32 v1, v1, v6
 ; GFX900-GISEL-NEXT:    v_cvt_i32_f32_e32 v6, v7
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v7, 0x7f800000
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v5
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s0, v5
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v6
 ; GFX900-GISEL-NEXT:    v_mul_f32_e32 v6, s2, v2
 ; GFX900-GISEL-NEXT:    v_fma_f32 v8, s2, v2, -v6
@@ -1877,9 +1877,9 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ; GFX900-GISEL-NEXT:    v_add_f32_e32 v6, v6, v8
 ; GFX900-GISEL-NEXT:    v_cvt_i32_f32_e32 v8, v9
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v6, v6
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v4
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v7, v0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s1, v4
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v6, v6, v8
 ; GFX900-GISEL-NEXT:    v_mul_f32_e32 v8, s3, v2
 ; GFX900-GISEL-NEXT:    v_fma_f32 v2, s3, v2, -v8
@@ -1889,17 +1889,17 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ; GFX900-GISEL-NEXT:    v_add_f32_e32 v2, v8, v2
 ; GFX900-GISEL-NEXT:    v_cvt_i32_f32_e32 v3, v3
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v8, v2
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v5
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v4
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v2, v6, 0, vcc
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v5
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s1, v5
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v4
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v6, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s2, v5
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v2, v7, v2, vcc
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v3, v8, v3
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v4
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, 0, vcc
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v5
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s3, v4
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s3, v5
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v3, v7, v3, vcc
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX900-GISEL-NEXT:    global_store_dwordx4 v4, v[0:3], s[6:7]
 ; GFX900-GISEL-NEXT:    s_endpgm
@@ -1989,7 +1989,7 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ; SI-GISEL-NEXT:    v_cvt_i32_f32_e32 v1, v4
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v0, v0
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v4, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v4
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s0, v4
 ; SI-GISEL-NEXT:    s_mov_b32 s6, -1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SI-GISEL-NEXT:    v_mul_f32_e32 v1, s1, v2
@@ -2000,9 +2000,9 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ; SI-GISEL-NEXT:    v_add_f32_e32 v1, v1, v6
 ; SI-GISEL-NEXT:    v_cvt_i32_f32_e32 v6, v7
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v7, 0x7f800000
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v5
+; SI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s0, v5
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v6
 ; SI-GISEL-NEXT:    v_mul_f32_e32 v6, s2, v2
 ; SI-GISEL-NEXT:    v_fma_f32 v8, s2, v2, -v6
@@ -2012,9 +2012,9 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ; SI-GISEL-NEXT:    v_add_f32_e32 v6, v6, v8
 ; SI-GISEL-NEXT:    v_cvt_i32_f32_e32 v8, v9
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v6, v6
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v4
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v7, v0, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s1, v4
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v6, v6, v8
 ; SI-GISEL-NEXT:    v_mul_f32_e32 v8, s3, v2
 ; SI-GISEL-NEXT:    v_fma_f32 v2, s3, v2, -v8
@@ -2024,17 +2024,17 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ; SI-GISEL-NEXT:    v_add_f32_e32 v2, v8, v2
 ; SI-GISEL-NEXT:    v_cvt_i32_f32_e32 v3, v3
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v8, v2
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v5
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v4
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v2, v6, 0, vcc
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v5
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s1, v5
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v4
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v6, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s2, v5
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v2, v7, v2, vcc
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v3, v8, v3
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v4
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, 0, vcc
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v5
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s3, v4
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s3, v5
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v3, v7, v3, vcc
 ; SI-GISEL-NEXT:    s_mov_b32 s7, 0xf000
 ; SI-GISEL-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
 ; SI-GISEL-NEXT:    s_endpgm
@@ -2565,11 +2565,11 @@ define float @v_exp_f32(float %in) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp_f32:
@@ -2611,11 +2611,11 @@ define float @v_exp_f32(float %in) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp_f32:
@@ -2657,11 +2657,11 @@ define float @v_exp_f32(float %in) {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp_f32:
@@ -2725,11 +2725,11 @@ define float @v_exp_fabs_f32(float %in) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, |v0|, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, |v0|, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e64 vcc, |v0|, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp_fabs_f32:
@@ -2771,11 +2771,11 @@ define float @v_exp_fabs_f32(float %in) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, |v0|, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, |v0|, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e64 vcc, |v0|, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp_fabs_f32:
@@ -2817,11 +2817,11 @@ define float @v_exp_fabs_f32(float %in) {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, |v0|, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, |v0|, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e64 vcc, |v0|, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp_fabs_f32:
@@ -2886,11 +2886,11 @@ define float @v_exp_fneg_fabs_f32(float %in) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], -|v0|, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, -|v0|, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, -|v0|, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e64 vcc, -|v0|, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp_fneg_fabs_f32:
@@ -2932,11 +2932,11 @@ define float @v_exp_fneg_fabs_f32(float %in) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], -|v0|, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, -|v0|, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, -|v0|, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e64 vcc, -|v0|, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp_fneg_fabs_f32:
@@ -2978,11 +2978,11 @@ define float @v_exp_fneg_fabs_f32(float %in) {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], -|v0|, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, -|v0|, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, -|v0|, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e64 vcc, -|v0|, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp_fneg_fabs_f32:
@@ -3048,11 +3048,11 @@ define float @v_exp_fneg_f32(float %in) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], -v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, -v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, -v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e64 vcc, -v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp_fneg_f32:
@@ -3094,11 +3094,11 @@ define float @v_exp_fneg_f32(float %in) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], -v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, -v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, -v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e64 vcc, -v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp_fneg_f32:
@@ -3140,11 +3140,11 @@ define float @v_exp_fneg_f32(float %in) {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], -v0, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, -v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, -v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e64 vcc, -v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp_fneg_f32:
@@ -3400,8 +3400,8 @@ define float @v_exp_f32_ninf(float %in) {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp_f32_ninf:
@@ -3438,8 +3438,8 @@ define float @v_exp_f32_ninf(float %in) {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp_f32_ninf:
@@ -3476,8 +3476,8 @@ define float @v_exp_f32_ninf(float %in) {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp_f32_ninf:
@@ -3770,11 +3770,11 @@ define float @v_exp_f32_daz(float %in) #0 {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp_f32_daz:
@@ -3816,11 +3816,11 @@ define float @v_exp_f32_daz(float %in) #0 {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp_f32_daz:
@@ -3862,11 +3862,11 @@ define float @v_exp_f32_daz(float %in) #0 {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp_f32_daz:
@@ -3928,11 +3928,11 @@ define float @v_exp_f32_nnan(float %in) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp_f32_nnan:
@@ -3974,11 +3974,11 @@ define float @v_exp_f32_nnan(float %in) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp_f32_nnan:
@@ -4020,11 +4020,11 @@ define float @v_exp_f32_nnan(float %in) {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp_f32_nnan:
@@ -4086,11 +4086,11 @@ define float @v_exp_f32_nnan_daz(float %in) #0 {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp_f32_nnan_daz:
@@ -4132,11 +4132,11 @@ define float @v_exp_f32_nnan_daz(float %in) #0 {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp_f32_nnan_daz:
@@ -4178,11 +4178,11 @@ define float @v_exp_f32_nnan_daz(float %in) #0 {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp_f32_nnan_daz:
@@ -4244,11 +4244,11 @@ define float @v_exp_f32_nnan_dynamic(float %in) #1 {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp_f32_nnan_dynamic:
@@ -4290,11 +4290,11 @@ define float @v_exp_f32_nnan_dynamic(float %in) #1 {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp_f32_nnan_dynamic:
@@ -4336,11 +4336,11 @@ define float @v_exp_f32_nnan_dynamic(float %in) #1 {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp_f32_nnan_dynamic:
@@ -4397,8 +4397,8 @@ define float @v_exp_f32_ninf_daz(float %in) #0 {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp_f32_ninf_daz:
@@ -4435,8 +4435,8 @@ define float @v_exp_f32_ninf_daz(float %in) #0 {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp_f32_ninf_daz:
@@ -4473,8 +4473,8 @@ define float @v_exp_f32_ninf_daz(float %in) #0 {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp_f32_ninf_daz:
@@ -4531,8 +4531,8 @@ define float @v_exp_f32_ninf_dynamic(float %in) #1 {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp_f32_ninf_dynamic:
@@ -4569,8 +4569,8 @@ define float @v_exp_f32_ninf_dynamic(float %in) #1 {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp_f32_ninf_dynamic:
@@ -4607,8 +4607,8 @@ define float @v_exp_f32_ninf_dynamic(float %in) #1 {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp_f32_ninf_dynamic:
@@ -4665,8 +4665,8 @@ define float @v_exp_f32_nnan_ninf(float %in) {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp_f32_nnan_ninf:
@@ -4703,8 +4703,8 @@ define float @v_exp_f32_nnan_ninf(float %in) {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp_f32_nnan_ninf:
@@ -4741,8 +4741,8 @@ define float @v_exp_f32_nnan_ninf(float %in) {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp_f32_nnan_ninf:
@@ -4799,8 +4799,8 @@ define float @v_exp_f32_nnan_ninf_daz(float %in) #0 {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp_f32_nnan_ninf_daz:
@@ -4837,8 +4837,8 @@ define float @v_exp_f32_nnan_ninf_daz(float %in) #0 {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp_f32_nnan_ninf_daz:
@@ -4875,8 +4875,8 @@ define float @v_exp_f32_nnan_ninf_daz(float %in) #0 {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp_f32_nnan_ninf_daz:
@@ -4933,8 +4933,8 @@ define float @v_exp_f32_nnan_ninf_dynamic(float %in) #1 {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp_f32_nnan_ninf_dynamic:
@@ -4971,8 +4971,8 @@ define float @v_exp_f32_nnan_ninf_dynamic(float %in) #1 {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp_f32_nnan_ninf_dynamic:
@@ -5009,8 +5009,8 @@ define float @v_exp_f32_nnan_ninf_dynamic(float %in) #1 {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp_f32_nnan_ninf_dynamic:
@@ -5100,11 +5100,11 @@ define float @v_exp_f32_dynamic_mode(float %in) #1 {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp_f32_dynamic_mode:
@@ -5146,11 +5146,11 @@ define float @v_exp_f32_dynamic_mode(float %in) #1 {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp_f32_dynamic_mode:
@@ -5192,11 +5192,11 @@ define float @v_exp_f32_dynamic_mode(float %in) #1 {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp_f32_dynamic_mode:
@@ -5244,11 +5244,11 @@ define float @v_exp_f32_undef() {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v0, v0, v1
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s4, v1
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s4, v1
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x42b17218
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v1
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s4, v1
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp_f32_undef:
@@ -5283,11 +5283,11 @@ define float @v_exp_f32_undef() {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s4, v1
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s4, v1
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0x42b17218
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v1
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s4, v1
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp_f32_undef:
@@ -5322,11 +5322,11 @@ define float @v_exp_f32_undef() {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s4, v1
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s4, v1
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x42b17218
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v1
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s4, v1
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp_f32_undef:
@@ -5416,11 +5416,11 @@ define float @v_exp_f32_from_fpext_f16(i16 %src.i) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp_f32_from_fpext_f16:
@@ -5462,13 +5462,13 @@ define float @v_exp_f32_from_fpext_f16(i16 %src.i) {
 ; GFX900-GISEL-NEXT:    v_cvt_i32_f32_e32 v2, v4
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v3
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v3
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp_f32_from_fpext_f16:
@@ -5510,13 +5510,13 @@ define float @v_exp_f32_from_fpext_f16(i16 %src.i) {
 ; SI-GISEL-NEXT:    v_cvt_i32_f32_e32 v2, v4
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v3
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v3
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp_f32_from_fpext_f16:
@@ -5584,11 +5584,11 @@ define float @v_exp_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp_f32_from_fpext_math_f16:
@@ -5634,11 +5634,11 @@ define float @v_exp_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp_f32_from_fpext_math_f16:
@@ -5688,11 +5688,11 @@ define float @v_exp_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v4, 0x42b17218
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v3
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v4
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v5, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v4
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v5, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp_f32_from_fpext_math_f16:
@@ -5905,11 +5905,11 @@ define float @v_exp_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp_f32_from_fpext_math_f16_daz:
@@ -5955,11 +5955,11 @@ define float @v_exp_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp_f32_from_fpext_math_f16_daz:
@@ -6009,11 +6009,11 @@ define float @v_exp_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v4, 0x42b17218
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v3
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v4
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v5, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v4
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v5, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp_f32_from_fpext_math_f16_daz:
@@ -7032,11 +7032,11 @@ define float @v_exp_f32_contract(float %in) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp_f32_contract:
@@ -7078,11 +7078,11 @@ define float @v_exp_f32_contract(float %in) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp_f32_contract:
@@ -7124,11 +7124,11 @@ define float @v_exp_f32_contract(float %in) {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp_f32_contract:
@@ -7190,11 +7190,11 @@ define float @v_exp_f32_contract_daz(float %in) #0 {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp_f32_contract_daz:
@@ -7236,11 +7236,11 @@ define float @v_exp_f32_contract_daz(float %in) #0 {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp_f32_contract_daz:
@@ -7282,11 +7282,11 @@ define float @v_exp_f32_contract_daz(float %in) #0 {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp_f32_contract_daz:
@@ -7343,8 +7343,8 @@ define float @v_exp_f32_contract_nnan_ninf(float %in) {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp_f32_contract_nnan_ninf:
@@ -7381,8 +7381,8 @@ define float @v_exp_f32_contract_nnan_ninf(float %in) {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp_f32_contract_nnan_ninf:
@@ -7419,8 +7419,8 @@ define float @v_exp_f32_contract_nnan_ninf(float %in) {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp_f32_contract_nnan_ninf:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll
index 0c2e6f82c9115..35040d479f71d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll
@@ -71,11 +71,11 @@ define amdgpu_kernel void @s_exp10_f32(ptr addrspace(1) %out, float %in) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v0, v0, v1
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v1
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v1
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x421a209b
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v1
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s2, v1
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
 ; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
@@ -125,13 +125,13 @@ define amdgpu_kernel void @s_exp10_f32(ptr addrspace(1) %out, float %in) {
 ; GFX900-GISEL-NEXT:    v_cvt_i32_f32_e32 v1, v3
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v0, v0
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0x421a209b
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v1
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s2, v1
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX900-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX900-GISEL-NEXT:    s_endpgm
@@ -181,13 +181,13 @@ define amdgpu_kernel void @s_exp10_f32(ptr addrspace(1) %out, float %in) {
 ; SI-GISEL-NEXT:    v_cvt_i32_f32_e32 v1, v3
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v0, v0
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x421a209b
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v1
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s2, v1
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; SI-GISEL-NEXT:    s_mov_b32 s2, -1
 ; SI-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-GISEL-NEXT:    s_endpgm
@@ -426,18 +426,18 @@ define amdgpu_kernel void @s_exp10_v2f32(ptr addrspace(1) %out, <2 x float> %in)
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v5, v0
 ; VI-GISEL-NEXT:    v_ldexp_f32 v2, v2, v3
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v3
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v3
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 0x421a209b
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v6, 0x7f800000
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v4
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v6, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s2, v4
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v6, v2, vcc
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v5, v1
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v3
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v4
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s3, v3
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s3, v4
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s1
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v6, v1, vcc
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s0
 ; VI-GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; VI-GISEL-NEXT:    s_endpgm
@@ -506,16 +506,16 @@ define amdgpu_kernel void @s_exp10_v2f32(ptr addrspace(1) %out, <2 x float> %in)
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v5, v0
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v4, 0xc23369f4
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v2, v2, v3
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v4
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v4
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x421a209b
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v3
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v6, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s2, v3
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v6, v2, vcc
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v5, v1
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v4
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v3
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s3, v4
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s3, v3
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, v6, v1, vcc
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX900-GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX900-GISEL-NEXT:    s_endpgm
@@ -587,16 +587,16 @@ define amdgpu_kernel void @s_exp10_v2f32(ptr addrspace(1) %out, <2 x float> %in)
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v5, v0
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v4, 0xc23369f4
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v2, v2, v3
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v4
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v4
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x421a209b
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v3
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v6, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s2, v3
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v6, v2, vcc
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v5, v1
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v4
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v3
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s3, v4
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s3, v3
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v6, v1, vcc
 ; SI-GISEL-NEXT:    s_mov_b32 s2, -1
 ; SI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
 ; SI-GISEL-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
@@ -962,10 +962,10 @@ define amdgpu_kernel void @s_exp10_v3f32(ptr addrspace(1) %out, <3 x float> %in)
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v5, v5
 ; VI-GISEL-NEXT:    v_ldexp_f32 v0, v0, v3
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v3
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s0, v3
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 0x421a209b
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v4
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s0, v4
 ; VI-GISEL-NEXT:    s_and_b32 s0, s2, 0xfffff000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v5, v5, v6
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v6, s0
@@ -982,17 +982,17 @@ define amdgpu_kernel void @s_exp10_v3f32(ptr addrspace(1) %out, <3 x float> %in)
 ; VI-GISEL-NEXT:    v_cvt_i32_f32_e32 v2, v6
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v6, v1
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v7, 0x7f800000
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v3
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v5, 0, vcc
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v4
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v7, v0, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s1, v3
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v5, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s1, v4
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
 ; VI-GISEL-NEXT:    v_ldexp_f32 v2, v6, v2
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v3
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v4
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v3
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s2, v4
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s4
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v2, v7, v2, vcc
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s5
 ; VI-GISEL-NEXT:    flat_store_dwordx3 v[3:4], v[0:2]
 ; VI-GISEL-NEXT:    s_endpgm
@@ -1082,23 +1082,23 @@ define amdgpu_kernel void @s_exp10_v3f32(ptr addrspace(1) %out, <3 x float> %in)
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v4, 0xc23369f4
 ; GFX900-GISEL-NEXT:    v_add_f32_e32 v1, v6, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v0, v0, v3
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v4
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s0, v4
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x421a209b
 ; GFX900-GISEL-NEXT:    v_cvt_i32_f32_e32 v2, v2
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v6, v1
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v7, 0x7f800000
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v3
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v4
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v5, 0, vcc
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v3
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s0, v3
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v7, v0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s1, v4
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v5, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s1, v3
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v2, v6, v2
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v4
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v3
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v4
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s2, v3
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v2, v7, v2, vcc
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0
 ; GFX900-GISEL-NEXT:    global_store_dwordx3 v3, v[0:2], s[6:7]
 ; GFX900-GISEL-NEXT:    s_endpgm
@@ -1191,24 +1191,24 @@ define amdgpu_kernel void @s_exp10_v3f32(ptr addrspace(1) %out, <3 x float> %in)
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v4, 0xc23369f4
 ; SI-GISEL-NEXT:    v_add_f32_e32 v1, v6, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v0, v0, v3
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v4
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s0, v4
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x421a209b
 ; SI-GISEL-NEXT:    v_cvt_i32_f32_e32 v2, v2
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v6, v1
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v7, 0x7f800000
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v3
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v4
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v5, 0, vcc
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v3
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s0, v3
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v7, v0, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s1, v4
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v5, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s1, v3
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v2, v6, v2
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v4
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v3
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v4
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s2, v3
 ; SI-GISEL-NEXT:    s_mov_b32 s7, 0xf000
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v2, v7, v2, vcc
 ; SI-GISEL-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
 ; SI-GISEL-NEXT:    buffer_store_dword v2, off, s[4:7], 0 offset:8
 ; SI-GISEL-NEXT:    s_endpgm
@@ -1721,9 +1721,9 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; VI-GISEL-NEXT:    v_add_f32_e32 v1, v6, v1
 ; VI-GISEL-NEXT:    v_cvt_i32_f32_e32 v6, v7
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v4
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v5
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s0, v4
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s0, v5
 ; VI-GISEL-NEXT:    s_and_b32 s0, s2, 0xfffff000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v6
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v6, s0
@@ -1753,23 +1753,23 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; VI-GISEL-NEXT:    v_sub_f32_e32 v2, v2, v8
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v7, 0x7f800000
 ; VI-GISEL-NEXT:    v_add_f32_e32 v2, v2, v3
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v4
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v7, v0, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s1, v4
 ; VI-GISEL-NEXT:    v_cvt_i32_f32_e32 v3, v8
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v8, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v5
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v4
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v2, v6, 0, vcc
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v5
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s1, v5
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v4
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v6, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s2, v5
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v2, v7, v2, vcc
 ; VI-GISEL-NEXT:    v_ldexp_f32 v3, v8, v3
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v4
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, 0, vcc
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v5
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s3, v4
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s3, v5
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s4
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v3, v7, v3, vcc
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s5
 ; VI-GISEL-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; VI-GISEL-NEXT:    s_endpgm
@@ -1857,7 +1857,7 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; GFX900-GISEL-NEXT:    v_cvt_i32_f32_e32 v1, v4
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v0, v0
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v4, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v4
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s0, v4
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX900-GISEL-NEXT:    v_mul_f32_e32 v1, s1, v2
 ; GFX900-GISEL-NEXT:    v_fma_f32 v6, s1, v2, -v1
@@ -1867,9 +1867,9 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; GFX900-GISEL-NEXT:    v_add_f32_e32 v1, v1, v6
 ; GFX900-GISEL-NEXT:    v_cvt_i32_f32_e32 v6, v7
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v7, 0x7f800000
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v5
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s0, v5
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v6
 ; GFX900-GISEL-NEXT:    v_mul_f32_e32 v6, s2, v2
 ; GFX900-GISEL-NEXT:    v_fma_f32 v8, s2, v2, -v6
@@ -1879,9 +1879,9 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; GFX900-GISEL-NEXT:    v_add_f32_e32 v6, v6, v8
 ; GFX900-GISEL-NEXT:    v_cvt_i32_f32_e32 v8, v9
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v6, v6
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v4
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v7, v0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s1, v4
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v6, v6, v8
 ; GFX900-GISEL-NEXT:    v_mul_f32_e32 v8, s3, v2
 ; GFX900-GISEL-NEXT:    v_fma_f32 v2, s3, v2, -v8
@@ -1891,17 +1891,17 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; GFX900-GISEL-NEXT:    v_add_f32_e32 v2, v8, v2
 ; GFX900-GISEL-NEXT:    v_cvt_i32_f32_e32 v3, v3
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v8, v2
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v5
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v4
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v2, v6, 0, vcc
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v5
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s1, v5
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v4
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v6, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s2, v5
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v2, v7, v2, vcc
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v3, v8, v3
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v4
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, 0, vcc
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v5
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s3, v4
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s3, v5
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v3, v7, v3, vcc
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX900-GISEL-NEXT:    global_store_dwordx4 v4, v[0:3], s[6:7]
 ; GFX900-GISEL-NEXT:    s_endpgm
@@ -1991,7 +1991,7 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; SI-GISEL-NEXT:    v_cvt_i32_f32_e32 v1, v4
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v0, v0
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v4, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v4
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s0, v4
 ; SI-GISEL-NEXT:    s_mov_b32 s6, -1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SI-GISEL-NEXT:    v_mul_f32_e32 v1, s1, v2
@@ -2002,9 +2002,9 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; SI-GISEL-NEXT:    v_add_f32_e32 v1, v1, v6
 ; SI-GISEL-NEXT:    v_cvt_i32_f32_e32 v6, v7
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v7, 0x7f800000
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v5
+; SI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s0, v5
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v6
 ; SI-GISEL-NEXT:    v_mul_f32_e32 v6, s2, v2
 ; SI-GISEL-NEXT:    v_fma_f32 v8, s2, v2, -v6
@@ -2014,9 +2014,9 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; SI-GISEL-NEXT:    v_add_f32_e32 v6, v6, v8
 ; SI-GISEL-NEXT:    v_cvt_i32_f32_e32 v8, v9
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v6, v6
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v4
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v7, v0, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s1, v4
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v6, v6, v8
 ; SI-GISEL-NEXT:    v_mul_f32_e32 v8, s3, v2
 ; SI-GISEL-NEXT:    v_fma_f32 v2, s3, v2, -v8
@@ -2026,17 +2026,17 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; SI-GISEL-NEXT:    v_add_f32_e32 v2, v8, v2
 ; SI-GISEL-NEXT:    v_cvt_i32_f32_e32 v3, v3
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v8, v2
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v5
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v4
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v2, v6, 0, vcc
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v5
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s1, v5
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v4
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v6, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s2, v5
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v2, v7, v2, vcc
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v3, v8, v3
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v4
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, 0, vcc
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v5
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s3, v4
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s3, v5
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v3, v7, v3, vcc
 ; SI-GISEL-NEXT:    s_mov_b32 s7, 0xf000
 ; SI-GISEL-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
 ; SI-GISEL-NEXT:    s_endpgm
@@ -2567,11 +2567,11 @@ define float @v_exp10_f32(float %in) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp10_f32:
@@ -2613,11 +2613,11 @@ define float @v_exp10_f32(float %in) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp10_f32:
@@ -2659,11 +2659,11 @@ define float @v_exp10_f32(float %in) {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp10_f32:
@@ -2727,11 +2727,11 @@ define float @v_exp10_fabs_f32(float %in) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, |v0|, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, |v0|, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e64 vcc, |v0|, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp10_fabs_f32:
@@ -2773,11 +2773,11 @@ define float @v_exp10_fabs_f32(float %in) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, |v0|, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, |v0|, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e64 vcc, |v0|, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp10_fabs_f32:
@@ -2819,11 +2819,11 @@ define float @v_exp10_fabs_f32(float %in) {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, |v0|, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, |v0|, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e64 vcc, |v0|, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp10_fabs_f32:
@@ -2888,11 +2888,11 @@ define float @v_exp10_fneg_fabs_f32(float %in) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], -|v0|, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, -|v0|, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, -|v0|, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e64 vcc, -|v0|, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp10_fneg_fabs_f32:
@@ -2934,11 +2934,11 @@ define float @v_exp10_fneg_fabs_f32(float %in) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], -|v0|, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, -|v0|, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, -|v0|, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e64 vcc, -|v0|, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp10_fneg_fabs_f32:
@@ -2980,11 +2980,11 @@ define float @v_exp10_fneg_fabs_f32(float %in) {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], -|v0|, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, -|v0|, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, -|v0|, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e64 vcc, -|v0|, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp10_fneg_fabs_f32:
@@ -3050,11 +3050,11 @@ define float @v_exp10_fneg_f32(float %in) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], -v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, -v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, -v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e64 vcc, -v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp10_fneg_f32:
@@ -3096,11 +3096,11 @@ define float @v_exp10_fneg_f32(float %in) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], -v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, -v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, -v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e64 vcc, -v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp10_fneg_f32:
@@ -3142,11 +3142,11 @@ define float @v_exp10_fneg_f32(float %in) {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], -v0, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, -v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, -v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e64 vcc, -v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp10_fneg_f32:
@@ -3420,8 +3420,8 @@ define float @v_exp10_f32_ninf(float %in) {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp10_f32_ninf:
@@ -3458,8 +3458,8 @@ define float @v_exp10_f32_ninf(float %in) {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp10_f32_ninf:
@@ -3496,8 +3496,8 @@ define float @v_exp10_f32_ninf(float %in) {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp10_f32_ninf:
@@ -3828,11 +3828,11 @@ define float @v_exp10_f32_daz(float %in) #0 {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp10_f32_daz:
@@ -3874,11 +3874,11 @@ define float @v_exp10_f32_daz(float %in) #0 {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp10_f32_daz:
@@ -3920,11 +3920,11 @@ define float @v_exp10_f32_daz(float %in) #0 {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp10_f32_daz:
@@ -3986,11 +3986,11 @@ define float @v_exp10_f32_nnan(float %in) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp10_f32_nnan:
@@ -4032,11 +4032,11 @@ define float @v_exp10_f32_nnan(float %in) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp10_f32_nnan:
@@ -4078,11 +4078,11 @@ define float @v_exp10_f32_nnan(float %in) {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp10_f32_nnan:
@@ -4144,11 +4144,11 @@ define float @v_exp10_f32_nnan_daz(float %in) #0 {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp10_f32_nnan_daz:
@@ -4190,11 +4190,11 @@ define float @v_exp10_f32_nnan_daz(float %in) #0 {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp10_f32_nnan_daz:
@@ -4236,11 +4236,11 @@ define float @v_exp10_f32_nnan_daz(float %in) #0 {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp10_f32_nnan_daz:
@@ -4302,11 +4302,11 @@ define float @v_exp10_f32_nnan_dynamic(float %in) #1 {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp10_f32_nnan_dynamic:
@@ -4348,11 +4348,11 @@ define float @v_exp10_f32_nnan_dynamic(float %in) #1 {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp10_f32_nnan_dynamic:
@@ -4394,11 +4394,11 @@ define float @v_exp10_f32_nnan_dynamic(float %in) #1 {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp10_f32_nnan_dynamic:
@@ -4455,8 +4455,8 @@ define float @v_exp10_f32_ninf_daz(float %in) #0 {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp10_f32_ninf_daz:
@@ -4493,8 +4493,8 @@ define float @v_exp10_f32_ninf_daz(float %in) #0 {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp10_f32_ninf_daz:
@@ -4531,8 +4531,8 @@ define float @v_exp10_f32_ninf_daz(float %in) #0 {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp10_f32_ninf_daz:
@@ -4589,8 +4589,8 @@ define float @v_exp10_f32_ninf_dynamic(float %in) #1 {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp10_f32_ninf_dynamic:
@@ -4627,8 +4627,8 @@ define float @v_exp10_f32_ninf_dynamic(float %in) #1 {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp10_f32_ninf_dynamic:
@@ -4665,8 +4665,8 @@ define float @v_exp10_f32_ninf_dynamic(float %in) #1 {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp10_f32_ninf_dynamic:
@@ -4723,8 +4723,8 @@ define float @v_exp10_f32_nnan_ninf(float %in) {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp10_f32_nnan_ninf:
@@ -4761,8 +4761,8 @@ define float @v_exp10_f32_nnan_ninf(float %in) {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp10_f32_nnan_ninf:
@@ -4799,8 +4799,8 @@ define float @v_exp10_f32_nnan_ninf(float %in) {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp10_f32_nnan_ninf:
@@ -4857,8 +4857,8 @@ define float @v_exp10_f32_nnan_ninf_daz(float %in) #0 {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp10_f32_nnan_ninf_daz:
@@ -4895,8 +4895,8 @@ define float @v_exp10_f32_nnan_ninf_daz(float %in) #0 {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp10_f32_nnan_ninf_daz:
@@ -4933,8 +4933,8 @@ define float @v_exp10_f32_nnan_ninf_daz(float %in) #0 {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp10_f32_nnan_ninf_daz:
@@ -4991,8 +4991,8 @@ define float @v_exp10_f32_nnan_ninf_dynamic(float %in) #1 {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp10_f32_nnan_ninf_dynamic:
@@ -5029,8 +5029,8 @@ define float @v_exp10_f32_nnan_ninf_dynamic(float %in) #1 {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp10_f32_nnan_ninf_dynamic:
@@ -5067,8 +5067,8 @@ define float @v_exp10_f32_nnan_ninf_dynamic(float %in) #1 {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp10_f32_nnan_ninf_dynamic:
@@ -5178,11 +5178,11 @@ define float @v_exp10_f32_dynamic_mode(float %in) #1 {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp10_f32_dynamic_mode:
@@ -5224,11 +5224,11 @@ define float @v_exp10_f32_dynamic_mode(float %in) #1 {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp10_f32_dynamic_mode:
@@ -5270,11 +5270,11 @@ define float @v_exp10_f32_dynamic_mode(float %in) #1 {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp10_f32_dynamic_mode:
@@ -5322,11 +5322,11 @@ define float @v_exp10_f32_undef() {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v0, v0, v1
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s4, v1
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s4, v1
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x421a209b
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v1
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s4, v1
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp10_f32_undef:
@@ -5361,11 +5361,11 @@ define float @v_exp10_f32_undef() {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s4, v1
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s4, v1
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0x421a209b
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v1
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s4, v1
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp10_f32_undef:
@@ -5400,11 +5400,11 @@ define float @v_exp10_f32_undef() {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s4, v1
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s4, v1
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x421a209b
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v1
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, s4, v1
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp10_f32_undef:
@@ -5494,11 +5494,11 @@ define float @v_exp10_f32_from_fpext_f16(i16 %src.i) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp10_f32_from_fpext_f16:
@@ -5540,13 +5540,13 @@ define float @v_exp10_f32_from_fpext_f16(i16 %src.i) {
 ; GFX900-GISEL-NEXT:    v_cvt_i32_f32_e32 v2, v4
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v3
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v3
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp10_f32_from_fpext_f16:
@@ -5588,13 +5588,13 @@ define float @v_exp10_f32_from_fpext_f16(i16 %src.i) {
 ; SI-GISEL-NEXT:    v_cvt_i32_f32_e32 v2, v4
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v3
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v3
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp10_f32_from_fpext_f16:
@@ -5662,11 +5662,11 @@ define float @v_exp10_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp10_f32_from_fpext_math_f16:
@@ -5712,11 +5712,11 @@ define float @v_exp10_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp10_f32_from_fpext_math_f16:
@@ -5766,11 +5766,11 @@ define float @v_exp10_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v4, 0x421a209b
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v3
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v4
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v5, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v4
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v5, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp10_f32_from_fpext_math_f16:
@@ -5998,11 +5998,11 @@ define float @v_exp10_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp10_f32_from_fpext_math_f16_daz:
@@ -6048,11 +6048,11 @@ define float @v_exp10_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp10_f32_from_fpext_math_f16_daz:
@@ -6102,11 +6102,11 @@ define float @v_exp10_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v4, 0x421a209b
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v3
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v4
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v5, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v4
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v5, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp10_f32_from_fpext_math_f16_daz:
@@ -7126,11 +7126,11 @@ define float @v_exp10_f32_contract(float %in) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp10_f32_contract:
@@ -7172,11 +7172,11 @@ define float @v_exp10_f32_contract(float %in) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp10_f32_contract:
@@ -7218,11 +7218,11 @@ define float @v_exp10_f32_contract(float %in) {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp10_f32_contract:
@@ -7284,11 +7284,11 @@ define float @v_exp10_f32_contract_daz(float %in) #0 {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp10_f32_contract_daz:
@@ -7330,11 +7330,11 @@ define float @v_exp10_f32_contract_daz(float %in) #0 {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp10_f32_contract_daz:
@@ -7376,11 +7376,11 @@ define float @v_exp10_f32_contract_daz(float %in) #0 {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT:    v_cmp_le_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp10_f32_contract_daz:
@@ -7437,8 +7437,8 @@ define float @v_exp10_f32_contract_nnan_ninf(float %in) {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp10_f32_contract_nnan_ninf:
@@ -7475,8 +7475,8 @@ define float @v_exp10_f32_contract_nnan_ninf(float %in) {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp10_f32_contract_nnan_ninf:
@@ -7513,8 +7513,8 @@ define float @v_exp10_f32_contract_nnan_ninf(float %in) {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp10_f32_contract_nnan_ninf: