[llvm] def2285 - AMDGPU: Use pown instead of pow if known integral

Fri Sep 1 05:22:33 PDT 2023

Author: Matt Arsenault
Date: 2023-09-01T08:22:16-04:00
New Revision: def228553cfd20155c3d5601ea3549b943612ed9

URL: https://github.com/llvm/llvm-project/commit/def228553cfd20155c3d5601ea3549b943612ed9
DIFF: https://github.com/llvm/llvm-project/commit/def228553cfd20155c3d5601ea3549b943612ed9.diff

LOG: AMDGPU: Use pown instead of pow if known integral

https://reviews.llvm.org/D158998

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
    llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp
    llvm/lib/Target/AMDGPU/AMDGPULibFunc.h
    llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll
    llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index 285768ff28a1df..5899b3a95ad819 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -17,6 +17,7 @@
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/AttributeMask.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/IntrinsicInst.h"
@@ -168,6 +169,15 @@ static CallInst *CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1,
   return R;
 }
 
+static FunctionType *getPownType(FunctionType *FT) {
+  Type *PowNExpTy = Type::getInt32Ty(FT->getContext());
+  if (VectorType *VecTy = dyn_cast<VectorType>(FT->getReturnType()))
+    PowNExpTy = VectorType::get(PowNExpTy, VecTy->getElementCount());
+
+  return FunctionType::get(FT->getReturnType(),
+                           {FT->getParamType(0), PowNExpTy}, false);
+}
+
 //  Data structures for table-driven optimizations.
 //  FuncTbl works for both f32 and f64 functions with 1 input argument
 
@@ -738,6 +748,27 @@ bool AMDGPULibCalls::fold(CallInst *CI) {
         return fold_pow(FPOp, B, PowrInfo) || true;
       }
 
+      // pow(x, y) -> pown(x, y) for known integral y
+      if (isKnownIntegral(FPOp->getOperand(1), M->getDataLayout(),
+                          FPOp->getFastMathFlags())) {
+        FunctionType *PownType = getPownType(CI->getFunctionType());
+        AMDGPULibFunc PownInfo(AMDGPULibFunc::EI_POWN, PownType, true);
+        FunctionCallee PownFunc = getFunction(M, PownInfo);
+        if (PownFunc) {
+          // TODO: If the incoming integral value is an sitofp/uitofp, it won't
+          // fold out without a known range. We can probably take the source
+          // value directly.
+          Value *CastedArg =
+              B.CreateFPToSI(FPOp->getOperand(1), PownType->getParamType(1));
+          // Have to drop any nofpclass attributes on the original call site.
+          Call->removeParamAttrs(
+              1, AttributeFuncs::typeIncompatible(CastedArg->getType()));
+          Call->setCalledFunction(PownFunc);
+          Call->setArgOperand(1, CastedArg);
+          return fold_pow(FPOp, B, PownInfo) || true;
+        }
+      }
+
       return fold_pow(FPOp, B, FInfo);
     }
     case AMDGPULibFunc::EI_POWR:

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp
index de321963dbe7a1..c798ab5f03b3cc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp
@@ -527,6 +527,16 @@ AMDGPUMangledLibFunc::AMDGPUMangledLibFunc(
   Leads[1] = copyFrom.Leads[1];
 }
 
+AMDGPUMangledLibFunc::AMDGPUMangledLibFunc(EFuncId id, FunctionType *FT,
+                                           bool SignedInts) {
+  FuncId = id;
+  unsigned NumArgs = FT->getNumParams();
+  if (NumArgs >= 1)
+    Leads[0] = Param::getFromTy(FT->getParamType(0), SignedInts);
+  if (NumArgs >= 2)
+    Leads[1] = Param::getFromTy(FT->getParamType(1), SignedInts);
+}
+
 ///////////////////////////////////////////////////////////////////////////////
 // Demangling
 
@@ -875,6 +885,50 @@ std::string AMDGPUMangledLibFunc::mangleNameItanium() const {
 ///////////////////////////////////////////////////////////////////////////////
 // Misc
 
+AMDGPULibFuncBase::Param AMDGPULibFuncBase::Param::getFromTy(Type *Ty,
+                                                             bool Signed) {
+  Param P;
+  if (FixedVectorType *VT = dyn_cast<FixedVectorType>(Ty)) {
+    P.VectorSize = VT->getNumElements();
+    Ty = VT->getElementType();
+  }
+
+  switch (Ty->getTypeID()) {
+  case Type::FloatTyID:
+    P.ArgType = AMDGPULibFunc::F32;
+    break;
+  case Type::DoubleTyID:
+    P.ArgType = AMDGPULibFunc::F64;
+    break;
+  case Type::HalfTyID:
+    P.ArgType = AMDGPULibFunc::F16;
+    break;
+  case Type::IntegerTyID:
+    switch (cast<IntegerType>(Ty)->getBitWidth()) {
+    case 8:
+      P.ArgType = Signed ? AMDGPULibFunc::I8 : AMDGPULibFunc::U8;
+      break;
+    case 16:
+      P.ArgType = Signed ? AMDGPULibFunc::I16 : AMDGPULibFunc::U16;
+      break;
+    case 32:
+      P.ArgType = Signed ? AMDGPULibFunc::I32 : AMDGPULibFunc::U32;
+      break;
+    case 64:
+      P.ArgType = Signed ? AMDGPULibFunc::I64 : AMDGPULibFunc::U64;
+      break;
+    default:
+      llvm_unreachable("unhandled libcall argument type");
+    }
+
+    break;
+  default:
+    llvm_unreachable("unhandled libcall argument type");
+  }
+
+  return P;
+}
+
 static Type* getIntrinsicParamType(
   LLVMContext& C,
   const AMDGPULibFunc::Param& P,
@@ -1051,6 +1105,10 @@ AMDGPULibFunc::AMDGPULibFunc(EFuncId Id, const AMDGPULibFunc &CopyFrom) {
       Id, *cast<AMDGPUMangledLibFunc>(CopyFrom.Impl.get())));
 }
 
+AMDGPULibFunc::AMDGPULibFunc(EFuncId Id, FunctionType *FT, bool SignedInts) {
+  Impl.reset(new AMDGPUMangledLibFunc(Id, FT, SignedInts));
+}
+
 AMDGPULibFunc::AMDGPULibFunc(StringRef Name, FunctionType *FT) {
   Impl.reset(new AMDGPUUnmangledLibFunc(Name, FT));
 }

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULibFunc.h b/llvm/lib/Target/AMDGPU/AMDGPULibFunc.h
index e88963746649ba..10551bee3fa8d4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULibFunc.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULibFunc.h
@@ -18,6 +18,7 @@ class FunctionCallee;
 class FunctionType;
 class Function;
 class Module;
+class Type;
 
 class AMDGPULibFuncBase {
 public:
@@ -290,18 +291,23 @@ class AMDGPULibFuncBase {
   };
 
   struct Param {
-    unsigned char ArgType;
-    unsigned char VectorSize;
-    unsigned char PtrKind;
+    unsigned char ArgType = 0;
+    unsigned char VectorSize = 1;
+    unsigned char PtrKind = 0;
 
-    unsigned char Reserved;
+    unsigned char Reserved = 0;
 
     void reset() {
       ArgType = 0;
       VectorSize = 1;
       PtrKind = 0;
     }
-    Param() { reset(); }
+
+    static Param getIntN(unsigned char NumElts) {
+      return Param{I32, NumElts, 0, 0};
+    }
+
+    static Param getFromTy(Type *Ty, bool Signed);
 
     template <typename Stream>
     void mangleItanium(Stream& os);
@@ -351,7 +357,7 @@ class AMDGPULibFuncImpl : public AMDGPULibFuncBase {
 protected:
   EFuncId FuncId;
   std::string Name;
-  ENamePrefix FKind;
+  ENamePrefix FKind = NOPFX;
 };
 
 /// Wrapper class for AMDGPULIbFuncImpl
@@ -362,6 +368,8 @@ class AMDGPULibFunc : public AMDGPULibFuncBase {
   /// Clone a mangled library func with the Id \p Id and argument info from \p
   /// CopyFrom.
   explicit AMDGPULibFunc(EFuncId Id, const AMDGPULibFunc &CopyFrom);
+  explicit AMDGPULibFunc(EFuncId Id, FunctionType *FT, bool SignedInts);
+
   /// Construct an unmangled library function on the fly.
   explicit AMDGPULibFunc(StringRef FName, FunctionType *FT);
 
@@ -415,6 +423,8 @@ class AMDGPUMangledLibFunc : public AMDGPULibFuncImpl {
   explicit AMDGPUMangledLibFunc();
   explicit AMDGPUMangledLibFunc(EFuncId id,
                                 const AMDGPUMangledLibFunc &copyFrom);
+  explicit AMDGPUMangledLibFunc(EFuncId id, FunctionType *FT,
+                                bool SignedInts = true);
 
   std::string getName() const override;
   unsigned getNumArgs() const override;

diff  --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll
index bd3fc6f0589b6d..8c64ab5952def7 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll
@@ -58,13 +58,16 @@ define half @test_pow_fast_f16__integral_y(half %x, i32 %y.i) {
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CHECK-NEXT:    v_cvt_f32_i32_e32 v1, v1
-; CHECK-NEXT:    v_log_f16_e64 v2, |v0|
+; CHECK-NEXT:    v_log_f16_e64 v3, |v0|
 ; CHECK-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; CHECK-NEXT:    v_mul_f16_e32 v2, v2, v1
-; CHECK-NEXT:    v_exp_f16_e32 v2, v2
-; CHECK-NEXT:    v_cvt_i16_f16_e32 v1, v1
+; CHECK-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; CHECK-NEXT:    v_cvt_i32_f32_e32 v1, v1
+; CHECK-NEXT:    v_cvt_f32_i32_e32 v2, v1
 ; CHECK-NEXT:    v_lshlrev_b16_e32 v1, 15, v1
 ; CHECK-NEXT:    v_and_b32_e32 v0, v1, v0
+; CHECK-NEXT:    v_cvt_f16_f32_e32 v2, v2
+; CHECK-NEXT:    v_mul_f16_e32 v2, v3, v2
+; CHECK-NEXT:    v_exp_f16_e32 v2, v2
 ; CHECK-NEXT:    v_or_b32_e32 v0, v0, v2
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
   %y = sitofp i32 %y.i to half
@@ -76,28 +79,29 @@ define float @test_pow_fast_f32__integral_y(float %x, i32 %y.i) {
 ; CHECK-LABEL: test_pow_fast_f32__integral_y:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_cvt_f32_i32_e32 v1, v1
 ; CHECK-NEXT:    s_mov_b32 s4, 0x800000
 ; CHECK-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
 ; CHECK-NEXT:    v_mov_b32_e32 v3, 0x4f800000
+; CHECK-NEXT:    v_cvt_i32_f32_e32 v1, v1
 ; CHECK-NEXT:    v_cndmask_b32_e32 v3, 1.0, v3, vcc
 ; CHECK-NEXT:    v_mul_f32_e64 v3, |v0|, v3
 ; CHECK-NEXT:    v_log_f32_e32 v3, v3
-; CHECK-NEXT:    v_cvt_f32_i32_e32 v1, v1
+; CHECK-NEXT:    v_cvt_f32_i32_e32 v4, v1
 ; CHECK-NEXT:    v_mov_b32_e32 v2, 0x42000000
 ; CHECK-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; CHECK-NEXT:    v_sub_f32_e32 v2, v3, v2
-; CHECK-NEXT:    v_mul_f32_e32 v3, v2, v1
+; CHECK-NEXT:    v_mul_f32_e32 v3, v2, v4
 ; CHECK-NEXT:    s_mov_b32 s4, 0xc2fc0000
-; CHECK-NEXT:    v_mov_b32_e32 v4, 0x42800000
+; CHECK-NEXT:    v_mov_b32_e32 v5, 0x42800000
 ; CHECK-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v3
-; CHECK-NEXT:    v_cndmask_b32_e32 v3, 0, v4, vcc
-; CHECK-NEXT:    v_fma_f32 v2, v2, v1, v3
+; CHECK-NEXT:    v_cndmask_b32_e32 v3, 0, v5, vcc
+; CHECK-NEXT:    v_fma_f32 v2, v2, v4, v3
 ; CHECK-NEXT:    v_exp_f32_e32 v2, v2
-; CHECK-NEXT:    v_cvt_i32_f32_e32 v1, v1
 ; CHECK-NEXT:    v_mov_b32_e32 v3, 0x1f800000
 ; CHECK-NEXT:    v_cndmask_b32_e32 v3, 1.0, v3, vcc
-; CHECK-NEXT:    v_mul_f32_e32 v2, v2, v3
 ; CHECK-NEXT:    v_lshlrev_b32_e32 v1, 31, v1
+; CHECK-NEXT:    v_mul_f32_e32 v2, v2, v3
 ; CHECK-NEXT:    v_and_or_b32 v0, v1, v0, v2
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
   %y = sitofp i32 %y.i to float
@@ -112,7 +116,7 @@ define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) {
 ; CHECK-NEXT:    s_mov_b32 s16, s33
 ; CHECK-NEXT:    s_mov_b32 s33, s32
 ; CHECK-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; CHECK-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
+; CHECK-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
 ; CHECK-NEXT:    s_mov_b64 exec, s[18:19]
 ; CHECK-NEXT:    v_writelane_b32 v40, s16, 14
 ; CHECK-NEXT:    v_writelane_b32 v40, s30, 0
@@ -132,15 +136,12 @@ define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) {
 ; CHECK-NEXT:    s_addc_u32 s5, s5, _Z4log2d at gotpcrel32@hi+12
 ; CHECK-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
 ; CHECK-NEXT:    v_writelane_b32 v40, s42, 10
-; CHECK-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
-; CHECK-NEXT:    buffer_store_dword v42, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
-; CHECK-NEXT:    buffer_store_dword v43, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
-; CHECK-NEXT:    buffer_store_dword v44, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
-; CHECK-NEXT:    buffer_store_dword v45, off, s[0:3], s33 ; 4-byte Folded Spill
+; CHECK-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
+; CHECK-NEXT:    buffer_store_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; CHECK-NEXT:    buffer_store_dword v43, off, s[0:3], s33 ; 4-byte Folded Spill
 ; CHECK-NEXT:    v_writelane_b32 v40, s43, 11
 ; CHECK-NEXT:    v_mov_b32_e32 v43, v1
 ; CHECK-NEXT:    v_writelane_b32 v40, s44, 12
-; CHECK-NEXT:    v_mov_b32_e32 v42, v2
 ; CHECK-NEXT:    v_and_b32_e32 v1, 0x7fffffff, v43
 ; CHECK-NEXT:    s_mov_b64 s[4:5], s[40:41]
 ; CHECK-NEXT:    v_writelane_b32 v40, s45, 13
@@ -152,15 +153,16 @@ define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) {
 ; CHECK-NEXT:    s_mov_b64 s[34:35], s[10:11]
 ; CHECK-NEXT:    s_mov_b64 s[36:37], s[8:9]
 ; CHECK-NEXT:    s_mov_b64 s[38:39], s[6:7]
-; CHECK-NEXT:    v_cvt_f64_i32_e32 v[44:45], v42
+; CHECK-NEXT:    v_mov_b32_e32 v42, v2
 ; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
 ; CHECK-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; CHECK-NEXT:    v_mul_f64 v[0:1], v[0:1], v[44:45]
+; CHECK-NEXT:    v_cvt_f64_i32_e32 v[2:3], v42
 ; CHECK-NEXT:    s_getpc_b64 s[4:5]
 ; CHECK-NEXT:    s_add_u32 s4, s4, _Z4exp2d at gotpcrel32@lo+4
 ; CHECK-NEXT:    s_addc_u32 s5, s5, _Z4exp2d at gotpcrel32@hi+12
 ; CHECK-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
 ; CHECK-NEXT:    s_mov_b64 s[4:5], s[40:41]
+; CHECK-NEXT:    v_mul_f64 v[0:1], v[0:1], v[2:3]
 ; CHECK-NEXT:    s_mov_b64 s[6:7], s[38:39]
 ; CHECK-NEXT:    s_mov_b64 s[8:9], s[36:37]
 ; CHECK-NEXT:    s_mov_b64 s[10:11], s[34:35]
@@ -173,11 +175,9 @@ define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) {
 ; CHECK-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; CHECK-NEXT:    v_lshlrev_b32_e32 v2, 31, v42
 ; CHECK-NEXT:    v_and_b32_e32 v2, v2, v43
-; CHECK-NEXT:    buffer_load_dword v45, off, s[0:3], s33 ; 4-byte Folded Reload
-; CHECK-NEXT:    buffer_load_dword v44, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
-; CHECK-NEXT:    buffer_load_dword v43, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
-; CHECK-NEXT:    buffer_load_dword v42, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
-; CHECK-NEXT:    buffer_load_dword v41, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
+; CHECK-NEXT:    buffer_load_dword v43, off, s[0:3], s33 ; 4-byte Folded Reload
+; CHECK-NEXT:    buffer_load_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; CHECK-NEXT:    buffer_load_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
 ; CHECK-NEXT:    v_or_b32_e32 v1, v2, v1
 ; CHECK-NEXT:    v_readlane_b32 s45, v40, 13
 ; CHECK-NEXT:    v_readlane_b32 s44, v40, 12
@@ -195,7 +195,7 @@ define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) {
 ; CHECK-NEXT:    v_readlane_b32 s30, v40, 0
 ; CHECK-NEXT:    v_readlane_b32 s4, v40, 14
 ; CHECK-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; CHECK-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload
+; CHECK-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
 ; CHECK-NEXT:    s_mov_b64 exec, s[6:7]
 ; CHECK-NEXT:    s_addk_i32 s32, 0xf800
 ; CHECK-NEXT:    s_mov_b32 s33, s4

diff  --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll
index 2ebe2562ddb469..6e1ce0cbf2e1ed 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll
@@ -559,8 +559,7 @@ define <2 x float> @test_pow_afn_v2f32_neg0.0(<2 x float> %x) {
 define <2 x float> @test_pow_afn_v2f32_plus_minus_0.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_plus_minus_0.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> <float 0.000000e+00, float -0.000000e+00>)
-; CHECK-NEXT:    ret <2 x float> [[POW]]
+; CHECK-NEXT:    ret <2 x float> <float 1.000000e+00, float 1.000000e+00>
 ;
   %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 0.0, float -0.0>)
   ret <2 x float> %pow
@@ -695,7 +694,7 @@ define <2 x float> @test_pow_afn_v2f32_neg1.0(<2 x float> %x) {
 define <2 x float> @test_pow_afn_v2f32_plus_minus_1.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_plus_minus_1.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> <float 1.000000e+00, float -1.000000e+00>)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 1, i32 -1>)
 ; CHECK-NEXT:    ret <2 x float> [[POW]]
 ;
   %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 1.0, float -1.0>)
@@ -734,7 +733,7 @@ define float @test_pow_afn_f32_2.0(float %x) {
 define float @test_pow_afn_f32_neg2.0(float %x) {
 ; CHECK-LABEL: define float @test_pow_afn_f32_neg2.0
 ; CHECK-SAME: (float [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z3powff(float [[X]], float -2.000000e+00)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 -2)
 ; CHECK-NEXT:    ret float [[POW]]
 ;
   %pow = tail call afn float @_Z3powff(float %x, float -2.0)
@@ -754,7 +753,7 @@ define <2 x float> @test_pow_afn_v2f32_2.0(<2 x float> %x) {
 define <2 x float> @test_pow_afn_v2f32_neg2.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg2.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> <float -2.000000e+00, float -2.000000e+00>)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 -2, i32 -2>)
 ; CHECK-NEXT:    ret <2 x float> [[POW]]
 ;
   %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float -2.0, float -2.0>)
@@ -764,7 +763,7 @@ define <2 x float> @test_pow_afn_v2f32_neg2.0(<2 x float> %x) {
 define <2 x float> @test_pow_afn_v2f32_plus_minus_2.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_plus_minus_2.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> <float 2.000000e+00, float -2.000000e+00>)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 2, i32 -2>)
 ; CHECK-NEXT:    ret <2 x float> [[POW]]
 ;
   %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 2.0, float -2.0>)
@@ -774,7 +773,7 @@ define <2 x float> @test_pow_afn_v2f32_plus_minus_2.0(<2 x float> %x) {
 define float @test_pow_afn_f32_3.0(float %x) {
 ; CHECK-LABEL: define float @test_pow_afn_f32_3.0
 ; CHECK-SAME: (float [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z3powff(float [[X]], float 3.000000e+00)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 3)
 ; CHECK-NEXT:    ret float [[POW]]
 ;
   %pow = tail call afn float @_Z3powff(float %x, float 3.0)
@@ -784,7 +783,7 @@ define float @test_pow_afn_f32_3.0(float %x) {
 define float @test_pow_afn_f32_neg3.0(float %x) {
 ; CHECK-LABEL: define float @test_pow_afn_f32_neg3.0
 ; CHECK-SAME: (float [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z3powff(float [[X]], float -3.000000e+00)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 -3)
 ; CHECK-NEXT:    ret float [[POW]]
 ;
   %pow = tail call afn float @_Z3powff(float %x, float -3.0)
@@ -794,7 +793,7 @@ define float @test_pow_afn_f32_neg3.0(float %x) {
 define <2 x float> @test_pow_afn_v2f32_3.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_3.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> <float 3.000000e+00, float 3.000000e+00>)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 3, i32 3>)
 ; CHECK-NEXT:    ret <2 x float> [[POW]]
 ;
   %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 3.0, float 3.0>)
@@ -804,7 +803,7 @@ define <2 x float> @test_pow_afn_v2f32_3.0(<2 x float> %x) {
 define <2 x float> @test_pow_afn_v2f32_neg3.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg3.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> <float -3.000000e+00, float -3.000000e+00>)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 -3, i32 -3>)
 ; CHECK-NEXT:    ret <2 x float> [[POW]]
 ;
   %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float -3.0, float -3.0>)
@@ -814,7 +813,7 @@ define <2 x float> @test_pow_afn_v2f32_neg3.0(<2 x float> %x) {
 define <2 x float> @test_pow_afn_v2f32_plus_minus_3.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_plus_minus_3.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> <float 3.000000e+00, float -3.000000e+00>)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 3, i32 -3>)
 ; CHECK-NEXT:    ret <2 x float> [[POW]]
 ;
   %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 3.0, float -3.0>)
@@ -874,7 +873,7 @@ define <2 x float> @test_pow_afn_v2f32_plus_minus_3.99(<2 x float> %x) {
 define float @test_pow_afn_f32_8.0(float %x) {
 ; CHECK-LABEL: define float @test_pow_afn_f32_8.0
 ; CHECK-SAME: (float [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z3powff(float [[X]], float 8.000000e+00)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 8)
 ; CHECK-NEXT:    ret float [[POW]]
 ;
   %pow = tail call afn float @_Z3powff(float %x, float 8.0)
@@ -884,7 +883,7 @@ define float @test_pow_afn_f32_8.0(float %x) {
 define float @test_pow_afn_f32_neg8.0(float %x) {
 ; CHECK-LABEL: define float @test_pow_afn_f32_neg8.0
 ; CHECK-SAME: (float [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z3powff(float [[X]], float -8.000000e+00)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 -8)
 ; CHECK-NEXT:    ret float [[POW]]
 ;
   %pow = tail call afn float @_Z3powff(float %x, float -8.0)
@@ -894,7 +893,7 @@ define float @test_pow_afn_f32_neg8.0(float %x) {
 define <2 x float> @test_pow_afn_v2f32_8.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_8.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> <float 8.000000e+00, float 8.000000e+00>)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 8, i32 8>)
 ; CHECK-NEXT:    ret <2 x float> [[POW]]
 ;
   %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 8.0, float 8.0>)
@@ -904,7 +903,7 @@ define <2 x float> @test_pow_afn_v2f32_8.0(<2 x float> %x) {
 define <2 x float> @test_pow_afn_v2f32_neg8.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg8.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> <float -8.000000e+00, float -8.000000e+00>)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 -8, i32 -8>)
 ; CHECK-NEXT:    ret <2 x float> [[POW]]
 ;
   %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float -8.0, float -8.0>)
@@ -914,7 +913,7 @@ define <2 x float> @test_pow_afn_v2f32_neg8.0(<2 x float> %x) {
 define <2 x float> @test_pow_afn_v2f32_plus_minus_8.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_plus_minus_8.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> <float 8.000000e+00, float -8.000000e+00>)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 8, i32 -8>)
 ; CHECK-NEXT:    ret <2 x float> [[POW]]
 ;
   %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 8.0, float -8.0>)
@@ -924,7 +923,7 @@ define <2 x float> @test_pow_afn_v2f32_plus_minus_8.0(<2 x float> %x) {
 define float @test_pow_afn_f32_12.0(float %x) {
 ; CHECK-LABEL: define float @test_pow_afn_f32_12.0
 ; CHECK-SAME: (float [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z3powff(float [[X]], float 1.200000e+01)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 12)
 ; CHECK-NEXT:    ret float [[POW]]
 ;
   %pow = tail call afn float @_Z3powff(float %x, float 12.0)
@@ -934,7 +933,7 @@ define float @test_pow_afn_f32_12.0(float %x) {
 define float @test_pow_afn_f32_neg12.0(float %x) {
 ; CHECK-LABEL: define float @test_pow_afn_f32_neg12.0
 ; CHECK-SAME: (float [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z3powff(float [[X]], float -1.200000e+01)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 -12)
 ; CHECK-NEXT:    ret float [[POW]]
 ;
   %pow = tail call afn float @_Z3powff(float %x, float -12.0)
@@ -944,7 +943,7 @@ define float @test_pow_afn_f32_neg12.0(float %x) {
 define <2 x float> @test_pow_afn_v2f32_12.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_12.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> <float 1.200000e+01, float 1.200000e+01>)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 12, i32 12>)
 ; CHECK-NEXT:    ret <2 x float> [[POW]]
 ;
   %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 12.0, float 12.0>)
@@ -954,7 +953,7 @@ define <2 x float> @test_pow_afn_v2f32_12.0(<2 x float> %x) {
 define <2 x float> @test_pow_afn_v2f32_neg12.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg12.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> <float -1.200000e+01, float -1.200000e+01>)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 -12, i32 -12>)
 ; CHECK-NEXT:    ret <2 x float> [[POW]]
 ;
   %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float -12.0, float -12.0>)
@@ -964,7 +963,7 @@ define <2 x float> @test_pow_afn_v2f32_neg12.0(<2 x float> %x) {
 define <2 x float> @test_pow_afn_v2f32_plus_minus_12.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_plus_minus_12.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> <float 1.200000e+01, float -1.200000e+01>)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 12, i32 -12>)
 ; CHECK-NEXT:    ret <2 x float> [[POW]]
 ;
   %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 12.0, float -12.0>)
@@ -974,7 +973,7 @@ define <2 x float> @test_pow_afn_v2f32_plus_minus_12.0(<2 x float> %x) {
 define float @test_pow_afn_f32_13.0(float %x) {
 ; CHECK-LABEL: define float @test_pow_afn_f32_13.0
 ; CHECK-SAME: (float [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z3powff(float [[X]], float 1.300000e+01)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 13)
 ; CHECK-NEXT:    ret float [[POW]]
 ;
   %pow = tail call afn float @_Z3powff(float %x, float 13.0)
@@ -984,7 +983,7 @@ define float @test_pow_afn_f32_13.0(float %x) {
 define float @test_pow_afn_f32_neg13.0(float %x) {
 ; CHECK-LABEL: define float @test_pow_afn_f32_neg13.0
 ; CHECK-SAME: (float [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z3powff(float [[X]], float -1.300000e+01)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 -13)
 ; CHECK-NEXT:    ret float [[POW]]
 ;
   %pow = tail call afn float @_Z3powff(float %x, float -13.0)
@@ -994,7 +993,7 @@ define float @test_pow_afn_f32_neg13.0(float %x) {
 define <2 x float> @test_pow_afn_v2f32_13.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_13.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> <float 1.300000e+01, float 1.300000e+01>)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 13, i32 13>)
 ; CHECK-NEXT:    ret <2 x float> [[POW]]
 ;
   %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 13.0, float 13.0>)
@@ -1004,7 +1003,7 @@ define <2 x float> @test_pow_afn_v2f32_13.0(<2 x float> %x) {
 define <2 x float> @test_pow_afn_v2f32_neg13.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg13.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> <float -1.300000e+01, float -1.300000e+01>)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 -13, i32 -13>)
 ; CHECK-NEXT:    ret <2 x float> [[POW]]
 ;
   %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float -13.0, float -13.0>)
@@ -1014,7 +1013,7 @@ define <2 x float> @test_pow_afn_v2f32_neg13.0(<2 x float> %x) {
 define <2 x float> @test_pow_afn_v2f32_13.0_15.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_13.0_15.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> <float 1.300000e+01, float 1.500000e+01>)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 13, i32 15>)
 ; CHECK-NEXT:    ret <2 x float> [[POW]]
 ;
   %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 13.0, float 15.0>)
@@ -1024,7 +1023,7 @@ define <2 x float> @test_pow_afn_v2f32_13.0_15.0(<2 x float> %x) {
 define <2 x float> @test_pow_afn_v2f32_13.0_14.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_13.0_14.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> <float 1.300000e+01, float 1.400000e+01>)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 13, i32 14>)
 ; CHECK-NEXT:    ret <2 x float> [[POW]]
 ;
   %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 13.0, float 14.0>)
@@ -1034,7 +1033,7 @@ define <2 x float> @test_pow_afn_v2f32_13.0_14.0(<2 x float> %x) {
 define <2 x float> @test_pow_afn_v2f32_14.0_16.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_14.0_16.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> <float 1.400000e+01, float 1.600000e+01>)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 14, i32 16>)
 ; CHECK-NEXT:    ret <2 x float> [[POW]]
 ;
   %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 14.0, float 16.0>)
@@ -1044,7 +1043,7 @@ define <2 x float> @test_pow_afn_v2f32_14.0_16.0(<2 x float> %x) {
 define <2 x float> @test_pow_afn_v2f32_plus_minus_13.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_plus_minus_13.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> <float 1.300000e+01, float -1.300000e+01>)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 13, i32 -13>)
 ; CHECK-NEXT:    ret <2 x float> [[POW]]
 ;
   %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 13.0, float -13.0>)
@@ -1054,7 +1053,7 @@ define <2 x float> @test_pow_afn_v2f32_plus_minus_13.0(<2 x float> %x) {
 define <2 x float> @test_pow_afn_v2f32_plus_minus_13.0_minus_14.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_plus_minus_13.0_minus_14.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> <float -1.300000e+01, float -1.400000e+01>)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 -13, i32 -14>)
 ; CHECK-NEXT:    ret <2 x float> [[POW]]
 ;
   %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float -13.0, float -14.0>)
@@ -1363,7 +1362,7 @@ define float @test_pow_f32__y_2(float %x) {
 define float @test_pow_f32__y_n2(float %x) {
 ; CHECK-LABEL: define float @test_pow_f32__y_n2
 ; CHECK-SAME: (float [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call float @_Z3powff(float [[X]], float -2.000000e+00)
+; CHECK-NEXT:    [[POW:%.*]] = tail call float @_Z4pownfi(float [[X]], i32 -2)
 ; CHECK-NEXT:    ret float [[POW]]
 ;
   %pow = tail call float @_Z3powff(float %x, float -2.0)
@@ -1393,7 +1392,7 @@ define float @test_pow_f32__y_neg_half(float %x) {
 define float @test_pow_f32__y_3(float %x) {
 ; CHECK-LABEL: define float @test_pow_f32__y_3
 ; CHECK-SAME: (float [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call float @_Z3powff(float [[X]], float 3.000000e+00)
+; CHECK-NEXT:    [[POW:%.*]] = tail call float @_Z4pownfi(float [[X]], i32 3)
 ; CHECK-NEXT:    ret float [[POW]]
 ;
   %pow = tail call float @_Z3powff(float %x, float 3.0)
@@ -1403,7 +1402,7 @@ define float @test_pow_f32__y_3(float %x) {
 define float @test_pow_f32__y_n3(float %x) {
 ; CHECK-LABEL: define float @test_pow_f32__y_n3
 ; CHECK-SAME: (float [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call float @_Z3powff(float [[X]], float -3.000000e+00)
+; CHECK-NEXT:    [[POW:%.*]] = tail call float @_Z4pownfi(float [[X]], i32 -3)
 ; CHECK-NEXT:    ret float [[POW]]
 ;
   %pow = tail call float @_Z3powff(float %x, float -3.0)
@@ -1480,7 +1479,7 @@ define <2 x float> @test_pow_v2f32__y_2(<2 x float> %x) {
 define <2 x float> @test_pow_v2f32__y_n2(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_n2
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> <float -2.000000e+00, float -2.000000e+00>)
+; CHECK-NEXT:    [[POW:%.*]] = tail call <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 -2, i32 -2>)
 ; CHECK-NEXT:    ret <2 x float> [[POW]]
 ;
   %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float -2.0, float -2.0>)
@@ -1510,7 +1509,7 @@ define <2 x float> @test_pow_v2f32__y_neg_half(<2 x float> %x) {
 define <2 x float> @test_pow_v2f32__y_3(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_3
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> <float 3.000000e+00, float 3.000000e+00>)
+; CHECK-NEXT:    [[POW:%.*]] = tail call <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 3, i32 3>)
 ; CHECK-NEXT:    ret <2 x float> [[POW]]
 ;
   %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 3.0, float 3.0>)
@@ -1520,7 +1519,7 @@ define <2 x float> @test_pow_v2f32__y_3(<2 x float> %x) {
 define <2 x float> @test_pow_v2f32__y_n3(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_n3
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> <float -3.000000e+00, float -3.000000e+00>)
+; CHECK-NEXT:    [[POW:%.*]] = tail call <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 -3, i32 -3>)
 ; CHECK-NEXT:    ret <2 x float> [[POW]]
 ;
   %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float -3.0,float -3.0>)
@@ -1664,7 +1663,7 @@ define float @test_pow_f32_x_assumed_ugt_0(float %x, float %y) {
 define float @test_pow_afn_f32__y_poison(float %x) {
 ; CHECK-LABEL: define float @test_pow_afn_f32__y_poison
 ; CHECK-SAME: (float [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z3powff(float [[X]], float poison)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 poison)
 ; CHECK-NEXT:    ret float [[POW]]
 ;
   %pow = tail call afn float @_Z3powff(float %x, float poison)
@@ -1674,7 +1673,7 @@ define float @test_pow_afn_f32__y_poison(float %x) {
 define float @test_pow_afn_f32__y_3(float %x) {
 ; CHECK-LABEL: define float @test_pow_afn_f32__y_3
 ; CHECK-SAME: (float [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z3powff(float [[X]], float 3.000000e+00)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 3)
 ; CHECK-NEXT:    ret float [[POW]]
 ;
   %pow = tail call afn float @_Z3powff(float %x, float 3.0)
@@ -1695,7 +1694,7 @@ define float @test_pow_afn_f32_nnan_ninf__y_3(float %x) {
 define float @test_pow_afn_f32__y_4(float %x) {
 ; CHECK-LABEL: define float @test_pow_afn_f32__y_4
 ; CHECK-SAME: (float [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z3powff(float [[X]], float 4.000000e+00)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 4)
 ; CHECK-NEXT:    ret float [[POW]]
 ;
   %pow = tail call afn float @_Z3powff(float %x, float 4.0)
@@ -1726,7 +1725,7 @@ define float @test_pow_afn_f32_nnan_ninf__y_4_5(float %x) {
 define float @test_pow_afn_f32__y_5(float %x) {
 ; CHECK-LABEL: define float @test_pow_afn_f32__y_5
 ; CHECK-SAME: (float [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z3powff(float [[X]], float 5.000000e+00)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 5)
 ; CHECK-NEXT:    ret float [[POW]]
 ;
   %pow = tail call afn float @_Z3powff(float %x, float 5.0)
@@ -1748,7 +1747,7 @@ define float @test_pow_afn_f32_nnan_ninf__y_5(float %x) {
 define float @test_pow_afn_f32__y_neg5(float %x) {
 ; CHECK-LABEL: define float @test_pow_afn_f32__y_neg5
 ; CHECK-SAME: (float [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z3powff(float [[X]], float -5.000000e+00)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 -5)
 ; CHECK-NEXT:    ret float [[POW]]
 ;
   %pow = tail call afn float @_Z3powff(float %x, float -5.0)
@@ -1872,7 +1871,7 @@ define float @test_pow_afn_f32_nnan_ninf__y_5_known_positive_with_ninf_flag(floa
 define double @test_pow_afn_f64__y_3(double %x) {
 ; CHECK-LABEL: define double @test_pow_afn_f64__y_3
 ; CHECK-SAME: (double [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn double @_Z3powdd(double [[X]], double 3.000000e+00)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn double @_Z4powndi(double [[X]], i32 3)
 ; CHECK-NEXT:    ret double [[POW]]
 ;
   %pow = tail call afn double @_Z3powdd(double %x, double 3.0)
@@ -1893,7 +1892,7 @@ define double @test_pow_afn_f64_nnan_ninf__y_3(double %x) {
 define double @test_pow_afn_f64__y_4(double %x) {
 ; CHECK-LABEL: define double @test_pow_afn_f64__y_4
 ; CHECK-SAME: (double [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn double @_Z3powdd(double [[X]], double 4.000000e+00)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn double @_Z4powndi(double [[X]], i32 4)
 ; CHECK-NEXT:    ret double [[POW]]
 ;
   %pow = tail call afn double @_Z3powdd(double %x, double 4.0)
@@ -1924,7 +1923,7 @@ define double @test_pow_afn_f64_nnan_ninf__y_4_5(double %x) {
 define double @test_pow_afn_f64__y_5(double %x) {
 ; CHECK-LABEL: define double @test_pow_afn_f64__y_5
 ; CHECK-SAME: (double [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn double @_Z3powdd(double [[X]], double 5.000000e+00)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn double @_Z4powndi(double [[X]], i32 5)
 ; CHECK-NEXT:    ret double [[POW]]
 ;
   %pow = tail call afn double @_Z3powdd(double %x, double 5.0)
@@ -1946,7 +1945,7 @@ define double @test_pow_afn_f64_nnan_ninf__y_5(double %x) {
 define double @test_pow_afn_f64__y_neg5(double %x) {
 ; CHECK-LABEL: define double @test_pow_afn_f64__y_neg5
 ; CHECK-SAME: (double [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn double @_Z3powdd(double [[X]], double -5.000000e+00)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn double @_Z4powndi(double [[X]], i32 -5)
 ; CHECK-NEXT:    ret double [[POW]]
 ;
   %pow = tail call afn double @_Z3powdd(double %x, double -5.0)
@@ -2026,7 +2025,7 @@ define <2 x double> @test_pow_afn_v2f64_nnan_ninf__y_5(<2 x double> %x) {
 define half @test_pow_afn_f16__y_3(half %x) {
 ; CHECK-LABEL: define half @test_pow_afn_f16__y_3
 ; CHECK-SAME: (half [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn half @_Z3powDhDh(half [[X]], half 0xH4200)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn half @_Z4pownDhi(half [[X]], i32 3)
 ; CHECK-NEXT:    ret half [[POW]]
 ;
   %pow = tail call afn half @_Z3powDhDh(half %x, half 3.0)
@@ -2047,7 +2046,7 @@ define half @test_pow_afn_f16_nnan_ninf__y_3(half %x) {
 define half @test_pow_afn_f16__y_4(half %x) {
 ; CHECK-LABEL: define half @test_pow_afn_f16__y_4
 ; CHECK-SAME: (half [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn half @_Z3powDhDh(half [[X]], half 0xH4400)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn half @_Z4pownDhi(half [[X]], i32 4)
 ; CHECK-NEXT:    ret half [[POW]]
 ;
   %pow = tail call afn half @_Z3powDhDh(half %x, half 4.0)
@@ -2078,7 +2077,7 @@ define half @test_pow_afn_f16_nnan_ninf__y_4_5(half %x) {
 define half @test_pow_afn_f16__y_5(half %x) {
 ; CHECK-LABEL: define half @test_pow_afn_f16__y_5
 ; CHECK-SAME: (half [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn half @_Z3powDhDh(half [[X]], half 0xH4500)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn half @_Z4pownDhi(half [[X]], i32 5)
 ; CHECK-NEXT:    ret half [[POW]]
 ;
   %pow = tail call afn half @_Z3powDhDh(half %x, half 5.0)
@@ -2100,7 +2099,7 @@ define half @test_pow_afn_f16_nnan_ninf__y_5(half %x) {
 define half @test_pow_afn_f16__y_neg5(half %x) {
 ; CHECK-LABEL: define half @test_pow_afn_f16__y_neg5
 ; CHECK-SAME: (half [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn half @_Z3powDhDh(half [[X]], half 0xHC500)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn half @_Z4pownDhi(half [[X]], i32 -5)
 ; CHECK-NEXT:    ret half [[POW]]
 ;
   %pow = tail call afn half @_Z3powDhDh(half %x, half -5.0)
@@ -2181,7 +2180,8 @@ define float @test_pow_f32_known_integral_sitofp(float %x, i32 %y) {
 ; CHECK-LABEL: define float @test_pow_f32_known_integral_sitofp
 ; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) {
 ; CHECK-NEXT:    [[Y_CAST:%.*]] = sitofp i32 [[Y]] to float
-; CHECK-NEXT:    [[POW:%.*]] = tail call float @_Z3powff(float [[X]], float [[Y_CAST]])
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32
+; CHECK-NEXT:    [[POW:%.*]] = tail call float @_Z4pownfi(float [[X]], i32 [[TMP1]])
 ; CHECK-NEXT:    ret float [[POW]]
 ;
   %y.cast = sitofp i32 %y to float
@@ -2193,7 +2193,8 @@ define float @test_pow_afn_f32_known_integral_sitofp(float %x, i32 %y) {
 ; CHECK-LABEL: define float @test_pow_afn_f32_known_integral_sitofp
 ; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) {
 ; CHECK-NEXT:    [[Y_CAST:%.*]] = sitofp i32 [[Y]] to float
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z3powff(float [[X]], float [[Y_CAST]])
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 [[TMP1]])
 ; CHECK-NEXT:    ret float [[POW]]
 ;
   %y.cast = sitofp i32 %y to float
@@ -2205,18 +2206,19 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp(float %x, i32 %y)
 ; CHECK-LABEL: define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp
 ; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) {
 ; CHECK-NEXT:    [[Y_CAST:%.*]] = sitofp i32 [[Y]] to float
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32
 ; CHECK-NEXT:    [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]])
 ; CHECK-NEXT:    [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[__FABS]])
-; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[Y_CAST]]
+; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp i32 [[TMP1]] to float
+; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[POWNI2F]]
 ; CHECK-NEXT:    [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]])
-; CHECK-NEXT:    [[__YTOU:%.*]] = fptosi float [[Y_CAST]] to i32
-; CHECK-NEXT:    [[__YEVEN:%.*]] = shl i32 [[__YTOU]], 31
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float [[X]] to i32
-; CHECK-NEXT:    [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP1]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float [[__EXP2]] to i32
-; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[__POW_SIGN]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float
-; CHECK-NEXT:    ret float [[TMP4]]
+; CHECK-NEXT:    [[__YEVEN:%.*]] = shl i32 [[TMP1]], 31
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float [[X]] to i32
+; CHECK-NEXT:    [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
+; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[__POW_SIGN]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
+; CHECK-NEXT:    ret float [[TMP5]]
 ;
   %y.cast = sitofp i32 %y to float
   %pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y.cast)
@@ -2227,7 +2229,8 @@ define float @test_pow_afn_nnan_f32_known_integral_sitofp(float %x, i32 %y) {
 ; CHECK-LABEL: define float @test_pow_afn_nnan_f32_known_integral_sitofp
 ; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) {
 ; CHECK-NEXT:    [[Y_CAST:%.*]] = sitofp i32 [[Y]] to float
-; CHECK-NEXT:    [[POW:%.*]] = tail call nnan afn float @_Z3powff(float [[X]], float [[Y_CAST]])
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32
+; CHECK-NEXT:    [[POW:%.*]] = tail call nnan afn float @_Z4pownfi(float [[X]], i32 [[TMP1]])
 ; CHECK-NEXT:    ret float [[POW]]
 ;
   %y.cast = sitofp i32 %y to float
@@ -2239,7 +2242,8 @@ define float @test_pow_afn_ninf_f32_known_integral_sitofp(float %x, i32 %y) {
 ; CHECK-LABEL: define float @test_pow_afn_ninf_f32_known_integral_sitofp
 ; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) {
 ; CHECK-NEXT:    [[Y_CAST:%.*]] = sitofp i32 [[Y]] to float
-; CHECK-NEXT:    [[POW:%.*]] = tail call ninf afn float @_Z3powff(float [[X]], float [[Y_CAST]])
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32
+; CHECK-NEXT:    [[POW:%.*]] = tail call ninf afn float @_Z4pownfi(float [[X]], i32 [[TMP1]])
 ; CHECK-NEXT:    ret float [[POW]]
 ;
   %y.cast = sitofp i32 %y to float
@@ -2251,7 +2255,8 @@ define float @test_pow_afn_f32_known_integral_sitofp_finite_argument(float %x, i
 ; CHECK-LABEL: define float @test_pow_afn_f32_known_integral_sitofp_finite_argument
 ; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) {
 ; CHECK-NEXT:    [[Y_CAST:%.*]] = sitofp i32 [[Y]] to float
-; CHECK-NEXT:    [[POW:%.*]] = tail call float @_Z3powff(float [[X]], float nofpclass(nan inf) [[Y_CAST]])
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32
+; CHECK-NEXT:    [[POW:%.*]] = tail call float @_Z4pownfi(float [[X]], i32 [[TMP1]])
 ; CHECK-NEXT:    ret float [[POW]]
 ;
   %y.cast = sitofp i32 %y to float
@@ -2263,7 +2268,8 @@ define float @test_pow_f32_known_integral_uitofp(float %x, i32 %y) {
 ; CHECK-LABEL: define float @test_pow_f32_known_integral_uitofp
 ; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) {
 ; CHECK-NEXT:    [[Y_CAST:%.*]] = uitofp i32 [[Y]] to float
-; CHECK-NEXT:    [[POW:%.*]] = tail call float @_Z3powff(float [[X]], float [[Y_CAST]])
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32
+; CHECK-NEXT:    [[POW:%.*]] = tail call float @_Z4pownfi(float [[X]], i32 [[TMP1]])
 ; CHECK-NEXT:    ret float [[POW]]
 ;
   %y.cast = uitofp i32 %y to float
@@ -2275,7 +2281,8 @@ define float @test_pow_afn_f32_known_integral_uitofp(float %x, i32 %y) {
 ; CHECK-LABEL: define float @test_pow_afn_f32_known_integral_uitofp
 ; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) {
 ; CHECK-NEXT:    [[Y_CAST:%.*]] = uitofp i32 [[Y]] to float
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z3powff(float [[X]], float [[Y_CAST]])
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 [[TMP1]])
 ; CHECK-NEXT:    ret float [[POW]]
 ;
   %y.cast = uitofp i32 %y to float
@@ -2287,18 +2294,19 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp(float %x, i32 %y)
 ; CHECK-LABEL: define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp
 ; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) {
 ; CHECK-NEXT:    [[Y_CAST:%.*]] = uitofp i32 [[Y]] to float
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32
 ; CHECK-NEXT:    [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]])
 ; CHECK-NEXT:    [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[__FABS]])
-; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[Y_CAST]]
+; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp i32 [[TMP1]] to float
+; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[POWNI2F]]
 ; CHECK-NEXT:    [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]])
-; CHECK-NEXT:    [[__YTOU:%.*]] = fptosi float [[Y_CAST]] to i32
-; CHECK-NEXT:    [[__YEVEN:%.*]] = shl i32 [[__YTOU]], 31
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float [[X]] to i32
-; CHECK-NEXT:    [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP1]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float [[__EXP2]] to i32
-; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[__POW_SIGN]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float
-; CHECK-NEXT:    ret float [[TMP4]]
+; CHECK-NEXT:    [[__YEVEN:%.*]] = shl i32 [[TMP1]], 31
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float [[X]] to i32
+; CHECK-NEXT:    [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
+; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[__POW_SIGN]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
+; CHECK-NEXT:    ret float [[TMP5]]
 ;
   %y.cast = uitofp i32 %y to float
   %pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y.cast)
@@ -2335,18 +2343,19 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp_i256(float %x, i2
 ; CHECK-LABEL: define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp_i256
 ; CHECK-SAME: (float [[X:%.*]], i256 [[Y:%.*]]) {
 ; CHECK-NEXT:    [[Y_CAST:%.*]] = uitofp i256 [[Y]] to float
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32
 ; CHECK-NEXT:    [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]])
 ; CHECK-NEXT:    [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[__FABS]])
-; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[Y_CAST]]
+; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp i32 [[TMP1]] to float
+; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[POWNI2F]]
 ; CHECK-NEXT:    [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]])
-; CHECK-NEXT:    [[__YTOU:%.*]] = fptosi float [[Y_CAST]] to i32
-; CHECK-NEXT:    [[__YEVEN:%.*]] = shl i32 [[__YTOU]], 31
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float [[X]] to i32
-; CHECK-NEXT:    [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP1]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float [[__EXP2]] to i32
-; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[__POW_SIGN]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float
-; CHECK-NEXT:    ret float [[TMP4]]
+; CHECK-NEXT:    [[__YEVEN:%.*]] = shl i32 [[TMP1]], 31
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float [[X]] to i32
+; CHECK-NEXT:    [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
+; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[__POW_SIGN]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
+; CHECK-NEXT:    ret float [[TMP5]]
 ;
   %y.cast = uitofp i256 %y to float
   %pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y.cast)
@@ -2357,18 +2366,19 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp_i256(float %x, i2
 ; CHECK-LABEL: define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp_i256
 ; CHECK-SAME: (float [[X:%.*]], i256 [[Y:%.*]]) {
 ; CHECK-NEXT:    [[Y_CAST:%.*]] = sitofp i256 [[Y]] to float
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32
 ; CHECK-NEXT:    [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]])
 ; CHECK-NEXT:    [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[__FABS]])
-; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[Y_CAST]]
+; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp i32 [[TMP1]] to float
+; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[POWNI2F]]
 ; CHECK-NEXT:    [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]])
-; CHECK-NEXT:    [[__YTOU:%.*]] = fptosi float [[Y_CAST]] to i32
-; CHECK-NEXT:    [[__YEVEN:%.*]] = shl i32 [[__YTOU]], 31
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float [[X]] to i32
-; CHECK-NEXT:    [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP1]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float [[__EXP2]] to i32
-; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[__POW_SIGN]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float
-; CHECK-NEXT:    ret float [[TMP4]]
+; CHECK-NEXT:    [[__YEVEN:%.*]] = shl i32 [[TMP1]], 31
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float [[X]] to i32
+; CHECK-NEXT:    [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
+; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[__POW_SIGN]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
+; CHECK-NEXT:    ret float [[TMP5]]
 ;
   %y.cast = sitofp i256 %y to float
   %pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y.cast)
@@ -2379,18 +2389,19 @@ define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_sitofp(<2 x floa
 ; CHECK-LABEL: define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_sitofp
 ; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x i32> [[Y:%.*]]) {
 ; CHECK-NEXT:    [[Y_CAST:%.*]] = sitofp <2 x i32> [[Y]] to <2 x float>
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi <2 x float> [[Y_CAST]] to <2 x i32>
 ; CHECK-NEXT:    [[__FABS:%.*]] = call nnan ninf afn <2 x float> @llvm.fabs.v2f32(<2 x float> [[X]])
 ; CHECK-NEXT:    [[__LOG2:%.*]] = call nnan ninf afn <2 x float> @llvm.log2.v2f32(<2 x float> [[__FABS]])
-; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[Y_CAST]]
+; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float>
+; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[POWNI2F]]
 ; CHECK-NEXT:    [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @llvm.exp2.v2f32(<2 x float> [[__YLOGX]])
-; CHECK-NEXT:    [[__YTOU:%.*]] = fptosi <2 x float> [[Y_CAST]] to <2 x i32>
-; CHECK-NEXT:    [[__YEVEN:%.*]] = shl <2 x i32> [[__YTOU]], <i32 31, i32 31>
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>
-; CHECK-NEXT:    [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP1]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32>
-; CHECK-NEXT:    [[TMP3:%.*]] = or <2 x i32> [[__POW_SIGN]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <2 x float>
-; CHECK-NEXT:    ret <2 x float> [[TMP4]]
+; CHECK-NEXT:    [[__YEVEN:%.*]] = shl <2 x i32> [[TMP1]], <i32 31, i32 31>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>
+; CHECK-NEXT:    [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32>
+; CHECK-NEXT:    [[TMP4:%.*]] = or <2 x i32> [[__POW_SIGN]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <2 x float>
+; CHECK-NEXT:    ret <2 x float> [[TMP5]]
 ;
   %y.cast = sitofp <2 x i32> %y to <2 x float>
   %pow = tail call afn nnan ninf <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> %y.cast)
@@ -2401,7 +2412,8 @@ define <2 x float> @test_pow_v2f32_known_integral_uitofp(<2 x float> %x, <2 x i3
 ; CHECK-LABEL: define <2 x float> @test_pow_v2f32_known_integral_uitofp
 ; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x i32> [[Y:%.*]]) {
 ; CHECK-NEXT:    [[Y_CAST:%.*]] = uitofp <2 x i32> [[Y]] to <2 x float>
-; CHECK-NEXT:    [[POW:%.*]] = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> [[Y_CAST]])
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi <2 x float> [[Y_CAST]] to <2 x i32>
+; CHECK-NEXT:    [[POW:%.*]] = tail call <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> [[TMP1]])
 ; CHECK-NEXT:    ret <2 x float> [[POW]]
 ;
   %y.cast = uitofp <2 x i32> %y to <2 x float>
@@ -2413,7 +2425,8 @@ define <2 x float> @test_pow_afn_v2f32_known_integral_uitofp(<2 x float> %x, <2
 ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_known_integral_uitofp
 ; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x i32> [[Y:%.*]]) {
 ; CHECK-NEXT:    [[Y_CAST:%.*]] = uitofp <2 x i32> [[Y]] to <2 x float>
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> [[Y_CAST]])
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi <2 x float> [[Y_CAST]] to <2 x i32>
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> [[TMP1]])
 ; CHECK-NEXT:    ret <2 x float> [[POW]]
 ;
   %y.cast = uitofp <2 x i32> %y to <2 x float>
@@ -2425,18 +2438,19 @@ define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_uitofp(<2 x floa
 ; CHECK-LABEL: define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_uitofp
 ; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x i32> [[Y:%.*]]) {
 ; CHECK-NEXT:    [[Y_CAST:%.*]] = uitofp <2 x i32> [[Y]] to <2 x float>
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi <2 x float> [[Y_CAST]] to <2 x i32>
 ; CHECK-NEXT:    [[__FABS:%.*]] = call nnan ninf afn <2 x float> @llvm.fabs.v2f32(<2 x float> [[X]])
 ; CHECK-NEXT:    [[__LOG2:%.*]] = call nnan ninf afn <2 x float> @llvm.log2.v2f32(<2 x float> [[__FABS]])
-; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[Y_CAST]]
+; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float>
+; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[POWNI2F]]
 ; CHECK-NEXT:    [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @llvm.exp2.v2f32(<2 x float> [[__YLOGX]])
-; CHECK-NEXT:    [[__YTOU:%.*]] = fptosi <2 x float> [[Y_CAST]] to <2 x i32>
-; CHECK-NEXT:    [[__YEVEN:%.*]] = shl <2 x i32> [[__YTOU]], <i32 31, i32 31>
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>
-; CHECK-NEXT:    [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP1]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32>
-; CHECK-NEXT:    [[TMP3:%.*]] = or <2 x i32> [[__POW_SIGN]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <2 x float>
-; CHECK-NEXT:    ret <2 x float> [[TMP4]]
+; CHECK-NEXT:    [[__YEVEN:%.*]] = shl <2 x i32> [[TMP1]], <i32 31, i32 31>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>
+; CHECK-NEXT:    [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32>
+; CHECK-NEXT:    [[TMP4:%.*]] = or <2 x i32> [[__POW_SIGN]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <2 x float>
+; CHECK-NEXT:    ret <2 x float> [[TMP5]]
 ;
   %y.cast = uitofp <2 x i32> %y to <2 x float>
   %pow = tail call afn nnan ninf <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> %y.cast)