[clang] [llvm] [IR] Allow fast math flags on fptosi, fptoui and sitofp (PR #160475)

Wed Sep 24 07:17:16 PDT 2025

https://github.com/paperchalice updated https://github.com/llvm/llvm-project/pull/160475

>From 8bf6ae5d44b8adb37886b076f84a9ecd571ab3f4 Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Wed, 24 Sep 2025 17:26:32 +0800
Subject: [PATCH] [IR] Allow fast math flags on fptosi, fptoui and sitofp

---
 clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu   |   2 +-
 clang/test/CodeGenCUDA/managed-var.cu         |   2 +-
 .../BasicFeatures/AggregateSplatCast.hlsl     |   6 +-
 .../BasicFeatures/ArrayElementwiseCast.hlsl   |   4 +-
 .../CodeGenHLSL/BasicFeatures/InitLists.hlsl  |  16 +-
 .../BasicFeatures/OutputArguments.hlsl        |  14 +-
 .../BasicFeatures/StructElementwiseCast.hlsl  |   8 +-
 .../BasicFeatures/VectorElementwiseCast.hlsl  |   2 +-
 .../standard_conversion_sequences.hlsl        |   6 +-
 clang/test/CodeGenHLSL/builtins/dot2add.hlsl  |  12 +-
 .../CodeGenHLSL/builtins/lerp-overloads.hlsl  |  48 ++--
 .../CodeGenHLSL/builtins/pow-overloads.hlsl   |  32 +--
 clang/test/Headers/__clang_hip_math.hip       |  80 +++---
 .../test/Headers/openmp_device_math_isnan.cpp |   4 +-
 .../SemaHLSL/VectorOverloadResolution.hlsl    |   6 +-
 llvm/docs/LangRef.rst                         |   9 +-
 llvm/include/llvm/IR/Operator.h               |   3 +
 llvm/lib/AsmParser/LLParser.cpp               |   6 +-
 llvm/test/Assembler/fast-math-flags.ll        |  66 +++++
 ...amdgpu-codegenprepare-fold-binop-select.ll |   4 +-
 .../AMDGPU/amdgpu-codegenprepare-idiv.ll      | 264 +++++++++---------
 .../AMDGPU/amdgpu-simplify-libcall-pow.ll     |  40 +--
 .../AMDGPU/amdgpu-simplify-libcall-pown.ll    |  18 +-
 llvm/test/CodeGen/AMDGPU/divrem24-assume.ll   |   4 +-
 llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll |   4 +-
 llvm/test/Transforms/InstCombine/log-pow.ll   |   6 +-
 .../PowerPC/vplan-scalarivsext-crash.ll       |   4 +-
 .../LoopVectorize/X86/float-induction-x86.ll  |  76 ++---
 .../LoopVectorize/float-induction.ll          |  22 +-
 .../LoopVectorize/iv_outside_user.ll          |  18 +-
 .../single_early_exit_live_outs.ll            |   2 +-
 31 files changed, 432 insertions(+), 356 deletions(-)

diff --git a/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu b/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
index 22c40e6d38ea2..b8819b8e550ea 100644
--- a/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
@@ -164,7 +164,7 @@ __device__ double ffp4(double *p, float f) {
 
 __device__ double ffp5(double *p, int i) {
   // FUN-LABEL: @_Z4ffp5Pdi
-  // CHECK: sitofp i32 {{.*}} to double
+  // CHECK: sitofp contract i32 {{.*}} to double
   // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, [[DEFMD]]
   // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, [[DEFMD]]
   __atomic_fetch_sub(p, i, memory_order_relaxed);
diff --git a/clang/test/CodeGenCUDA/managed-var.cu b/clang/test/CodeGenCUDA/managed-var.cu
index 0e7a7be85ac8e..368adece297b7 100644
--- a/clang/test/CodeGenCUDA/managed-var.cu
+++ b/clang/test/CodeGenCUDA/managed-var.cu
@@ -145,7 +145,7 @@ float load3() {
 // HOST:  %4 = ptrtoint ptr %3 to i64
 // HOST:  %5 = sub i64 %4, %1
 // HOST:  %sub.ptr.div = sdiv exact i64 %5, 4
-// HOST:  %conv = sitofp i64 %sub.ptr.div to float
+// HOST:  %conv = sitofp contract i64 %sub.ptr.div to float
 // HOST:  ret float %conv
 float addr_taken2() {
   return (float)reinterpret_cast<long>(&(v2[1].y)-&(v[1].x));
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl
index 512fcd435191a..64fda1301d8cb 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl
@@ -38,7 +38,7 @@ export void call8() {
 // CHECK-NEXT: store <1 x float> splat (float 1.000000e+00), ptr [[B]], align 4
 // CHECK-NEXT: [[L:%.*]] = load <1 x float>, ptr [[B]], align 4
 // CHECK-NEXT: [[VL:%.*]] = extractelement <1 x float> [[L]], i32 0
-// CHECK-NEXT: [[C:%.*]] = fptosi float [[VL]] to i32
+// CHECK-NEXT: [[C:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[VL]] to i32
 // CHECK-NEXT: [[SI:%.*]] = insertelement <4 x i32> poison, i32 [[C]], i64 0
 // CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[SI]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT: store <4 x i32> [[S]], ptr [[A]], align 16
@@ -62,7 +62,7 @@ struct S {
 // CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 0
 // CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 1
 // CHECK-NEXT: store i32 [[VL]], ptr [[G1]], align 4
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[VL]] to float
+// CHECK-NEXT: [[C:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[VL]] to float
 // CHECK-NEXT: store float [[C]], ptr [[G2]], align 4
 export void call3() {
   int1 A = {1};
@@ -79,7 +79,7 @@ export void call3() {
 // CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 0
 // CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 1
 // CHECK-NEXT: store i32 [[VL]], ptr [[G1]], align 4
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[VL]] to float
+// CHECK-NEXT: [[C:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[VL]] to float
 // CHECK-NEXT: store float [[C]], ptr [[G2]], align 4
 export void call5() {
   int1 A = {1};
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl
index ac02ddf5765ed..e743a978e7122 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl
@@ -63,7 +63,7 @@ export void call2() {
 // CHECK-NEXT: [[C:%.*]] = load <1 x float>, ptr [[A]], align 4
 // CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [1 x i32], ptr [[B]], i32 0, i32 0
 // CHECK-NEXT: [[V:%.*]] = extractelement <1 x float> [[C]], i64 0
-// CHECK-NEXT: [[C:%.*]] = fptosi float [[V]] to i32
+// CHECK-NEXT: [[C:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[V]] to i32
 // CHECK-NEXT: store i32 [[C]], ptr [[G1]], align 4
 export void call3() {
   float1 A = {1.2};
@@ -84,7 +84,7 @@ export void call3() {
 // CHECK-NEXT: [[VG:%.*]] = getelementptr inbounds [1 x <2 x float>], ptr [[Tmp]], i32 0, i32 0
 // CHECK-NEXT: [[L:%.*]] = load <2 x float>, ptr [[VG]], align 8
 // CHECK-NEXT: [[VL:%.*]] = extractelement <2 x float> [[L]], i32 0
-// CHECK-NEXT: [[C:%.*]] = fptosi float [[VL]] to i32
+// CHECK-NEXT: [[C:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[VL]] to i32
 // CHECK-NEXT: store i32 [[C]], ptr [[G1]], align 4
 // CHECK-NEXT: [[L4:%.*]] = load <2 x float>, ptr [[VG]], align 8
 // CHECK-NEXT: [[VL5:%.*]] = extractelement <2 x float> [[L4]], i32 1
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl
index c30c640519cda..8376131b62149 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl
@@ -98,12 +98,12 @@ TwoFloats case3(int Val) {
 // CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[TWOVALS_ADDR]], align 8
 // CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <2 x i32> [[TMP0]], i64 0
-// CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[VECEXT]] to float
+// CHECK-NEXT:    [[CONV:%.*]] = sitofp reassoc nnan ninf nsz arcp afni32 [[VECEXT]] to float
 // CHECK-NEXT:    store float [[CONV]], ptr [[X]], align 1
 // CHECK-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 1
 // CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[TWOVALS_ADDR]], align 8
 // CHECK-NEXT:    [[VECEXT1:%.*]] = extractelement <2 x i32> [[TMP1]], i64 1
-// CHECK-NEXT:    [[CONV2:%.*]] = sitofp i32 [[VECEXT1]] to float
+// CHECK-NEXT:    [[CONV2:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[VECEXT1]] to float
 // CHECK-NEXT:    store float [[CONV2]], ptr [[Y]], align 1
 // CHECK-NEXT:    ret void
 //
@@ -181,7 +181,7 @@ TwoInts case6(TwoFloats TF4) {
 // CHECK-NEXT:    store i32 [[TMP4]], ptr [[TAILSTATE]], align 1
 // CHECK-NEXT:    [[HAIRCOUNT:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[AGG_RESULT]], i32 0, i32 2
 // CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[VAL_ADDR]], align 4
-// CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP5]] to float
+// CHECK-NEXT:    [[CONV:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[TMP5]] to float
 // CHECK-NEXT:    store float [[CONV]], ptr [[HAIRCOUNT]], align 1
 // CHECK-NEXT:    [[EARDIRECTION:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[AGG_RESULT]], i32 0, i32 3
 // CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[TF1]], i32 0, i32 0
@@ -407,7 +407,7 @@ AnimalBits case8(Doggo D1) {
 // CHECK-NEXT:    [[HAIRCOUNT58:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[ARRAYINIT_ELEMENT44]], i32 0, i32 2
 // CHECK-NEXT:    [[COUNTER:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[A1]], i32 0, i32 2
 // CHECK-NEXT:    [[TMP19:%.*]] = load i64, ptr [[COUNTER]], align 1
-// CHECK-NEXT:    [[CONV:%.*]] = sitofp i64 [[TMP19]] to float
+// CHECK-NEXT:    [[CONV:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i64 [[TMP19]] to float
 // CHECK-NEXT:    store float [[CONV]], ptr [[HAIRCOUNT58]], align 1
 // CHECK-NEXT:    [[EARDIRECTION59:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[ARRAYINIT_ELEMENT44]], i32 0, i32 3
 // CHECK-NEXT:    [[LEFTDIR:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[A1]], i32 0, i32 3
@@ -542,7 +542,7 @@ AnimalBits case8(Doggo D1) {
 // CHECK-NEXT:    [[HAIRCOUNT149:%.*]] = getelementptr inbounds nuw [[STRUCT_KITTEH]], ptr [[ARRAYINIT_ELEMENT133]], i32 0, i32 2
 // CHECK-NEXT:    [[COUNTER150:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[A1]], i32 0, i32 2
 // CHECK-NEXT:    [[TMP47:%.*]] = load i64, ptr [[COUNTER150]], align 1
-// CHECK-NEXT:    [[CONV151:%.*]] = sitofp i64 [[TMP47]] to float
+// CHECK-NEXT:    [[CONV151:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i64 [[TMP47]] to float
 // CHECK-NEXT:    store float [[CONV151]], ptr [[HAIRCOUNT149]], align 1
 // CHECK-NEXT:    [[CLAWS152:%.*]] = getelementptr inbounds nuw [[STRUCT_KITTEH]], ptr [[ARRAYINIT_ELEMENT133]], i32 0, i32 3
 // CHECK-NEXT:    [[LEFTDIR153:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[A1]], i32 0, i32 3
@@ -677,7 +677,7 @@ AnimalBits case8(Doggo D1) {
 // CHECK-NEXT:    [[HAIRCOUNT246:%.*]] = getelementptr inbounds nuw [[STRUCT_KITTEH]], ptr [[ARRAYINIT_ELEMENT230]], i32 0, i32 2
 // CHECK-NEXT:    [[COUNTER247:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[A1]], i32 0, i32 2
 // CHECK-NEXT:    [[TMP75:%.*]] = load i64, ptr [[COUNTER247]], align 1
-// CHECK-NEXT:    [[CONV248:%.*]] = sitofp i64 [[TMP75]] to float
+// CHECK-NEXT:    [[CONV248:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i64 [[TMP75]] to float
 // CHECK-NEXT:    store float [[CONV248]], ptr [[HAIRCOUNT246]], align 1
 // CHECK-NEXT:    [[CLAWS249:%.*]] = getelementptr inbounds nuw [[STRUCT_KITTEH]], ptr [[ARRAYINIT_ELEMENT230]], i32 0, i32 3
 // CHECK-NEXT:    [[LEFTDIR250:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[A1]], i32 0, i32 3
@@ -867,13 +867,13 @@ TwoInts case14(SlicyBits SB) {
 // CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 0
 // CHECK-NEXT:    [[BF_LOAD:%.*]] = load i8, ptr [[SB]], align 1
 // CHECK-NEXT:    [[BF_CAST:%.*]] = sext i8 [[BF_LOAD]] to i32
-// CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[BF_CAST]] to float
+// CHECK-NEXT:    [[CONV:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[BF_CAST]] to float
 // CHECK-NEXT:    store float [[CONV]], ptr [[X]], align 1
 // CHECK-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 1
 // CHECK-NEXT:    [[W:%.*]] = getelementptr inbounds nuw [[STRUCT_SLICYBITS]], ptr [[SB]], i32 0, i32 1
 // CHECK-NEXT:    [[BF_LOAD1:%.*]] = load i8, ptr [[W]], align 1
 // CHECK-NEXT:    [[BF_CAST2:%.*]] = sext i8 [[BF_LOAD1]] to i32
-// CHECK-NEXT:    [[CONV3:%.*]] = sitofp i32 [[BF_CAST2]] to float
+// CHECK-NEXT:    [[CONV3:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[BF_CAST2]] to float
 // CHECK-NEXT:    store float [[CONV3]], ptr [[Y]], align 1
 // CHECK-NEXT:    ret void
 //
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
index d0ba8f447b732..cf73f86474bd5 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
@@ -13,14 +13,14 @@ void trunc_Param(inout int X) {}
 // CHECK: [[F:%.*]] = alloca float
 // CHECK: [[ArgTmp:%.*]] = alloca i32
 // CHECK: [[FVal:%.*]] = load float, ptr {{.*}}
-// CHECK: [[IVal:%.*]] = fptosi float [[FVal]] to i32
+// CHECK: [[IVal:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[FVal]] to i32
 // CHECK: store i32 [[IVal]], ptr [[ArgTmp]]
 // CHECK: call void {{.*}}trunc_Param{{.*}}(ptr noalias noundef nonnull align 4 dereferenceable(4) [[ArgTmp]])
 // CHECK: [[IRet:%.*]] = load i32, ptr [[ArgTmp]]
-// CHECK: [[FRet:%.*]] = sitofp i32 [[IRet]] to float
+// CHECK: [[FRet:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[IRet]] to float
 // CHECK: store float [[FRet]], ptr [[F]]
-// OPT: [[IVal:%.*]] = fptosi float {{.*}} to i32
-// OPT: [[FVal:%.*]] = sitofp i32 [[IVal]] to float
+// OPT: [[IVal:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float {{.*}} to i32
+// OPT: [[FVal:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[IVal]] to float
 // OPT: ret float [[FVal]]
 export float case1(float F) {
   trunc_Param(F);
@@ -202,14 +202,14 @@ void trunc_vec(inout int3 V) {}
 // CHECK: [[V:%.*]] = alloca <3 x float>
 // CHECK: [[Tmp:%.*]] = alloca <3 x i32>
 // CHECK: [[FVal:%.*]] = load <3 x float>, ptr [[V]]
-// CHECK: [[IVal:%.*]] = fptosi <3 x float> [[FVal]] to <3 x i32>
+// CHECK: [[IVal:%.*]] = fptosi reassoc nnan ninf nsz arcp afn <3 x float> [[FVal]] to <3 x i32>
 // CHECK: store <3 x i32> [[IVal]], ptr [[Tmp]]
 // CHECK: call void {{.*}}trunc_vec{{.*}}(ptr noalias noundef nonnull align 16 dereferenceable(16) [[Tmp]])
 // CHECK: [[IRet:%.*]] = load <3 x i32>, ptr [[Tmp]]
-// CHECK: [[FRet:%.*]] = sitofp <3 x i32> [[IRet]] to <3 x float>
+// CHECK: [[FRet:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <3 x i32> [[IRet]] to <3 x float>
 // CHECK: store <3 x float> [[FRet]], ptr [[V]]
 
-// OPT: [[IVal:%.*]] = fptosi <3 x float> {{.*}} to <3 x i32>
+// OPT: [[IVal:%.*]] = fptosi reassoc nnan ninf nsz arcp afn <3 x float> {{.*}} to <3 x i32>
 // OPT: [[FVal:%.*]] = sitofp <3 x i32> [[IVal]] to <3 x float>
 // OPT: ret <3 x float> [[FVal]]
 
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl
index 81b9f5b28cc7e..e4229951e58b5 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl
@@ -32,7 +32,7 @@ export void call0() {
 // CHECK-NEXT: [[VL:%.*]] = extractelement <2 x i32> [[L]], i64 0
 // CHECK-NEXT: store i32 [[VL]], ptr [[G1]], align 4
 // CHECK-NEXT: [[VL2:%.*]] = extractelement <2 x i32> [[L]], i64 1
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[VL2]] to float
+// CHECK-NEXT: [[C:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[VL2]] to float
 // CHECK-NEXT: store float [[C]], ptr [[G2]], align 4
 export void call1() {
   int2 A = {1,2};
@@ -54,7 +54,7 @@ export void call1() {
 // CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G3]], align 4
 // CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4
 // CHECK-NEXT: [[L4:%.*]] = load i32, ptr [[G4]], align 4
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[L4]] to float
+// CHECK-NEXT: [[C:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[L4]] to float
 // CHECK-NEXT: store float [[C]], ptr [[G2]], align 4
 export void call2() {
   int A[2] = {1,2};
@@ -104,7 +104,7 @@ export void call6() {
 // CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G3]], align 4
 // CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4
 // CHECK-NEXT: [[L4:%.*]] = load i32, ptr [[G4]], align 4
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[L4]] to float
+// CHECK-NEXT: [[C:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[L4]] to float
 // CHECK-NEXT: store float [[C]], ptr [[G2]], align 4
 export void call7() {
   int A[2] = {1,2};
@@ -132,7 +132,7 @@ struct T {
 // CHECK-NEXT: %load = load i32, ptr %gep2, align 4
 // CHECK-NEXT: store i32 %load, ptr %gep, align 4
 // CHECK-NEXT: %load5 = load i32, ptr %gep3, align 4
-// CHECK-NEXT: %conv = sitofp i32 %load5 to float
+// CHECK-NEXT: %conv = sitofp reassoc nnan ninf nsz arcp afn i32 %load5 to float
 // CHECK-NEXT: store float %conv, ptr %gep1, align 4
 export void call8() {
   T t = {1,2,3};
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
index 253b38a7ca072..a5843b0a95642 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
@@ -40,7 +40,7 @@ struct S {
 // CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G1]], align 4
 // CHECK-NEXT: [[C:%.*]] = insertelement <2 x i32> [[B]], i32 [[L]], i64 0
 // CHECK-NEXT: [[L2:%.*]] = load float, ptr [[G2]], align 4
-// CHECK-NEXT: [[D:%.*]] = fptosi float [[L2]] to i32
+// CHECK-NEXT: [[D:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[L2]] to i32
 // CHECK-NEXT: [[E:%.*]] = insertelement <2 x i32> [[C]], i32 [[D]], i64 1
 // CHECK-NEXT: store <2 x i32> [[E]], ptr [[A]], align 8
 export void call3() {
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl
index 6770efefe94fe..3e8c06ed074af 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl
@@ -43,7 +43,7 @@ void d4_to_f2() {
 // CHECK: [[i2:%.*]] = alloca <2 x i32>
 // CHECK: store <2 x float> splat (float 4.000000e+00), ptr [[f2]]
 // CHECK: [[vecf2:%.*]] = load <2 x float>, ptr [[f2]]
-// CHECK: [[veci2:%.*]] = fptosi <2 x float> [[vecf2]] to <2 x i32>
+// CHECK: [[veci2:%.*]] = fptosi reassoc nnan ninf nsz arcp afn <2 x float> [[vecf2]] to <2 x i32>
 // CHECK: store <2 x i32> [[veci2]], ptr [[i2]]
 void f2_to_i2() {
   vector<float,2> f2 = 4.0;
@@ -55,7 +55,7 @@ void f2_to_i2() {
 // CHECK: [[i2:%.*]] = alloca <2 x i32>
 // CHECK: store <4 x double> splat (double 5.000000e+00), ptr [[d4]]
 // CHECK: [[vecd4:%.*]] = load <4 x double>, ptr [[d4]]
-// CHECK: [[veci4:%.*]] = fptosi <4 x double> [[vecd4]] to <4 x i32>
+// CHECK: [[veci4:%.*]] = fptosi reassoc nnan ninf nsz arcp afn <4 x double> [[vecd4]] to <4 x i32>
 // CHECK: [[veci2:%.*]] = shufflevector <4 x i32> [[veci4]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
 // CHECK: store <2 x i32> [[veci2]], ptr [[i2]]
 void d4_to_i2() {
@@ -68,7 +68,7 @@ void d4_to_i2() {
 // CHECK: [[l4:%.*]] = alloca <4 x i64>
 // CHECK: store <4 x double> splat (double 6.000000e+00), ptr [[d4]]
 // CHECK: [[vecd4:%.*]] = load <4 x double>, ptr [[d4]]
-// CHECK: [[vecl4:%.*]] = fptosi <4 x double> [[vecd4]] to <4 x i64>
+// CHECK: [[vecl4:%.*]] = fptosi reassoc nnan ninf nsz arcp afn <4 x double> [[vecd4]] to <4 x i64>
 // CHECK: store <4 x i64> [[vecl4]], ptr [[l4]]
 void d4_to_l4() {
   vector<double,4> d4 = 6.0;
diff --git a/clang/test/CodeGenHLSL/builtins/dot2add.hlsl b/clang/test/CodeGenHLSL/builtins/dot2add.hlsl
index e80ffba2bcfdb..29031c2bddec3 100644
--- a/clang/test/CodeGenHLSL/builtins/dot2add.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/dot2add.hlsl
@@ -106,8 +106,8 @@ float test_double_arg1_arg2_type(double2 p1, double2 p2, float p3) {
 
 // CHECK-LABEL: define {{.*}}test_int16_arg1_arg2_type
 float test_int16_arg1_arg2_type(int16_t2 p1, int16_t2 p2, float p3) {
-  // CHECK:  %conv = sitofp <2 x i16> %{{.*}} to <2 x half>
-  // CHECK:  %conv1 = sitofp <2 x i16> %{{.*}} to <2 x half>
+  // CHECK:  %conv = sitofp reassoc nnan ninf nsz arcp afn <2 x i16> %{{.*}} to <2 x half>
+  // CHECK:  %conv1 = sitofp reassoc nnan ninf nsz arcp afn <2 x i16> %{{.*}} to <2 x half>
   // CHECK-SPIRV:  %[[MUL:.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.spv.fdot.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}})
   // CHECK-SPIRV:  %[[CONV:.*]] = fpext reassoc nnan ninf nsz arcp afn half %[[MUL]] to float
   // CHECK-SPIRV:  %[[C:.*]] = load float, ptr %c.addr.i, align 4
@@ -123,8 +123,8 @@ float test_int16_arg1_arg2_type(int16_t2 p1, int16_t2 p2, float p3) {
 
 // CHECK-LABEL: define {{.*}}test_int32_arg1_arg2_type
 float test_int32_arg1_arg2_type(int32_t2 p1, int32_t2 p2, float p3) {
-  // CHECK:  %conv = sitofp <2 x i32> %{{.*}} to <2 x half>
-  // CHECK:  %conv1 = sitofp <2 x i32> %{{.*}} to <2 x half>
+  // CHECK:  %conv = sitofp reassoc nnan ninf nsz arcp afn <2 x i32> %{{.*}} to <2 x half>
+  // CHECK:  %conv1 = sitofp reassoc nnan ninf nsz arcp afn <2 x i32> %{{.*}} to <2 x half>
   // CHECK-SPIRV:  %[[MUL:.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.spv.fdot.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}})
   // CHECK-SPIRV:  %[[CONV:.*]] = fpext reassoc nnan ninf nsz arcp afn half %[[MUL]] to float
   // CHECK-SPIRV:  %[[C:.*]] = load float, ptr %c.addr.i, align 4
@@ -140,8 +140,8 @@ float test_int32_arg1_arg2_type(int32_t2 p1, int32_t2 p2, float p3) {
 
 // CHECK-LABEL: define {{.*}}test_int64_arg1_arg2_type
 float test_int64_arg1_arg2_type(int64_t2 p1, int64_t2 p2, float p3) {
-  // CHECK:  %conv = sitofp <2 x i64> %{{.*}} to <2 x half>
-  // CHECK:  %conv1 = sitofp <2 x i64> %{{.*}} to <2 x half>
+  // CHECK:  %conv = sitofp  reassoc nnan ninf nsz arcp afn<2 x i64> %{{.*}} to <2 x half>
+  // CHECK:  %conv1 = sitofp reassoc nnan ninf nsz arcp afn <2 x i64> %{{.*}} to <2 x half>
   // CHECK-SPIRV:  %[[MUL:.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.spv.fdot.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}})
   // CHECK-SPIRV:  %[[CONV:.*]] = fpext reassoc nnan ninf nsz arcp afn half %[[MUL]] to float
   // CHECK-SPIRV:  %[[C:.*]] = load float, ptr %c.addr.i, align 4
diff --git a/clang/test/CodeGenHLSL/builtins/lerp-overloads.hlsl b/clang/test/CodeGenHLSL/builtins/lerp-overloads.hlsl
index 3b13e43873c77..589f18e67deb8 100644
--- a/clang/test/CodeGenHLSL/builtins/lerp-overloads.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/lerp-overloads.hlsl
@@ -36,33 +36,33 @@ float3 test_lerp_double3(double3 p0) { return lerp(p0, p0, p0); }
 float4 test_lerp_double4(double4 p0) { return lerp(p0, p0, p0); }
 
 // CHECK: define [[FNATTRS]] float @_Z13test_lerp_inti(
-// CHECK:    [[CONV0:%.*]] = sitofp i32 %{{.*}} to float
-// CHECK:    [[CONV1:%.*]] = sitofp i32 %{{.*}} to float
-// CHECK:    [[CONV2:%.*]] = sitofp i32 %{{.*}} to float
+// CHECK:    [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 %{{.*}} to float
+// CHECK:    [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 %{{.*}} to float
+// CHECK:    [[CONV2:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 %{{.*}} to float
 // CHECK:    [[LERP:%.*]] = call {{.*}} float @llvm.[[TARGET]].lerp.f32(float [[CONV0]], float [[CONV1]], float [[CONV2]])
 // CHECK:    ret float [[LERP]]
 float test_lerp_int(int p0) { return lerp(p0, p0, p0); }
 
 // CHECK: define [[FNATTRS]] <2 x float> @_Z14test_lerp_int2Dv2_i(
-// CHECK:    [[CONV0:%.*]] = sitofp <2 x i32> %{{.*}} to <2 x float>
-// CHECK:    [[CONV1:%.*]] = sitofp <2 x i32> %{{.*}} to <2 x float>
-// CHECK:    [[CONV2:%.*]] = sitofp <2 x i32> %{{.*}} to <2 x float>
+// CHECK:    [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <2 x i32> %{{.*}} to <2 x float>
+// CHECK:    [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <2 x i32> %{{.*}} to <2 x float>
+// CHECK:    [[CONV2:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <2 x i32> %{{.*}} to <2 x float>
 // CHECK:    [[LERP:%.*]] = call {{.*}} <2 x float> @llvm.[[TARGET]].lerp.v2f32(<2 x float> [[CONV0]], <2 x float> [[CONV1]], <2 x float> [[CONV2]])
 // CHECK:    ret <2 x float> [[LERP]]
 float2 test_lerp_int2(int2 p0) { return lerp(p0, p0, p0); }
 
 // CHECK: define [[FNATTRS]] <3 x float> @_Z14test_lerp_int3Dv3_i(
-// CHECK:    [[CONV0:%.*]] = sitofp <3 x i32> %{{.*}} to <3 x float>
-// CHECK:    [[CONV1:%.*]] = sitofp <3 x i32> %{{.*}} to <3 x float>
-// CHECK:    [[CONV2:%.*]] = sitofp <3 x i32> %{{.*}} to <3 x float>
+// CHECK:    [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <3 x i32> %{{.*}} to <3 x float>
+// CHECK:    [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <3 x i32> %{{.*}} to <3 x float>
+// CHECK:    [[CONV2:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <3 x i32> %{{.*}} to <3 x float>
 // CHECK:    [[LERP:%.*]] = call {{.*}} <3 x float> @llvm.[[TARGET]].lerp.v3f32(<3 x float> [[CONV0]], <3 x float> [[CONV1]], <3 x float> [[CONV2]])
 // CHECK:    ret <3 x float> [[LERP]]
 float3 test_lerp_int3(int3 p0) { return lerp(p0, p0, p0); }
 
 // CHECK: define [[FNATTRS]] <4 x float> @_Z14test_lerp_int4Dv4_i(
-// CHECK:    [[CONV0:%.*]] = sitofp <4 x i32> %{{.*}} to <4 x float>
-// CHECK:    [[CONV1:%.*]] = sitofp <4 x i32> %{{.*}} to <4 x float>
-// CHECK:    [[CONV2:%.*]] = sitofp <4 x i32> %{{.*}} to <4 x float>
+// CHECK:    [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <4 x i32> %{{.*}} to <4 x float>
+// CHECK:    [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <4 x i32> %{{.*}} to <4 x float>
+// CHECK:    [[CONV2:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <4 x i32> %{{.*}} to <4 x float>
 // CHECK:    [[LERP:%.*]] = call {{.*}} <4 x float> @llvm.[[TARGET]].lerp.v4f32(<4 x float> [[CONV0]], <4 x float> [[CONV1]], <4 x float> [[CONV2]])
 // CHECK:    ret <4 x float> [[LERP]]
 float4 test_lerp_int4(int4 p0) { return lerp(p0, p0, p0); }
@@ -100,33 +100,33 @@ float3 test_lerp_uint3(uint3 p0) { return lerp(p0, p0, p0); }
 float4 test_lerp_uint4(uint4 p0) { return lerp(p0, p0, p0); }
 
 // CHECK: define [[FNATTRS]] float @_Z17test_lerp_int64_tl(
-// CHECK:    [[CONV0:%.*]] = sitofp i64 %{{.*}} to float
-// CHECK:    [[CONV1:%.*]] = sitofp i64 %{{.*}} to float
-// CHECK:    [[CONV2:%.*]] = sitofp i64 %{{.*}} to float
+// CHECK:    [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i64 %{{.*}} to float
+// CHECK:    [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i64 %{{.*}} to float
+// CHECK:    [[CONV2:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i64 %{{.*}} to float
 // CHECK:    [[LERP:%.*]] = call {{.*}} float @llvm.[[TARGET]].lerp.f32(float [[CONV0]], float [[CONV1]], float [[CONV2]])
 // CHECK:    ret float [[LERP]]
 float test_lerp_int64_t(int64_t p0) { return lerp(p0, p0, p0); }
 
 // CHECK: define [[FNATTRS]] <2 x float> @_Z18test_lerp_int64_t2Dv2_l(
-// CHECK:    [[CONV0:%.*]] = sitofp <2 x i64> %{{.*}} to <2 x float>
-// CHECK:    [[CONV1:%.*]] = sitofp <2 x i64> %{{.*}} to <2 x float>
-// CHECK:    [[CONV2:%.*]] = sitofp <2 x i64> %{{.*}} to <2 x float>
+// CHECK:    [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <2 x i64> %{{.*}} to <2 x float>
+// CHECK:    [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <2 x i64> %{{.*}} to <2 x float>
+// CHECK:    [[CONV2:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <2 x i64> %{{.*}} to <2 x float>
 // CHECK:    [[LERP:%.*]] = call {{.*}} <2 x float> @llvm.[[TARGET]].lerp.v2f32(<2 x float> [[CONV0]], <2 x float> [[CONV1]], <2 x float> [[CONV2]])
 // CHECK:    ret <2 x float> [[LERP]]
 float2 test_lerp_int64_t2(int64_t2 p0) { return lerp(p0, p0, p0); }
 
 // CHECK: define [[FNATTRS]] <3 x float> @_Z18test_lerp_int64_t3Dv3_l(
-// CHECK:    [[CONV0:%.*]] = sitofp <3 x i64> %{{.*}} to <3 x float>
-// CHECK:    [[CONV1:%.*]] = sitofp <3 x i64> %{{.*}} to <3 x float>
-// CHECK:    [[CONV2:%.*]] = sitofp <3 x i64> %{{.*}} to <3 x float>
+// CHECK:    [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <3 x i64> %{{.*}} to <3 x float>
+// CHECK:    [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <3 x i64> %{{.*}} to <3 x float>
+// CHECK:    [[CONV2:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <3 x i64> %{{.*}} to <3 x float>
 // CHECK:    [[LERP:%.*]] = call {{.*}} <3 x float> @llvm.[[TARGET]].lerp.v3f32(<3 x float> [[CONV0]], <3 x float> [[CONV1]], <3 x float> [[CONV2]])
 // CHECK:    ret <3 x float> [[LERP]]
 float3 test_lerp_int64_t3(int64_t3 p0) { return lerp(p0, p0, p0); }
 
 // CHECK: define [[FNATTRS]] <4 x float> @_Z18test_lerp_int64_t4Dv4_l(
-// CHECK:    [[CONV0:%.*]] = sitofp <4 x i64> %{{.*}} to <4 x float>
-// CHECK:    [[CONV1:%.*]] = sitofp <4 x i64> %{{.*}} to <4 x float>
-// CHECK:    [[CONV2:%.*]] = sitofp <4 x i64> %{{.*}} to <4 x float>
+// CHECK:    [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <4 x i64> %{{.*}} to <4 x float>
+// CHECK:    [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <4 x i64> %{{.*}} to <4 x float>
+// CHECK:    [[CONV2:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <4 x i64> %{{.*}} to <4 x float>
 // CHECK:    [[LERP:%.*]] = call {{.*}} <4 x float> @llvm.[[TARGET]].lerp.v4f32(<4 x float> [[CONV0]], <4 x float> [[CONV1]], <4 x float> [[CONV2]])
 // CHECK:    ret <4 x float> [[LERP]]
 float4 test_lerp_int64_t4(int64_t4 p0) { return lerp(p0, p0, p0); }
diff --git a/clang/test/CodeGenHLSL/builtins/pow-overloads.hlsl b/clang/test/CodeGenHLSL/builtins/pow-overloads.hlsl
index 0d1f3d3546a33..bcf8997196d2e 100644
--- a/clang/test/CodeGenHLSL/builtins/pow-overloads.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/pow-overloads.hlsl
@@ -28,26 +28,26 @@ float3 test_pow_double3(double3 p0, double3 p1) { return pow(p0, p1); }
 float4 test_pow_double4(double4 p0, double4 p1) { return pow(p0, p1); }
 
 // CHECK-LABEL: define hidden noundef nofpclass(nan inf) float {{.*}}test_pow_int
-// CHECK: [[CONV0:%.*]] = sitofp i32 %{{.*}} to float
-// CHECK: [[CONV1:%.*]] = sitofp i32 %{{.*}} to float
+// CHECK: [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 %{{.*}} to float
+// CHECK: [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 %{{.*}} to float
 // CHECK: [[POW:%.*]] = call [[FLOATATTRS]] noundef float @llvm.pow.f32(float [[CONV0]], float [[CONV1]])
 // CHECK: ret float [[POW]]
 float test_pow_int(int p0, int p1) { return pow(p0, p1); }
 // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <2 x float> {{.*}}test_pow_int2
-// CHECK: [[CONV0:%.*]] = sitofp <2 x i32> %{{.*}} to <2 x float>
-// CHECK: [[CONV1:%.*]] = sitofp <2 x i32> %{{.*}} to <2 x float>
+// CHECK: [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <2 x i32> %{{.*}} to <2 x float>
+// CHECK: [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <2 x i32> %{{.*}} to <2 x float>
 // CHECK: [[POW:%.*]] = call [[FLOATATTRS]] noundef <2 x float> @llvm.pow.v2f32(<2 x float> [[CONV0]], <2 x float> [[CONV1]])
 // CHECK: ret <2 x float> [[POW]]
 float2 test_pow_int2(int2 p0, int2 p1) { return pow(p0, p1); }
 // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <3 x float> {{.*}}test_pow_int3
-// CHECK: [[CONV0:%.*]] = sitofp <3 x i32> %{{.*}} to <3 x float>
-// CHECK: [[CONV1:%.*]] = sitofp <3 x i32> %{{.*}} to <3 x float>
+// CHECK: [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <3 x i32> %{{.*}} to <3 x float>
+// CHECK: [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <3 x i32> %{{.*}} to <3 x float>
 // CHECK: [[POW:%.*]] = call [[FLOATATTRS]] noundef <3 x float> @llvm.pow.v3f32(<3 x float> [[CONV0]], <3 x float> [[CONV1]])
 // CHECK: ret <3 x float> [[POW]]
 float3 test_pow_int3(int3 p0, int3 p1) { return pow(p0, p1); }
 // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> {{.*}}test_pow_int4
-// CHECK: [[CONV0:%.*]] = sitofp <4 x i32> %{{.*}} to <4 x float>
-// CHECK: [[CONV1:%.*]] = sitofp <4 x i32> %{{.*}} to <4 x float>
+// CHECK: [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <4 x i32> %{{.*}} to <4 x float>
+// CHECK: [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <4 x i32> %{{.*}} to <4 x float>
 // CHECK: [[POW:%.*]] = call [[FLOATATTRS]] noundef <4 x float> @llvm.pow.v4f32(<4 x float> [[CONV0]], <4 x float> [[CONV1]])
 // CHECK: ret <4 x float> [[POW]]
 float4 test_pow_int4(int4 p0, int4 p1) { return pow(p0, p1); }
@@ -78,26 +78,26 @@ float3 test_pow_uint3(uint3 p0, uint3 p1) { return pow(p0, p1); }
 float4 test_pow_uint4(uint4 p0, uint4 p1) { return pow(p0, p1); }
 
 // CHECK-LABEL: define hidden noundef nofpclass(nan inf) float {{.*}}test_pow_int64_t
-// CHECK: [[CONV0:%.*]] = sitofp i64 %{{.*}} to float
-// CHECK: [[CONV1:%.*]] = sitofp i64 %{{.*}} to float
+// CHECK: [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i64 %{{.*}} to float
+// CHECK: [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i64 %{{.*}} to float
 // CHECK: [[POW:%.*]] = call [[FLOATATTRS]] noundef float @llvm.pow.f32(float [[CONV0]], float [[CONV1]])
 // CHECK: ret float [[POW]]
 float test_pow_int64_t(int64_t p0, int64_t p1) { return pow(p0, p1); }
 // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <2 x float> {{.*}}test_pow_int64_t2
-// CHECK: [[CONV0:%.*]] = sitofp <2 x i64> %{{.*}} to <2 x float>
-// CHECK: [[CONV1:%.*]] = sitofp <2 x i64> %{{.*}} to <2 x float>
+// CHECK: [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <2 x i64> %{{.*}} to <2 x float>
+// CHECK: [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <2 x i64> %{{.*}} to <2 x float>
 // CHECK: [[POW:%.*]] = call [[FLOATATTRS]] noundef <2 x float> @llvm.pow.v2f32(<2 x float> [[CONV0]], <2 x float> [[CONV1]])
 // CHECK: ret <2 x float> [[POW]]
 float2 test_pow_int64_t2(int64_t2 p0, int64_t2 p1) { return pow(p0, p1); }
 // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <3 x float> {{.*}}test_pow_int64_t3
-// CHECK: [[CONV0:%.*]] = sitofp <3 x i64> %{{.*}} to <3 x float>
-// CHECK: [[CONV1:%.*]] = sitofp <3 x i64> %{{.*}} to <3 x float>
+// CHECK: [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <3 x i64> %{{.*}} to <3 x float>
+// CHECK: [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <3 x i64> %{{.*}} to <3 x float>
 // CHECK: [[POW:%.*]] = call [[FLOATATTRS]] noundef <3 x float> @llvm.pow.v3f32(<3 x float> [[CONV0]], <3 x float> [[CONV1]])
 // CHECK: ret <3 x float> [[POW]]
 float3 test_pow_int64_t3(int64_t3 p0, int64_t3 p1) { return pow(p0, p1); }
 // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> {{.*}}test_pow_int64_t4
-// CHECK: [[CONV0:%.*]] = sitofp <4 x i64> %{{.*}} to <4 x float>
-// CHECK: [[CONV1:%.*]] = sitofp <4 x i64> %{{.*}} to <4 x float>
+// CHECK: [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <4 x i64> %{{.*}} to <4 x float>
+// CHECK: [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <4 x i64> %{{.*}} to <4 x float>
 // CHECK: [[POW:%.*]] = call [[FLOATATTRS]] noundef <4 x float> @llvm.pow.v4f32(<4 x float> [[CONV0]], <4 x float> [[CONV1]])
 // CHECK: ret <4 x float> [[POW]]
 float4 test_pow_int64_t4(int64_t4 p0, int64_t4 p1) { return pow(p0, p1); }
diff --git a/clang/test/Headers/__clang_hip_math.hip b/clang/test/Headers/__clang_hip_math.hip
index b88aa3cc18207..87967557dd671 100644
--- a/clang/test/Headers/__clang_hip_math.hip
+++ b/clang/test/Headers/__clang_hip_math.hip
@@ -3518,35 +3518,35 @@ extern "C" __device__ double test_lgamma(double x) {
 // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // DEFAULT-NEXT:  [[ENTRY:.*:]]
 // DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.rint.f32(float [[X]])
-// DEFAULT-NEXT:    [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
+// DEFAULT-NEXT:    [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64
 // DEFAULT-NEXT:    ret i64 [[CONV_I]]
 //
 // FINITEONLY-LABEL: define dso_local i64 @test_llrintf(
 // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // FINITEONLY-NEXT:  [[ENTRY:.*:]]
 // FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract float @llvm.rint.f32(float nofpclass(nan inf) [[X]])
-// FINITEONLY-NEXT:    [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
+// FINITEONLY-NEXT:    [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64
 // FINITEONLY-NEXT:    ret i64 [[CONV_I]]
 //
 // APPROX-LABEL: define dso_local i64 @test_llrintf(
 // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // APPROX-NEXT:  [[ENTRY:.*:]]
 // APPROX-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.rint.f32(float [[X]])
-// APPROX-NEXT:    [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
+// APPROX-NEXT:    [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64
 // APPROX-NEXT:    ret i64 [[CONV_I]]
 //
 // NCRDIV-LABEL: define dso_local i64 @test_llrintf(
 // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // NCRDIV-NEXT:  [[ENTRY:.*:]]
 // NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.rint.f32(float [[X]])
-// NCRDIV-NEXT:    [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
+// NCRDIV-NEXT:    [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64
 // NCRDIV-NEXT:    ret i64 [[CONV_I]]
 //
 // AMDGCNSPIRV-LABEL: define spir_func i64 @test_llrintf(
 // AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
 // AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
 // AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.rint.f32(float [[X]])
-// AMDGCNSPIRV-NEXT:    [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
+// AMDGCNSPIRV-NEXT:    [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64
 // AMDGCNSPIRV-NEXT:    ret i64 [[CONV_I]]
 //
 extern "C" __device__ long long int test_llrintf(float x) {
@@ -3557,35 +3557,35 @@ extern "C" __device__ long long int test_llrintf(float x) {
 // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // DEFAULT-NEXT:  [[ENTRY:.*:]]
 // DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract double @llvm.rint.f64(double [[X]])
-// DEFAULT-NEXT:    [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
+// DEFAULT-NEXT:    [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64
 // DEFAULT-NEXT:    ret i64 [[CONV_I]]
 //
 // FINITEONLY-LABEL: define dso_local i64 @test_llrint(
 // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // FINITEONLY-NEXT:  [[ENTRY:.*:]]
 // FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract double @llvm.rint.f64(double nofpclass(nan inf) [[X]])
-// FINITEONLY-NEXT:    [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
+// FINITEONLY-NEXT:    [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64
 // FINITEONLY-NEXT:    ret i64 [[CONV_I]]
 //
 // APPROX-LABEL: define dso_local i64 @test_llrint(
 // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // APPROX-NEXT:  [[ENTRY:.*:]]
 // APPROX-NEXT:    [[TMP0:%.*]] = tail call contract double @llvm.rint.f64(double [[X]])
-// APPROX-NEXT:    [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
+// APPROX-NEXT:    [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64
 // APPROX-NEXT:    ret i64 [[CONV_I]]
 //
 // NCRDIV-LABEL: define dso_local i64 @test_llrint(
 // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // NCRDIV-NEXT:  [[ENTRY:.*:]]
 // NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract double @llvm.rint.f64(double [[X]])
-// NCRDIV-NEXT:    [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
+// NCRDIV-NEXT:    [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64
 // NCRDIV-NEXT:    ret i64 [[CONV_I]]
 //
 // AMDGCNSPIRV-LABEL: define spir_func i64 @test_llrint(
 // AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
 // AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
 // AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract addrspace(4) double @llvm.rint.f64(double [[X]])
-// AMDGCNSPIRV-NEXT:    [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
+// AMDGCNSPIRV-NEXT:    [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64
 // AMDGCNSPIRV-NEXT:    ret i64 [[CONV_I]]
 //
 extern "C" __device__ long long int test_llrint(double x) {
@@ -3596,35 +3596,35 @@ extern "C" __device__ long long int test_llrint(double x) {
 // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // DEFAULT-NEXT:  [[ENTRY:.*:]]
 // DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.round.f32(float [[X]])
-// DEFAULT-NEXT:    [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
+// DEFAULT-NEXT:    [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64
 // DEFAULT-NEXT:    ret i64 [[CONV_I]]
 //
 // FINITEONLY-LABEL: define dso_local i64 @test_llroundf(
 // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // FINITEONLY-NEXT:  [[ENTRY:.*:]]
 // FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract float @llvm.round.f32(float nofpclass(nan inf) [[X]])
-// FINITEONLY-NEXT:    [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
+// FINITEONLY-NEXT:    [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64
 // FINITEONLY-NEXT:    ret i64 [[CONV_I]]
 //
 // APPROX-LABEL: define dso_local i64 @test_llroundf(
 // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // APPROX-NEXT:  [[ENTRY:.*:]]
 // APPROX-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.round.f32(float [[X]])
-// APPROX-NEXT:    [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
+// APPROX-NEXT:    [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64
 // APPROX-NEXT:    ret i64 [[CONV_I]]
 //
 // NCRDIV-LABEL: define dso_local i64 @test_llroundf(
 // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // NCRDIV-NEXT:  [[ENTRY:.*:]]
 // NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.round.f32(float [[X]])
-// NCRDIV-NEXT:    [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
+// NCRDIV-NEXT:    [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64
 // NCRDIV-NEXT:    ret i64 [[CONV_I]]
 //
 // AMDGCNSPIRV-LABEL: define spir_func i64 @test_llroundf(
 // AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
 // AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
 // AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.round.f32(float [[X]])
-// AMDGCNSPIRV-NEXT:    [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
+// AMDGCNSPIRV-NEXT:    [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64
 // AMDGCNSPIRV-NEXT:    ret i64 [[CONV_I]]
 //
 extern "C" __device__ long long int test_llroundf(float x) {
@@ -3635,35 +3635,35 @@ extern "C" __device__ long long int test_llroundf(float x) {
 // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // DEFAULT-NEXT:  [[ENTRY:.*:]]
 // DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract double @llvm.round.f64(double [[X]])
-// DEFAULT-NEXT:    [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
+// DEFAULT-NEXT:    [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64
 // DEFAULT-NEXT:    ret i64 [[CONV_I]]
 //
 // FINITEONLY-LABEL: define dso_local i64 @test_llround(
 // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // FINITEONLY-NEXT:  [[ENTRY:.*:]]
 // FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract double @llvm.round.f64(double nofpclass(nan inf) [[X]])
-// FINITEONLY-NEXT:    [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
+// FINITEONLY-NEXT:    [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64
 // FINITEONLY-NEXT:    ret i64 [[CONV_I]]
 //
 // APPROX-LABEL: define dso_local i64 @test_llround(
 // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // APPROX-NEXT:  [[ENTRY:.*:]]
 // APPROX-NEXT:    [[TMP0:%.*]] = tail call contract double @llvm.round.f64(double [[X]])
-// APPROX-NEXT:    [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
+// APPROX-NEXT:    [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64
 // APPROX-NEXT:    ret i64 [[CONV_I]]
 //
 // NCRDIV-LABEL: define dso_local i64 @test_llround(
 // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // NCRDIV-NEXT:  [[ENTRY:.*:]]
 // NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract double @llvm.round.f64(double [[X]])
-// NCRDIV-NEXT:    [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
+// NCRDIV-NEXT:    [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64
 // NCRDIV-NEXT:    ret i64 [[CONV_I]]
 //
 // AMDGCNSPIRV-LABEL: define spir_func i64 @test_llround(
 // AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
 // AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
 // AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract addrspace(4) double @llvm.round.f64(double [[X]])
-// AMDGCNSPIRV-NEXT:    [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
+// AMDGCNSPIRV-NEXT:    [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64
 // AMDGCNSPIRV-NEXT:    ret i64 [[CONV_I]]
 //
 extern "C" __device__ long long int test_llround(double x) {
@@ -3980,35 +3980,35 @@ extern "C" __device__ float test_logf(float x) {
 // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // DEFAULT-NEXT:  [[ENTRY:.*:]]
 // DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.rint.f32(float [[X]])
-// DEFAULT-NEXT:    [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
+// DEFAULT-NEXT:    [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64
 // DEFAULT-NEXT:    ret i64 [[CONV_I]]
 //
 // FINITEONLY-LABEL: define dso_local i64 @test_lrintf(
 // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // FINITEONLY-NEXT:  [[ENTRY:.*:]]
 // FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract float @llvm.rint.f32(float nofpclass(nan inf) [[X]])
-// FINITEONLY-NEXT:    [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
+// FINITEONLY-NEXT:    [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64
 // FINITEONLY-NEXT:    ret i64 [[CONV_I]]
 //
 // APPROX-LABEL: define dso_local i64 @test_lrintf(
 // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // APPROX-NEXT:  [[ENTRY:.*:]]
 // APPROX-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.rint.f32(float [[X]])
-// APPROX-NEXT:    [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
+// APPROX-NEXT:    [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64
 // APPROX-NEXT:    ret i64 [[CONV_I]]
 //
 // NCRDIV-LABEL: define dso_local i64 @test_lrintf(
 // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // NCRDIV-NEXT:  [[ENTRY:.*:]]
 // NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.rint.f32(float [[X]])
-// NCRDIV-NEXT:    [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
+// NCRDIV-NEXT:    [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64
 // NCRDIV-NEXT:    ret i64 [[CONV_I]]
 //
 // AMDGCNSPIRV-LABEL: define spir_func i64 @test_lrintf(
 // AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
 // AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
 // AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.rint.f32(float [[X]])
-// AMDGCNSPIRV-NEXT:    [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
+// AMDGCNSPIRV-NEXT:    [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64
 // AMDGCNSPIRV-NEXT:    ret i64 [[CONV_I]]
 //
 extern "C" __device__ long int test_lrintf(float x) {
@@ -4019,35 +4019,35 @@ extern "C" __device__ long int test_lrintf(float x) {
 // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // DEFAULT-NEXT:  [[ENTRY:.*:]]
 // DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract double @llvm.rint.f64(double [[X]])
-// DEFAULT-NEXT:    [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
+// DEFAULT-NEXT:    [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64
 // DEFAULT-NEXT:    ret i64 [[CONV_I]]
 //
 // FINITEONLY-LABEL: define dso_local i64 @test_lrint(
 // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // FINITEONLY-NEXT:  [[ENTRY:.*:]]
 // FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract double @llvm.rint.f64(double nofpclass(nan inf) [[X]])
-// FINITEONLY-NEXT:    [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
+// FINITEONLY-NEXT:    [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64
 // FINITEONLY-NEXT:    ret i64 [[CONV_I]]
 //
 // APPROX-LABEL: define dso_local i64 @test_lrint(
 // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // APPROX-NEXT:  [[ENTRY:.*:]]
 // APPROX-NEXT:    [[TMP0:%.*]] = tail call contract double @llvm.rint.f64(double [[X]])
-// APPROX-NEXT:    [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
+// APPROX-NEXT:    [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64
 // APPROX-NEXT:    ret i64 [[CONV_I]]
 //
 // NCRDIV-LABEL: define dso_local i64 @test_lrint(
 // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // NCRDIV-NEXT:  [[ENTRY:.*:]]
 // NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract double @llvm.rint.f64(double [[X]])
-// NCRDIV-NEXT:    [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
+// NCRDIV-NEXT:    [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64
 // NCRDIV-NEXT:    ret i64 [[CONV_I]]
 //
 // AMDGCNSPIRV-LABEL: define spir_func i64 @test_lrint(
 // AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
 // AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
 // AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract addrspace(4) double @llvm.rint.f64(double [[X]])
-// AMDGCNSPIRV-NEXT:    [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
+// AMDGCNSPIRV-NEXT:    [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64
 // AMDGCNSPIRV-NEXT:    ret i64 [[CONV_I]]
 //
 extern "C" __device__ long int test_lrint(double x) {
@@ -4058,35 +4058,35 @@ extern "C" __device__ long int test_lrint(double x) {
 // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // DEFAULT-NEXT:  [[ENTRY:.*:]]
 // DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.round.f32(float [[X]])
-// DEFAULT-NEXT:    [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
+// DEFAULT-NEXT:    [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64
 // DEFAULT-NEXT:    ret i64 [[CONV_I]]
 //
 // FINITEONLY-LABEL: define dso_local i64 @test_lroundf(
 // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // FINITEONLY-NEXT:  [[ENTRY:.*:]]
 // FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract float @llvm.round.f32(float nofpclass(nan inf) [[X]])
-// FINITEONLY-NEXT:    [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
+// FINITEONLY-NEXT:    [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64
 // FINITEONLY-NEXT:    ret i64 [[CONV_I]]
 //
 // APPROX-LABEL: define dso_local i64 @test_lroundf(
 // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // APPROX-NEXT:  [[ENTRY:.*:]]
 // APPROX-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.round.f32(float [[X]])
-// APPROX-NEXT:    [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
+// APPROX-NEXT:    [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64
 // APPROX-NEXT:    ret i64 [[CONV_I]]
 //
 // NCRDIV-LABEL: define dso_local i64 @test_lroundf(
 // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // NCRDIV-NEXT:  [[ENTRY:.*:]]
 // NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.round.f32(float [[X]])
-// NCRDIV-NEXT:    [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
+// NCRDIV-NEXT:    [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64
 // NCRDIV-NEXT:    ret i64 [[CONV_I]]
 //
 // AMDGCNSPIRV-LABEL: define spir_func i64 @test_lroundf(
 // AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
 // AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
 // AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.round.f32(float [[X]])
-// AMDGCNSPIRV-NEXT:    [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
+// AMDGCNSPIRV-NEXT:    [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64
 // AMDGCNSPIRV-NEXT:    ret i64 [[CONV_I]]
 //
 extern "C" __device__ long int test_lroundf(float x) {
@@ -4097,35 +4097,35 @@ extern "C" __device__ long int test_lroundf(float x) {
 // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // DEFAULT-NEXT:  [[ENTRY:.*:]]
 // DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract double @llvm.round.f64(double [[X]])
-// DEFAULT-NEXT:    [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
+// DEFAULT-NEXT:    [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64
 // DEFAULT-NEXT:    ret i64 [[CONV_I]]
 //
 // FINITEONLY-LABEL: define dso_local i64 @test_lround(
 // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // FINITEONLY-NEXT:  [[ENTRY:.*:]]
 // FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract double @llvm.round.f64(double nofpclass(nan inf) [[X]])
-// FINITEONLY-NEXT:    [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
+// FINITEONLY-NEXT:    [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64
 // FINITEONLY-NEXT:    ret i64 [[CONV_I]]
 //
 // APPROX-LABEL: define dso_local i64 @test_lround(
 // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // APPROX-NEXT:  [[ENTRY:.*:]]
 // APPROX-NEXT:    [[TMP0:%.*]] = tail call contract double @llvm.round.f64(double [[X]])
-// APPROX-NEXT:    [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
+// APPROX-NEXT:    [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64
 // APPROX-NEXT:    ret i64 [[CONV_I]]
 //
 // NCRDIV-LABEL: define dso_local i64 @test_lround(
 // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // NCRDIV-NEXT:  [[ENTRY:.*:]]
 // NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract double @llvm.round.f64(double [[X]])
-// NCRDIV-NEXT:    [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
+// NCRDIV-NEXT:    [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64
 // NCRDIV-NEXT:    ret i64 [[CONV_I]]
 //
 // AMDGCNSPIRV-LABEL: define spir_func i64 @test_lround(
 // AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
 // AMDGCNSPIRV-NEXT:  [[ENTRY:.*:]]
 // AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract addrspace(4) double @llvm.round.f64(double [[X]])
-// AMDGCNSPIRV-NEXT:    [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
+// AMDGCNSPIRV-NEXT:    [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64
 // AMDGCNSPIRV-NEXT:    ret i64 [[CONV_I]]
 //
 extern "C" __device__ long int test_lround(double x) {
diff --git a/clang/test/Headers/openmp_device_math_isnan.cpp b/clang/test/Headers/openmp_device_math_isnan.cpp
index 3fd98813f2480..ccc4717af881e 100644
--- a/clang/test/Headers/openmp_device_math_isnan.cpp
+++ b/clang/test/Headers/openmp_device_math_isnan.cpp
@@ -29,7 +29,7 @@ double math(float f, double d) {
   double r = 0;
   // INT_RETURN: call noundef i32 @__nv_isnanf(float
   // AMD_INT_RETURN_SAFE: call i1 @llvm.is.fpclass.f32(float{{.*}}, i32 3)
-  // AMD_INT_RETURN_FAST: sitofp i32 {{.*}} to double
+  // AMD_INT_RETURN_FAST: sitofp contract i32 {{.*}} to double
   // SPIRV_INT_RETURN: call spir_func noundef i32 @_Z5isnanf(float
   // BOOL_RETURN: call noundef i32 @__nv_isnanf(float
   // SPIRV_BOOL_RETURN: call spir_func noundef zeroext i1 @_Z5isnanf(float 
@@ -39,7 +39,7 @@ double math(float f, double d) {
   // INT_RETURN: call noundef i32 @__nv_isnand(double
   // SPIRV_INT_RETURN: call spir_func noundef i32 @_Z5isnand(double
   // AMD_INT_RETURN_SAFE: call i1 @llvm.is.fpclass.f64(double{{.*}}, i32 3)
-  // AMD_INT_RETURN_FAST: sitofp i32 {{.*}} to double
+  // AMD_INT_RETURN_FAST: sitofp contract i32 {{.*}} to double
   // BOOL_RETURN: call noundef i32 @__nv_isnand(double
   // SPIRV_BOOL_RETURN: call spir_func noundef zeroext i1 @_Z5isnand(double
   // AMD_BOOL_RETURN_SAFE: call i1 @llvm.is.fpclass.f64(double{{.*}}, i32 3)
diff --git a/clang/test/SemaHLSL/VectorOverloadResolution.hlsl b/clang/test/SemaHLSL/VectorOverloadResolution.hlsl
index b320abdd81182..0201e6807038d 100644
--- a/clang/test/SemaHLSL/VectorOverloadResolution.hlsl
+++ b/clang/test/SemaHLSL/VectorOverloadResolution.hlsl
@@ -40,7 +40,7 @@ void Fn3( int64_t2 p0);
 // CHECK-NEXT: ImplicitCastExpr {{.*}} 'half2':'vector<half, 2>' <LValueToRValue>
 // CHECK-NEXT: DeclRefExpr {{.*}} 'half2':'vector<half, 2>' lvalue ParmVar {{.*}} 'p0' 'half2':'vector<half, 2>'
 // CHECKIR-LABEL: Call3
-// CHECKIR: {{.*}} = fptosi <2 x half> {{.*}} to <2 x i64>
+// CHECKIR: {{.*}} = fptosi reassoc nnan ninf nsz arcp afn <2 x half> {{.*}} to <2 x i64>
 void Call3(half2 p0) {
   Fn3(p0);
 }
@@ -53,7 +53,7 @@ void Call3(half2 p0) {
 // CHECK-NEXT: ImplicitCastExpr {{.*}} 'float2':'vector<float, 2>' <LValueToRValue>
 // CHECK-NEXT: DeclRefExpr {{.*}} 'float2':'vector<float, 2>' lvalue ParmVar {{.*}} 'p0' 'float2':'vector<float, 2>'
 // CHECKIR-LABEL: Call4
-// CHECKIR: {{.*}} = fptosi <2 x float> {{.*}} to <2 x i64>
+// CHECKIR: {{.*}} = fptosi reassoc nnan ninf nsz arcp afn <2 x float> {{.*}} to <2 x i64>
 void Call4(float2 p0) {
   Fn3(p0);
 }
@@ -68,7 +68,7 @@ void Fn4( float2 p0);
 // CHECK-NEXT: ImplicitCastExpr {{.*}} 'int64_t2':'vector<int64_t, 2>' <LValueToRValue>
 // CHECK-NEXT: DeclRefExpr {{.*}} 'int64_t2':'vector<int64_t, 2>' lvalue ParmVar {{.*}} 'p0' 'int64_t2':'vector<int64_t, 2>'
 // CHECKIR-LABEL: Call5
-// CHECKIR: {{.*}} = sitofp <2 x i64> {{.*}} to <2 x float>
+// CHECKIR: {{.*}} = sitofp reassoc nnan ninf nsz arcp afn <2 x i64> {{.*}} to <2 x float>
 void Call5(int64_t2 p0) {
   Fn4(p0);
 }
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index e6713c827d6ab..b6822af87735c 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -3960,7 +3960,8 @@ Fast-Math Flags
 LLVM IR floating-point operations (:ref:`fneg <i_fneg>`, :ref:`fadd <i_fadd>`,
 :ref:`fsub <i_fsub>`, :ref:`fmul <i_fmul>`, :ref:`fdiv <i_fdiv>`,
 :ref:`frem <i_frem>`, :ref:`fcmp <i_fcmp>`, :ref:`fptrunc <i_fptrunc>`,
-:ref:`fpext <i_fpext>`), and :ref:`phi <i_phi>`, :ref:`select <i_select>`, or
+:ref:`fpext <i_fpext>`), :ref:`fptoui <i_fptoui>`, :ref:`fptosi <i_fptosi>`,
+:ref:`sitofp <i_sitofp>`, and :ref:`phi <i_phi>`, :ref:`select <i_select>`, or
 :ref:`call <i_call>` instructions that return floating-point types may use the
 following flags to enable otherwise unsafe floating-point transformations.
 
@@ -12271,6 +12272,8 @@ Example:
       %X = fpext float 3.125 to double         ; yields double:3.125000e+00
       %Y = fpext double %X to fp128            ; yields fp128:0xL00000000000000004000900000000000
 
+.. _i_fptoui:
+
 '``fptoui .. to``' Instruction
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -12313,6 +12316,8 @@ Example:
       %Y = fptoui float 1.0E+300 to i1     ; yields undefined:1
       %Z = fptoui float 1.04E+17 to i8     ; yields undefined:1
 
+.. _i_fptosi:
+
 '``fptosi .. to``' Instruction
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -12407,6 +12412,8 @@ Example:
       %a = uitofp nneg i32 256 to i32      ; yields float:256.0
       %b = uitofp nneg i32 -256 to i32     ; yields i32 poison
 
+.. _i_sitofp:
+
 '``sitofp .. to``' Instruction
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/llvm/include/llvm/IR/Operator.h b/llvm/include/llvm/IR/Operator.h
index 10816c0e62c29..b725e01462e70 100644
--- a/llvm/include/llvm/IR/Operator.h
+++ b/llvm/include/llvm/IR/Operator.h
@@ -362,6 +362,9 @@ class FPMathOperator : public Operator {
     case Instruction::FRem:
     case Instruction::FPTrunc:
     case Instruction::FPExt:
+    case Instruction::FPToUI:
+    case Instruction::FPToSI:
+    case Instruction::SIToFP:
     // FIXME: To clean up and correct the semantics of fast-math-flags, FCmp
     //        should not be treated as a math op, but the other opcodes should.
     //        This would make things consistent with Select/PHI (FP value type
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 897e679095906..c2f856aa65bf6 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -7319,13 +7319,13 @@ int LLParser::parseInstruction(Instruction *&Inst, BasicBlock *BB,
   case lltok::kw_sext:
   case lltok::kw_bitcast:
   case lltok::kw_addrspacecast:
-  case lltok::kw_sitofp:
-  case lltok::kw_fptoui:
-  case lltok::kw_fptosi:
   case lltok::kw_inttoptr:
   case lltok::kw_ptrtoaddr:
   case lltok::kw_ptrtoint:
     return parseCast(Inst, PFS, KeywordVal);
+  case lltok::kw_sitofp:
+  case lltok::kw_fptoui:
+  case lltok::kw_fptosi:
   case lltok::kw_fptrunc:
   case lltok::kw_fpext: {
     FastMathFlags FMF = EatFastMathFlagsIfPresent();
diff --git a/llvm/test/Assembler/fast-math-flags.ll b/llvm/test/Assembler/fast-math-flags.ll
index 9c08e9da1d19e..2a725cd445182 100644
--- a/llvm/test/Assembler/fast-math-flags.ll
+++ b/llvm/test/Assembler/fast-math-flags.ll
@@ -56,6 +56,24 @@ entry:
   %h_vec = fptrunc <3 x float> %vec to <3 x half>
 ; CHECK: %h_scalable = fptrunc <vscale x 3 x float> %scalable to <vscale x 3 x half>
   %h_scalable = fptrunc <vscale x 3 x float> %scalable to <vscale x 3 x half>
+; CHECK: %i = fptoui float %x to i32
+  %i = fptoui float %x to i32
+; CHECK: %i_vec = fptoui <3 x float> %vec to <3 x i32>
+  %i_vec = fptoui <3 x float> %vec to <3 x i32>
+; CHECK: %i_scalable = fptoui <vscale x 3 x float> %scalable to <vscale x 3 x i32>
+  %i_scalable = fptoui <vscale x 3 x float> %scalable to <vscale x 3 x i32>
+; CHECK: %j = fptosi float %x to i32
+  %j = fptosi float %x to i32
+; CHECK: %j_vec = fptosi <3 x float> %vec to <3 x i32>
+  %j_vec = fptosi <3 x float> %vec to <3 x i32>
+; CHECK: %j_scalable = fptosi <vscale x 3 x float> %scalable to <vscale x 3 x i32>
+  %j_scalable = fptosi <vscale x 3 x float> %scalable to <vscale x 3 x i32>
+; CHECK: %k = sitofp i32 %j to float
+  %k = sitofp i32 %j to float
+; CHECK: %k_vec = sitofp <3 x i32> %j_vec to <3 x float>
+  %k_vec = sitofp <3 x i32> %j_vec to <3 x float>
+; CHECK: %k_scalable = sitofp <vscale x 3 x i32> %j_scalable to <vscale x 3 x float>
+  %k_scalable = sitofp <vscale x 3 x i32> %j_scalable to <vscale x 3 x float>
 ; CHECK:  ret float %f
   ret  float %f
 }
@@ -108,6 +126,24 @@ entry:
   %h_vec = fptrunc nnan <3 x float> %vec to <3 x half>
 ; CHECK: %h_scalable = fptrunc nnan <vscale x 3 x float> %scalable to <vscale x 3 x half>
   %h_scalable = fptrunc nnan <vscale x 3 x float> %scalable to <vscale x 3 x half>
+; CHECK: %i = fptoui nnan float %x to i32
+  %i = fptoui nnan float %x to i32
+; CHECK: %i_vec = fptoui nnan <3 x float> %vec to <3 x i32>
+  %i_vec = fptoui nnan <3 x float> %vec to <3 x i32>
+; CHECK: %i_scalable = fptoui nnan <vscale x 3 x float> %scalable to <vscale x 3 x i32>
+  %i_scalable = fptoui nnan <vscale x 3 x float> %scalable to <vscale x 3 x i32>
+; CHECK: %j = fptosi nnan float %x to i32
+  %j = fptosi nnan float %x to i32
+; CHECK: %j_vec = fptosi nnan <3 x float> %vec to <3 x i32>
+  %j_vec = fptosi nnan <3 x float> %vec to <3 x i32>
+; CHECK: %j_scalable = fptosi nnan <vscale x 3 x float> %scalable to <vscale x 3 x i32>
+  %j_scalable = fptosi nnan <vscale x 3 x float> %scalable to <vscale x 3 x i32>
+; CHECK: %k = sitofp nnan i32 %j to float
+  %k = sitofp nnan i32 %j to float
+; CHECK: %k_vec = sitofp nnan <3 x i32> %j_vec to <3 x float>
+  %k_vec = sitofp nnan <3 x i32> %j_vec to <3 x float>
+; CHECK: %k_scalable = sitofp nnan <vscale x 3 x i32> %j_scalable to <vscale x 3 x float>
+  %k_scalable = sitofp nnan <vscale x 3 x i32> %j_scalable to <vscale x 3 x float>
 ; CHECK:  ret float %f
   ret float %f
 }
@@ -125,6 +161,12 @@ entry:
   %d = fpext contract float %x to double
 ; CHECK: %e = fptrunc contract float %x to half
   %e = fptrunc contract float %x to half
+; CHECK: %f = fptoui contract float %x to i32
+  %f = fptoui contract float %x to i32
+; CHECK: %g = fptosi contract float %x to i32
+  %g = fptosi contract float %x to i32
+; CHECK: %h = sitofp contract i32 %g to float
+  %h = sitofp contract i32 %g to float
   ret float %c
 }
 
@@ -140,6 +182,12 @@ define float @reassoc(float %x, float %y) {
   %d = fpext reassoc float %x to double
 ; CHECK: %e = fptrunc reassoc float %x to half
   %e = fptrunc reassoc float %x to half
+; CHECK: %f = fptoui reassoc float %x to i32
+  %f = fptoui reassoc float %x to i32
+; CHECK: %g = fptosi reassoc float %x to i32
+  %g = fptosi reassoc float %x to i32
+; CHECK: %h = sitofp reassoc i32 %g to float
+  %h = sitofp reassoc i32 %g to float
   ret float %c
 }
 
@@ -198,6 +246,24 @@ entry:
   %g_vec = fptrunc ninf nnan <3 x float> %vec to <3 x half>
 ; CHECK: %g_scalable = fptrunc nnan ninf <vscale x 3 x float> %scalable to <vscale x 3 x half>
   %g_scalable = fptrunc ninf nnan <vscale x 3 x float> %scalable to <vscale x 3 x half>
+; CHECK: %i = fptoui nnan ninf float %x to i32
+  %i = fptoui ninf nnan float %x to i32
+; CHECK: %i_vec = fptoui nnan ninf <3 x float> %vec to <3 x i32>
+  %i_vec = fptoui ninf nnan <3 x float> %vec to <3 x i32>
+; CHECK: %i_scalable = fptoui nnan ninf <vscale x 3 x float> %scalable to <vscale x 3 x i32>
+  %i_scalable = fptoui ninf nnan <vscale x 3 x float> %scalable to <vscale x 3 x i32>
+; CHECK: %j = fptosi nnan ninf float %x to i32
+  %j = fptosi ninf nnan float %x to i32
+; CHECK: %j_vec = fptosi nnan ninf <3 x float> %vec to <3 x i32>
+  %j_vec = fptosi ninf nnan <3 x float> %vec to <3 x i32>
+; CHECK: %j_scalable = fptosi nnan ninf <vscale x 3 x float> %scalable to <vscale x 3 x i32>
+  %j_scalable = fptosi ninf nnan <vscale x 3 x float> %scalable to <vscale x 3 x i32>
+; CHECK: %k = sitofp nnan ninf i32 %j to float
+  %k = sitofp ninf nnan i32 %j to float
+; CHECK: %k_vec = sitofp nnan ninf <3 x i32> %j_vec to <3 x float>
+  %k_vec = sitofp ninf nnan <3 x i32> %j_vec to <3 x float>
+; CHECK: %k_scalable = sitofp nnan ninf <vscale x 3 x i32> %j_scalable to <vscale x 3 x float>
+  %k_scalable = sitofp ninf nnan <vscale x 3 x i32> %j_scalable to <vscale x 3 x float>
 ; CHECK:  ret float %e
   ret float %e
 }
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll
index e71bf15384727..152b5b37c8308 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll
@@ -94,7 +94,7 @@ define i32 @select_sdiv_lhs_opaque_const0_i32(i1 %cond) {
 ; IR-NEXT:    [[TMP5:%.*]] = uitofp i32 [[TMP4]] to float
 ; IR-NEXT:    [[TMP6:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP5]])
 ; IR-NEXT:    [[TMP7:%.*]] = fmul fast float [[TMP6]], 0x41EFFFFFC0000000
-; IR-NEXT:    [[TMP8:%.*]] = fptoui float [[TMP7]] to i32
+; IR-NEXT:    [[TMP8:%.*]] = fptoui fast float [[TMP7]] to i32
 ; IR-NEXT:    [[TMP9:%.*]] = sub i32 0, [[TMP4]]
 ; IR-NEXT:    [[TMP10:%.*]] = mul i32 [[TMP9]], [[TMP8]]
 ; IR-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP8]] to i64
@@ -176,7 +176,7 @@ define i32 @select_sdiv_lhs_opaque_const1_i32(i1 %cond) {
 ; IR-NEXT:    [[TMP5:%.*]] = uitofp i32 [[TMP4]] to float
 ; IR-NEXT:    [[TMP6:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP5]])
 ; IR-NEXT:    [[TMP7:%.*]] = fmul fast float [[TMP6]], 0x41EFFFFFC0000000
-; IR-NEXT:    [[TMP8:%.*]] = fptoui float [[TMP7]] to i32
+; IR-NEXT:    [[TMP8:%.*]] = fptoui fast float [[TMP7]] to i32
 ; IR-NEXT:    [[TMP9:%.*]] = sub i32 0, [[TMP4]]
 ; IR-NEXT:    [[TMP10:%.*]] = mul i32 [[TMP9]], [[TMP8]]
 ; IR-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP8]] to i64
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
index b2dcd77274989..0b447c59eb858 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
@@ -8,7 +8,7 @@ define amdgpu_kernel void @udiv_i32(ptr addrspace(1) %out, i32 %x, i32 %y) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = uitofp i32 [[Y:%.*]] to float
 ; CHECK-NEXT:    [[TMP2:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP1]])
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast float [[TMP2]], 0x41EFFFFFC0000000
-; CHECK-NEXT:    [[TMP4:%.*]] = fptoui float [[TMP3]] to i32
+; CHECK-NEXT:    [[TMP4:%.*]] = fptoui fast float [[TMP3]] to i32
 ; CHECK-NEXT:    [[TMP5:%.*]] = sub i32 0, [[Y]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = mul i32 [[TMP5]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP4]] to i64
@@ -108,7 +108,7 @@ define amdgpu_kernel void @urem_i32(ptr addrspace(1) %out, i32 %x, i32 %y) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = uitofp i32 [[Y:%.*]] to float
 ; CHECK-NEXT:    [[TMP2:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP1]])
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast float [[TMP2]], 0x41EFFFFFC0000000
-; CHECK-NEXT:    [[TMP4:%.*]] = fptoui float [[TMP3]] to i32
+; CHECK-NEXT:    [[TMP4:%.*]] = fptoui fast float [[TMP3]] to i32
 ; CHECK-NEXT:    [[TMP5:%.*]] = sub i32 0, [[Y]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = mul i32 [[TMP5]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP4]] to i64
@@ -208,7 +208,7 @@ define amdgpu_kernel void @sdiv_i32(ptr addrspace(1) %out, i32 %x, i32 %y) {
 ; CHECK-NEXT:    [[TMP8:%.*]] = uitofp i32 [[TMP7]] to float
 ; CHECK-NEXT:    [[TMP9:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP8]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = fmul fast float [[TMP9]], 0x41EFFFFFC0000000
-; CHECK-NEXT:    [[TMP11:%.*]] = fptoui float [[TMP10]] to i32
+; CHECK-NEXT:    [[TMP11:%.*]] = fptoui fast float [[TMP10]] to i32
 ; CHECK-NEXT:    [[TMP12:%.*]] = sub i32 0, [[TMP7]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = mul i32 [[TMP12]], [[TMP11]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = zext i32 [[TMP11]] to i64
@@ -328,7 +328,7 @@ define amdgpu_kernel void @srem_i32(ptr addrspace(1) %out, i32 %x, i32 %y) {
 ; CHECK-NEXT:    [[TMP7:%.*]] = uitofp i32 [[TMP6]] to float
 ; CHECK-NEXT:    [[TMP8:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP7]])
 ; CHECK-NEXT:    [[TMP9:%.*]] = fmul fast float [[TMP8]], 0x41EFFFFFC0000000
-; CHECK-NEXT:    [[TMP10:%.*]] = fptoui float [[TMP9]] to i32
+; CHECK-NEXT:    [[TMP10:%.*]] = fptoui fast float [[TMP9]] to i32
 ; CHECK-NEXT:    [[TMP11:%.*]] = sub i32 0, [[TMP6]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = mul i32 [[TMP11]], [[TMP10]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = zext i32 [[TMP10]] to i64
@@ -439,7 +439,7 @@ define amdgpu_kernel void @udiv_i16(ptr addrspace(1) %out, i16 %x, i16 %y) {
 ; CHECK-NEXT:    [[TMP7:%.*]] = call fast float @llvm.trunc.f32(float [[TMP6]])
 ; CHECK-NEXT:    [[TMP8:%.*]] = fneg fast float [[TMP7]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP8]], float [[TMP4]], float [[TMP3]])
-; CHECK-NEXT:    [[TMP10:%.*]] = fptoui float [[TMP7]] to i32
+; CHECK-NEXT:    [[TMP10:%.*]] = fptoui fast float [[TMP7]] to i32
 ; CHECK-NEXT:    [[TMP11:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]])
 ; CHECK-NEXT:    [[TMP12:%.*]] = call fast float @llvm.fabs.f32(float [[TMP4]])
 ; CHECK-NEXT:    [[TMP13:%.*]] = fcmp fast oge float [[TMP11]], [[TMP12]]
@@ -508,7 +508,7 @@ define amdgpu_kernel void @urem_i16(ptr addrspace(1) %out, i16 %x, i16 %y) {
 ; CHECK-NEXT:    [[TMP7:%.*]] = call fast float @llvm.trunc.f32(float [[TMP6]])
 ; CHECK-NEXT:    [[TMP8:%.*]] = fneg fast float [[TMP7]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP8]], float [[TMP4]], float [[TMP3]])
-; CHECK-NEXT:    [[TMP10:%.*]] = fptoui float [[TMP7]] to i32
+; CHECK-NEXT:    [[TMP10:%.*]] = fptoui fast float [[TMP7]] to i32
 ; CHECK-NEXT:    [[TMP11:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]])
 ; CHECK-NEXT:    [[TMP12:%.*]] = call fast float @llvm.fabs.f32(float [[TMP4]])
 ; CHECK-NEXT:    [[TMP13:%.*]] = fcmp fast oge float [[TMP11]], [[TMP12]]
@@ -579,14 +579,14 @@ define amdgpu_kernel void @sdiv_i16(ptr addrspace(1) %out, i16 %x, i16 %y) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = ashr i32 [[TMP3]], 30
 ; CHECK-NEXT:    [[TMP5:%.*]] = or i32 [[TMP4]], 1
-; CHECK-NEXT:    [[TMP6:%.*]] = sitofp i32 [[TMP1]] to float
-; CHECK-NEXT:    [[TMP7:%.*]] = sitofp i32 [[TMP2]] to float
+; CHECK-NEXT:    [[TMP6:%.*]] = sitofp fast i32 [[TMP1]] to float
+; CHECK-NEXT:    [[TMP7:%.*]] = sitofp fast i32 [[TMP2]] to float
 ; CHECK-NEXT:    [[TMP8:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP7]])
 ; CHECK-NEXT:    [[TMP9:%.*]] = fmul fast float [[TMP6]], [[TMP8]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = call fast float @llvm.trunc.f32(float [[TMP9]])
 ; CHECK-NEXT:    [[TMP11:%.*]] = fneg fast float [[TMP10]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP11]], float [[TMP7]], float [[TMP6]])
-; CHECK-NEXT:    [[TMP13:%.*]] = fptosi float [[TMP10]] to i32
+; CHECK-NEXT:    [[TMP13:%.*]] = fptosi fast float [[TMP10]] to i32
 ; CHECK-NEXT:    [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP12]])
 ; CHECK-NEXT:    [[TMP15:%.*]] = call fast float @llvm.fabs.f32(float [[TMP7]])
 ; CHECK-NEXT:    [[TMP16:%.*]] = fcmp fast oge float [[TMP14]], [[TMP15]]
@@ -660,14 +660,14 @@ define amdgpu_kernel void @srem_i16(ptr addrspace(1) %out, i16 %x, i16 %y) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = ashr i32 [[TMP3]], 30
 ; CHECK-NEXT:    [[TMP5:%.*]] = or i32 [[TMP4]], 1
-; CHECK-NEXT:    [[TMP6:%.*]] = sitofp i32 [[TMP1]] to float
-; CHECK-NEXT:    [[TMP7:%.*]] = sitofp i32 [[TMP2]] to float
+; CHECK-NEXT:    [[TMP6:%.*]] = sitofp fast i32 [[TMP1]] to float
+; CHECK-NEXT:    [[TMP7:%.*]] = sitofp fast i32 [[TMP2]] to float
 ; CHECK-NEXT:    [[TMP8:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP7]])
 ; CHECK-NEXT:    [[TMP9:%.*]] = fmul fast float [[TMP6]], [[TMP8]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = call fast float @llvm.trunc.f32(float [[TMP9]])
 ; CHECK-NEXT:    [[TMP11:%.*]] = fneg fast float [[TMP10]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP11]], float [[TMP7]], float [[TMP6]])
-; CHECK-NEXT:    [[TMP13:%.*]] = fptosi float [[TMP10]] to i32
+; CHECK-NEXT:    [[TMP13:%.*]] = fptosi fast float [[TMP10]] to i32
 ; CHECK-NEXT:    [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP12]])
 ; CHECK-NEXT:    [[TMP15:%.*]] = call fast float @llvm.fabs.f32(float [[TMP7]])
 ; CHECK-NEXT:    [[TMP16:%.*]] = fcmp fast oge float [[TMP14]], [[TMP15]]
@@ -751,7 +751,7 @@ define amdgpu_kernel void @udiv_i8(ptr addrspace(1) %out, i8 %x, i8 %y) {
 ; CHECK-NEXT:    [[TMP7:%.*]] = call fast float @llvm.trunc.f32(float [[TMP6]])
 ; CHECK-NEXT:    [[TMP8:%.*]] = fneg fast float [[TMP7]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP8]], float [[TMP4]], float [[TMP3]])
-; CHECK-NEXT:    [[TMP10:%.*]] = fptoui float [[TMP7]] to i32
+; CHECK-NEXT:    [[TMP10:%.*]] = fptoui fast float [[TMP7]] to i32
 ; CHECK-NEXT:    [[TMP11:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]])
 ; CHECK-NEXT:    [[TMP12:%.*]] = call fast float @llvm.fabs.f32(float [[TMP4]])
 ; CHECK-NEXT:    [[TMP13:%.*]] = fcmp fast oge float [[TMP11]], [[TMP12]]
@@ -814,7 +814,7 @@ define amdgpu_kernel void @urem_i8(ptr addrspace(1) %out, i8 %x, i8 %y) {
 ; CHECK-NEXT:    [[TMP7:%.*]] = call fast float @llvm.trunc.f32(float [[TMP6]])
 ; CHECK-NEXT:    [[TMP8:%.*]] = fneg fast float [[TMP7]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP8]], float [[TMP4]], float [[TMP3]])
-; CHECK-NEXT:    [[TMP10:%.*]] = fptoui float [[TMP7]] to i32
+; CHECK-NEXT:    [[TMP10:%.*]] = fptoui fast float [[TMP7]] to i32
 ; CHECK-NEXT:    [[TMP11:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]])
 ; CHECK-NEXT:    [[TMP12:%.*]] = call fast float @llvm.fabs.f32(float [[TMP4]])
 ; CHECK-NEXT:    [[TMP13:%.*]] = fcmp fast oge float [[TMP11]], [[TMP12]]
@@ -881,14 +881,14 @@ define amdgpu_kernel void @sdiv_i8(ptr addrspace(1) %out, i8 %x, i8 %y) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = ashr i32 [[TMP3]], 30
 ; CHECK-NEXT:    [[TMP5:%.*]] = or i32 [[TMP4]], 1
-; CHECK-NEXT:    [[TMP6:%.*]] = sitofp i32 [[TMP1]] to float
-; CHECK-NEXT:    [[TMP7:%.*]] = sitofp i32 [[TMP2]] to float
+; CHECK-NEXT:    [[TMP6:%.*]] = sitofp fast i32 [[TMP1]] to float
+; CHECK-NEXT:    [[TMP7:%.*]] = sitofp fast i32 [[TMP2]] to float
 ; CHECK-NEXT:    [[TMP8:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP7]])
 ; CHECK-NEXT:    [[TMP9:%.*]] = fmul fast float [[TMP6]], [[TMP8]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = call fast float @llvm.trunc.f32(float [[TMP9]])
 ; CHECK-NEXT:    [[TMP11:%.*]] = fneg fast float [[TMP10]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP11]], float [[TMP7]], float [[TMP6]])
-; CHECK-NEXT:    [[TMP13:%.*]] = fptosi float [[TMP10]] to i32
+; CHECK-NEXT:    [[TMP13:%.*]] = fptosi fast float [[TMP10]] to i32
 ; CHECK-NEXT:    [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP12]])
 ; CHECK-NEXT:    [[TMP15:%.*]] = call fast float @llvm.fabs.f32(float [[TMP7]])
 ; CHECK-NEXT:    [[TMP16:%.*]] = fcmp fast oge float [[TMP14]], [[TMP15]]
@@ -962,14 +962,14 @@ define amdgpu_kernel void @srem_i8(ptr addrspace(1) %out, i8 %x, i8 %y) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = ashr i32 [[TMP3]], 30
 ; CHECK-NEXT:    [[TMP5:%.*]] = or i32 [[TMP4]], 1
-; CHECK-NEXT:    [[TMP6:%.*]] = sitofp i32 [[TMP1]] to float
-; CHECK-NEXT:    [[TMP7:%.*]] = sitofp i32 [[TMP2]] to float
+; CHECK-NEXT:    [[TMP6:%.*]] = sitofp fast i32 [[TMP1]] to float
+; CHECK-NEXT:    [[TMP7:%.*]] = sitofp fast i32 [[TMP2]] to float
 ; CHECK-NEXT:    [[TMP8:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP7]])
 ; CHECK-NEXT:    [[TMP9:%.*]] = fmul fast float [[TMP6]], [[TMP8]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = call fast float @llvm.trunc.f32(float [[TMP9]])
 ; CHECK-NEXT:    [[TMP11:%.*]] = fneg fast float [[TMP10]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP11]], float [[TMP7]], float [[TMP6]])
-; CHECK-NEXT:    [[TMP13:%.*]] = fptosi float [[TMP10]] to i32
+; CHECK-NEXT:    [[TMP13:%.*]] = fptosi fast float [[TMP10]] to i32
 ; CHECK-NEXT:    [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP12]])
 ; CHECK-NEXT:    [[TMP15:%.*]] = call fast float @llvm.fabs.f32(float [[TMP7]])
 ; CHECK-NEXT:    [[TMP16:%.*]] = fcmp fast oge float [[TMP14]], [[TMP15]]
@@ -1051,7 +1051,7 @@ define amdgpu_kernel void @udiv_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x
 ; CHECK-NEXT:    [[TMP3:%.*]] = uitofp i32 [[TMP2]] to float
 ; CHECK-NEXT:    [[TMP4:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP3]])
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul fast float [[TMP4]], 0x41EFFFFFC0000000
-; CHECK-NEXT:    [[TMP6:%.*]] = fptoui float [[TMP5]] to i32
+; CHECK-NEXT:    [[TMP6:%.*]] = fptoui fast float [[TMP5]] to i32
 ; CHECK-NEXT:    [[TMP7:%.*]] = sub i32 0, [[TMP2]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = mul i32 [[TMP7]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP6]] to i64
@@ -1083,7 +1083,7 @@ define amdgpu_kernel void @udiv_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x
 ; CHECK-NEXT:    [[TMP35:%.*]] = uitofp i32 [[TMP34]] to float
 ; CHECK-NEXT:    [[TMP36:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP35]])
 ; CHECK-NEXT:    [[TMP37:%.*]] = fmul fast float [[TMP36]], 0x41EFFFFFC0000000
-; CHECK-NEXT:    [[TMP38:%.*]] = fptoui float [[TMP37]] to i32
+; CHECK-NEXT:    [[TMP38:%.*]] = fptoui fast float [[TMP37]] to i32
 ; CHECK-NEXT:    [[TMP39:%.*]] = sub i32 0, [[TMP34]]
 ; CHECK-NEXT:    [[TMP40:%.*]] = mul i32 [[TMP39]], [[TMP38]]
 ; CHECK-NEXT:    [[TMP41:%.*]] = zext i32 [[TMP38]] to i64
@@ -1115,7 +1115,7 @@ define amdgpu_kernel void @udiv_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x
 ; CHECK-NEXT:    [[TMP67:%.*]] = uitofp i32 [[TMP66]] to float
 ; CHECK-NEXT:    [[TMP68:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP67]])
 ; CHECK-NEXT:    [[TMP69:%.*]] = fmul fast float [[TMP68]], 0x41EFFFFFC0000000
-; CHECK-NEXT:    [[TMP70:%.*]] = fptoui float [[TMP69]] to i32
+; CHECK-NEXT:    [[TMP70:%.*]] = fptoui fast float [[TMP69]] to i32
 ; CHECK-NEXT:    [[TMP71:%.*]] = sub i32 0, [[TMP66]]
 ; CHECK-NEXT:    [[TMP72:%.*]] = mul i32 [[TMP71]], [[TMP70]]
 ; CHECK-NEXT:    [[TMP73:%.*]] = zext i32 [[TMP70]] to i64
@@ -1147,7 +1147,7 @@ define amdgpu_kernel void @udiv_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x
 ; CHECK-NEXT:    [[TMP99:%.*]] = uitofp i32 [[TMP98]] to float
 ; CHECK-NEXT:    [[TMP100:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP99]])
 ; CHECK-NEXT:    [[TMP101:%.*]] = fmul fast float [[TMP100]], 0x41EFFFFFC0000000
-; CHECK-NEXT:    [[TMP102:%.*]] = fptoui float [[TMP101]] to i32
+; CHECK-NEXT:    [[TMP102:%.*]] = fptoui fast float [[TMP101]] to i32
 ; CHECK-NEXT:    [[TMP103:%.*]] = sub i32 0, [[TMP98]]
 ; CHECK-NEXT:    [[TMP104:%.*]] = mul i32 [[TMP103]], [[TMP102]]
 ; CHECK-NEXT:    [[TMP105:%.*]] = zext i32 [[TMP102]] to i64
@@ -1379,7 +1379,7 @@ define amdgpu_kernel void @urem_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x
 ; CHECK-NEXT:    [[TMP3:%.*]] = uitofp i32 [[TMP2]] to float
 ; CHECK-NEXT:    [[TMP4:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP3]])
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul fast float [[TMP4]], 0x41EFFFFFC0000000
-; CHECK-NEXT:    [[TMP6:%.*]] = fptoui float [[TMP5]] to i32
+; CHECK-NEXT:    [[TMP6:%.*]] = fptoui fast float [[TMP5]] to i32
 ; CHECK-NEXT:    [[TMP7:%.*]] = sub i32 0, [[TMP2]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = mul i32 [[TMP7]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP6]] to i64
@@ -1409,7 +1409,7 @@ define amdgpu_kernel void @urem_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x
 ; CHECK-NEXT:    [[TMP33:%.*]] = uitofp i32 [[TMP32]] to float
 ; CHECK-NEXT:    [[TMP34:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP33]])
 ; CHECK-NEXT:    [[TMP35:%.*]] = fmul fast float [[TMP34]], 0x41EFFFFFC0000000
-; CHECK-NEXT:    [[TMP36:%.*]] = fptoui float [[TMP35]] to i32
+; CHECK-NEXT:    [[TMP36:%.*]] = fptoui fast float [[TMP35]] to i32
 ; CHECK-NEXT:    [[TMP37:%.*]] = sub i32 0, [[TMP32]]
 ; CHECK-NEXT:    [[TMP38:%.*]] = mul i32 [[TMP37]], [[TMP36]]
 ; CHECK-NEXT:    [[TMP39:%.*]] = zext i32 [[TMP36]] to i64
@@ -1439,7 +1439,7 @@ define amdgpu_kernel void @urem_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x
 ; CHECK-NEXT:    [[TMP63:%.*]] = uitofp i32 [[TMP62]] to float
 ; CHECK-NEXT:    [[TMP64:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP63]])
 ; CHECK-NEXT:    [[TMP65:%.*]] = fmul fast float [[TMP64]], 0x41EFFFFFC0000000
-; CHECK-NEXT:    [[TMP66:%.*]] = fptoui float [[TMP65]] to i32
+; CHECK-NEXT:    [[TMP66:%.*]] = fptoui fast float [[TMP65]] to i32
 ; CHECK-NEXT:    [[TMP67:%.*]] = sub i32 0, [[TMP62]]
 ; CHECK-NEXT:    [[TMP68:%.*]] = mul i32 [[TMP67]], [[TMP66]]
 ; CHECK-NEXT:    [[TMP69:%.*]] = zext i32 [[TMP66]] to i64
@@ -1469,7 +1469,7 @@ define amdgpu_kernel void @urem_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x
 ; CHECK-NEXT:    [[TMP93:%.*]] = uitofp i32 [[TMP92]] to float
 ; CHECK-NEXT:    [[TMP94:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP93]])
 ; CHECK-NEXT:    [[TMP95:%.*]] = fmul fast float [[TMP94]], 0x41EFFFFFC0000000
-; CHECK-NEXT:    [[TMP96:%.*]] = fptoui float [[TMP95]] to i32
+; CHECK-NEXT:    [[TMP96:%.*]] = fptoui fast float [[TMP95]] to i32
 ; CHECK-NEXT:    [[TMP97:%.*]] = sub i32 0, [[TMP92]]
 ; CHECK-NEXT:    [[TMP98:%.*]] = mul i32 [[TMP97]], [[TMP96]]
 ; CHECK-NEXT:    [[TMP99:%.*]] = zext i32 [[TMP96]] to i64
@@ -1687,7 +1687,7 @@ define amdgpu_kernel void @sdiv_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x
 ; CHECK-NEXT:    [[TMP10:%.*]] = uitofp i32 [[TMP9]] to float
 ; CHECK-NEXT:    [[TMP11:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP10]])
 ; CHECK-NEXT:    [[TMP12:%.*]] = fmul fast float [[TMP11]], 0x41EFFFFFC0000000
-; CHECK-NEXT:    [[TMP13:%.*]] = fptoui float [[TMP12]] to i32
+; CHECK-NEXT:    [[TMP13:%.*]] = fptoui fast float [[TMP12]] to i32
 ; CHECK-NEXT:    [[TMP14:%.*]] = sub i32 0, [[TMP9]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = mul i32 [[TMP14]], [[TMP13]]
 ; CHECK-NEXT:    [[TMP16:%.*]] = zext i32 [[TMP13]] to i64
@@ -1728,7 +1728,7 @@ define amdgpu_kernel void @sdiv_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x
 ; CHECK-NEXT:    [[TMP51:%.*]] = uitofp i32 [[TMP50]] to float
 ; CHECK-NEXT:    [[TMP52:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP51]])
 ; CHECK-NEXT:    [[TMP53:%.*]] = fmul fast float [[TMP52]], 0x41EFFFFFC0000000
-; CHECK-NEXT:    [[TMP54:%.*]] = fptoui float [[TMP53]] to i32
+; CHECK-NEXT:    [[TMP54:%.*]] = fptoui fast float [[TMP53]] to i32
 ; CHECK-NEXT:    [[TMP55:%.*]] = sub i32 0, [[TMP50]]
 ; CHECK-NEXT:    [[TMP56:%.*]] = mul i32 [[TMP55]], [[TMP54]]
 ; CHECK-NEXT:    [[TMP57:%.*]] = zext i32 [[TMP54]] to i64
@@ -1769,7 +1769,7 @@ define amdgpu_kernel void @sdiv_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x
 ; CHECK-NEXT:    [[TMP92:%.*]] = uitofp i32 [[TMP91]] to float
 ; CHECK-NEXT:    [[TMP93:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP92]])
 ; CHECK-NEXT:    [[TMP94:%.*]] = fmul fast float [[TMP93]], 0x41EFFFFFC0000000
-; CHECK-NEXT:    [[TMP95:%.*]] = fptoui float [[TMP94]] to i32
+; CHECK-NEXT:    [[TMP95:%.*]] = fptoui fast float [[TMP94]] to i32
 ; CHECK-NEXT:    [[TMP96:%.*]] = sub i32 0, [[TMP91]]
 ; CHECK-NEXT:    [[TMP97:%.*]] = mul i32 [[TMP96]], [[TMP95]]
 ; CHECK-NEXT:    [[TMP98:%.*]] = zext i32 [[TMP95]] to i64
@@ -1810,7 +1810,7 @@ define amdgpu_kernel void @sdiv_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x
 ; CHECK-NEXT:    [[TMP133:%.*]] = uitofp i32 [[TMP132]] to float
 ; CHECK-NEXT:    [[TMP134:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP133]])
 ; CHECK-NEXT:    [[TMP135:%.*]] = fmul fast float [[TMP134]], 0x41EFFFFFC0000000
-; CHECK-NEXT:    [[TMP136:%.*]] = fptoui float [[TMP135]] to i32
+; CHECK-NEXT:    [[TMP136:%.*]] = fptoui fast float [[TMP135]] to i32
 ; CHECK-NEXT:    [[TMP137:%.*]] = sub i32 0, [[TMP132]]
 ; CHECK-NEXT:    [[TMP138:%.*]] = mul i32 [[TMP137]], [[TMP136]]
 ; CHECK-NEXT:    [[TMP139:%.*]] = zext i32 [[TMP136]] to i64
@@ -2099,7 +2099,7 @@ define amdgpu_kernel void @srem_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x
 ; CHECK-NEXT:    [[TMP9:%.*]] = uitofp i32 [[TMP8]] to float
 ; CHECK-NEXT:    [[TMP10:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP9]])
 ; CHECK-NEXT:    [[TMP11:%.*]] = fmul fast float [[TMP10]], 0x41EFFFFFC0000000
-; CHECK-NEXT:    [[TMP12:%.*]] = fptoui float [[TMP11]] to i32
+; CHECK-NEXT:    [[TMP12:%.*]] = fptoui fast float [[TMP11]] to i32
 ; CHECK-NEXT:    [[TMP13:%.*]] = sub i32 0, [[TMP8]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = mul i32 [[TMP13]], [[TMP12]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = zext i32 [[TMP12]] to i64
@@ -2137,7 +2137,7 @@ define amdgpu_kernel void @srem_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x
 ; CHECK-NEXT:    [[TMP47:%.*]] = uitofp i32 [[TMP46]] to float
 ; CHECK-NEXT:    [[TMP48:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP47]])
 ; CHECK-NEXT:    [[TMP49:%.*]] = fmul fast float [[TMP48]], 0x41EFFFFFC0000000
-; CHECK-NEXT:    [[TMP50:%.*]] = fptoui float [[TMP49]] to i32
+; CHECK-NEXT:    [[TMP50:%.*]] = fptoui fast float [[TMP49]] to i32
 ; CHECK-NEXT:    [[TMP51:%.*]] = sub i32 0, [[TMP46]]
 ; CHECK-NEXT:    [[TMP52:%.*]] = mul i32 [[TMP51]], [[TMP50]]
 ; CHECK-NEXT:    [[TMP53:%.*]] = zext i32 [[TMP50]] to i64
@@ -2175,7 +2175,7 @@ define amdgpu_kernel void @srem_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x
 ; CHECK-NEXT:    [[TMP85:%.*]] = uitofp i32 [[TMP84]] to float
 ; CHECK-NEXT:    [[TMP86:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP85]])
 ; CHECK-NEXT:    [[TMP87:%.*]] = fmul fast float [[TMP86]], 0x41EFFFFFC0000000
-; CHECK-NEXT:    [[TMP88:%.*]] = fptoui float [[TMP87]] to i32
+; CHECK-NEXT:    [[TMP88:%.*]] = fptoui fast float [[TMP87]] to i32
 ; CHECK-NEXT:    [[TMP89:%.*]] = sub i32 0, [[TMP84]]
 ; CHECK-NEXT:    [[TMP90:%.*]] = mul i32 [[TMP89]], [[TMP88]]
 ; CHECK-NEXT:    [[TMP91:%.*]] = zext i32 [[TMP88]] to i64
@@ -2213,7 +2213,7 @@ define amdgpu_kernel void @srem_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x
 ; CHECK-NEXT:    [[TMP123:%.*]] = uitofp i32 [[TMP122]] to float
 ; CHECK-NEXT:    [[TMP124:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP123]])
 ; CHECK-NEXT:    [[TMP125:%.*]] = fmul fast float [[TMP124]], 0x41EFFFFFC0000000
-; CHECK-NEXT:    [[TMP126:%.*]] = fptoui float [[TMP125]] to i32
+; CHECK-NEXT:    [[TMP126:%.*]] = fptoui fast float [[TMP125]] to i32
 ; CHECK-NEXT:    [[TMP127:%.*]] = sub i32 0, [[TMP122]]
 ; CHECK-NEXT:    [[TMP128:%.*]] = mul i32 [[TMP127]], [[TMP126]]
 ; CHECK-NEXT:    [[TMP129:%.*]] = zext i32 [[TMP126]] to i64
@@ -2473,7 +2473,7 @@ define amdgpu_kernel void @udiv_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x
 ; CHECK-NEXT:    [[TMP9:%.*]] = call fast float @llvm.trunc.f32(float [[TMP8]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = fneg fast float [[TMP9]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP10]], float [[TMP6]], float [[TMP5]])
-; CHECK-NEXT:    [[TMP12:%.*]] = fptoui float [[TMP9]] to i32
+; CHECK-NEXT:    [[TMP12:%.*]] = fptoui fast float [[TMP9]] to i32
 ; CHECK-NEXT:    [[TMP13:%.*]] = call fast float @llvm.fabs.f32(float [[TMP11]])
 ; CHECK-NEXT:    [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP6]])
 ; CHECK-NEXT:    [[TMP15:%.*]] = fcmp fast oge float [[TMP13]], [[TMP14]]
@@ -2493,7 +2493,7 @@ define amdgpu_kernel void @udiv_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x
 ; CHECK-NEXT:    [[TMP29:%.*]] = call fast float @llvm.trunc.f32(float [[TMP28]])
 ; CHECK-NEXT:    [[TMP30:%.*]] = fneg fast float [[TMP29]]
 ; CHECK-NEXT:    [[TMP31:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP30]], float [[TMP26]], float [[TMP25]])
-; CHECK-NEXT:    [[TMP32:%.*]] = fptoui float [[TMP29]] to i32
+; CHECK-NEXT:    [[TMP32:%.*]] = fptoui fast float [[TMP29]] to i32
 ; CHECK-NEXT:    [[TMP33:%.*]] = call fast float @llvm.fabs.f32(float [[TMP31]])
 ; CHECK-NEXT:    [[TMP34:%.*]] = call fast float @llvm.fabs.f32(float [[TMP26]])
 ; CHECK-NEXT:    [[TMP35:%.*]] = fcmp fast oge float [[TMP33]], [[TMP34]]
@@ -2513,7 +2513,7 @@ define amdgpu_kernel void @udiv_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x
 ; CHECK-NEXT:    [[TMP49:%.*]] = call fast float @llvm.trunc.f32(float [[TMP48]])
 ; CHECK-NEXT:    [[TMP50:%.*]] = fneg fast float [[TMP49]]
 ; CHECK-NEXT:    [[TMP51:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP50]], float [[TMP46]], float [[TMP45]])
-; CHECK-NEXT:    [[TMP52:%.*]] = fptoui float [[TMP49]] to i32
+; CHECK-NEXT:    [[TMP52:%.*]] = fptoui fast float [[TMP49]] to i32
 ; CHECK-NEXT:    [[TMP53:%.*]] = call fast float @llvm.fabs.f32(float [[TMP51]])
 ; CHECK-NEXT:    [[TMP54:%.*]] = call fast float @llvm.fabs.f32(float [[TMP46]])
 ; CHECK-NEXT:    [[TMP55:%.*]] = fcmp fast oge float [[TMP53]], [[TMP54]]
@@ -2533,7 +2533,7 @@ define amdgpu_kernel void @udiv_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x
 ; CHECK-NEXT:    [[TMP69:%.*]] = call fast float @llvm.trunc.f32(float [[TMP68]])
 ; CHECK-NEXT:    [[TMP70:%.*]] = fneg fast float [[TMP69]]
 ; CHECK-NEXT:    [[TMP71:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP70]], float [[TMP66]], float [[TMP65]])
-; CHECK-NEXT:    [[TMP72:%.*]] = fptoui float [[TMP69]] to i32
+; CHECK-NEXT:    [[TMP72:%.*]] = fptoui fast float [[TMP69]] to i32
 ; CHECK-NEXT:    [[TMP73:%.*]] = call fast float @llvm.fabs.f32(float [[TMP71]])
 ; CHECK-NEXT:    [[TMP74:%.*]] = call fast float @llvm.fabs.f32(float [[TMP66]])
 ; CHECK-NEXT:    [[TMP75:%.*]] = fcmp fast oge float [[TMP73]], [[TMP74]]
@@ -2680,7 +2680,7 @@ define amdgpu_kernel void @urem_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x
 ; CHECK-NEXT:    [[TMP9:%.*]] = call fast float @llvm.trunc.f32(float [[TMP8]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = fneg fast float [[TMP9]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP10]], float [[TMP6]], float [[TMP5]])
-; CHECK-NEXT:    [[TMP12:%.*]] = fptoui float [[TMP9]] to i32
+; CHECK-NEXT:    [[TMP12:%.*]] = fptoui fast float [[TMP9]] to i32
 ; CHECK-NEXT:    [[TMP13:%.*]] = call fast float @llvm.fabs.f32(float [[TMP11]])
 ; CHECK-NEXT:    [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP6]])
 ; CHECK-NEXT:    [[TMP15:%.*]] = fcmp fast oge float [[TMP13]], [[TMP14]]
@@ -2702,7 +2702,7 @@ define amdgpu_kernel void @urem_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x
 ; CHECK-NEXT:    [[TMP31:%.*]] = call fast float @llvm.trunc.f32(float [[TMP30]])
 ; CHECK-NEXT:    [[TMP32:%.*]] = fneg fast float [[TMP31]]
 ; CHECK-NEXT:    [[TMP33:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP32]], float [[TMP28]], float [[TMP27]])
-; CHECK-NEXT:    [[TMP34:%.*]] = fptoui float [[TMP31]] to i32
+; CHECK-NEXT:    [[TMP34:%.*]] = fptoui fast float [[TMP31]] to i32
 ; CHECK-NEXT:    [[TMP35:%.*]] = call fast float @llvm.fabs.f32(float [[TMP33]])
 ; CHECK-NEXT:    [[TMP36:%.*]] = call fast float @llvm.fabs.f32(float [[TMP28]])
 ; CHECK-NEXT:    [[TMP37:%.*]] = fcmp fast oge float [[TMP35]], [[TMP36]]
@@ -2724,7 +2724,7 @@ define amdgpu_kernel void @urem_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x
 ; CHECK-NEXT:    [[TMP53:%.*]] = call fast float @llvm.trunc.f32(float [[TMP52]])
 ; CHECK-NEXT:    [[TMP54:%.*]] = fneg fast float [[TMP53]]
 ; CHECK-NEXT:    [[TMP55:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP54]], float [[TMP50]], float [[TMP49]])
-; CHECK-NEXT:    [[TMP56:%.*]] = fptoui float [[TMP53]] to i32
+; CHECK-NEXT:    [[TMP56:%.*]] = fptoui fast float [[TMP53]] to i32
 ; CHECK-NEXT:    [[TMP57:%.*]] = call fast float @llvm.fabs.f32(float [[TMP55]])
 ; CHECK-NEXT:    [[TMP58:%.*]] = call fast float @llvm.fabs.f32(float [[TMP50]])
 ; CHECK-NEXT:    [[TMP59:%.*]] = fcmp fast oge float [[TMP57]], [[TMP58]]
@@ -2746,7 +2746,7 @@ define amdgpu_kernel void @urem_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x
 ; CHECK-NEXT:    [[TMP75:%.*]] = call fast float @llvm.trunc.f32(float [[TMP74]])
 ; CHECK-NEXT:    [[TMP76:%.*]] = fneg fast float [[TMP75]]
 ; CHECK-NEXT:    [[TMP77:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP76]], float [[TMP72]], float [[TMP71]])
-; CHECK-NEXT:    [[TMP78:%.*]] = fptoui float [[TMP75]] to i32
+; CHECK-NEXT:    [[TMP78:%.*]] = fptoui fast float [[TMP75]] to i32
 ; CHECK-NEXT:    [[TMP79:%.*]] = call fast float @llvm.fabs.f32(float [[TMP77]])
 ; CHECK-NEXT:    [[TMP80:%.*]] = call fast float @llvm.fabs.f32(float [[TMP72]])
 ; CHECK-NEXT:    [[TMP81:%.*]] = fcmp fast oge float [[TMP79]], [[TMP80]]
@@ -2906,14 +2906,14 @@ define amdgpu_kernel void @sdiv_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = ashr i32 [[TMP5]], 30
 ; CHECK-NEXT:    [[TMP7:%.*]] = or i32 [[TMP6]], 1
-; CHECK-NEXT:    [[TMP8:%.*]] = sitofp i32 [[TMP3]] to float
-; CHECK-NEXT:    [[TMP9:%.*]] = sitofp i32 [[TMP4]] to float
+; CHECK-NEXT:    [[TMP8:%.*]] = sitofp fast i32 [[TMP3]] to float
+; CHECK-NEXT:    [[TMP9:%.*]] = sitofp fast i32 [[TMP4]] to float
 ; CHECK-NEXT:    [[TMP10:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP9]])
 ; CHECK-NEXT:    [[TMP11:%.*]] = fmul fast float [[TMP8]], [[TMP10]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = call fast float @llvm.trunc.f32(float [[TMP11]])
 ; CHECK-NEXT:    [[TMP13:%.*]] = fneg fast float [[TMP12]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP13]], float [[TMP9]], float [[TMP8]])
-; CHECK-NEXT:    [[TMP15:%.*]] = fptosi float [[TMP12]] to i32
+; CHECK-NEXT:    [[TMP15:%.*]] = fptosi fast float [[TMP12]] to i32
 ; CHECK-NEXT:    [[TMP16:%.*]] = call fast float @llvm.fabs.f32(float [[TMP14]])
 ; CHECK-NEXT:    [[TMP17:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]])
 ; CHECK-NEXT:    [[TMP18:%.*]] = fcmp fast oge float [[TMP16]], [[TMP17]]
@@ -2930,14 +2930,14 @@ define amdgpu_kernel void @sdiv_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x
 ; CHECK-NEXT:    [[TMP29:%.*]] = xor i32 [[TMP27]], [[TMP28]]
 ; CHECK-NEXT:    [[TMP30:%.*]] = ashr i32 [[TMP29]], 30
 ; CHECK-NEXT:    [[TMP31:%.*]] = or i32 [[TMP30]], 1
-; CHECK-NEXT:    [[TMP32:%.*]] = sitofp i32 [[TMP27]] to float
-; CHECK-NEXT:    [[TMP33:%.*]] = sitofp i32 [[TMP28]] to float
+; CHECK-NEXT:    [[TMP32:%.*]] = sitofp fast i32 [[TMP27]] to float
+; CHECK-NEXT:    [[TMP33:%.*]] = sitofp fast i32 [[TMP28]] to float
 ; CHECK-NEXT:    [[TMP34:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP33]])
 ; CHECK-NEXT:    [[TMP35:%.*]] = fmul fast float [[TMP32]], [[TMP34]]
 ; CHECK-NEXT:    [[TMP36:%.*]] = call fast float @llvm.trunc.f32(float [[TMP35]])
 ; CHECK-NEXT:    [[TMP37:%.*]] = fneg fast float [[TMP36]]
 ; CHECK-NEXT:    [[TMP38:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP37]], float [[TMP33]], float [[TMP32]])
-; CHECK-NEXT:    [[TMP39:%.*]] = fptosi float [[TMP36]] to i32
+; CHECK-NEXT:    [[TMP39:%.*]] = fptosi fast float [[TMP36]] to i32
 ; CHECK-NEXT:    [[TMP40:%.*]] = call fast float @llvm.fabs.f32(float [[TMP38]])
 ; CHECK-NEXT:    [[TMP41:%.*]] = call fast float @llvm.fabs.f32(float [[TMP33]])
 ; CHECK-NEXT:    [[TMP42:%.*]] = fcmp fast oge float [[TMP40]], [[TMP41]]
@@ -2954,14 +2954,14 @@ define amdgpu_kernel void @sdiv_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x
 ; CHECK-NEXT:    [[TMP53:%.*]] = xor i32 [[TMP51]], [[TMP52]]
 ; CHECK-NEXT:    [[TMP54:%.*]] = ashr i32 [[TMP53]], 30
 ; CHECK-NEXT:    [[TMP55:%.*]] = or i32 [[TMP54]], 1
-; CHECK-NEXT:    [[TMP56:%.*]] = sitofp i32 [[TMP51]] to float
-; CHECK-NEXT:    [[TMP57:%.*]] = sitofp i32 [[TMP52]] to float
+; CHECK-NEXT:    [[TMP56:%.*]] = sitofp fast i32 [[TMP51]] to float
+; CHECK-NEXT:    [[TMP57:%.*]] = sitofp fast i32 [[TMP52]] to float
 ; CHECK-NEXT:    [[TMP58:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP57]])
 ; CHECK-NEXT:    [[TMP59:%.*]] = fmul fast float [[TMP56]], [[TMP58]]
 ; CHECK-NEXT:    [[TMP60:%.*]] = call fast float @llvm.trunc.f32(float [[TMP59]])
 ; CHECK-NEXT:    [[TMP61:%.*]] = fneg fast float [[TMP60]]
 ; CHECK-NEXT:    [[TMP62:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP61]], float [[TMP57]], float [[TMP56]])
-; CHECK-NEXT:    [[TMP63:%.*]] = fptosi float [[TMP60]] to i32
+; CHECK-NEXT:    [[TMP63:%.*]] = fptosi fast float [[TMP60]] to i32
 ; CHECK-NEXT:    [[TMP64:%.*]] = call fast float @llvm.fabs.f32(float [[TMP62]])
 ; CHECK-NEXT:    [[TMP65:%.*]] = call fast float @llvm.fabs.f32(float [[TMP57]])
 ; CHECK-NEXT:    [[TMP66:%.*]] = fcmp fast oge float [[TMP64]], [[TMP65]]
@@ -2978,14 +2978,14 @@ define amdgpu_kernel void @sdiv_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x
 ; CHECK-NEXT:    [[TMP77:%.*]] = xor i32 [[TMP75]], [[TMP76]]
 ; CHECK-NEXT:    [[TMP78:%.*]] = ashr i32 [[TMP77]], 30
 ; CHECK-NEXT:    [[TMP79:%.*]] = or i32 [[TMP78]], 1
-; CHECK-NEXT:    [[TMP80:%.*]] = sitofp i32 [[TMP75]] to float
-; CHECK-NEXT:    [[TMP81:%.*]] = sitofp i32 [[TMP76]] to float
+; CHECK-NEXT:    [[TMP80:%.*]] = sitofp fast i32 [[TMP75]] to float
+; CHECK-NEXT:    [[TMP81:%.*]] = sitofp fast i32 [[TMP76]] to float
 ; CHECK-NEXT:    [[TMP82:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP81]])
 ; CHECK-NEXT:    [[TMP83:%.*]] = fmul fast float [[TMP80]], [[TMP82]]
 ; CHECK-NEXT:    [[TMP84:%.*]] = call fast float @llvm.trunc.f32(float [[TMP83]])
 ; CHECK-NEXT:    [[TMP85:%.*]] = fneg fast float [[TMP84]]
 ; CHECK-NEXT:    [[TMP86:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP85]], float [[TMP81]], float [[TMP80]])
-; CHECK-NEXT:    [[TMP87:%.*]] = fptosi float [[TMP84]] to i32
+; CHECK-NEXT:    [[TMP87:%.*]] = fptosi fast float [[TMP84]] to i32
 ; CHECK-NEXT:    [[TMP88:%.*]] = call fast float @llvm.fabs.f32(float [[TMP86]])
 ; CHECK-NEXT:    [[TMP89:%.*]] = call fast float @llvm.fabs.f32(float [[TMP81]])
 ; CHECK-NEXT:    [[TMP90:%.*]] = fcmp fast oge float [[TMP88]], [[TMP89]]
@@ -3168,14 +3168,14 @@ define amdgpu_kernel void @srem_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = ashr i32 [[TMP5]], 30
 ; CHECK-NEXT:    [[TMP7:%.*]] = or i32 [[TMP6]], 1
-; CHECK-NEXT:    [[TMP8:%.*]] = sitofp i32 [[TMP3]] to float
-; CHECK-NEXT:    [[TMP9:%.*]] = sitofp i32 [[TMP4]] to float
+; CHECK-NEXT:    [[TMP8:%.*]] = sitofp fast i32 [[TMP3]] to float
+; CHECK-NEXT:    [[TMP9:%.*]] = sitofp fast i32 [[TMP4]] to float
 ; CHECK-NEXT:    [[TMP10:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP9]])
 ; CHECK-NEXT:    [[TMP11:%.*]] = fmul fast float [[TMP8]], [[TMP10]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = call fast float @llvm.trunc.f32(float [[TMP11]])
 ; CHECK-NEXT:    [[TMP13:%.*]] = fneg fast float [[TMP12]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP13]], float [[TMP9]], float [[TMP8]])
-; CHECK-NEXT:    [[TMP15:%.*]] = fptosi float [[TMP12]] to i32
+; CHECK-NEXT:    [[TMP15:%.*]] = fptosi fast float [[TMP12]] to i32
 ; CHECK-NEXT:    [[TMP16:%.*]] = call fast float @llvm.fabs.f32(float [[TMP14]])
 ; CHECK-NEXT:    [[TMP17:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]])
 ; CHECK-NEXT:    [[TMP18:%.*]] = fcmp fast oge float [[TMP16]], [[TMP17]]
@@ -3194,14 +3194,14 @@ define amdgpu_kernel void @srem_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x
 ; CHECK-NEXT:    [[TMP31:%.*]] = xor i32 [[TMP29]], [[TMP30]]
 ; CHECK-NEXT:    [[TMP32:%.*]] = ashr i32 [[TMP31]], 30
 ; CHECK-NEXT:    [[TMP33:%.*]] = or i32 [[TMP32]], 1
-; CHECK-NEXT:    [[TMP34:%.*]] = sitofp i32 [[TMP29]] to float
-; CHECK-NEXT:    [[TMP35:%.*]] = sitofp i32 [[TMP30]] to float
+; CHECK-NEXT:    [[TMP34:%.*]] = sitofp fast i32 [[TMP29]] to float
+; CHECK-NEXT:    [[TMP35:%.*]] = sitofp fast i32 [[TMP30]] to float
 ; CHECK-NEXT:    [[TMP36:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP35]])
 ; CHECK-NEXT:    [[TMP37:%.*]] = fmul fast float [[TMP34]], [[TMP36]]
 ; CHECK-NEXT:    [[TMP38:%.*]] = call fast float @llvm.trunc.f32(float [[TMP37]])
 ; CHECK-NEXT:    [[TMP39:%.*]] = fneg fast float [[TMP38]]
 ; CHECK-NEXT:    [[TMP40:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP39]], float [[TMP35]], float [[TMP34]])
-; CHECK-NEXT:    [[TMP41:%.*]] = fptosi float [[TMP38]] to i32
+; CHECK-NEXT:    [[TMP41:%.*]] = fptosi fast float [[TMP38]] to i32
 ; CHECK-NEXT:    [[TMP42:%.*]] = call fast float @llvm.fabs.f32(float [[TMP40]])
 ; CHECK-NEXT:    [[TMP43:%.*]] = call fast float @llvm.fabs.f32(float [[TMP35]])
 ; CHECK-NEXT:    [[TMP44:%.*]] = fcmp fast oge float [[TMP42]], [[TMP43]]
@@ -3220,14 +3220,14 @@ define amdgpu_kernel void @srem_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x
 ; CHECK-NEXT:    [[TMP57:%.*]] = xor i32 [[TMP55]], [[TMP56]]
 ; CHECK-NEXT:    [[TMP58:%.*]] = ashr i32 [[TMP57]], 30
 ; CHECK-NEXT:    [[TMP59:%.*]] = or i32 [[TMP58]], 1
-; CHECK-NEXT:    [[TMP60:%.*]] = sitofp i32 [[TMP55]] to float
-; CHECK-NEXT:    [[TMP61:%.*]] = sitofp i32 [[TMP56]] to float
+; CHECK-NEXT:    [[TMP60:%.*]] = sitofp fast i32 [[TMP55]] to float
+; CHECK-NEXT:    [[TMP61:%.*]] = sitofp fast i32 [[TMP56]] to float
 ; CHECK-NEXT:    [[TMP62:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP61]])
 ; CHECK-NEXT:    [[TMP63:%.*]] = fmul fast float [[TMP60]], [[TMP62]]
 ; CHECK-NEXT:    [[TMP64:%.*]] = call fast float @llvm.trunc.f32(float [[TMP63]])
 ; CHECK-NEXT:    [[TMP65:%.*]] = fneg fast float [[TMP64]]
 ; CHECK-NEXT:    [[TMP66:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP65]], float [[TMP61]], float [[TMP60]])
-; CHECK-NEXT:    [[TMP67:%.*]] = fptosi float [[TMP64]] to i32
+; CHECK-NEXT:    [[TMP67:%.*]] = fptosi fast float [[TMP64]] to i32
 ; CHECK-NEXT:    [[TMP68:%.*]] = call fast float @llvm.fabs.f32(float [[TMP66]])
 ; CHECK-NEXT:    [[TMP69:%.*]] = call fast float @llvm.fabs.f32(float [[TMP61]])
 ; CHECK-NEXT:    [[TMP70:%.*]] = fcmp fast oge float [[TMP68]], [[TMP69]]
@@ -3246,14 +3246,14 @@ define amdgpu_kernel void @srem_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x
 ; CHECK-NEXT:    [[TMP83:%.*]] = xor i32 [[TMP81]], [[TMP82]]
 ; CHECK-NEXT:    [[TMP84:%.*]] = ashr i32 [[TMP83]], 30
 ; CHECK-NEXT:    [[TMP85:%.*]] = or i32 [[TMP84]], 1
-; CHECK-NEXT:    [[TMP86:%.*]] = sitofp i32 [[TMP81]] to float
-; CHECK-NEXT:    [[TMP87:%.*]] = sitofp i32 [[TMP82]] to float
+; CHECK-NEXT:    [[TMP86:%.*]] = sitofp fast i32 [[TMP81]] to float
+; CHECK-NEXT:    [[TMP87:%.*]] = sitofp fast i32 [[TMP82]] to float
 ; CHECK-NEXT:    [[TMP88:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP87]])
 ; CHECK-NEXT:    [[TMP89:%.*]] = fmul fast float [[TMP86]], [[TMP88]]
 ; CHECK-NEXT:    [[TMP90:%.*]] = call fast float @llvm.trunc.f32(float [[TMP89]])
 ; CHECK-NEXT:    [[TMP91:%.*]] = fneg fast float [[TMP90]]
 ; CHECK-NEXT:    [[TMP92:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP91]], float [[TMP87]], float [[TMP86]])
-; CHECK-NEXT:    [[TMP93:%.*]] = fptosi float [[TMP90]] to i32
+; CHECK-NEXT:    [[TMP93:%.*]] = fptosi fast float [[TMP90]] to i32
 ; CHECK-NEXT:    [[TMP94:%.*]] = call fast float @llvm.fabs.f32(float [[TMP92]])
 ; CHECK-NEXT:    [[TMP95:%.*]] = call fast float @llvm.fabs.f32(float [[TMP87]])
 ; CHECK-NEXT:    [[TMP96:%.*]] = fcmp fast oge float [[TMP94]], [[TMP95]]
@@ -3460,7 +3460,7 @@ define amdgpu_kernel void @udiv_i3(ptr addrspace(1) %out, i3 %x, i3 %y) {
 ; CHECK-NEXT:    [[TMP7:%.*]] = call fast float @llvm.trunc.f32(float [[TMP6]])
 ; CHECK-NEXT:    [[TMP8:%.*]] = fneg fast float [[TMP7]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP8]], float [[TMP4]], float [[TMP3]])
-; CHECK-NEXT:    [[TMP10:%.*]] = fptoui float [[TMP7]] to i32
+; CHECK-NEXT:    [[TMP10:%.*]] = fptoui fast float [[TMP7]] to i32
 ; CHECK-NEXT:    [[TMP11:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]])
 ; CHECK-NEXT:    [[TMP12:%.*]] = call fast float @llvm.fabs.f32(float [[TMP4]])
 ; CHECK-NEXT:    [[TMP13:%.*]] = fcmp fast oge float [[TMP11]], [[TMP12]]
@@ -3529,7 +3529,7 @@ define amdgpu_kernel void @urem_i3(ptr addrspace(1) %out, i3 %x, i3 %y) {
 ; CHECK-NEXT:    [[TMP7:%.*]] = call fast float @llvm.trunc.f32(float [[TMP6]])
 ; CHECK-NEXT:    [[TMP8:%.*]] = fneg fast float [[TMP7]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP8]], float [[TMP4]], float [[TMP3]])
-; CHECK-NEXT:    [[TMP10:%.*]] = fptoui float [[TMP7]] to i32
+; CHECK-NEXT:    [[TMP10:%.*]] = fptoui fast float [[TMP7]] to i32
 ; CHECK-NEXT:    [[TMP11:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]])
 ; CHECK-NEXT:    [[TMP12:%.*]] = call fast float @llvm.fabs.f32(float [[TMP4]])
 ; CHECK-NEXT:    [[TMP13:%.*]] = fcmp fast oge float [[TMP11]], [[TMP12]]
@@ -3603,14 +3603,14 @@ define amdgpu_kernel void @sdiv_i3(ptr addrspace(1) %out, i3 %x, i3 %y) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = ashr i32 [[TMP3]], 30
 ; CHECK-NEXT:    [[TMP5:%.*]] = or i32 [[TMP4]], 1
-; CHECK-NEXT:    [[TMP6:%.*]] = sitofp i32 [[TMP1]] to float
-; CHECK-NEXT:    [[TMP7:%.*]] = sitofp i32 [[TMP2]] to float
+; CHECK-NEXT:    [[TMP6:%.*]] = sitofp fast i32 [[TMP1]] to float
+; CHECK-NEXT:    [[TMP7:%.*]] = sitofp fast i32 [[TMP2]] to float
 ; CHECK-NEXT:    [[TMP8:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP7]])
 ; CHECK-NEXT:    [[TMP9:%.*]] = fmul fast float [[TMP6]], [[TMP8]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = call fast float @llvm.trunc.f32(float [[TMP9]])
 ; CHECK-NEXT:    [[TMP11:%.*]] = fneg fast float [[TMP10]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP11]], float [[TMP7]], float [[TMP6]])
-; CHECK-NEXT:    [[TMP13:%.*]] = fptosi float [[TMP10]] to i32
+; CHECK-NEXT:    [[TMP13:%.*]] = fptosi fast float [[TMP10]] to i32
 ; CHECK-NEXT:    [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP12]])
 ; CHECK-NEXT:    [[TMP15:%.*]] = call fast float @llvm.fabs.f32(float [[TMP7]])
 ; CHECK-NEXT:    [[TMP16:%.*]] = fcmp fast oge float [[TMP14]], [[TMP15]]
@@ -3686,14 +3686,14 @@ define amdgpu_kernel void @srem_i3(ptr addrspace(1) %out, i3 %x, i3 %y) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = ashr i32 [[TMP3]], 30
 ; CHECK-NEXT:    [[TMP5:%.*]] = or i32 [[TMP4]], 1
-; CHECK-NEXT:    [[TMP6:%.*]] = sitofp i32 [[TMP1]] to float
-; CHECK-NEXT:    [[TMP7:%.*]] = sitofp i32 [[TMP2]] to float
+; CHECK-NEXT:    [[TMP6:%.*]] = sitofp fast i32 [[TMP1]] to float
+; CHECK-NEXT:    [[TMP7:%.*]] = sitofp fast i32 [[TMP2]] to float
 ; CHECK-NEXT:    [[TMP8:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP7]])
 ; CHECK-NEXT:    [[TMP9:%.*]] = fmul fast float [[TMP6]], [[TMP8]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = call fast float @llvm.trunc.f32(float [[TMP9]])
 ; CHECK-NEXT:    [[TMP11:%.*]] = fneg fast float [[TMP10]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP11]], float [[TMP7]], float [[TMP6]])
-; CHECK-NEXT:    [[TMP13:%.*]] = fptosi float [[TMP10]] to i32
+; CHECK-NEXT:    [[TMP13:%.*]] = fptosi fast float [[TMP10]] to i32
 ; CHECK-NEXT:    [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP12]])
 ; CHECK-NEXT:    [[TMP15:%.*]] = call fast float @llvm.fabs.f32(float [[TMP7]])
 ; CHECK-NEXT:    [[TMP16:%.*]] = fcmp fast oge float [[TMP14]], [[TMP15]]
@@ -3784,7 +3784,7 @@ define amdgpu_kernel void @udiv_v3i16(ptr addrspace(1) %out, <3 x i16> %x, <3 x
 ; CHECK-NEXT:    [[TMP9:%.*]] = call fast float @llvm.trunc.f32(float [[TMP8]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = fneg fast float [[TMP9]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP10]], float [[TMP6]], float [[TMP5]])
-; CHECK-NEXT:    [[TMP12:%.*]] = fptoui float [[TMP9]] to i32
+; CHECK-NEXT:    [[TMP12:%.*]] = fptoui fast float [[TMP9]] to i32
 ; CHECK-NEXT:    [[TMP13:%.*]] = call fast float @llvm.fabs.f32(float [[TMP11]])
 ; CHECK-NEXT:    [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP6]])
 ; CHECK-NEXT:    [[TMP15:%.*]] = fcmp fast oge float [[TMP13]], [[TMP14]]
@@ -3804,7 +3804,7 @@ define amdgpu_kernel void @udiv_v3i16(ptr addrspace(1) %out, <3 x i16> %x, <3 x
 ; CHECK-NEXT:    [[TMP29:%.*]] = call fast float @llvm.trunc.f32(float [[TMP28]])
 ; CHECK-NEXT:    [[TMP30:%.*]] = fneg fast float [[TMP29]]
 ; CHECK-NEXT:    [[TMP31:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP30]], float [[TMP26]], float [[TMP25]])
-; CHECK-NEXT:    [[TMP32:%.*]] = fptoui float [[TMP29]] to i32
+; CHECK-NEXT:    [[TMP32:%.*]] = fptoui fast float [[TMP29]] to i32
 ; CHECK-NEXT:    [[TMP33:%.*]] = call fast float @llvm.fabs.f32(float [[TMP31]])
 ; CHECK-NEXT:    [[TMP34:%.*]] = call fast float @llvm.fabs.f32(float [[TMP26]])
 ; CHECK-NEXT:    [[TMP35:%.*]] = fcmp fast oge float [[TMP33]], [[TMP34]]
@@ -3824,7 +3824,7 @@ define amdgpu_kernel void @udiv_v3i16(ptr addrspace(1) %out, <3 x i16> %x, <3 x
 ; CHECK-NEXT:    [[TMP49:%.*]] = call fast float @llvm.trunc.f32(float [[TMP48]])
 ; CHECK-NEXT:    [[TMP50:%.*]] = fneg fast float [[TMP49]]
 ; CHECK-NEXT:    [[TMP51:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP50]], float [[TMP46]], float [[TMP45]])
-; CHECK-NEXT:    [[TMP52:%.*]] = fptoui float [[TMP49]] to i32
+; CHECK-NEXT:    [[TMP52:%.*]] = fptoui fast float [[TMP49]] to i32
 ; CHECK-NEXT:    [[TMP53:%.*]] = call fast float @llvm.fabs.f32(float [[TMP51]])
 ; CHECK-NEXT:    [[TMP54:%.*]] = call fast float @llvm.fabs.f32(float [[TMP46]])
 ; CHECK-NEXT:    [[TMP55:%.*]] = fcmp fast oge float [[TMP53]], [[TMP54]]
@@ -3946,7 +3946,7 @@ define amdgpu_kernel void @urem_v3i16(ptr addrspace(1) %out, <3 x i16> %x, <3 x
 ; CHECK-NEXT:    [[TMP9:%.*]] = call fast float @llvm.trunc.f32(float [[TMP8]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = fneg fast float [[TMP9]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP10]], float [[TMP6]], float [[TMP5]])
-; CHECK-NEXT:    [[TMP12:%.*]] = fptoui float [[TMP9]] to i32
+; CHECK-NEXT:    [[TMP12:%.*]] = fptoui fast float [[TMP9]] to i32
 ; CHECK-NEXT:    [[TMP13:%.*]] = call fast float @llvm.fabs.f32(float [[TMP11]])
 ; CHECK-NEXT:    [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP6]])
 ; CHECK-NEXT:    [[TMP15:%.*]] = fcmp fast oge float [[TMP13]], [[TMP14]]
@@ -3968,7 +3968,7 @@ define amdgpu_kernel void @urem_v3i16(ptr addrspace(1) %out, <3 x i16> %x, <3 x
 ; CHECK-NEXT:    [[TMP31:%.*]] = call fast float @llvm.trunc.f32(float [[TMP30]])
 ; CHECK-NEXT:    [[TMP32:%.*]] = fneg fast float [[TMP31]]
 ; CHECK-NEXT:    [[TMP33:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP32]], float [[TMP28]], float [[TMP27]])
-; CHECK-NEXT:    [[TMP34:%.*]] = fptoui float [[TMP31]] to i32
+; CHECK-NEXT:    [[TMP34:%.*]] = fptoui fast float [[TMP31]] to i32
 ; CHECK-NEXT:    [[TMP35:%.*]] = call fast float @llvm.fabs.f32(float [[TMP33]])
 ; CHECK-NEXT:    [[TMP36:%.*]] = call fast float @llvm.fabs.f32(float [[TMP28]])
 ; CHECK-NEXT:    [[TMP37:%.*]] = fcmp fast oge float [[TMP35]], [[TMP36]]
@@ -3990,7 +3990,7 @@ define amdgpu_kernel void @urem_v3i16(ptr addrspace(1) %out, <3 x i16> %x, <3 x
 ; CHECK-NEXT:    [[TMP53:%.*]] = call fast float @llvm.trunc.f32(float [[TMP52]])
 ; CHECK-NEXT:    [[TMP54:%.*]] = fneg fast float [[TMP53]]
 ; CHECK-NEXT:    [[TMP55:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP54]], float [[TMP50]], float [[TMP49]])
-; CHECK-NEXT:    [[TMP56:%.*]] = fptoui float [[TMP53]] to i32
+; CHECK-NEXT:    [[TMP56:%.*]] = fptoui fast float [[TMP53]] to i32
 ; CHECK-NEXT:    [[TMP57:%.*]] = call fast float @llvm.fabs.f32(float [[TMP55]])
 ; CHECK-NEXT:    [[TMP58:%.*]] = call fast float @llvm.fabs.f32(float [[TMP50]])
 ; CHECK-NEXT:    [[TMP59:%.*]] = fcmp fast oge float [[TMP57]], [[TMP58]]
@@ -4121,14 +4121,14 @@ define amdgpu_kernel void @sdiv_v3i16(ptr addrspace(1) %out, <3 x i16> %x, <3 x
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = ashr i32 [[TMP5]], 30
 ; CHECK-NEXT:    [[TMP7:%.*]] = or i32 [[TMP6]], 1
-; CHECK-NEXT:    [[TMP8:%.*]] = sitofp i32 [[TMP3]] to float
-; CHECK-NEXT:    [[TMP9:%.*]] = sitofp i32 [[TMP4]] to float
+; CHECK-NEXT:    [[TMP8:%.*]] = sitofp fast i32 [[TMP3]] to float
+; CHECK-NEXT:    [[TMP9:%.*]] = sitofp fast i32 [[TMP4]] to float
 ; CHECK-NEXT:    [[TMP10:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP9]])
 ; CHECK-NEXT:    [[TMP11:%.*]] = fmul fast float [[TMP8]], [[TMP10]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = call fast float @llvm.trunc.f32(float [[TMP11]])
 ; CHECK-NEXT:    [[TMP13:%.*]] = fneg fast float [[TMP12]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP13]], float [[TMP9]], float [[TMP8]])
-; CHECK-NEXT:    [[TMP15:%.*]] = fptosi float [[TMP12]] to i32
+; CHECK-NEXT:    [[TMP15:%.*]] = fptosi fast float [[TMP12]] to i32
 ; CHECK-NEXT:    [[TMP16:%.*]] = call fast float @llvm.fabs.f32(float [[TMP14]])
 ; CHECK-NEXT:    [[TMP17:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]])
 ; CHECK-NEXT:    [[TMP18:%.*]] = fcmp fast oge float [[TMP16]], [[TMP17]]
@@ -4145,14 +4145,14 @@ define amdgpu_kernel void @sdiv_v3i16(ptr addrspace(1) %out, <3 x i16> %x, <3 x
 ; CHECK-NEXT:    [[TMP29:%.*]] = xor i32 [[TMP27]], [[TMP28]]
 ; CHECK-NEXT:    [[TMP30:%.*]] = ashr i32 [[TMP29]], 30
 ; CHECK-NEXT:    [[TMP31:%.*]] = or i32 [[TMP30]], 1
-; CHECK-NEXT:    [[TMP32:%.*]] = sitofp i32 [[TMP27]] to float
-; CHECK-NEXT:    [[TMP33:%.*]] = sitofp i32 [[TMP28]] to float
+; CHECK-NEXT:    [[TMP32:%.*]] = sitofp fast i32 [[TMP27]] to float
+; CHECK-NEXT:    [[TMP33:%.*]] = sitofp fast i32 [[TMP28]] to float
 ; CHECK-NEXT:    [[TMP34:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP33]])
 ; CHECK-NEXT:    [[TMP35:%.*]] = fmul fast float [[TMP32]], [[TMP34]]
 ; CHECK-NEXT:    [[TMP36:%.*]] = call fast float @llvm.trunc.f32(float [[TMP35]])
 ; CHECK-NEXT:    [[TMP37:%.*]] = fneg fast float [[TMP36]]
 ; CHECK-NEXT:    [[TMP38:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP37]], float [[TMP33]], float [[TMP32]])
-; CHECK-NEXT:    [[TMP39:%.*]] = fptosi float [[TMP36]] to i32
+; CHECK-NEXT:    [[TMP39:%.*]] = fptosi fast float [[TMP36]] to i32
 ; CHECK-NEXT:    [[TMP40:%.*]] = call fast float @llvm.fabs.f32(float [[TMP38]])
 ; CHECK-NEXT:    [[TMP41:%.*]] = call fast float @llvm.fabs.f32(float [[TMP33]])
 ; CHECK-NEXT:    [[TMP42:%.*]] = fcmp fast oge float [[TMP40]], [[TMP41]]
@@ -4169,14 +4169,14 @@ define amdgpu_kernel void @sdiv_v3i16(ptr addrspace(1) %out, <3 x i16> %x, <3 x
 ; CHECK-NEXT:    [[TMP53:%.*]] = xor i32 [[TMP51]], [[TMP52]]
 ; CHECK-NEXT:    [[TMP54:%.*]] = ashr i32 [[TMP53]], 30
 ; CHECK-NEXT:    [[TMP55:%.*]] = or i32 [[TMP54]], 1
-; CHECK-NEXT:    [[TMP56:%.*]] = sitofp i32 [[TMP51]] to float
-; CHECK-NEXT:    [[TMP57:%.*]] = sitofp i32 [[TMP52]] to float
+; CHECK-NEXT:    [[TMP56:%.*]] = sitofp fast i32 [[TMP51]] to float
+; CHECK-NEXT:    [[TMP57:%.*]] = sitofp fast i32 [[TMP52]] to float
 ; CHECK-NEXT:    [[TMP58:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP57]])
 ; CHECK-NEXT:    [[TMP59:%.*]] = fmul fast float [[TMP56]], [[TMP58]]
 ; CHECK-NEXT:    [[TMP60:%.*]] = call fast float @llvm.trunc.f32(float [[TMP59]])
 ; CHECK-NEXT:    [[TMP61:%.*]] = fneg fast float [[TMP60]]
 ; CHECK-NEXT:    [[TMP62:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP61]], float [[TMP57]], float [[TMP56]])
-; CHECK-NEXT:    [[TMP63:%.*]] = fptosi float [[TMP60]] to i32
+; CHECK-NEXT:    [[TMP63:%.*]] = fptosi fast float [[TMP60]] to i32
 ; CHECK-NEXT:    [[TMP64:%.*]] = call fast float @llvm.fabs.f32(float [[TMP62]])
 ; CHECK-NEXT:    [[TMP65:%.*]] = call fast float @llvm.fabs.f32(float [[TMP57]])
 ; CHECK-NEXT:    [[TMP66:%.*]] = fcmp fast oge float [[TMP64]], [[TMP65]]
@@ -4324,14 +4324,14 @@ define amdgpu_kernel void @srem_v3i16(ptr addrspace(1) %out, <3 x i16> %x, <3 x
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = ashr i32 [[TMP5]], 30
 ; CHECK-NEXT:    [[TMP7:%.*]] = or i32 [[TMP6]], 1
-; CHECK-NEXT:    [[TMP8:%.*]] = sitofp i32 [[TMP3]] to float
-; CHECK-NEXT:    [[TMP9:%.*]] = sitofp i32 [[TMP4]] to float
+; CHECK-NEXT:    [[TMP8:%.*]] = sitofp fast i32 [[TMP3]] to float
+; CHECK-NEXT:    [[TMP9:%.*]] = sitofp fast i32 [[TMP4]] to float
 ; CHECK-NEXT:    [[TMP10:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP9]])
 ; CHECK-NEXT:    [[TMP11:%.*]] = fmul fast float [[TMP8]], [[TMP10]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = call fast float @llvm.trunc.f32(float [[TMP11]])
 ; CHECK-NEXT:    [[TMP13:%.*]] = fneg fast float [[TMP12]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP13]], float [[TMP9]], float [[TMP8]])
-; CHECK-NEXT:    [[TMP15:%.*]] = fptosi float [[TMP12]] to i32
+; CHECK-NEXT:    [[TMP15:%.*]] = fptosi fast float [[TMP12]] to i32
 ; CHECK-NEXT:    [[TMP16:%.*]] = call fast float @llvm.fabs.f32(float [[TMP14]])
 ; CHECK-NEXT:    [[TMP17:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]])
 ; CHECK-NEXT:    [[TMP18:%.*]] = fcmp fast oge float [[TMP16]], [[TMP17]]
@@ -4350,14 +4350,14 @@ define amdgpu_kernel void @srem_v3i16(ptr addrspace(1) %out, <3 x i16> %x, <3 x
 ; CHECK-NEXT:    [[TMP31:%.*]] = xor i32 [[TMP29]], [[TMP30]]
 ; CHECK-NEXT:    [[TMP32:%.*]] = ashr i32 [[TMP31]], 30
 ; CHECK-NEXT:    [[TMP33:%.*]] = or i32 [[TMP32]], 1
-; CHECK-NEXT:    [[TMP34:%.*]] = sitofp i32 [[TMP29]] to float
-; CHECK-NEXT:    [[TMP35:%.*]] = sitofp i32 [[TMP30]] to float
+; CHECK-NEXT:    [[TMP34:%.*]] = sitofp fast i32 [[TMP29]] to float
+; CHECK-NEXT:    [[TMP35:%.*]] = sitofp fast i32 [[TMP30]] to float
 ; CHECK-NEXT:    [[TMP36:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP35]])
 ; CHECK-NEXT:    [[TMP37:%.*]] = fmul fast float [[TMP34]], [[TMP36]]
 ; CHECK-NEXT:    [[TMP38:%.*]] = call fast float @llvm.trunc.f32(float [[TMP37]])
 ; CHECK-NEXT:    [[TMP39:%.*]] = fneg fast float [[TMP38]]
 ; CHECK-NEXT:    [[TMP40:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP39]], float [[TMP35]], float [[TMP34]])
-; CHECK-NEXT:    [[TMP41:%.*]] = fptosi float [[TMP38]] to i32
+; CHECK-NEXT:    [[TMP41:%.*]] = fptosi fast float [[TMP38]] to i32
 ; CHECK-NEXT:    [[TMP42:%.*]] = call fast float @llvm.fabs.f32(float [[TMP40]])
 ; CHECK-NEXT:    [[TMP43:%.*]] = call fast float @llvm.fabs.f32(float [[TMP35]])
 ; CHECK-NEXT:    [[TMP44:%.*]] = fcmp fast oge float [[TMP42]], [[TMP43]]
@@ -4376,14 +4376,14 @@ define amdgpu_kernel void @srem_v3i16(ptr addrspace(1) %out, <3 x i16> %x, <3 x
 ; CHECK-NEXT:    [[TMP57:%.*]] = xor i32 [[TMP55]], [[TMP56]]
 ; CHECK-NEXT:    [[TMP58:%.*]] = ashr i32 [[TMP57]], 30
 ; CHECK-NEXT:    [[TMP59:%.*]] = or i32 [[TMP58]], 1
-; CHECK-NEXT:    [[TMP60:%.*]] = sitofp i32 [[TMP55]] to float
-; CHECK-NEXT:    [[TMP61:%.*]] = sitofp i32 [[TMP56]] to float
+; CHECK-NEXT:    [[TMP60:%.*]] = sitofp fast i32 [[TMP55]] to float
+; CHECK-NEXT:    [[TMP61:%.*]] = sitofp fast i32 [[TMP56]] to float
 ; CHECK-NEXT:    [[TMP62:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP61]])
 ; CHECK-NEXT:    [[TMP63:%.*]] = fmul fast float [[TMP60]], [[TMP62]]
 ; CHECK-NEXT:    [[TMP64:%.*]] = call fast float @llvm.trunc.f32(float [[TMP63]])
 ; CHECK-NEXT:    [[TMP65:%.*]] = fneg fast float [[TMP64]]
 ; CHECK-NEXT:    [[TMP66:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP65]], float [[TMP61]], float [[TMP60]])
-; CHECK-NEXT:    [[TMP67:%.*]] = fptosi float [[TMP64]] to i32
+; CHECK-NEXT:    [[TMP67:%.*]] = fptosi fast float [[TMP64]] to i32
 ; CHECK-NEXT:    [[TMP68:%.*]] = call fast float @llvm.fabs.f32(float [[TMP66]])
 ; CHECK-NEXT:    [[TMP69:%.*]] = call fast float @llvm.fabs.f32(float [[TMP61]])
 ; CHECK-NEXT:    [[TMP70:%.*]] = fcmp fast oge float [[TMP68]], [[TMP69]]
@@ -4551,7 +4551,7 @@ define amdgpu_kernel void @udiv_v3i15(ptr addrspace(1) %out, <3 x i15> %x, <3 x
 ; CHECK-NEXT:    [[TMP9:%.*]] = call fast float @llvm.trunc.f32(float [[TMP8]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = fneg fast float [[TMP9]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP10]], float [[TMP6]], float [[TMP5]])
-; CHECK-NEXT:    [[TMP12:%.*]] = fptoui float [[TMP9]] to i32
+; CHECK-NEXT:    [[TMP12:%.*]] = fptoui fast float [[TMP9]] to i32
 ; CHECK-NEXT:    [[TMP13:%.*]] = call fast float @llvm.fabs.f32(float [[TMP11]])
 ; CHECK-NEXT:    [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP6]])
 ; CHECK-NEXT:    [[TMP15:%.*]] = fcmp fast oge float [[TMP13]], [[TMP14]]
@@ -4571,7 +4571,7 @@ define amdgpu_kernel void @udiv_v3i15(ptr addrspace(1) %out, <3 x i15> %x, <3 x
 ; CHECK-NEXT:    [[TMP29:%.*]] = call fast float @llvm.trunc.f32(float [[TMP28]])
 ; CHECK-NEXT:    [[TMP30:%.*]] = fneg fast float [[TMP29]]
 ; CHECK-NEXT:    [[TMP31:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP30]], float [[TMP26]], float [[TMP25]])
-; CHECK-NEXT:    [[TMP32:%.*]] = fptoui float [[TMP29]] to i32
+; CHECK-NEXT:    [[TMP32:%.*]] = fptoui fast float [[TMP29]] to i32
 ; CHECK-NEXT:    [[TMP33:%.*]] = call fast float @llvm.fabs.f32(float [[TMP31]])
 ; CHECK-NEXT:    [[TMP34:%.*]] = call fast float @llvm.fabs.f32(float [[TMP26]])
 ; CHECK-NEXT:    [[TMP35:%.*]] = fcmp fast oge float [[TMP33]], [[TMP34]]
@@ -4591,7 +4591,7 @@ define amdgpu_kernel void @udiv_v3i15(ptr addrspace(1) %out, <3 x i15> %x, <3 x
 ; CHECK-NEXT:    [[TMP49:%.*]] = call fast float @llvm.trunc.f32(float [[TMP48]])
 ; CHECK-NEXT:    [[TMP50:%.*]] = fneg fast float [[TMP49]]
 ; CHECK-NEXT:    [[TMP51:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP50]], float [[TMP46]], float [[TMP45]])
-; CHECK-NEXT:    [[TMP52:%.*]] = fptoui float [[TMP49]] to i32
+; CHECK-NEXT:    [[TMP52:%.*]] = fptoui fast float [[TMP49]] to i32
 ; CHECK-NEXT:    [[TMP53:%.*]] = call fast float @llvm.fabs.f32(float [[TMP51]])
 ; CHECK-NEXT:    [[TMP54:%.*]] = call fast float @llvm.fabs.f32(float [[TMP46]])
 ; CHECK-NEXT:    [[TMP55:%.*]] = fcmp fast oge float [[TMP53]], [[TMP54]]
@@ -4732,7 +4732,7 @@ define amdgpu_kernel void @urem_v3i15(ptr addrspace(1) %out, <3 x i15> %x, <3 x
 ; CHECK-NEXT:    [[TMP9:%.*]] = call fast float @llvm.trunc.f32(float [[TMP8]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = fneg fast float [[TMP9]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP10]], float [[TMP6]], float [[TMP5]])
-; CHECK-NEXT:    [[TMP12:%.*]] = fptoui float [[TMP9]] to i32
+; CHECK-NEXT:    [[TMP12:%.*]] = fptoui fast float [[TMP9]] to i32
 ; CHECK-NEXT:    [[TMP13:%.*]] = call fast float @llvm.fabs.f32(float [[TMP11]])
 ; CHECK-NEXT:    [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP6]])
 ; CHECK-NEXT:    [[TMP15:%.*]] = fcmp fast oge float [[TMP13]], [[TMP14]]
@@ -4754,7 +4754,7 @@ define amdgpu_kernel void @urem_v3i15(ptr addrspace(1) %out, <3 x i15> %x, <3 x
 ; CHECK-NEXT:    [[TMP31:%.*]] = call fast float @llvm.trunc.f32(float [[TMP30]])
 ; CHECK-NEXT:    [[TMP32:%.*]] = fneg fast float [[TMP31]]
 ; CHECK-NEXT:    [[TMP33:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP32]], float [[TMP28]], float [[TMP27]])
-; CHECK-NEXT:    [[TMP34:%.*]] = fptoui float [[TMP31]] to i32
+; CHECK-NEXT:    [[TMP34:%.*]] = fptoui fast float [[TMP31]] to i32
 ; CHECK-NEXT:    [[TMP35:%.*]] = call fast float @llvm.fabs.f32(float [[TMP33]])
 ; CHECK-NEXT:    [[TMP36:%.*]] = call fast float @llvm.fabs.f32(float [[TMP28]])
 ; CHECK-NEXT:    [[TMP37:%.*]] = fcmp fast oge float [[TMP35]], [[TMP36]]
@@ -4776,7 +4776,7 @@ define amdgpu_kernel void @urem_v3i15(ptr addrspace(1) %out, <3 x i15> %x, <3 x
 ; CHECK-NEXT:    [[TMP53:%.*]] = call fast float @llvm.trunc.f32(float [[TMP52]])
 ; CHECK-NEXT:    [[TMP54:%.*]] = fneg fast float [[TMP53]]
 ; CHECK-NEXT:    [[TMP55:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP54]], float [[TMP50]], float [[TMP49]])
-; CHECK-NEXT:    [[TMP56:%.*]] = fptoui float [[TMP53]] to i32
+; CHECK-NEXT:    [[TMP56:%.*]] = fptoui fast float [[TMP53]] to i32
 ; CHECK-NEXT:    [[TMP57:%.*]] = call fast float @llvm.fabs.f32(float [[TMP55]])
 ; CHECK-NEXT:    [[TMP58:%.*]] = call fast float @llvm.fabs.f32(float [[TMP50]])
 ; CHECK-NEXT:    [[TMP59:%.*]] = fcmp fast oge float [[TMP57]], [[TMP58]]
@@ -4931,14 +4931,14 @@ define amdgpu_kernel void @sdiv_v3i15(ptr addrspace(1) %out, <3 x i15> %x, <3 x
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = ashr i32 [[TMP5]], 30
 ; CHECK-NEXT:    [[TMP7:%.*]] = or i32 [[TMP6]], 1
-; CHECK-NEXT:    [[TMP8:%.*]] = sitofp i32 [[TMP3]] to float
-; CHECK-NEXT:    [[TMP9:%.*]] = sitofp i32 [[TMP4]] to float
+; CHECK-NEXT:    [[TMP8:%.*]] = sitofp fast i32 [[TMP3]] to float
+; CHECK-NEXT:    [[TMP9:%.*]] = sitofp fast i32 [[TMP4]] to float
 ; CHECK-NEXT:    [[TMP10:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP9]])
 ; CHECK-NEXT:    [[TMP11:%.*]] = fmul fast float [[TMP8]], [[TMP10]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = call fast float @llvm.trunc.f32(float [[TMP11]])
 ; CHECK-NEXT:    [[TMP13:%.*]] = fneg fast float [[TMP12]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP13]], float [[TMP9]], float [[TMP8]])
-; CHECK-NEXT:    [[TMP15:%.*]] = fptosi float [[TMP12]] to i32
+; CHECK-NEXT:    [[TMP15:%.*]] = fptosi fast float [[TMP12]] to i32
 ; CHECK-NEXT:    [[TMP16:%.*]] = call fast float @llvm.fabs.f32(float [[TMP14]])
 ; CHECK-NEXT:    [[TMP17:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]])
 ; CHECK-NEXT:    [[TMP18:%.*]] = fcmp fast oge float [[TMP16]], [[TMP17]]
@@ -4955,14 +4955,14 @@ define amdgpu_kernel void @sdiv_v3i15(ptr addrspace(1) %out, <3 x i15> %x, <3 x
 ; CHECK-NEXT:    [[TMP29:%.*]] = xor i32 [[TMP27]], [[TMP28]]
 ; CHECK-NEXT:    [[TMP30:%.*]] = ashr i32 [[TMP29]], 30
 ; CHECK-NEXT:    [[TMP31:%.*]] = or i32 [[TMP30]], 1
-; CHECK-NEXT:    [[TMP32:%.*]] = sitofp i32 [[TMP27]] to float
-; CHECK-NEXT:    [[TMP33:%.*]] = sitofp i32 [[TMP28]] to float
+; CHECK-NEXT:    [[TMP32:%.*]] = sitofp fast i32 [[TMP27]] to float
+; CHECK-NEXT:    [[TMP33:%.*]] = sitofp fast i32 [[TMP28]] to float
 ; CHECK-NEXT:    [[TMP34:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP33]])
 ; CHECK-NEXT:    [[TMP35:%.*]] = fmul fast float [[TMP32]], [[TMP34]]
 ; CHECK-NEXT:    [[TMP36:%.*]] = call fast float @llvm.trunc.f32(float [[TMP35]])
 ; CHECK-NEXT:    [[TMP37:%.*]] = fneg fast float [[TMP36]]
 ; CHECK-NEXT:    [[TMP38:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP37]], float [[TMP33]], float [[TMP32]])
-; CHECK-NEXT:    [[TMP39:%.*]] = fptosi float [[TMP36]] to i32
+; CHECK-NEXT:    [[TMP39:%.*]] = fptosi fast float [[TMP36]] to i32
 ; CHECK-NEXT:    [[TMP40:%.*]] = call fast float @llvm.fabs.f32(float [[TMP38]])
 ; CHECK-NEXT:    [[TMP41:%.*]] = call fast float @llvm.fabs.f32(float [[TMP33]])
 ; CHECK-NEXT:    [[TMP42:%.*]] = fcmp fast oge float [[TMP40]], [[TMP41]]
@@ -4979,14 +4979,14 @@ define amdgpu_kernel void @sdiv_v3i15(ptr addrspace(1) %out, <3 x i15> %x, <3 x
 ; CHECK-NEXT:    [[TMP53:%.*]] = xor i32 [[TMP51]], [[TMP52]]
 ; CHECK-NEXT:    [[TMP54:%.*]] = ashr i32 [[TMP53]], 30
 ; CHECK-NEXT:    [[TMP55:%.*]] = or i32 [[TMP54]], 1
-; CHECK-NEXT:    [[TMP56:%.*]] = sitofp i32 [[TMP51]] to float
-; CHECK-NEXT:    [[TMP57:%.*]] = sitofp i32 [[TMP52]] to float
+; CHECK-NEXT:    [[TMP56:%.*]] = sitofp fast i32 [[TMP51]] to float
+; CHECK-NEXT:    [[TMP57:%.*]] = sitofp fast i32 [[TMP52]] to float
 ; CHECK-NEXT:    [[TMP58:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP57]])
 ; CHECK-NEXT:    [[TMP59:%.*]] = fmul fast float [[TMP56]], [[TMP58]]
 ; CHECK-NEXT:    [[TMP60:%.*]] = call fast float @llvm.trunc.f32(float [[TMP59]])
 ; CHECK-NEXT:    [[TMP61:%.*]] = fneg fast float [[TMP60]]
 ; CHECK-NEXT:    [[TMP62:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP61]], float [[TMP57]], float [[TMP56]])
-; CHECK-NEXT:    [[TMP63:%.*]] = fptosi float [[TMP60]] to i32
+; CHECK-NEXT:    [[TMP63:%.*]] = fptosi fast float [[TMP60]] to i32
 ; CHECK-NEXT:    [[TMP64:%.*]] = call fast float @llvm.fabs.f32(float [[TMP62]])
 ; CHECK-NEXT:    [[TMP65:%.*]] = call fast float @llvm.fabs.f32(float [[TMP57]])
 ; CHECK-NEXT:    [[TMP66:%.*]] = fcmp fast oge float [[TMP64]], [[TMP65]]
@@ -5152,14 +5152,14 @@ define amdgpu_kernel void @srem_v3i15(ptr addrspace(1) %out, <3 x i15> %x, <3 x
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = ashr i32 [[TMP5]], 30
 ; CHECK-NEXT:    [[TMP7:%.*]] = or i32 [[TMP6]], 1
-; CHECK-NEXT:    [[TMP8:%.*]] = sitofp i32 [[TMP3]] to float
-; CHECK-NEXT:    [[TMP9:%.*]] = sitofp i32 [[TMP4]] to float
+; CHECK-NEXT:    [[TMP8:%.*]] = sitofp fast i32 [[TMP3]] to float
+; CHECK-NEXT:    [[TMP9:%.*]] = sitofp fast i32 [[TMP4]] to float
 ; CHECK-NEXT:    [[TMP10:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP9]])
 ; CHECK-NEXT:    [[TMP11:%.*]] = fmul fast float [[TMP8]], [[TMP10]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = call fast float @llvm.trunc.f32(float [[TMP11]])
 ; CHECK-NEXT:    [[TMP13:%.*]] = fneg fast float [[TMP12]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP13]], float [[TMP9]], float [[TMP8]])
-; CHECK-NEXT:    [[TMP15:%.*]] = fptosi float [[TMP12]] to i32
+; CHECK-NEXT:    [[TMP15:%.*]] = fptosi fast float [[TMP12]] to i32
 ; CHECK-NEXT:    [[TMP16:%.*]] = call fast float @llvm.fabs.f32(float [[TMP14]])
 ; CHECK-NEXT:    [[TMP17:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]])
 ; CHECK-NEXT:    [[TMP18:%.*]] = fcmp fast oge float [[TMP16]], [[TMP17]]
@@ -5178,14 +5178,14 @@ define amdgpu_kernel void @srem_v3i15(ptr addrspace(1) %out, <3 x i15> %x, <3 x
 ; CHECK-NEXT:    [[TMP31:%.*]] = xor i32 [[TMP29]], [[TMP30]]
 ; CHECK-NEXT:    [[TMP32:%.*]] = ashr i32 [[TMP31]], 30
 ; CHECK-NEXT:    [[TMP33:%.*]] = or i32 [[TMP32]], 1
-; CHECK-NEXT:    [[TMP34:%.*]] = sitofp i32 [[TMP29]] to float
-; CHECK-NEXT:    [[TMP35:%.*]] = sitofp i32 [[TMP30]] to float
+; CHECK-NEXT:    [[TMP34:%.*]] = sitofp fast i32 [[TMP29]] to float
+; CHECK-NEXT:    [[TMP35:%.*]] = sitofp fast i32 [[TMP30]] to float
 ; CHECK-NEXT:    [[TMP36:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP35]])
 ; CHECK-NEXT:    [[TMP37:%.*]] = fmul fast float [[TMP34]], [[TMP36]]
 ; CHECK-NEXT:    [[TMP38:%.*]] = call fast float @llvm.trunc.f32(float [[TMP37]])
 ; CHECK-NEXT:    [[TMP39:%.*]] = fneg fast float [[TMP38]]
 ; CHECK-NEXT:    [[TMP40:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP39]], float [[TMP35]], float [[TMP34]])
-; CHECK-NEXT:    [[TMP41:%.*]] = fptosi float [[TMP38]] to i32
+; CHECK-NEXT:    [[TMP41:%.*]] = fptosi fast float [[TMP38]] to i32
 ; CHECK-NEXT:    [[TMP42:%.*]] = call fast float @llvm.fabs.f32(float [[TMP40]])
 ; CHECK-NEXT:    [[TMP43:%.*]] = call fast float @llvm.fabs.f32(float [[TMP35]])
 ; CHECK-NEXT:    [[TMP44:%.*]] = fcmp fast oge float [[TMP42]], [[TMP43]]
@@ -5204,14 +5204,14 @@ define amdgpu_kernel void @srem_v3i15(ptr addrspace(1) %out, <3 x i15> %x, <3 x
 ; CHECK-NEXT:    [[TMP57:%.*]] = xor i32 [[TMP55]], [[TMP56]]
 ; CHECK-NEXT:    [[TMP58:%.*]] = ashr i32 [[TMP57]], 30
 ; CHECK-NEXT:    [[TMP59:%.*]] = or i32 [[TMP58]], 1
-; CHECK-NEXT:    [[TMP60:%.*]] = sitofp i32 [[TMP55]] to float
-; CHECK-NEXT:    [[TMP61:%.*]] = sitofp i32 [[TMP56]] to float
+; CHECK-NEXT:    [[TMP60:%.*]] = sitofp fast i32 [[TMP55]] to float
+; CHECK-NEXT:    [[TMP61:%.*]] = sitofp fast i32 [[TMP56]] to float
 ; CHECK-NEXT:    [[TMP62:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP61]])
 ; CHECK-NEXT:    [[TMP63:%.*]] = fmul fast float [[TMP60]], [[TMP62]]
 ; CHECK-NEXT:    [[TMP64:%.*]] = call fast float @llvm.trunc.f32(float [[TMP63]])
 ; CHECK-NEXT:    [[TMP65:%.*]] = fneg fast float [[TMP64]]
 ; CHECK-NEXT:    [[TMP66:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP65]], float [[TMP61]], float [[TMP60]])
-; CHECK-NEXT:    [[TMP67:%.*]] = fptosi float [[TMP64]] to i32
+; CHECK-NEXT:    [[TMP67:%.*]] = fptosi fast float [[TMP64]] to i32
 ; CHECK-NEXT:    [[TMP68:%.*]] = call fast float @llvm.fabs.f32(float [[TMP66]])
 ; CHECK-NEXT:    [[TMP69:%.*]] = call fast float @llvm.fabs.f32(float [[TMP61]])
 ; CHECK-NEXT:    [[TMP70:%.*]] = fcmp fast oge float [[TMP68]], [[TMP69]]
@@ -5601,7 +5601,7 @@ define amdgpu_kernel void @udiv_v2i32_pow2_shl_denom(ptr addrspace(1) %out, <2 x
 ; CHECK-NEXT:    [[TMP3:%.*]] = uitofp i32 [[TMP2]] to float
 ; CHECK-NEXT:    [[TMP4:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP3]])
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul fast float [[TMP4]], 0x41EFFFFFC0000000
-; CHECK-NEXT:    [[TMP6:%.*]] = fptoui float [[TMP5]] to i32
+; CHECK-NEXT:    [[TMP6:%.*]] = fptoui fast float [[TMP5]] to i32
 ; CHECK-NEXT:    [[TMP7:%.*]] = sub i32 0, [[TMP2]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = mul i32 [[TMP7]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP6]] to i64
@@ -5633,7 +5633,7 @@ define amdgpu_kernel void @udiv_v2i32_pow2_shl_denom(ptr addrspace(1) %out, <2 x
 ; CHECK-NEXT:    [[TMP35:%.*]] = uitofp i32 [[TMP34]] to float
 ; CHECK-NEXT:    [[TMP36:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP35]])
 ; CHECK-NEXT:    [[TMP37:%.*]] = fmul fast float [[TMP36]], 0x41EFFFFFC0000000
-; CHECK-NEXT:    [[TMP38:%.*]] = fptoui float [[TMP37]] to i32
+; CHECK-NEXT:    [[TMP38:%.*]] = fptoui fast float [[TMP37]] to i32
 ; CHECK-NEXT:    [[TMP39:%.*]] = sub i32 0, [[TMP34]]
 ; CHECK-NEXT:    [[TMP40:%.*]] = mul i32 [[TMP39]], [[TMP38]]
 ; CHECK-NEXT:    [[TMP41:%.*]] = zext i32 [[TMP38]] to i64
@@ -5946,7 +5946,7 @@ define amdgpu_kernel void @urem_v2i32_pow2_shl_denom(ptr addrspace(1) %out, <2 x
 ; CHECK-NEXT:    [[TMP3:%.*]] = uitofp i32 [[TMP2]] to float
 ; CHECK-NEXT:    [[TMP4:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP3]])
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul fast float [[TMP4]], 0x41EFFFFFC0000000
-; CHECK-NEXT:    [[TMP6:%.*]] = fptoui float [[TMP5]] to i32
+; CHECK-NEXT:    [[TMP6:%.*]] = fptoui fast float [[TMP5]] to i32
 ; CHECK-NEXT:    [[TMP7:%.*]] = sub i32 0, [[TMP2]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = mul i32 [[TMP7]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP6]] to i64
@@ -5976,7 +5976,7 @@ define amdgpu_kernel void @urem_v2i32_pow2_shl_denom(ptr addrspace(1) %out, <2 x
 ; CHECK-NEXT:    [[TMP33:%.*]] = uitofp i32 [[TMP32]] to float
 ; CHECK-NEXT:    [[TMP34:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP33]])
 ; CHECK-NEXT:    [[TMP35:%.*]] = fmul fast float [[TMP34]], 0x41EFFFFFC0000000
-; CHECK-NEXT:    [[TMP36:%.*]] = fptoui float [[TMP35]] to i32
+; CHECK-NEXT:    [[TMP36:%.*]] = fptoui fast float [[TMP35]] to i32
 ; CHECK-NEXT:    [[TMP37:%.*]] = sub i32 0, [[TMP32]]
 ; CHECK-NEXT:    [[TMP38:%.*]] = mul i32 [[TMP37]], [[TMP36]]
 ; CHECK-NEXT:    [[TMP39:%.*]] = zext i32 [[TMP36]] to i64
@@ -6408,7 +6408,7 @@ define amdgpu_kernel void @sdiv_v2i32_pow2_shl_denom(ptr addrspace(1) %out, <2 x
 ; CHECK-NEXT:    [[TMP10:%.*]] = uitofp i32 [[TMP9]] to float
 ; CHECK-NEXT:    [[TMP11:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP10]])
 ; CHECK-NEXT:    [[TMP12:%.*]] = fmul fast float [[TMP11]], 0x41EFFFFFC0000000
-; CHECK-NEXT:    [[TMP13:%.*]] = fptoui float [[TMP12]] to i32
+; CHECK-NEXT:    [[TMP13:%.*]] = fptoui fast float [[TMP12]] to i32
 ; CHECK-NEXT:    [[TMP14:%.*]] = sub i32 0, [[TMP9]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = mul i32 [[TMP14]], [[TMP13]]
 ; CHECK-NEXT:    [[TMP16:%.*]] = zext i32 [[TMP13]] to i64
@@ -6449,7 +6449,7 @@ define amdgpu_kernel void @sdiv_v2i32_pow2_shl_denom(ptr addrspace(1) %out, <2 x
 ; CHECK-NEXT:    [[TMP51:%.*]] = uitofp i32 [[TMP50]] to float
 ; CHECK-NEXT:    [[TMP52:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP51]])
 ; CHECK-NEXT:    [[TMP53:%.*]] = fmul fast float [[TMP52]], 0x41EFFFFFC0000000
-; CHECK-NEXT:    [[TMP54:%.*]] = fptoui float [[TMP53]] to i32
+; CHECK-NEXT:    [[TMP54:%.*]] = fptoui fast float [[TMP53]] to i32
 ; CHECK-NEXT:    [[TMP55:%.*]] = sub i32 0, [[TMP50]]
 ; CHECK-NEXT:    [[TMP56:%.*]] = mul i32 [[TMP55]], [[TMP54]]
 ; CHECK-NEXT:    [[TMP57:%.*]] = zext i32 [[TMP54]] to i64
@@ -6865,7 +6865,7 @@ define amdgpu_kernel void @srem_v2i32_pow2_shl_denom(ptr addrspace(1) %out, <2 x
 ; CHECK-NEXT:    [[TMP9:%.*]] = uitofp i32 [[TMP8]] to float
 ; CHECK-NEXT:    [[TMP10:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP9]])
 ; CHECK-NEXT:    [[TMP11:%.*]] = fmul fast float [[TMP10]], 0x41EFFFFFC0000000
-; CHECK-NEXT:    [[TMP12:%.*]] = fptoui float [[TMP11]] to i32
+; CHECK-NEXT:    [[TMP12:%.*]] = fptoui fast float [[TMP11]] to i32
 ; CHECK-NEXT:    [[TMP13:%.*]] = sub i32 0, [[TMP8]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = mul i32 [[TMP13]], [[TMP12]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = zext i32 [[TMP12]] to i64
@@ -6903,7 +6903,7 @@ define amdgpu_kernel void @srem_v2i32_pow2_shl_denom(ptr addrspace(1) %out, <2 x
 ; CHECK-NEXT:    [[TMP47:%.*]] = uitofp i32 [[TMP46]] to float
 ; CHECK-NEXT:    [[TMP48:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP47]])
 ; CHECK-NEXT:    [[TMP49:%.*]] = fmul fast float [[TMP48]], 0x41EFFFFFC0000000
-; CHECK-NEXT:    [[TMP50:%.*]] = fptoui float [[TMP49]] to i32
+; CHECK-NEXT:    [[TMP50:%.*]] = fptoui fast float [[TMP49]] to i32
 ; CHECK-NEXT:    [[TMP51:%.*]] = sub i32 0, [[TMP46]]
 ; CHECK-NEXT:    [[TMP52:%.*]] = mul i32 [[TMP51]], [[TMP50]]
 ; CHECK-NEXT:    [[TMP53:%.*]] = zext i32 [[TMP50]] to i64
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll
index 091e5a67799a9..84977cede80bd 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll
@@ -2191,7 +2191,7 @@ define float @test_pow_afn_f32_known_integral_sitofp(float %x, i32 %y) {
 ; CHECK-LABEL: define float @test_pow_afn_f32_known_integral_sitofp
 ; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) {
 ; CHECK-NEXT:    [[Y_CAST:%.*]] = sitofp i32 [[Y]] to float
-; CHECK-NEXT:    [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi afn float [[Y_CAST]] to i32
 ; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 [[TMP1]])
 ; CHECK-NEXT:    ret float [[POW]]
 ;
@@ -2204,10 +2204,10 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp(float %x, i32 %y)
 ; CHECK-LABEL: define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp
 ; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) {
 ; CHECK-NEXT:    [[Y_CAST:%.*]] = sitofp i32 [[Y]] to float
-; CHECK-NEXT:    [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi nnan ninf afn float [[Y_CAST]] to i32
 ; CHECK-NEXT:    [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]])
 ; CHECK-NEXT:    [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[__FABS]])
-; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp i32 [[TMP1]] to float
+; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp nnan ninf afn i32 [[TMP1]] to float
 ; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[POWNI2F]]
 ; CHECK-NEXT:    [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]])
 ; CHECK-NEXT:    [[__YEVEN:%.*]] = shl i32 [[TMP1]], 31
@@ -2227,7 +2227,7 @@ define float @test_pow_afn_nnan_f32_known_integral_sitofp(float %x, i32 %y) {
 ; CHECK-LABEL: define float @test_pow_afn_nnan_f32_known_integral_sitofp
 ; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) {
 ; CHECK-NEXT:    [[Y_CAST:%.*]] = sitofp i32 [[Y]] to float
-; CHECK-NEXT:    [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi nnan afn float [[Y_CAST]] to i32
 ; CHECK-NEXT:    [[POW:%.*]] = tail call nnan afn float @_Z4pownfi(float [[X]], i32 [[TMP1]])
 ; CHECK-NEXT:    ret float [[POW]]
 ;
@@ -2240,7 +2240,7 @@ define float @test_pow_afn_ninf_f32_known_integral_sitofp(float %x, i32 %y) {
 ; CHECK-LABEL: define float @test_pow_afn_ninf_f32_known_integral_sitofp
 ; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) {
 ; CHECK-NEXT:    [[Y_CAST:%.*]] = sitofp i32 [[Y]] to float
-; CHECK-NEXT:    [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi ninf afn float [[Y_CAST]] to i32
 ; CHECK-NEXT:    [[POW:%.*]] = tail call ninf afn float @_Z4pownfi(float [[X]], i32 [[TMP1]])
 ; CHECK-NEXT:    ret float [[POW]]
 ;
@@ -2279,7 +2279,7 @@ define float @test_pow_afn_f32_known_integral_uitofp(float %x, i32 %y) {
 ; CHECK-LABEL: define float @test_pow_afn_f32_known_integral_uitofp
 ; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) {
 ; CHECK-NEXT:    [[Y_CAST:%.*]] = uitofp i32 [[Y]] to float
-; CHECK-NEXT:    [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi afn float [[Y_CAST]] to i32
 ; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 [[TMP1]])
 ; CHECK-NEXT:    ret float [[POW]]
 ;
@@ -2292,10 +2292,10 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp(float %x, i32 %y)
 ; CHECK-LABEL: define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp
 ; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) {
 ; CHECK-NEXT:    [[Y_CAST:%.*]] = uitofp i32 [[Y]] to float
-; CHECK-NEXT:    [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi nnan ninf afn float [[Y_CAST]] to i32
 ; CHECK-NEXT:    [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]])
 ; CHECK-NEXT:    [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[__FABS]])
-; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp i32 [[TMP1]] to float
+; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp nnan ninf afn i32 [[TMP1]] to float
 ; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[POWNI2F]]
 ; CHECK-NEXT:    [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]])
 ; CHECK-NEXT:    [[__YEVEN:%.*]] = shl i32 [[TMP1]], 31
@@ -2341,10 +2341,10 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp_i256(float %x, i2
 ; CHECK-LABEL: define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp_i256
 ; CHECK-SAME: (float [[X:%.*]], i256 [[Y:%.*]]) {
 ; CHECK-NEXT:    [[Y_CAST:%.*]] = uitofp i256 [[Y]] to float
-; CHECK-NEXT:    [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi nnan ninf afn float [[Y_CAST]] to i32
 ; CHECK-NEXT:    [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]])
 ; CHECK-NEXT:    [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[__FABS]])
-; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp i32 [[TMP1]] to float
+; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp nnan ninf afn i32 [[TMP1]] to float
 ; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[POWNI2F]]
 ; CHECK-NEXT:    [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]])
 ; CHECK-NEXT:    [[__YEVEN:%.*]] = shl i32 [[TMP1]], 31
@@ -2364,10 +2364,10 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp_i256(float %x, i2
 ; CHECK-LABEL: define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp_i256
 ; CHECK-SAME: (float [[X:%.*]], i256 [[Y:%.*]]) {
 ; CHECK-NEXT:    [[Y_CAST:%.*]] = sitofp i256 [[Y]] to float
-; CHECK-NEXT:    [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi nnan ninf afn float [[Y_CAST]] to i32
 ; CHECK-NEXT:    [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]])
 ; CHECK-NEXT:    [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[__FABS]])
-; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp i32 [[TMP1]] to float
+; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp nnan ninf afn i32 [[TMP1]] to float
 ; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[POWNI2F]]
 ; CHECK-NEXT:    [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]])
 ; CHECK-NEXT:    [[__YEVEN:%.*]] = shl i32 [[TMP1]], 31
@@ -2387,10 +2387,10 @@ define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_sitofp(<2 x floa
 ; CHECK-LABEL: define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_sitofp
 ; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x i32> [[Y:%.*]]) {
 ; CHECK-NEXT:    [[Y_CAST:%.*]] = sitofp <2 x i32> [[Y]] to <2 x float>
-; CHECK-NEXT:    [[TMP1:%.*]] = fptosi <2 x float> [[Y_CAST]] to <2 x i32>
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi nnan ninf afn <2 x float> [[Y_CAST]] to <2 x i32>
 ; CHECK-NEXT:    [[__FABS:%.*]] = call nnan ninf afn <2 x float> @llvm.fabs.v2f32(<2 x float> [[X]])
 ; CHECK-NEXT:    [[__LOG2:%.*]] = call nnan ninf afn <2 x float> @llvm.log2.v2f32(<2 x float> [[__FABS]])
-; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float>
+; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp nnan ninf afn <2 x i32> [[TMP1]] to <2 x float>
 ; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[POWNI2F]]
 ; CHECK-NEXT:    [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @llvm.exp2.v2f32(<2 x float> [[__YLOGX]])
 ; CHECK-NEXT:    [[__YEVEN:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 31)
@@ -2423,7 +2423,7 @@ define <2 x float> @test_pow_afn_v2f32_known_integral_uitofp(<2 x float> %x, <2
 ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_known_integral_uitofp
 ; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x i32> [[Y:%.*]]) {
 ; CHECK-NEXT:    [[Y_CAST:%.*]] = uitofp <2 x i32> [[Y]] to <2 x float>
-; CHECK-NEXT:    [[TMP1:%.*]] = fptosi <2 x float> [[Y_CAST]] to <2 x i32>
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi afn <2 x float> [[Y_CAST]] to <2 x i32>
 ; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> [[TMP1]])
 ; CHECK-NEXT:    ret <2 x float> [[POW]]
 ;
@@ -2436,10 +2436,10 @@ define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_uitofp(<2 x floa
 ; CHECK-LABEL: define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_uitofp
 ; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x i32> [[Y:%.*]]) {
 ; CHECK-NEXT:    [[Y_CAST:%.*]] = uitofp <2 x i32> [[Y]] to <2 x float>
-; CHECK-NEXT:    [[TMP1:%.*]] = fptosi <2 x float> [[Y_CAST]] to <2 x i32>
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi nnan ninf afn <2 x float> [[Y_CAST]] to <2 x i32>
 ; CHECK-NEXT:    [[__FABS:%.*]] = call nnan ninf afn <2 x float> @llvm.fabs.v2f32(<2 x float> [[X]])
 ; CHECK-NEXT:    [[__LOG2:%.*]] = call nnan ninf afn <2 x float> @llvm.log2.v2f32(<2 x float> [[__FABS]])
-; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float>
+; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp nnan ninf afn <2 x i32> [[TMP1]] to <2 x float>
 ; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[POWNI2F]]
 ; CHECK-NEXT:    [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @llvm.exp2.v2f32(<2 x float> [[__YLOGX]])
 ; CHECK-NEXT:    [[__YEVEN:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 31)
@@ -2548,10 +2548,10 @@ define float @test_pow_afn_f32_nnan_ninf__y_known_integral_trunc(float %x, float
 ; CHECK-LABEL: define float @test_pow_afn_f32_nnan_ninf__y_known_integral_trunc
 ; CHECK-SAME: (float [[X:%.*]], float [[Y_ARG:%.*]]) {
 ; CHECK-NEXT:    [[Y:%.*]] = call float @llvm.trunc.f32(float [[Y_ARG]])
-; CHECK-NEXT:    [[TMP1:%.*]] = fptosi float [[Y]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi nnan ninf afn float [[Y]] to i32
 ; CHECK-NEXT:    [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]])
 ; CHECK-NEXT:    [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[__FABS]])
-; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp i32 [[TMP1]] to float
+; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp nnan ninf afn i32 [[TMP1]] to float
 ; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[POWNI2F]]
 ; CHECK-NEXT:    [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]])
 ; CHECK-NEXT:    [[__YEVEN:%.*]] = shl i32 [[TMP1]], 31
@@ -2571,7 +2571,7 @@ define float @test_pow_afn_f32__y_known_integral_trunc(float %x, float nofpclass
 ; CHECK-LABEL: define float @test_pow_afn_f32__y_known_integral_trunc
 ; CHECK-SAME: (float [[X:%.*]], float nofpclass(nan inf) [[Y_ARG:%.*]]) {
 ; CHECK-NEXT:    [[Y:%.*]] = call float @llvm.trunc.f32(float [[Y_ARG]])
-; CHECK-NEXT:    [[TMP1:%.*]] = fptosi float [[Y]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi afn float [[Y]] to i32
 ; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 [[TMP1]])
 ; CHECK-NEXT:    ret float [[POW]]
 ;
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll
index 27d204c1a253f..b7e6272761c1b 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll
@@ -671,7 +671,7 @@ define float @test_pown_afn_nnan_ninf_f32(float %x, i32 %y) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]])
 ; CHECK-NEXT:    [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[__FABS]])
-; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp i32 [[Y]] to float
+; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp nnan ninf afn i32 [[Y]] to float
 ; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[POWNI2F]]
 ; CHECK-NEXT:    [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]])
 ; CHECK-NEXT:    [[__YEVEN:%.*]] = shl i32 [[Y]], 31
@@ -693,7 +693,7 @@ define <2 x float> @test_pown_afn_nnan_ninf_v2f32(<2 x float> %x, <2 x i32> %y)
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[__FABS:%.*]] = call nnan ninf afn <2 x float> @llvm.fabs.v2f32(<2 x float> [[X]])
 ; CHECK-NEXT:    [[__LOG2:%.*]] = call nnan ninf afn <2 x float> @llvm.log2.v2f32(<2 x float> [[__FABS]])
-; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp <2 x i32> [[Y]] to <2 x float>
+; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp nnan ninf afn <2 x i32> [[Y]] to <2 x float>
 ; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[POWNI2F]]
 ; CHECK-NEXT:    [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @llvm.exp2.v2f32(<2 x float> [[__YLOGX]])
 ; CHECK-NEXT:    [[__YEVEN:%.*]] = shl <2 x i32> [[Y]], splat (i32 31)
@@ -715,7 +715,7 @@ define double @test_pown_afn_nnan_ninf_f64(double %x, i32 %y) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[__FABS:%.*]] = call nnan ninf afn double @llvm.fabs.f64(double [[X]])
 ; CHECK-NEXT:    [[__LOG2:%.*]] = call nnan ninf afn double @_Z4log2d(double [[__FABS]])
-; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp i32 [[Y]] to double
+; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp nnan ninf afn i32 [[Y]] to double
 ; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn double [[__LOG2]], [[POWNI2F]]
 ; CHECK-NEXT:    [[__EXP2:%.*]] = call nnan ninf afn double @_Z4exp2d(double [[__YLOGX]])
 ; CHECK-NEXT:    [[__YTOU:%.*]] = zext i32 [[Y]] to i64
@@ -738,7 +738,7 @@ define <2 x double> @test_pown_afn_nnan_ninf_v2f64(<2 x double> %x, <2 x i32> %y
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[__FABS:%.*]] = call nnan ninf afn <2 x double> @llvm.fabs.v2f64(<2 x double> [[X]])
 ; CHECK-NEXT:    [[__LOG2:%.*]] = call nnan ninf afn <2 x double> @_Z4log2Dv2_d(<2 x double> [[__FABS]])
-; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp <2 x i32> [[Y]] to <2 x double>
+; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp nnan ninf afn <2 x i32> [[Y]] to <2 x double>
 ; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x double> [[__LOG2]], [[POWNI2F]]
 ; CHECK-NEXT:    [[__EXP2:%.*]] = call nnan ninf afn <2 x double> @_Z4exp2Dv2_d(<2 x double> [[__YLOGX]])
 ; CHECK-NEXT:    [[__YTOU:%.*]] = zext <2 x i32> [[Y]] to <2 x i64>
@@ -761,7 +761,7 @@ define half @test_pown_afn_nnan_ninf_f16(half %x, i32 %y) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[__FABS:%.*]] = call nnan ninf afn half @llvm.fabs.f16(half [[X]])
 ; CHECK-NEXT:    [[__LOG2:%.*]] = call nnan ninf afn half @llvm.log2.f16(half [[__FABS]])
-; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp i32 [[Y]] to half
+; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp nnan ninf afn i32 [[Y]] to half
 ; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn half [[__LOG2]], [[POWNI2F]]
 ; CHECK-NEXT:    [[__EXP2:%.*]] = call nnan ninf afn half @llvm.exp2.f16(half [[__YLOGX]])
 ; CHECK-NEXT:    [[__YTOU:%.*]] = trunc i32 [[Y]] to i16
@@ -784,7 +784,7 @@ define <2 x half> @test_pown_afn_nnan_ninf_v2f16(<2 x half> %x, <2 x i32> %y) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[__FABS:%.*]] = call nnan ninf afn <2 x half> @llvm.fabs.v2f16(<2 x half> [[X]])
 ; CHECK-NEXT:    [[__LOG2:%.*]] = call nnan ninf afn <2 x half> @llvm.log2.v2f16(<2 x half> [[__FABS]])
-; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp <2 x i32> [[Y]] to <2 x half>
+; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp nnan ninf afn <2 x i32> [[Y]] to <2 x half>
 ; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x half> [[__LOG2]], [[POWNI2F]]
 ; CHECK-NEXT:    [[__EXP2:%.*]] = call nnan ninf afn <2 x half> @llvm.exp2.v2f16(<2 x half> [[__YLOGX]])
 ; CHECK-NEXT:    [[__YTOU:%.*]] = trunc <2 x i32> [[Y]] to <2 x i16>
@@ -1065,7 +1065,7 @@ define float @test_pown_afn_ninf_nnan_f32__x_known_positive(float nofpclass(ninf
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]])
 ; CHECK-NEXT:    [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[__FABS]])
-; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp i32 [[Y]] to float
+; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp nnan ninf afn i32 [[Y]] to float
 ; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[POWNI2F]]
 ; CHECK-NEXT:    [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]])
 ; CHECK-NEXT:    [[__YEVEN:%.*]] = shl i32 [[Y]], 31
@@ -1126,7 +1126,7 @@ define float @test_fast_pown_f32_y_known_even(float %x, i32 %y.arg) {
 ; CHECK-NEXT:    [[Y:%.*]] = shl i32 [[Y_ARG]], 1
 ; CHECK-NEXT:    [[__FABS:%.*]] = call fast float @llvm.fabs.f32(float [[X]])
 ; CHECK-NEXT:    [[__LOG2:%.*]] = call fast float @llvm.log2.f32(float [[__FABS]])
-; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp i32 [[Y]] to float
+; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp fast i32 [[Y]] to float
 ; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul fast float [[__LOG2]], [[POWNI2F]]
 ; CHECK-NEXT:    [[__EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[__YLOGX]])
 ; CHECK-NEXT:    ret float [[__EXP2]]
@@ -1144,7 +1144,7 @@ define float @test_fast_pown_f32_known_positive_y_known_even(float nofpclass(nin
 ; CHECK-NEXT:    [[Y:%.*]] = shl i32 [[Y_ARG]], 1
 ; CHECK-NEXT:    [[__FABS:%.*]] = call fast float @llvm.fabs.f32(float [[X]])
 ; CHECK-NEXT:    [[__LOG2:%.*]] = call fast float @llvm.log2.f32(float [[__FABS]])
-; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp i32 [[Y]] to float
+; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp fast i32 [[Y]] to float
 ; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul fast float [[__LOG2]], [[POWNI2F]]
 ; CHECK-NEXT:    [[__EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[__YLOGX]])
 ; CHECK-NEXT:    ret float [[__EXP2]]
diff --git a/llvm/test/CodeGen/AMDGPU/divrem24-assume.ll b/llvm/test/CodeGen/AMDGPU/divrem24-assume.ll
index dc79385d9eaca..46ff0f61b31a8 100644
--- a/llvm/test/CodeGen/AMDGPU/divrem24-assume.ll
+++ b/llvm/test/CodeGen/AMDGPU/divrem24-assume.ll
@@ -4,7 +4,7 @@
 define amdgpu_kernel void @divrem24_assume(ptr addrspace(1) %arg, i32 %arg1) {
 ; CHECK-LABEL: @divrem24_assume(
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0
+; CHECK-NEXT:    [[TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x(), !range [[RNG0:![0-9]+]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[ARG1:%.*]], 42
 ; CHECK-NEXT:    tail call void @llvm.assume(i1 [[TMP2]])
 ; CHECK-NEXT:    [[TMP0:%.*]] = uitofp i32 [[TMP]] to float
@@ -14,7 +14,7 @@ define amdgpu_kernel void @divrem24_assume(ptr addrspace(1) %arg, i32 %arg1) {
 ; CHECK-NEXT:    [[TMP4:%.*]] = call fast float @llvm.trunc.f32(float [[TMP3]])
 ; CHECK-NEXT:    [[TMP5:%.*]] = fneg fast float [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP5]], float [[TMP1]], float [[TMP0]])
-; CHECK-NEXT:    [[TMP7:%.*]] = fptoui float [[TMP4]] to i32
+; CHECK-NEXT:    [[TMP7:%.*]] = fptoui fast float [[TMP4]] to i32
 ; CHECK-NEXT:    [[TMP8:%.*]] = call fast float @llvm.fabs.f32(float [[TMP6]])
 ; CHECK-NEXT:    [[TMP9:%.*]] = call fast float @llvm.fabs.f32(float [[TMP1]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = fcmp fast oge float [[TMP8]], [[TMP9]]
diff --git a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
index 6a88be6e55859..02d42f1774c26 100644
--- a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
+++ b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
@@ -352,7 +352,7 @@ declare half @_Z4pownDhi(half, i32)
 ; GCN-LABEL: {{^}}define half @test_pown_f16(
 ; GCN-NATIVE: %__fabs = tail call fast half @llvm.fabs.f16(half %x)
 ; GCN-NATIVE: %__log2 = tail call fast half @llvm.log2.f16(half %__fabs)
-; GCN-NATIVE: %pownI2F = sitofp i32 %y to half
+; GCN-NATIVE: %pownI2F = sitofp fast i32 %y to half
 ; GCN-NATIVE: %__ylogx = fmul fast half %__log2, %pownI2F
 ; GCN-NATIVE: %__exp2 = tail call fast half @llvm.exp2.f16(half %__ylogx)
 ; GCN-NATIVE: %__ytou = trunc i32 %y to i16
@@ -404,7 +404,7 @@ entry:
 ; GCN: %conv = fptosi float %tmp1 to i32
 ; GCN: %__fabs = tail call fast float @llvm.fabs.f32(float %tmp)
 ; GCN: %__log2 = tail call fast float @llvm.log2.f32(float %__fabs)
-; GCN: %pownI2F = sitofp i32 %conv to float
+; GCN: %pownI2F = sitofp fast i32 %conv to float
 ; GCN: %__ylogx = fmul fast float %__log2, %pownI2F
 ; GCN: %__exp2 = tail call fast float @llvm.exp2.f32(float %__ylogx)
 ; GCN: %__yeven = shl i32 %conv, 31
diff --git a/llvm/test/Transforms/InstCombine/log-pow.ll b/llvm/test/Transforms/InstCombine/log-pow.ll
index 374115953145d..f5091c4a21b7b 100644
--- a/llvm/test/Transforms/InstCombine/log-pow.ll
+++ b/llvm/test/Transforms/InstCombine/log-pow.ll
@@ -26,7 +26,7 @@ define double @log_powi_const(double %x) {
 define double @log_powi_nonconst(double %x, i32 %y) {
 ; CHECK-LABEL: @log_powi_nonconst(
 ; CHECK-NEXT:    [[LOG1:%.*]] = call fast double @llvm.log.f64(double [[X:%.*]])
-; CHECK-NEXT:    [[CAST:%.*]] = sitofp i32 [[Y:%.*]] to double
+; CHECK-NEXT:    [[CAST:%.*]] = sitofp fast i32 [[Y:%.*]] to double
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul fast double [[LOG1]], [[CAST]]
 ; CHECK-NEXT:    ret double [[MUL]]
 ;
@@ -38,7 +38,7 @@ define double @log_powi_nonconst(double %x, i32 %y) {
 define double @logf64_powi_nonconst(double %x, i32 %y) {
 ; CHECK-LABEL: @logf64_powi_nonconst(
 ; CHECK-NEXT:    [[LOG1:%.*]] = call fast double @llvm.log.f64(double [[X:%.*]])
-; CHECK-NEXT:    [[CAST:%.*]] = sitofp i32 [[Y:%.*]] to double
+; CHECK-NEXT:    [[CAST:%.*]] = sitofp fast i32 [[Y:%.*]] to double
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul fast double [[LOG1]], [[CAST]]
 ; CHECK-NEXT:    ret double [[MUL]]
 ;
@@ -61,7 +61,7 @@ define float @logf_powfi_const(float %x) {
 define float @logf_powfi_nonconst(float %x, i32 %y) {
 ; CHECK-LABEL: @logf_powfi_nonconst(
 ; CHECK-NEXT:    [[LOG1:%.*]] = call fast float @llvm.log.f32(float [[X:%.*]])
-; CHECK-NEXT:    [[CAST:%.*]] = sitofp i32 [[Y:%.*]] to float
+; CHECK-NEXT:    [[CAST:%.*]] = sitofp fast i32 [[Y:%.*]] to float
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul fast float [[LOG1]], [[CAST]]
 ; CHECK-NEXT:    ret float [[MUL]]
 ;
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-scalarivsext-crash.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-scalarivsext-crash.ll
index 34b8deaa8de03..11f4a7bc81b4c 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-scalarivsext-crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-scalarivsext-crash.ll
@@ -31,13 +31,13 @@ define void @test_iv_trunc_crash(ptr %a, ptr %b, i32 %n) {
 ; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
 ; CHECK-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], i32 8, i32 [[N_MOD_VF]]
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP3]], [[TMP11]]
-; CHECK-NEXT:    [[DOTCAST:%.*]] = sitofp i32 [[N_VEC]] to double
+; CHECK-NEXT:    [[DOTCAST:%.*]] = sitofp reassoc i32 [[N_VEC]] to double
 ; CHECK-NEXT:    [[TMP12:%.*]] = fmul reassoc double [[X]], [[DOTCAST]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = fadd reassoc double [[SUM_0]], [[TMP12]]
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[DOTCAST2:%.*]] = sitofp i32 [[INDEX]] to double
+; CHECK-NEXT:    [[DOTCAST2:%.*]] = sitofp reassoc i32 [[INDEX]] to double
 ; CHECK-NEXT:    [[TMP14:%.*]] = fmul reassoc double [[X]], [[DOTCAST2]]
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = fadd reassoc double [[SUM_0]], [[TMP14]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = fmul reassoc double 7.000000e+00, [[X]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
index c0ff8816c2543..2e1698a3940c8 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
@@ -23,14 +23,14 @@ define void @fp_iv_loop1(ptr noalias nocapture %A, i32 %N) #0 {
 ; AUTO_VEC:       [[ITER_CHECK]]:
 ; AUTO_VEC-NEXT:    [[TMP0:%.*]] = zext i32 [[N]] to i64
 ; AUTO_VEC-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
-; AUTO_VEC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[FOR_BODY:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
+; AUTO_VEC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
 ; AUTO_VEC:       [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
 ; AUTO_VEC-NEXT:    [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP0]], 32
 ; AUTO_VEC-NEXT:    br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
 ; AUTO_VEC:       [[VECTOR_PH]]:
 ; AUTO_VEC-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 32
 ; AUTO_VEC-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
-; AUTO_VEC-NEXT:    [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float
+; AUTO_VEC-NEXT:    [[DOTCAST:%.*]] = sitofp fast i64 [[N_VEC]] to float
 ; AUTO_VEC-NEXT:    [[TMP6:%.*]] = fmul fast float 5.000000e-01, [[DOTCAST]]
 ; AUTO_VEC-NEXT:    [[IND_END:%.*]] = fadd fast float 1.000000e+00, [[TMP6]]
 ; AUTO_VEC-NEXT:    br label %[[VECTOR_BODY:.*]]
@@ -56,18 +56,18 @@ define void @fp_iv_loop1(ptr noalias nocapture %A, i32 %N) #0 {
 ; AUTO_VEC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
 ; AUTO_VEC-NEXT:    br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
 ; AUTO_VEC:       [[VEC_EPILOG_ITER_CHECK]]:
-; AUTO_VEC-NEXT:    [[DOTCAST12:%.*]] = sitofp i64 [[N_VEC]] to float
+; AUTO_VEC-NEXT:    [[DOTCAST12:%.*]] = sitofp fast i64 [[N_VEC]] to float
 ; AUTO_VEC-NEXT:    [[TMP11:%.*]] = fmul fast float 5.000000e-01, [[DOTCAST12]]
 ; AUTO_VEC-NEXT:    [[IND_END1:%.*]] = fadd fast float 1.000000e+00, [[TMP11]]
 ; AUTO_VEC-NEXT:    [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP0]], [[N_VEC]]
 ; AUTO_VEC-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4
-; AUTO_VEC-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[FOR_BODY]], label %[[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]]
+; AUTO_VEC-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]]
 ; AUTO_VEC:       [[VEC_EPILOG_PH]]:
 ; AUTO_VEC-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
 ; AUTO_VEC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi float [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 1.000000e+00, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
 ; AUTO_VEC-NEXT:    [[N_MOD_VF2:%.*]] = urem i64 [[TMP0]], 4
 ; AUTO_VEC-NEXT:    [[N_VEC3:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF2]]
-; AUTO_VEC-NEXT:    [[DOTCAST4:%.*]] = sitofp i64 [[N_VEC3]] to float
+; AUTO_VEC-NEXT:    [[DOTCAST4:%.*]] = sitofp fast i64 [[N_VEC3]] to float
 ; AUTO_VEC-NEXT:    [[TMP12:%.*]] = fmul fast float 5.000000e-01, [[DOTCAST4]]
 ; AUTO_VEC-NEXT:    [[TMP10:%.*]] = fadd fast float 1.000000e+00, [[TMP12]]
 ; AUTO_VEC-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[BC_RESUME_VAL]], i64 0
@@ -85,14 +85,14 @@ define void @fp_iv_loop1(ptr noalias nocapture %A, i32 %N) #0 {
 ; AUTO_VEC-NEXT:    br i1 [[TMP9]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; AUTO_VEC:       [[VEC_EPILOG_MIDDLE_BLOCK]]:
 ; AUTO_VEC-NEXT:    [[CMP_N9:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]]
-; AUTO_VEC-NEXT:    br i1 [[CMP_N9]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]]
-; AUTO_VEC:       [[FOR_BODY]]:
+; AUTO_VEC-NEXT:    br i1 [[CMP_N9]], label %[[FOR_END_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
+; AUTO_VEC:       [[VEC_EPILOG_SCALAR_PH]]:
 ; AUTO_VEC-NEXT:    [[BC_RESUME_VAL10:%.*]] = phi i64 [ [[N_VEC3]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
 ; AUTO_VEC-NEXT:    [[BC_RESUME_VAL11:%.*]] = phi float [ [[TMP10]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END1]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 1.000000e+00, %[[ITER_CHECK]] ]
 ; AUTO_VEC-NEXT:    br label %[[LOOP:.*]]
 ; AUTO_VEC:       [[LOOP]]:
-; AUTO_VEC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL10]], %[[FOR_BODY]] ]
-; AUTO_VEC-NEXT:    [[X_06:%.*]] = phi float [ [[CONV1:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL11]], %[[FOR_BODY]] ]
+; AUTO_VEC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL10]], %[[VEC_EPILOG_SCALAR_PH]] ]
+; AUTO_VEC-NEXT:    [[X_06:%.*]] = phi float [ [[CONV1:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL11]], %[[VEC_EPILOG_SCALAR_PH]] ]
 ; AUTO_VEC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
 ; AUTO_VEC-NEXT:    store float [[X_06]], ptr [[ARRAYIDX]], align 4
 ; AUTO_VEC-NEXT:    [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01
@@ -145,19 +145,19 @@ define void @fp_iv_loop2(ptr noalias nocapture %A, i32 %N) {
 ; AUTO_VEC-SAME: ptr noalias captures(none) [[A:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
 ; AUTO_VEC-NEXT:  [[ENTRY:.*:]]
 ; AUTO_VEC-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N]], 0
-; AUTO_VEC-NEXT:    br i1 [[CMP4]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]]
-; AUTO_VEC:       [[FOR_BODY_PREHEADER]]:
-; AUTO_VEC-NEXT:    br label %[[FOR_BODY:.*]]
-; AUTO_VEC:       [[FOR_BODY]]:
-; AUTO_VEC-NEXT:    [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], %[[FOR_BODY]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
-; AUTO_VEC-NEXT:    [[X_06_EPIL:%.*]] = phi float [ [[CONV1_EPIL:%.*]], %[[FOR_BODY]] ], [ 1.000000e+00, %[[FOR_BODY_PREHEADER]] ]
+; AUTO_VEC-NEXT:    br i1 [[CMP4]], label %[[LOOP_PREHEADER:.*]], label %[[FOR_END:.*]]
+; AUTO_VEC:       [[LOOP_PREHEADER]]:
+; AUTO_VEC-NEXT:    br label %[[LOOP:.*]]
+; AUTO_VEC:       [[LOOP]]:
+; AUTO_VEC-NEXT:    [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
+; AUTO_VEC-NEXT:    [[X_06_EPIL:%.*]] = phi float [ [[CONV1_EPIL:%.*]], %[[LOOP]] ], [ 1.000000e+00, %[[LOOP_PREHEADER]] ]
 ; AUTO_VEC-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV_EPIL]]
 ; AUTO_VEC-NEXT:    store float [[X_06_EPIL]], ptr [[ARRAYIDX_EPIL]], align 4
 ; AUTO_VEC-NEXT:    [[CONV1_EPIL]] = fadd float [[X_06_EPIL]], 5.000000e-01
 ; AUTO_VEC-NEXT:    [[INDVARS_IV_NEXT_EPIL]] = add nuw nsw i64 [[INDVARS_IV_EPIL]], 1
 ; AUTO_VEC-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT_EPIL]] to i32
 ; AUTO_VEC-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
-; AUTO_VEC-NEXT:    br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY]]
+; AUTO_VEC-NEXT:    br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT:.*]], label %[[LOOP]]
 ; AUTO_VEC:       [[FOR_END_LOOPEXIT]]:
 ; AUTO_VEC-NEXT:    br label %[[FOR_END]]
 ; AUTO_VEC:       [[FOR_END]]:
@@ -194,11 +194,11 @@ define double @external_use_with_fast_math(ptr %a, i64 %n) {
 ; AUTO_VEC-NEXT:  [[ENTRY:.*]]:
 ; AUTO_VEC-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
 ; AUTO_VEC-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 16
-; AUTO_VEC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[FOR_BODY:.*]], label %[[VECTOR_PH:.*]]
+; AUTO_VEC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
 ; AUTO_VEC:       [[VECTOR_PH]]:
 ; AUTO_VEC-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[SMAX]], 16
 ; AUTO_VEC-NEXT:    [[N_VEC:%.*]] = sub i64 [[SMAX]], [[N_MOD_VF]]
-; AUTO_VEC-NEXT:    [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to double
+; AUTO_VEC-NEXT:    [[DOTCAST:%.*]] = sitofp fast i64 [[N_VEC]] to double
 ; AUTO_VEC-NEXT:    [[TMP0:%.*]] = fmul fast double 3.000000e+00, [[DOTCAST]]
 ; AUTO_VEC-NEXT:    [[TMP6:%.*]] = fadd fast double 0.000000e+00, [[TMP0]]
 ; AUTO_VEC-NEXT:    br label %[[VECTOR_BODY:.*]]
@@ -223,14 +223,14 @@ define double @external_use_with_fast_math(ptr %a, i64 %n) {
 ; AUTO_VEC:       [[MIDDLE_BLOCK]]:
 ; AUTO_VEC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
 ; AUTO_VEC-NEXT:    [[TMP7:%.*]] = fsub fast double [[TMP6]], 3.000000e+00
-; AUTO_VEC-NEXT:    br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[FOR_BODY]]
-; AUTO_VEC:       [[FOR_BODY]]:
+; AUTO_VEC-NEXT:    br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
+; AUTO_VEC:       [[SCALAR_PH]]:
 ; AUTO_VEC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
 ; AUTO_VEC-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi double [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[ENTRY]] ]
 ; AUTO_VEC-NEXT:    br label %[[LOOP:.*]]
 ; AUTO_VEC:       [[LOOP]]:
-; AUTO_VEC-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[FOR_BODY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
-; AUTO_VEC-NEXT:    [[J:%.*]] = phi double [ [[BC_RESUME_VAL1]], %[[FOR_BODY]] ], [ [[J_NEXT:%.*]], %[[LOOP]] ]
+; AUTO_VEC-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
+; AUTO_VEC-NEXT:    [[J:%.*]] = phi double [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[J_NEXT:%.*]], %[[LOOP]] ]
 ; AUTO_VEC-NEXT:    [[T0:%.*]] = getelementptr double, ptr [[A]], i64 [[I]]
 ; AUTO_VEC-NEXT:    store double [[J]], ptr [[T0]], align 8
 ; AUTO_VEC-NEXT:    [[I_NEXT]] = add i64 [[I]], 1
@@ -262,19 +262,19 @@ for.end:
 define double @external_use_without_fast_math(ptr %a, i64 %n) {
 ; AUTO_VEC-LABEL: define double @external_use_without_fast_math(
 ; AUTO_VEC-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
-; AUTO_VEC-NEXT:  [[ENTRY_NEW:.*]]:
-; AUTO_VEC-NEXT:    br label %[[FOR_BODY:.*]]
-; AUTO_VEC:       [[FOR_BODY]]:
-; AUTO_VEC-NEXT:    [[I:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[I_NEXT_7:%.*]], %[[FOR_BODY]] ]
-; AUTO_VEC-NEXT:    [[J:%.*]] = phi double [ 0.000000e+00, %[[ENTRY_NEW]] ], [ [[J_NEXT_7:%.*]], %[[FOR_BODY]] ]
+; AUTO_VEC-NEXT:  [[ENTRY:.*]]:
+; AUTO_VEC-NEXT:    br label %[[LOOP:.*]]
+; AUTO_VEC:       [[LOOP]]:
+; AUTO_VEC-NEXT:    [[I:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[I_NEXT_7:%.*]], %[[LOOP]] ]
+; AUTO_VEC-NEXT:    [[J:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[J_NEXT_7:%.*]], %[[LOOP]] ]
 ; AUTO_VEC-NEXT:    [[TMP7:%.*]] = getelementptr double, ptr [[A]], i64 [[I]]
 ; AUTO_VEC-NEXT:    store double [[J]], ptr [[TMP7]], align 8
 ; AUTO_VEC-NEXT:    [[I_NEXT_7]] = add i64 [[I]], 1
 ; AUTO_VEC-NEXT:    [[J_NEXT_7]] = fadd double [[J]], 3.000000e+00
 ; AUTO_VEC-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT_7]], [[N]]
-; AUTO_VEC-NEXT:    br i1 [[COND]], label %[[FOR_BODY]], label %[[FOR_END:.*]]
+; AUTO_VEC-NEXT:    br i1 [[COND]], label %[[LOOP]], label %[[FOR_END:.*]]
 ; AUTO_VEC:       [[FOR_END]]:
-; AUTO_VEC-NEXT:    [[J_LCSSA:%.*]] = phi double [ [[J]], %[[FOR_BODY]] ]
+; AUTO_VEC-NEXT:    [[J_LCSSA:%.*]] = phi double [ [[J]], %[[LOOP]] ]
 ; AUTO_VEC-NEXT:    ret double [[J_LCSSA]]
 ;
 entry:
@@ -309,14 +309,14 @@ define void @fadd_reassoc_FMF(ptr nocapture %p, i32 %N) {
 ; AUTO_VEC-NEXT:  [[ITER_CHECK:.*]]:
 ; AUTO_VEC-NEXT:    [[TMP0:%.*]] = zext i32 [[N]] to i64
 ; AUTO_VEC-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
-; AUTO_VEC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[FOR_BODY:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
+; AUTO_VEC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
 ; AUTO_VEC:       [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
 ; AUTO_VEC-NEXT:    [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP0]], 32
 ; AUTO_VEC-NEXT:    br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
 ; AUTO_VEC:       [[VECTOR_PH]]:
 ; AUTO_VEC-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 32
 ; AUTO_VEC-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
-; AUTO_VEC-NEXT:    [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float
+; AUTO_VEC-NEXT:    [[DOTCAST:%.*]] = sitofp reassoc i64 [[N_VEC]] to float
 ; AUTO_VEC-NEXT:    [[TMP1:%.*]] = fmul reassoc float 4.200000e+01, [[DOTCAST]]
 ; AUTO_VEC-NEXT:    [[IND_END:%.*]] = fadd reassoc float 1.000000e+00, [[TMP1]]
 ; AUTO_VEC-NEXT:    br label %[[VECTOR_BODY:.*]]
@@ -350,18 +350,18 @@ define void @fadd_reassoc_FMF(ptr nocapture %p, i32 %N) {
 ; AUTO_VEC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
 ; AUTO_VEC-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
 ; AUTO_VEC:       [[VEC_EPILOG_ITER_CHECK]]:
-; AUTO_VEC-NEXT:    [[DOTCAST16:%.*]] = sitofp i64 [[N_VEC]] to float
+; AUTO_VEC-NEXT:    [[DOTCAST16:%.*]] = sitofp reassoc i64 [[N_VEC]] to float
 ; AUTO_VEC-NEXT:    [[TMP12:%.*]] = fmul reassoc float 4.200000e+01, [[DOTCAST16]]
 ; AUTO_VEC-NEXT:    [[IND_END1:%.*]] = fadd reassoc float 1.000000e+00, [[TMP12]]
 ; AUTO_VEC-NEXT:    [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP0]], [[N_VEC]]
 ; AUTO_VEC-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4
-; AUTO_VEC-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[FOR_BODY]], label %[[VEC_EPILOG_PH]], !prof [[PROF3]]
+; AUTO_VEC-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3]]
 ; AUTO_VEC:       [[VEC_EPILOG_PH]]:
 ; AUTO_VEC-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
 ; AUTO_VEC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi float [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 1.000000e+00, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
 ; AUTO_VEC-NEXT:    [[N_MOD_VF5:%.*]] = urem i64 [[TMP0]], 4
 ; AUTO_VEC-NEXT:    [[N_VEC6:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF5]]
-; AUTO_VEC-NEXT:    [[DOTCAST7:%.*]] = sitofp i64 [[N_VEC6]] to float
+; AUTO_VEC-NEXT:    [[DOTCAST7:%.*]] = sitofp reassoc i64 [[N_VEC6]] to float
 ; AUTO_VEC-NEXT:    [[TMP17:%.*]] = fmul reassoc float 4.200000e+01, [[DOTCAST7]]
 ; AUTO_VEC-NEXT:    [[TMP18:%.*]] = fadd reassoc float 1.000000e+00, [[TMP17]]
 ; AUTO_VEC-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[BC_RESUME_VAL]], i64 0
@@ -381,14 +381,14 @@ define void @fadd_reassoc_FMF(ptr nocapture %p, i32 %N) {
 ; AUTO_VEC-NEXT:    br i1 [[TMP15]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
 ; AUTO_VEC:       [[VEC_EPILOG_MIDDLE_BLOCK]]:
 ; AUTO_VEC-NEXT:    [[CMP_N18:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC6]]
-; AUTO_VEC-NEXT:    br i1 [[CMP_N18]], label %[[EXIT]], label %[[FOR_BODY]]
-; AUTO_VEC:       [[FOR_BODY]]:
+; AUTO_VEC-NEXT:    br i1 [[CMP_N18]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
+; AUTO_VEC:       [[VEC_EPILOG_SCALAR_PH]]:
 ; AUTO_VEC-NEXT:    [[BC_RESUME_VAL14:%.*]] = phi i64 [ [[N_VEC6]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
 ; AUTO_VEC-NEXT:    [[BC_RESUME_VAL15:%.*]] = phi float [ [[TMP18]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END1]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 1.000000e+00, %[[ITER_CHECK]] ]
 ; AUTO_VEC-NEXT:    br label %[[LOOP:.*]]
 ; AUTO_VEC:       [[LOOP]]:
-; AUTO_VEC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL14]], %[[FOR_BODY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[LOOP]] ]
-; AUTO_VEC-NEXT:    [[X_012:%.*]] = phi float [ [[BC_RESUME_VAL15]], %[[FOR_BODY]] ], [ [[ADD3:%.*]], %[[LOOP]] ]
+; AUTO_VEC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL14]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[LOOP]] ]
+; AUTO_VEC-NEXT:    [[X_012:%.*]] = phi float [ [[BC_RESUME_VAL15]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD3:%.*]], %[[LOOP]] ]
 ; AUTO_VEC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[P]], i64 [[INDVARS_IV]]
 ; AUTO_VEC-NEXT:    [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; AUTO_VEC-NEXT:    [[ADD:%.*]] = fadd reassoc float [[X_012]], [[TMP16]]
diff --git a/llvm/test/Transforms/LoopVectorize/float-induction.ll b/llvm/test/Transforms/LoopVectorize/float-induction.ll
index 2b15aae628274..f000fceaf6e50 100644
--- a/llvm/test/Transforms/LoopVectorize/float-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/float-induction.ll
@@ -148,7 +148,7 @@ define void @fp_iv_loop1_fast_FMF(float %init, ptr noalias nocapture %A, i32 %N)
 ; VEC1_INTERL2-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; VEC1_INTERL2:       vector.body:
 ; VEC1_INTERL2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; VEC1_INTERL2-NEXT:    [[DOTCAST2:%.*]] = sitofp i64 [[INDEX]] to float
+; VEC1_INTERL2-NEXT:    [[DOTCAST2:%.*]] = sitofp fast i64 [[INDEX]] to float
 ; VEC1_INTERL2-NEXT:    [[TMP3:%.*]] = fmul fast float [[FPINC]], [[DOTCAST2]]
 ; VEC1_INTERL2-NEXT:    [[OFFSET_IDX:%.*]] = fsub fast float [[INIT]], [[TMP3]]
 ; VEC1_INTERL2-NEXT:    [[TMP4:%.*]] = fsub fast float [[OFFSET_IDX]], [[FPINC]]
@@ -395,7 +395,7 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, ptr noalias nocapture %A, i32
 ; VEC1_INTERL2-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; VEC1_INTERL2:       vector.body:
 ; VEC1_INTERL2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; VEC1_INTERL2-NEXT:    [[DOTCAST2:%.*]] = sitofp i64 [[INDEX]] to float
+; VEC1_INTERL2-NEXT:    [[DOTCAST2:%.*]] = sitofp reassoc i64 [[INDEX]] to float
 ; VEC1_INTERL2-NEXT:    [[TMP3:%.*]] = fmul reassoc float [[FPINC]], [[DOTCAST2]]
 ; VEC1_INTERL2-NEXT:    [[OFFSET_IDX:%.*]] = fsub reassoc float [[INIT]], [[TMP3]]
 ; VEC1_INTERL2-NEXT:    [[TMP6:%.*]] = fsub reassoc float [[OFFSET_IDX]], [[FPINC]]
@@ -629,7 +629,7 @@ define void @fp_iv_loop2(float %init, ptr noalias nocapture %A, i32 %N) #0 {
 ; VEC1_INTERL2-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; VEC1_INTERL2:       vector.body:
 ; VEC1_INTERL2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; VEC1_INTERL2-NEXT:    [[DOTCAST2:%.*]] = sitofp i64 [[INDEX]] to float
+; VEC1_INTERL2-NEXT:    [[DOTCAST2:%.*]] = sitofp fast i64 [[INDEX]] to float
 ; VEC1_INTERL2-NEXT:    [[TMP3:%.*]] = fmul fast float [[DOTCAST2]], 5.000000e-01
 ; VEC1_INTERL2-NEXT:    [[OFFSET_IDX:%.*]] = fadd fast float [[INIT]], [[TMP3]]
 ; VEC1_INTERL2-NEXT:    [[TMP4:%.*]] = fadd fast float [[OFFSET_IDX]], 5.000000e-01
@@ -930,9 +930,9 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca
 ; VEC1_INTERL2:       vector.body:
 ; VEC1_INTERL2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC1_INTERL2-NEXT:    [[TMP4:%.*]] = or disjoint i64 [[INDEX]], 1
-; VEC1_INTERL2-NEXT:    [[DOTCAST5:%.*]] = sitofp i64 [[INDEX]] to float
+; VEC1_INTERL2-NEXT:    [[DOTCAST5:%.*]] = sitofp fast i64 [[INDEX]] to float
 ; VEC1_INTERL2-NEXT:    [[TMP5:%.*]] = fmul fast float [[DOTCAST5]], -5.000000e-01
-; VEC1_INTERL2-NEXT:    [[DOTCAST6:%.*]] = sitofp i64 [[INDEX]] to float
+; VEC1_INTERL2-NEXT:    [[DOTCAST6:%.*]] = sitofp fast i64 [[INDEX]] to float
 ; VEC1_INTERL2-NEXT:    [[TMP6:%.*]] = fmul fast float [[TMP0]], [[DOTCAST6]]
 ; VEC1_INTERL2-NEXT:    [[OFFSET_IDX7:%.*]] = fadd fast float [[INIT]], [[TMP6]]
 ; VEC1_INTERL2-NEXT:    [[TMP7:%.*]] = fadd fast float [[OFFSET_IDX7]], [[TMP0]]
@@ -1210,7 +1210,7 @@ define void @fp_iv_loop4(ptr noalias nocapture %A, i32 %N) {
 ; VEC1_INTERL2-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; VEC1_INTERL2:       vector.body:
 ; VEC1_INTERL2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; VEC1_INTERL2-NEXT:    [[DOTCAST2:%.*]] = sitofp i64 [[INDEX]] to float
+; VEC1_INTERL2-NEXT:    [[DOTCAST2:%.*]] = sitofp fast i64 [[INDEX]] to float
 ; VEC1_INTERL2-NEXT:    [[TMP3:%.*]] = fmul fast float [[DOTCAST2]], 5.000000e-01
 ; VEC1_INTERL2-NEXT:    [[OFFSET_IDX:%.*]] = fadd fast float [[TMP3]], 1.000000e+00
 ; VEC1_INTERL2-NEXT:    [[TMP4:%.*]] = fadd fast float [[TMP3]], 1.500000e+00
@@ -1321,7 +1321,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) {
 ; VEC4_INTERL1-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; VEC4_INTERL1:       vector.body:
 ; VEC4_INTERL1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE7:%.*]] ]
-; VEC4_INTERL1-NEXT:    [[DOTCAST2:%.*]] = sitofp i64 [[INDEX]] to float
+; VEC4_INTERL1-NEXT:    [[DOTCAST2:%.*]] = sitofp fast i64 [[INDEX]] to float
 ; VEC4_INTERL1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
 ; VEC4_INTERL1-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4
 ; VEC4_INTERL1-NEXT:    [[TMP1:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD]], zeroinitializer
@@ -1398,7 +1398,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) {
 ; VEC4_INTERL2-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; VEC4_INTERL2:       vector.body:
 ; VEC4_INTERL2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE16:%.*]] ]
-; VEC4_INTERL2-NEXT:    [[DOTCAST2:%.*]] = sitofp i64 [[INDEX]] to float
+; VEC4_INTERL2-NEXT:    [[DOTCAST2:%.*]] = sitofp fast i64 [[INDEX]] to float
 ; VEC4_INTERL2-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
 ; VEC4_INTERL2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
 ; VEC4_INTERL2-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
@@ -1514,7 +1514,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) {
 ; VEC1_INTERL2-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; VEC1_INTERL2:       vector.body:
 ; VEC1_INTERL2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ]
-; VEC1_INTERL2-NEXT:    [[DOTCAST1:%.*]] = sitofp i64 [[INDEX]] to float
+; VEC1_INTERL2-NEXT:    [[DOTCAST1:%.*]] = sitofp fast i64 [[INDEX]] to float
 ; VEC1_INTERL2-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
 ; VEC1_INTERL2-NEXT:    [[TMP1:%.*]] = getelementptr float, ptr [[A]], i64 [[INDEX]]
 ; VEC1_INTERL2-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 4
@@ -1572,7 +1572,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) {
 ; VEC2_INTERL1_PRED_STORE-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; VEC2_INTERL1_PRED_STORE:       vector.body:
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[DOTCAST2:%.*]] = sitofp i64 [[INDEX]] to float
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[DOTCAST2:%.*]] = sitofp fast i64 [[INDEX]] to float
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP0]], align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP1:%.*]] = fcmp fast oeq <2 x float> [[WIDE_LOAD]], zeroinitializer
@@ -1693,7 +1693,7 @@ define i32 @float_induction_with_dbg_on_fadd(ptr %dst) {
 ; VEC1_INTERL2-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; VEC1_INTERL2:       vector.body:
 ; VEC1_INTERL2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; VEC1_INTERL2-NEXT:    [[DOTCAST:%.*]] = sitofp i64 [[INDEX]] to float
+; VEC1_INTERL2-NEXT:    [[DOTCAST:%.*]] = sitofp reassoc i64 [[INDEX]] to float
 ; VEC1_INTERL2-NEXT:    [[TMP0:%.*]] = call reassoc float @llvm.copysign.f32(float 0.000000e+00, float [[DOTCAST]])
 ; VEC1_INTERL2-NEXT:    [[OFFSET_IDX:%.*]] = fadd reassoc float [[TMP0]], 0.000000e+00
 ; VEC1_INTERL2-NEXT:    [[TMP1:%.*]] = getelementptr float, ptr null, i64 [[INDEX]]
diff --git a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
index 3f91baa117b7f..3d0feb12f68b0 100644
--- a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
+++ b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
@@ -734,7 +734,7 @@ define float @fp_postinc_use_fadd(float %init, ptr noalias nocapture %A, i64 %N,
 ; VEC:       [[VECTOR_PH]]:
 ; VEC-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
 ; VEC-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
-; VEC-NEXT:    [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float
+; VEC-NEXT:    [[DOTCAST:%.*]] = sitofp fast i64 [[N_VEC]] to float
 ; VEC-NEXT:    [[TMP0:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]]
 ; VEC-NEXT:    [[TMP1:%.*]] = fadd fast float [[INIT]], [[TMP0]]
 ; VEC-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0
@@ -784,14 +784,14 @@ define float @fp_postinc_use_fadd(float %init, ptr noalias nocapture %A, i64 %N,
 ; INTERLEAVE:       [[VECTOR_PH]]:
 ; INTERLEAVE-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
 ; INTERLEAVE-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
-; INTERLEAVE-NEXT:    [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float
+; INTERLEAVE-NEXT:    [[DOTCAST:%.*]] = sitofp fast i64 [[N_VEC]] to float
 ; INTERLEAVE-NEXT:    [[TMP0:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]]
 ; INTERLEAVE-NEXT:    [[TMP1:%.*]] = fadd fast float [[INIT]], [[TMP0]]
 ; INTERLEAVE-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; INTERLEAVE:       [[VECTOR_BODY]]:
 ; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; INTERLEAVE-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 1
-; INTERLEAVE-NEXT:    [[DOTCAST1:%.*]] = sitofp i64 [[INDEX]] to float
+; INTERLEAVE-NEXT:    [[DOTCAST1:%.*]] = sitofp fast i64 [[INDEX]] to float
 ; INTERLEAVE-NEXT:    [[TMP4:%.*]] = fmul fast float [[FPINC]], [[DOTCAST1]]
 ; INTERLEAVE-NEXT:    [[OFFSET_IDX:%.*]] = fadd fast float [[INIT]], [[TMP4]]
 ; INTERLEAVE-NEXT:    [[TMP7:%.*]] = fmul fast float 1.000000e+00, [[FPINC]]
@@ -849,7 +849,7 @@ define float @fp_postinc_use_fadd_ops_swapped(float %init, ptr noalias nocapture
 ; VEC:       [[VECTOR_PH]]:
 ; VEC-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
 ; VEC-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
-; VEC-NEXT:    [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float
+; VEC-NEXT:    [[DOTCAST:%.*]] = sitofp fast i64 [[N_VEC]] to float
 ; VEC-NEXT:    [[TMP0:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]]
 ; VEC-NEXT:    [[TMP1:%.*]] = fadd fast float [[INIT]], [[TMP0]]
 ; VEC-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0
@@ -899,14 +899,14 @@ define float @fp_postinc_use_fadd_ops_swapped(float %init, ptr noalias nocapture
 ; INTERLEAVE:       [[VECTOR_PH]]:
 ; INTERLEAVE-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
 ; INTERLEAVE-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
-; INTERLEAVE-NEXT:    [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float
+; INTERLEAVE-NEXT:    [[DOTCAST:%.*]] = sitofp fast i64 [[N_VEC]] to float
 ; INTERLEAVE-NEXT:    [[TMP0:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]]
 ; INTERLEAVE-NEXT:    [[TMP1:%.*]] = fadd fast float [[INIT]], [[TMP0]]
 ; INTERLEAVE-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; INTERLEAVE:       [[VECTOR_BODY]]:
 ; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; INTERLEAVE-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 1
-; INTERLEAVE-NEXT:    [[DOTCAST1:%.*]] = sitofp i64 [[INDEX]] to float
+; INTERLEAVE-NEXT:    [[DOTCAST1:%.*]] = sitofp fast i64 [[INDEX]] to float
 ; INTERLEAVE-NEXT:    [[TMP4:%.*]] = fmul fast float [[FPINC]], [[DOTCAST1]]
 ; INTERLEAVE-NEXT:    [[OFFSET_IDX:%.*]] = fadd fast float [[INIT]], [[TMP4]]
 ; INTERLEAVE-NEXT:    [[TMP7:%.*]] = fmul fast float 1.000000e+00, [[FPINC]]
@@ -964,7 +964,7 @@ define float @fp_postinc_use_fsub(float %init, ptr noalias nocapture %A, i64 %N,
 ; VEC:       [[VECTOR_PH]]:
 ; VEC-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
 ; VEC-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
-; VEC-NEXT:    [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float
+; VEC-NEXT:    [[DOTCAST:%.*]] = sitofp fast i64 [[N_VEC]] to float
 ; VEC-NEXT:    [[TMP0:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]]
 ; VEC-NEXT:    [[TMP1:%.*]] = fsub fast float [[INIT]], [[TMP0]]
 ; VEC-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0
@@ -1014,14 +1014,14 @@ define float @fp_postinc_use_fsub(float %init, ptr noalias nocapture %A, i64 %N,
 ; INTERLEAVE:       [[VECTOR_PH]]:
 ; INTERLEAVE-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
 ; INTERLEAVE-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
-; INTERLEAVE-NEXT:    [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float
+; INTERLEAVE-NEXT:    [[DOTCAST:%.*]] = sitofp fast i64 [[N_VEC]] to float
 ; INTERLEAVE-NEXT:    [[TMP0:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]]
 ; INTERLEAVE-NEXT:    [[TMP1:%.*]] = fsub fast float [[INIT]], [[TMP0]]
 ; INTERLEAVE-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; INTERLEAVE:       [[VECTOR_BODY]]:
 ; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; INTERLEAVE-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 1
-; INTERLEAVE-NEXT:    [[DOTCAST1:%.*]] = sitofp i64 [[INDEX]] to float
+; INTERLEAVE-NEXT:    [[DOTCAST1:%.*]] = sitofp fast i64 [[INDEX]] to float
 ; INTERLEAVE-NEXT:    [[TMP4:%.*]] = fmul fast float [[FPINC]], [[DOTCAST1]]
 ; INTERLEAVE-NEXT:    [[OFFSET_IDX:%.*]] = fsub fast float [[INIT]], [[TMP4]]
 ; INTERLEAVE-NEXT:    [[TMP7:%.*]] = fmul fast float 1.000000e+00, [[FPINC]]
diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll
index 54408b24db114..c0767ac153af4 100644
--- a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll
+++ b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll
@@ -273,7 +273,7 @@ define float @same_exit_block_pre_inc_use1_iv64_endf32() {
 ; CHECK:       vector.early.exit:
 ; CHECK-NEXT:    [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true)
 ; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT:    [[DOTCAST:%.*]] = sitofp i64 [[TMP10]] to float
+; CHECK-NEXT:    [[DOTCAST:%.*]] = sitofp fast i64 [[TMP10]] to float
 ; CHECK-NEXT:    [[TMP11:%.*]] = fmul fast float 1.000000e+00, [[DOTCAST]]
 ; CHECK-NEXT:    [[EARLY_EXIT_VALUE:%.*]] = fadd fast float 9.000000e+00, [[TMP11]]
 ; CHECK-NEXT:    br label [[LOOP_END]]