[clang] [llvm] [IR] Allow fast math flags on fptosi, fptoui and sitofp (PR #160475)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Sep 24 07:17:16 PDT 2025
https://github.com/paperchalice updated https://github.com/llvm/llvm-project/pull/160475
>From 8bf6ae5d44b8adb37886b076f84a9ecd571ab3f4 Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Wed, 24 Sep 2025 17:26:32 +0800
Subject: [PATCH] [IR] Allow fast math flags on fptosi, fptoui and sitofp
---
clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu | 2 +-
clang/test/CodeGenCUDA/managed-var.cu | 2 +-
.../BasicFeatures/AggregateSplatCast.hlsl | 6 +-
.../BasicFeatures/ArrayElementwiseCast.hlsl | 4 +-
.../CodeGenHLSL/BasicFeatures/InitLists.hlsl | 16 +-
.../BasicFeatures/OutputArguments.hlsl | 14 +-
.../BasicFeatures/StructElementwiseCast.hlsl | 8 +-
.../BasicFeatures/VectorElementwiseCast.hlsl | 2 +-
.../standard_conversion_sequences.hlsl | 6 +-
clang/test/CodeGenHLSL/builtins/dot2add.hlsl | 12 +-
.../CodeGenHLSL/builtins/lerp-overloads.hlsl | 48 ++--
.../CodeGenHLSL/builtins/pow-overloads.hlsl | 32 +--
clang/test/Headers/__clang_hip_math.hip | 80 +++---
.../test/Headers/openmp_device_math_isnan.cpp | 4 +-
.../SemaHLSL/VectorOverloadResolution.hlsl | 6 +-
llvm/docs/LangRef.rst | 9 +-
llvm/include/llvm/IR/Operator.h | 3 +
llvm/lib/AsmParser/LLParser.cpp | 6 +-
llvm/test/Assembler/fast-math-flags.ll | 66 +++++
...amdgpu-codegenprepare-fold-binop-select.ll | 4 +-
.../AMDGPU/amdgpu-codegenprepare-idiv.ll | 264 +++++++++---------
.../AMDGPU/amdgpu-simplify-libcall-pow.ll | 40 +--
.../AMDGPU/amdgpu-simplify-libcall-pown.ll | 18 +-
llvm/test/CodeGen/AMDGPU/divrem24-assume.ll | 4 +-
llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll | 4 +-
llvm/test/Transforms/InstCombine/log-pow.ll | 6 +-
.../PowerPC/vplan-scalarivsext-crash.ll | 4 +-
.../LoopVectorize/X86/float-induction-x86.ll | 76 ++---
.../LoopVectorize/float-induction.ll | 22 +-
.../LoopVectorize/iv_outside_user.ll | 18 +-
.../single_early_exit_live_outs.ll | 2 +-
31 files changed, 432 insertions(+), 356 deletions(-)
diff --git a/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu b/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
index 22c40e6d38ea2..b8819b8e550ea 100644
--- a/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
@@ -164,7 +164,7 @@ __device__ double ffp4(double *p, float f) {
__device__ double ffp5(double *p, int i) {
// FUN-LABEL: @_Z4ffp5Pdi
- // CHECK: sitofp i32 {{.*}} to double
+ // CHECK: sitofp contract i32 {{.*}} to double
// SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, [[DEFMD]]
// UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, [[DEFMD]]
__atomic_fetch_sub(p, i, memory_order_relaxed);
diff --git a/clang/test/CodeGenCUDA/managed-var.cu b/clang/test/CodeGenCUDA/managed-var.cu
index 0e7a7be85ac8e..368adece297b7 100644
--- a/clang/test/CodeGenCUDA/managed-var.cu
+++ b/clang/test/CodeGenCUDA/managed-var.cu
@@ -145,7 +145,7 @@ float load3() {
// HOST: %4 = ptrtoint ptr %3 to i64
// HOST: %5 = sub i64 %4, %1
// HOST: %sub.ptr.div = sdiv exact i64 %5, 4
-// HOST: %conv = sitofp i64 %sub.ptr.div to float
+// HOST: %conv = sitofp contract i64 %sub.ptr.div to float
// HOST: ret float %conv
float addr_taken2() {
return (float)reinterpret_cast<long>(&(v2[1].y)-&(v[1].x));
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl
index 512fcd435191a..64fda1301d8cb 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl
@@ -38,7 +38,7 @@ export void call8() {
// CHECK-NEXT: store <1 x float> splat (float 1.000000e+00), ptr [[B]], align 4
// CHECK-NEXT: [[L:%.*]] = load <1 x float>, ptr [[B]], align 4
// CHECK-NEXT: [[VL:%.*]] = extractelement <1 x float> [[L]], i32 0
-// CHECK-NEXT: [[C:%.*]] = fptosi float [[VL]] to i32
+// CHECK-NEXT: [[C:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[VL]] to i32
// CHECK-NEXT: [[SI:%.*]] = insertelement <4 x i32> poison, i32 [[C]], i64 0
// CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[SI]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: store <4 x i32> [[S]], ptr [[A]], align 16
@@ -62,7 +62,7 @@ struct S {
// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 0
// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 1
// CHECK-NEXT: store i32 [[VL]], ptr [[G1]], align 4
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[VL]] to float
+// CHECK-NEXT: [[C:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[VL]] to float
// CHECK-NEXT: store float [[C]], ptr [[G2]], align 4
export void call3() {
int1 A = {1};
@@ -79,7 +79,7 @@ export void call3() {
// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 0
// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 1
// CHECK-NEXT: store i32 [[VL]], ptr [[G1]], align 4
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[VL]] to float
+// CHECK-NEXT: [[C:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[VL]] to float
// CHECK-NEXT: store float [[C]], ptr [[G2]], align 4
export void call5() {
int1 A = {1};
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl
index ac02ddf5765ed..e743a978e7122 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl
@@ -63,7 +63,7 @@ export void call2() {
// CHECK-NEXT: [[C:%.*]] = load <1 x float>, ptr [[A]], align 4
// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [1 x i32], ptr [[B]], i32 0, i32 0
// CHECK-NEXT: [[V:%.*]] = extractelement <1 x float> [[C]], i64 0
-// CHECK-NEXT: [[C:%.*]] = fptosi float [[V]] to i32
+// CHECK-NEXT: [[C:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[V]] to i32
// CHECK-NEXT: store i32 [[C]], ptr [[G1]], align 4
export void call3() {
float1 A = {1.2};
@@ -84,7 +84,7 @@ export void call3() {
// CHECK-NEXT: [[VG:%.*]] = getelementptr inbounds [1 x <2 x float>], ptr [[Tmp]], i32 0, i32 0
// CHECK-NEXT: [[L:%.*]] = load <2 x float>, ptr [[VG]], align 8
// CHECK-NEXT: [[VL:%.*]] = extractelement <2 x float> [[L]], i32 0
-// CHECK-NEXT: [[C:%.*]] = fptosi float [[VL]] to i32
+// CHECK-NEXT: [[C:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[VL]] to i32
// CHECK-NEXT: store i32 [[C]], ptr [[G1]], align 4
// CHECK-NEXT: [[L4:%.*]] = load <2 x float>, ptr [[VG]], align 8
// CHECK-NEXT: [[VL5:%.*]] = extractelement <2 x float> [[L4]], i32 1
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl
index c30c640519cda..8376131b62149 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl
@@ -98,12 +98,12 @@ TwoFloats case3(int Val) {
// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[TWOVALS_ADDR]], align 8
// CHECK-NEXT: [[VECEXT:%.*]] = extractelement <2 x i32> [[TMP0]], i64 0
-// CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[VECEXT]] to float
+// CHECK-NEXT: [[CONV:%.*]] = sitofp reassoc nnan ninf nsz arcp afni32 [[VECEXT]] to float
// CHECK-NEXT: store float [[CONV]], ptr [[X]], align 1
// CHECK-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 1
// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[TWOVALS_ADDR]], align 8
// CHECK-NEXT: [[VECEXT1:%.*]] = extractelement <2 x i32> [[TMP1]], i64 1
-// CHECK-NEXT: [[CONV2:%.*]] = sitofp i32 [[VECEXT1]] to float
+// CHECK-NEXT: [[CONV2:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[VECEXT1]] to float
// CHECK-NEXT: store float [[CONV2]], ptr [[Y]], align 1
// CHECK-NEXT: ret void
//
@@ -181,7 +181,7 @@ TwoInts case6(TwoFloats TF4) {
// CHECK-NEXT: store i32 [[TMP4]], ptr [[TAILSTATE]], align 1
// CHECK-NEXT: [[HAIRCOUNT:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[AGG_RESULT]], i32 0, i32 2
// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[VAL_ADDR]], align 4
-// CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to float
+// CHECK-NEXT: [[CONV:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[TMP5]] to float
// CHECK-NEXT: store float [[CONV]], ptr [[HAIRCOUNT]], align 1
// CHECK-NEXT: [[EARDIRECTION:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[AGG_RESULT]], i32 0, i32 3
// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[TF1]], i32 0, i32 0
@@ -407,7 +407,7 @@ AnimalBits case8(Doggo D1) {
// CHECK-NEXT: [[HAIRCOUNT58:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[ARRAYINIT_ELEMENT44]], i32 0, i32 2
// CHECK-NEXT: [[COUNTER:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[A1]], i32 0, i32 2
// CHECK-NEXT: [[TMP19:%.*]] = load i64, ptr [[COUNTER]], align 1
-// CHECK-NEXT: [[CONV:%.*]] = sitofp i64 [[TMP19]] to float
+// CHECK-NEXT: [[CONV:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i64 [[TMP19]] to float
// CHECK-NEXT: store float [[CONV]], ptr [[HAIRCOUNT58]], align 1
// CHECK-NEXT: [[EARDIRECTION59:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[ARRAYINIT_ELEMENT44]], i32 0, i32 3
// CHECK-NEXT: [[LEFTDIR:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[A1]], i32 0, i32 3
@@ -542,7 +542,7 @@ AnimalBits case8(Doggo D1) {
// CHECK-NEXT: [[HAIRCOUNT149:%.*]] = getelementptr inbounds nuw [[STRUCT_KITTEH]], ptr [[ARRAYINIT_ELEMENT133]], i32 0, i32 2
// CHECK-NEXT: [[COUNTER150:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[A1]], i32 0, i32 2
// CHECK-NEXT: [[TMP47:%.*]] = load i64, ptr [[COUNTER150]], align 1
-// CHECK-NEXT: [[CONV151:%.*]] = sitofp i64 [[TMP47]] to float
+// CHECK-NEXT: [[CONV151:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i64 [[TMP47]] to float
// CHECK-NEXT: store float [[CONV151]], ptr [[HAIRCOUNT149]], align 1
// CHECK-NEXT: [[CLAWS152:%.*]] = getelementptr inbounds nuw [[STRUCT_KITTEH]], ptr [[ARRAYINIT_ELEMENT133]], i32 0, i32 3
// CHECK-NEXT: [[LEFTDIR153:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[A1]], i32 0, i32 3
@@ -677,7 +677,7 @@ AnimalBits case8(Doggo D1) {
// CHECK-NEXT: [[HAIRCOUNT246:%.*]] = getelementptr inbounds nuw [[STRUCT_KITTEH]], ptr [[ARRAYINIT_ELEMENT230]], i32 0, i32 2
// CHECK-NEXT: [[COUNTER247:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[A1]], i32 0, i32 2
// CHECK-NEXT: [[TMP75:%.*]] = load i64, ptr [[COUNTER247]], align 1
-// CHECK-NEXT: [[CONV248:%.*]] = sitofp i64 [[TMP75]] to float
+// CHECK-NEXT: [[CONV248:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i64 [[TMP75]] to float
// CHECK-NEXT: store float [[CONV248]], ptr [[HAIRCOUNT246]], align 1
// CHECK-NEXT: [[CLAWS249:%.*]] = getelementptr inbounds nuw [[STRUCT_KITTEH]], ptr [[ARRAYINIT_ELEMENT230]], i32 0, i32 3
// CHECK-NEXT: [[LEFTDIR250:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[A1]], i32 0, i32 3
@@ -867,13 +867,13 @@ TwoInts case14(SlicyBits SB) {
// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 0
// CHECK-NEXT: [[BF_LOAD:%.*]] = load i8, ptr [[SB]], align 1
// CHECK-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_LOAD]] to i32
-// CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[BF_CAST]] to float
+// CHECK-NEXT: [[CONV:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[BF_CAST]] to float
// CHECK-NEXT: store float [[CONV]], ptr [[X]], align 1
// CHECK-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 1
// CHECK-NEXT: [[W:%.*]] = getelementptr inbounds nuw [[STRUCT_SLICYBITS]], ptr [[SB]], i32 0, i32 1
// CHECK-NEXT: [[BF_LOAD1:%.*]] = load i8, ptr [[W]], align 1
// CHECK-NEXT: [[BF_CAST2:%.*]] = sext i8 [[BF_LOAD1]] to i32
-// CHECK-NEXT: [[CONV3:%.*]] = sitofp i32 [[BF_CAST2]] to float
+// CHECK-NEXT: [[CONV3:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[BF_CAST2]] to float
// CHECK-NEXT: store float [[CONV3]], ptr [[Y]], align 1
// CHECK-NEXT: ret void
//
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
index d0ba8f447b732..cf73f86474bd5 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
@@ -13,14 +13,14 @@ void trunc_Param(inout int X) {}
// CHECK: [[F:%.*]] = alloca float
// CHECK: [[ArgTmp:%.*]] = alloca i32
// CHECK: [[FVal:%.*]] = load float, ptr {{.*}}
-// CHECK: [[IVal:%.*]] = fptosi float [[FVal]] to i32
+// CHECK: [[IVal:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[FVal]] to i32
// CHECK: store i32 [[IVal]], ptr [[ArgTmp]]
// CHECK: call void {{.*}}trunc_Param{{.*}}(ptr noalias noundef nonnull align 4 dereferenceable(4) [[ArgTmp]])
// CHECK: [[IRet:%.*]] = load i32, ptr [[ArgTmp]]
-// CHECK: [[FRet:%.*]] = sitofp i32 [[IRet]] to float
+// CHECK: [[FRet:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[IRet]] to float
// CHECK: store float [[FRet]], ptr [[F]]
-// OPT: [[IVal:%.*]] = fptosi float {{.*}} to i32
-// OPT: [[FVal:%.*]] = sitofp i32 [[IVal]] to float
+// OPT: [[IVal:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float {{.*}} to i32
+// OPT: [[FVal:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[IVal]] to float
// OPT: ret float [[FVal]]
export float case1(float F) {
trunc_Param(F);
@@ -202,14 +202,14 @@ void trunc_vec(inout int3 V) {}
// CHECK: [[V:%.*]] = alloca <3 x float>
// CHECK: [[Tmp:%.*]] = alloca <3 x i32>
// CHECK: [[FVal:%.*]] = load <3 x float>, ptr [[V]]
-// CHECK: [[IVal:%.*]] = fptosi <3 x float> [[FVal]] to <3 x i32>
+// CHECK: [[IVal:%.*]] = fptosi reassoc nnan ninf nsz arcp afn <3 x float> [[FVal]] to <3 x i32>
// CHECK: store <3 x i32> [[IVal]], ptr [[Tmp]]
// CHECK: call void {{.*}}trunc_vec{{.*}}(ptr noalias noundef nonnull align 16 dereferenceable(16) [[Tmp]])
// CHECK: [[IRet:%.*]] = load <3 x i32>, ptr [[Tmp]]
-// CHECK: [[FRet:%.*]] = sitofp <3 x i32> [[IRet]] to <3 x float>
+// CHECK: [[FRet:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <3 x i32> [[IRet]] to <3 x float>
// CHECK: store <3 x float> [[FRet]], ptr [[V]]
-// OPT: [[IVal:%.*]] = fptosi <3 x float> {{.*}} to <3 x i32>
+// OPT: [[IVal:%.*]] = fptosi reassoc nnan ninf nsz arcp afn <3 x float> {{.*}} to <3 x i32>
// OPT: [[FVal:%.*]] = sitofp <3 x i32> [[IVal]] to <3 x float>
// OPT: ret <3 x float> [[FVal]]
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl
index 81b9f5b28cc7e..e4229951e58b5 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl
@@ -32,7 +32,7 @@ export void call0() {
// CHECK-NEXT: [[VL:%.*]] = extractelement <2 x i32> [[L]], i64 0
// CHECK-NEXT: store i32 [[VL]], ptr [[G1]], align 4
// CHECK-NEXT: [[VL2:%.*]] = extractelement <2 x i32> [[L]], i64 1
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[VL2]] to float
+// CHECK-NEXT: [[C:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[VL2]] to float
// CHECK-NEXT: store float [[C]], ptr [[G2]], align 4
export void call1() {
int2 A = {1,2};
@@ -54,7 +54,7 @@ export void call1() {
// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G3]], align 4
// CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4
// CHECK-NEXT: [[L4:%.*]] = load i32, ptr [[G4]], align 4
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[L4]] to float
+// CHECK-NEXT: [[C:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[L4]] to float
// CHECK-NEXT: store float [[C]], ptr [[G2]], align 4
export void call2() {
int A[2] = {1,2};
@@ -104,7 +104,7 @@ export void call6() {
// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G3]], align 4
// CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4
// CHECK-NEXT: [[L4:%.*]] = load i32, ptr [[G4]], align 4
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[L4]] to float
+// CHECK-NEXT: [[C:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[L4]] to float
// CHECK-NEXT: store float [[C]], ptr [[G2]], align 4
export void call7() {
int A[2] = {1,2};
@@ -132,7 +132,7 @@ struct T {
// CHECK-NEXT: %load = load i32, ptr %gep2, align 4
// CHECK-NEXT: store i32 %load, ptr %gep, align 4
// CHECK-NEXT: %load5 = load i32, ptr %gep3, align 4
-// CHECK-NEXT: %conv = sitofp i32 %load5 to float
+// CHECK-NEXT: %conv = sitofp reassoc nnan ninf nsz arcp afn i32 %load5 to float
// CHECK-NEXT: store float %conv, ptr %gep1, align 4
export void call8() {
T t = {1,2,3};
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
index 253b38a7ca072..a5843b0a95642 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
@@ -40,7 +40,7 @@ struct S {
// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G1]], align 4
// CHECK-NEXT: [[C:%.*]] = insertelement <2 x i32> [[B]], i32 [[L]], i64 0
// CHECK-NEXT: [[L2:%.*]] = load float, ptr [[G2]], align 4
-// CHECK-NEXT: [[D:%.*]] = fptosi float [[L2]] to i32
+// CHECK-NEXT: [[D:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[L2]] to i32
// CHECK-NEXT: [[E:%.*]] = insertelement <2 x i32> [[C]], i32 [[D]], i64 1
// CHECK-NEXT: store <2 x i32> [[E]], ptr [[A]], align 8
export void call3() {
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl
index 6770efefe94fe..3e8c06ed074af 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl
@@ -43,7 +43,7 @@ void d4_to_f2() {
// CHECK: [[i2:%.*]] = alloca <2 x i32>
// CHECK: store <2 x float> splat (float 4.000000e+00), ptr [[f2]]
// CHECK: [[vecf2:%.*]] = load <2 x float>, ptr [[f2]]
-// CHECK: [[veci2:%.*]] = fptosi <2 x float> [[vecf2]] to <2 x i32>
+// CHECK: [[veci2:%.*]] = fptosi reassoc nnan ninf nsz arcp afn <2 x float> [[vecf2]] to <2 x i32>
// CHECK: store <2 x i32> [[veci2]], ptr [[i2]]
void f2_to_i2() {
vector<float,2> f2 = 4.0;
@@ -55,7 +55,7 @@ void f2_to_i2() {
// CHECK: [[i2:%.*]] = alloca <2 x i32>
// CHECK: store <4 x double> splat (double 5.000000e+00), ptr [[d4]]
// CHECK: [[vecd4:%.*]] = load <4 x double>, ptr [[d4]]
-// CHECK: [[veci4:%.*]] = fptosi <4 x double> [[vecd4]] to <4 x i32>
+// CHECK: [[veci4:%.*]] = fptosi reassoc nnan ninf nsz arcp afn <4 x double> [[vecd4]] to <4 x i32>
// CHECK: [[veci2:%.*]] = shufflevector <4 x i32> [[veci4]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
// CHECK: store <2 x i32> [[veci2]], ptr [[i2]]
void d4_to_i2() {
@@ -68,7 +68,7 @@ void d4_to_i2() {
// CHECK: [[l4:%.*]] = alloca <4 x i64>
// CHECK: store <4 x double> splat (double 6.000000e+00), ptr [[d4]]
// CHECK: [[vecd4:%.*]] = load <4 x double>, ptr [[d4]]
-// CHECK: [[vecl4:%.*]] = fptosi <4 x double> [[vecd4]] to <4 x i64>
+// CHECK: [[vecl4:%.*]] = fptosi reassoc nnan ninf nsz arcp afn <4 x double> [[vecd4]] to <4 x i64>
// CHECK: store <4 x i64> [[vecl4]], ptr [[l4]]
void d4_to_l4() {
vector<double,4> d4 = 6.0;
diff --git a/clang/test/CodeGenHLSL/builtins/dot2add.hlsl b/clang/test/CodeGenHLSL/builtins/dot2add.hlsl
index e80ffba2bcfdb..29031c2bddec3 100644
--- a/clang/test/CodeGenHLSL/builtins/dot2add.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/dot2add.hlsl
@@ -106,8 +106,8 @@ float test_double_arg1_arg2_type(double2 p1, double2 p2, float p3) {
// CHECK-LABEL: define {{.*}}test_int16_arg1_arg2_type
float test_int16_arg1_arg2_type(int16_t2 p1, int16_t2 p2, float p3) {
- // CHECK: %conv = sitofp <2 x i16> %{{.*}} to <2 x half>
- // CHECK: %conv1 = sitofp <2 x i16> %{{.*}} to <2 x half>
+ // CHECK: %conv = sitofp reassoc nnan ninf nsz arcp afn <2 x i16> %{{.*}} to <2 x half>
+ // CHECK: %conv1 = sitofp reassoc nnan ninf nsz arcp afn <2 x i16> %{{.*}} to <2 x half>
// CHECK-SPIRV: %[[MUL:.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.spv.fdot.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}})
// CHECK-SPIRV: %[[CONV:.*]] = fpext reassoc nnan ninf nsz arcp afn half %[[MUL]] to float
// CHECK-SPIRV: %[[C:.*]] = load float, ptr %c.addr.i, align 4
@@ -123,8 +123,8 @@ float test_int16_arg1_arg2_type(int16_t2 p1, int16_t2 p2, float p3) {
// CHECK-LABEL: define {{.*}}test_int32_arg1_arg2_type
float test_int32_arg1_arg2_type(int32_t2 p1, int32_t2 p2, float p3) {
- // CHECK: %conv = sitofp <2 x i32> %{{.*}} to <2 x half>
- // CHECK: %conv1 = sitofp <2 x i32> %{{.*}} to <2 x half>
+ // CHECK: %conv = sitofp reassoc nnan ninf nsz arcp afn <2 x i32> %{{.*}} to <2 x half>
+ // CHECK: %conv1 = sitofp reassoc nnan ninf nsz arcp afn <2 x i32> %{{.*}} to <2 x half>
// CHECK-SPIRV: %[[MUL:.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.spv.fdot.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}})
// CHECK-SPIRV: %[[CONV:.*]] = fpext reassoc nnan ninf nsz arcp afn half %[[MUL]] to float
// CHECK-SPIRV: %[[C:.*]] = load float, ptr %c.addr.i, align 4
@@ -140,8 +140,8 @@ float test_int32_arg1_arg2_type(int32_t2 p1, int32_t2 p2, float p3) {
// CHECK-LABEL: define {{.*}}test_int64_arg1_arg2_type
float test_int64_arg1_arg2_type(int64_t2 p1, int64_t2 p2, float p3) {
- // CHECK: %conv = sitofp <2 x i64> %{{.*}} to <2 x half>
- // CHECK: %conv1 = sitofp <2 x i64> %{{.*}} to <2 x half>
+ // CHECK: %conv = sitofp reassoc nnan ninf nsz arcp afn<2 x i64> %{{.*}} to <2 x half>
+ // CHECK: %conv1 = sitofp reassoc nnan ninf nsz arcp afn <2 x i64> %{{.*}} to <2 x half>
// CHECK-SPIRV: %[[MUL:.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.spv.fdot.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}})
// CHECK-SPIRV: %[[CONV:.*]] = fpext reassoc nnan ninf nsz arcp afn half %[[MUL]] to float
// CHECK-SPIRV: %[[C:.*]] = load float, ptr %c.addr.i, align 4
diff --git a/clang/test/CodeGenHLSL/builtins/lerp-overloads.hlsl b/clang/test/CodeGenHLSL/builtins/lerp-overloads.hlsl
index 3b13e43873c77..589f18e67deb8 100644
--- a/clang/test/CodeGenHLSL/builtins/lerp-overloads.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/lerp-overloads.hlsl
@@ -36,33 +36,33 @@ float3 test_lerp_double3(double3 p0) { return lerp(p0, p0, p0); }
float4 test_lerp_double4(double4 p0) { return lerp(p0, p0, p0); }
// CHECK: define [[FNATTRS]] float @_Z13test_lerp_inti(
-// CHECK: [[CONV0:%.*]] = sitofp i32 %{{.*}} to float
-// CHECK: [[CONV1:%.*]] = sitofp i32 %{{.*}} to float
-// CHECK: [[CONV2:%.*]] = sitofp i32 %{{.*}} to float
+// CHECK: [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 %{{.*}} to float
+// CHECK: [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 %{{.*}} to float
+// CHECK: [[CONV2:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 %{{.*}} to float
// CHECK: [[LERP:%.*]] = call {{.*}} float @llvm.[[TARGET]].lerp.f32(float [[CONV0]], float [[CONV1]], float [[CONV2]])
// CHECK: ret float [[LERP]]
float test_lerp_int(int p0) { return lerp(p0, p0, p0); }
// CHECK: define [[FNATTRS]] <2 x float> @_Z14test_lerp_int2Dv2_i(
-// CHECK: [[CONV0:%.*]] = sitofp <2 x i32> %{{.*}} to <2 x float>
-// CHECK: [[CONV1:%.*]] = sitofp <2 x i32> %{{.*}} to <2 x float>
-// CHECK: [[CONV2:%.*]] = sitofp <2 x i32> %{{.*}} to <2 x float>
+// CHECK: [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <2 x i32> %{{.*}} to <2 x float>
+// CHECK: [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <2 x i32> %{{.*}} to <2 x float>
+// CHECK: [[CONV2:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <2 x i32> %{{.*}} to <2 x float>
// CHECK: [[LERP:%.*]] = call {{.*}} <2 x float> @llvm.[[TARGET]].lerp.v2f32(<2 x float> [[CONV0]], <2 x float> [[CONV1]], <2 x float> [[CONV2]])
// CHECK: ret <2 x float> [[LERP]]
float2 test_lerp_int2(int2 p0) { return lerp(p0, p0, p0); }
// CHECK: define [[FNATTRS]] <3 x float> @_Z14test_lerp_int3Dv3_i(
-// CHECK: [[CONV0:%.*]] = sitofp <3 x i32> %{{.*}} to <3 x float>
-// CHECK: [[CONV1:%.*]] = sitofp <3 x i32> %{{.*}} to <3 x float>
-// CHECK: [[CONV2:%.*]] = sitofp <3 x i32> %{{.*}} to <3 x float>
+// CHECK: [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <3 x i32> %{{.*}} to <3 x float>
+// CHECK: [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <3 x i32> %{{.*}} to <3 x float>
+// CHECK: [[CONV2:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <3 x i32> %{{.*}} to <3 x float>
// CHECK: [[LERP:%.*]] = call {{.*}} <3 x float> @llvm.[[TARGET]].lerp.v3f32(<3 x float> [[CONV0]], <3 x float> [[CONV1]], <3 x float> [[CONV2]])
// CHECK: ret <3 x float> [[LERP]]
float3 test_lerp_int3(int3 p0) { return lerp(p0, p0, p0); }
// CHECK: define [[FNATTRS]] <4 x float> @_Z14test_lerp_int4Dv4_i(
-// CHECK: [[CONV0:%.*]] = sitofp <4 x i32> %{{.*}} to <4 x float>
-// CHECK: [[CONV1:%.*]] = sitofp <4 x i32> %{{.*}} to <4 x float>
-// CHECK: [[CONV2:%.*]] = sitofp <4 x i32> %{{.*}} to <4 x float>
+// CHECK: [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <4 x i32> %{{.*}} to <4 x float>
+// CHECK: [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <4 x i32> %{{.*}} to <4 x float>
+// CHECK: [[CONV2:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <4 x i32> %{{.*}} to <4 x float>
// CHECK: [[LERP:%.*]] = call {{.*}} <4 x float> @llvm.[[TARGET]].lerp.v4f32(<4 x float> [[CONV0]], <4 x float> [[CONV1]], <4 x float> [[CONV2]])
// CHECK: ret <4 x float> [[LERP]]
float4 test_lerp_int4(int4 p0) { return lerp(p0, p0, p0); }
@@ -100,33 +100,33 @@ float3 test_lerp_uint3(uint3 p0) { return lerp(p0, p0, p0); }
float4 test_lerp_uint4(uint4 p0) { return lerp(p0, p0, p0); }
// CHECK: define [[FNATTRS]] float @_Z17test_lerp_int64_tl(
-// CHECK: [[CONV0:%.*]] = sitofp i64 %{{.*}} to float
-// CHECK: [[CONV1:%.*]] = sitofp i64 %{{.*}} to float
-// CHECK: [[CONV2:%.*]] = sitofp i64 %{{.*}} to float
+// CHECK: [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i64 %{{.*}} to float
+// CHECK: [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i64 %{{.*}} to float
+// CHECK: [[CONV2:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i64 %{{.*}} to float
// CHECK: [[LERP:%.*]] = call {{.*}} float @llvm.[[TARGET]].lerp.f32(float [[CONV0]], float [[CONV1]], float [[CONV2]])
// CHECK: ret float [[LERP]]
float test_lerp_int64_t(int64_t p0) { return lerp(p0, p0, p0); }
// CHECK: define [[FNATTRS]] <2 x float> @_Z18test_lerp_int64_t2Dv2_l(
-// CHECK: [[CONV0:%.*]] = sitofp <2 x i64> %{{.*}} to <2 x float>
-// CHECK: [[CONV1:%.*]] = sitofp <2 x i64> %{{.*}} to <2 x float>
-// CHECK: [[CONV2:%.*]] = sitofp <2 x i64> %{{.*}} to <2 x float>
+// CHECK: [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <2 x i64> %{{.*}} to <2 x float>
+// CHECK: [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <2 x i64> %{{.*}} to <2 x float>
+// CHECK: [[CONV2:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <2 x i64> %{{.*}} to <2 x float>
// CHECK: [[LERP:%.*]] = call {{.*}} <2 x float> @llvm.[[TARGET]].lerp.v2f32(<2 x float> [[CONV0]], <2 x float> [[CONV1]], <2 x float> [[CONV2]])
// CHECK: ret <2 x float> [[LERP]]
float2 test_lerp_int64_t2(int64_t2 p0) { return lerp(p0, p0, p0); }
// CHECK: define [[FNATTRS]] <3 x float> @_Z18test_lerp_int64_t3Dv3_l(
-// CHECK: [[CONV0:%.*]] = sitofp <3 x i64> %{{.*}} to <3 x float>
-// CHECK: [[CONV1:%.*]] = sitofp <3 x i64> %{{.*}} to <3 x float>
-// CHECK: [[CONV2:%.*]] = sitofp <3 x i64> %{{.*}} to <3 x float>
+// CHECK: [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <3 x i64> %{{.*}} to <3 x float>
+// CHECK: [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <3 x i64> %{{.*}} to <3 x float>
+// CHECK: [[CONV2:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <3 x i64> %{{.*}} to <3 x float>
// CHECK: [[LERP:%.*]] = call {{.*}} <3 x float> @llvm.[[TARGET]].lerp.v3f32(<3 x float> [[CONV0]], <3 x float> [[CONV1]], <3 x float> [[CONV2]])
// CHECK: ret <3 x float> [[LERP]]
float3 test_lerp_int64_t3(int64_t3 p0) { return lerp(p0, p0, p0); }
// CHECK: define [[FNATTRS]] <4 x float> @_Z18test_lerp_int64_t4Dv4_l(
-// CHECK: [[CONV0:%.*]] = sitofp <4 x i64> %{{.*}} to <4 x float>
-// CHECK: [[CONV1:%.*]] = sitofp <4 x i64> %{{.*}} to <4 x float>
-// CHECK: [[CONV2:%.*]] = sitofp <4 x i64> %{{.*}} to <4 x float>
+// CHECK: [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <4 x i64> %{{.*}} to <4 x float>
+// CHECK: [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <4 x i64> %{{.*}} to <4 x float>
+// CHECK: [[CONV2:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <4 x i64> %{{.*}} to <4 x float>
// CHECK: [[LERP:%.*]] = call {{.*}} <4 x float> @llvm.[[TARGET]].lerp.v4f32(<4 x float> [[CONV0]], <4 x float> [[CONV1]], <4 x float> [[CONV2]])
// CHECK: ret <4 x float> [[LERP]]
float4 test_lerp_int64_t4(int64_t4 p0) { return lerp(p0, p0, p0); }
diff --git a/clang/test/CodeGenHLSL/builtins/pow-overloads.hlsl b/clang/test/CodeGenHLSL/builtins/pow-overloads.hlsl
index 0d1f3d3546a33..bcf8997196d2e 100644
--- a/clang/test/CodeGenHLSL/builtins/pow-overloads.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/pow-overloads.hlsl
@@ -28,26 +28,26 @@ float3 test_pow_double3(double3 p0, double3 p1) { return pow(p0, p1); }
float4 test_pow_double4(double4 p0, double4 p1) { return pow(p0, p1); }
// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float {{.*}}test_pow_int
-// CHECK: [[CONV0:%.*]] = sitofp i32 %{{.*}} to float
-// CHECK: [[CONV1:%.*]] = sitofp i32 %{{.*}} to float
+// CHECK: [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 %{{.*}} to float
+// CHECK: [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 %{{.*}} to float
// CHECK: [[POW:%.*]] = call [[FLOATATTRS]] noundef float @llvm.pow.f32(float [[CONV0]], float [[CONV1]])
// CHECK: ret float [[POW]]
float test_pow_int(int p0, int p1) { return pow(p0, p1); }
// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <2 x float> {{.*}}test_pow_int2
-// CHECK: [[CONV0:%.*]] = sitofp <2 x i32> %{{.*}} to <2 x float>
-// CHECK: [[CONV1:%.*]] = sitofp <2 x i32> %{{.*}} to <2 x float>
+// CHECK: [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <2 x i32> %{{.*}} to <2 x float>
+// CHECK: [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <2 x i32> %{{.*}} to <2 x float>
// CHECK: [[POW:%.*]] = call [[FLOATATTRS]] noundef <2 x float> @llvm.pow.v2f32(<2 x float> [[CONV0]], <2 x float> [[CONV1]])
// CHECK: ret <2 x float> [[POW]]
float2 test_pow_int2(int2 p0, int2 p1) { return pow(p0, p1); }
// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <3 x float> {{.*}}test_pow_int3
-// CHECK: [[CONV0:%.*]] = sitofp <3 x i32> %{{.*}} to <3 x float>
-// CHECK: [[CONV1:%.*]] = sitofp <3 x i32> %{{.*}} to <3 x float>
+// CHECK: [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <3 x i32> %{{.*}} to <3 x float>
+// CHECK: [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <3 x i32> %{{.*}} to <3 x float>
// CHECK: [[POW:%.*]] = call [[FLOATATTRS]] noundef <3 x float> @llvm.pow.v3f32(<3 x float> [[CONV0]], <3 x float> [[CONV1]])
// CHECK: ret <3 x float> [[POW]]
float3 test_pow_int3(int3 p0, int3 p1) { return pow(p0, p1); }
// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> {{.*}}test_pow_int4
-// CHECK: [[CONV0:%.*]] = sitofp <4 x i32> %{{.*}} to <4 x float>
-// CHECK: [[CONV1:%.*]] = sitofp <4 x i32> %{{.*}} to <4 x float>
+// CHECK: [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <4 x i32> %{{.*}} to <4 x float>
+// CHECK: [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <4 x i32> %{{.*}} to <4 x float>
// CHECK: [[POW:%.*]] = call [[FLOATATTRS]] noundef <4 x float> @llvm.pow.v4f32(<4 x float> [[CONV0]], <4 x float> [[CONV1]])
// CHECK: ret <4 x float> [[POW]]
float4 test_pow_int4(int4 p0, int4 p1) { return pow(p0, p1); }
@@ -78,26 +78,26 @@ float3 test_pow_uint3(uint3 p0, uint3 p1) { return pow(p0, p1); }
float4 test_pow_uint4(uint4 p0, uint4 p1) { return pow(p0, p1); }
// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float {{.*}}test_pow_int64_t
-// CHECK: [[CONV0:%.*]] = sitofp i64 %{{.*}} to float
-// CHECK: [[CONV1:%.*]] = sitofp i64 %{{.*}} to float
+// CHECK: [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i64 %{{.*}} to float
+// CHECK: [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i64 %{{.*}} to float
// CHECK: [[POW:%.*]] = call [[FLOATATTRS]] noundef float @llvm.pow.f32(float [[CONV0]], float [[CONV1]])
// CHECK: ret float [[POW]]
float test_pow_int64_t(int64_t p0, int64_t p1) { return pow(p0, p1); }
// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <2 x float> {{.*}}test_pow_int64_t2
-// CHECK: [[CONV0:%.*]] = sitofp <2 x i64> %{{.*}} to <2 x float>
-// CHECK: [[CONV1:%.*]] = sitofp <2 x i64> %{{.*}} to <2 x float>
+// CHECK: [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <2 x i64> %{{.*}} to <2 x float>
+// CHECK: [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <2 x i64> %{{.*}} to <2 x float>
// CHECK: [[POW:%.*]] = call [[FLOATATTRS]] noundef <2 x float> @llvm.pow.v2f32(<2 x float> [[CONV0]], <2 x float> [[CONV1]])
// CHECK: ret <2 x float> [[POW]]
float2 test_pow_int64_t2(int64_t2 p0, int64_t2 p1) { return pow(p0, p1); }
// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <3 x float> {{.*}}test_pow_int64_t3
-// CHECK: [[CONV0:%.*]] = sitofp <3 x i64> %{{.*}} to <3 x float>
-// CHECK: [[CONV1:%.*]] = sitofp <3 x i64> %{{.*}} to <3 x float>
+// CHECK: [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <3 x i64> %{{.*}} to <3 x float>
+// CHECK: [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <3 x i64> %{{.*}} to <3 x float>
// CHECK: [[POW:%.*]] = call [[FLOATATTRS]] noundef <3 x float> @llvm.pow.v3f32(<3 x float> [[CONV0]], <3 x float> [[CONV1]])
// CHECK: ret <3 x float> [[POW]]
float3 test_pow_int64_t3(int64_t3 p0, int64_t3 p1) { return pow(p0, p1); }
// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> {{.*}}test_pow_int64_t4
-// CHECK: [[CONV0:%.*]] = sitofp <4 x i64> %{{.*}} to <4 x float>
-// CHECK: [[CONV1:%.*]] = sitofp <4 x i64> %{{.*}} to <4 x float>
+// CHECK: [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <4 x i64> %{{.*}} to <4 x float>
+// CHECK: [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <4 x i64> %{{.*}} to <4 x float>
// CHECK: [[POW:%.*]] = call [[FLOATATTRS]] noundef <4 x float> @llvm.pow.v4f32(<4 x float> [[CONV0]], <4 x float> [[CONV1]])
// CHECK: ret <4 x float> [[POW]]
float4 test_pow_int64_t4(int64_t4 p0, int64_t4 p1) { return pow(p0, p1); }
diff --git a/clang/test/Headers/__clang_hip_math.hip b/clang/test/Headers/__clang_hip_math.hip
index b88aa3cc18207..87967557dd671 100644
--- a/clang/test/Headers/__clang_hip_math.hip
+++ b/clang/test/Headers/__clang_hip_math.hip
@@ -3518,35 +3518,35 @@ extern "C" __device__ double test_lgamma(double x) {
// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// DEFAULT-NEXT: [[ENTRY:.*:]]
// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.rint.f32(float [[X]])
-// DEFAULT-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
+// DEFAULT-NEXT: [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64
// DEFAULT-NEXT: ret i64 [[CONV_I]]
//
// FINITEONLY-LABEL: define dso_local i64 @test_llrintf(
// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// FINITEONLY-NEXT: [[ENTRY:.*:]]
// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract float @llvm.rint.f32(float nofpclass(nan inf) [[X]])
-// FINITEONLY-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
+// FINITEONLY-NEXT: [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64
// FINITEONLY-NEXT: ret i64 [[CONV_I]]
//
// APPROX-LABEL: define dso_local i64 @test_llrintf(
// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// APPROX-NEXT: [[ENTRY:.*:]]
// APPROX-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.rint.f32(float [[X]])
-// APPROX-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
+// APPROX-NEXT: [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64
// APPROX-NEXT: ret i64 [[CONV_I]]
//
// NCRDIV-LABEL: define dso_local i64 @test_llrintf(
// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// NCRDIV-NEXT: [[ENTRY:.*:]]
// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.rint.f32(float [[X]])
-// NCRDIV-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
+// NCRDIV-NEXT: [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64
// NCRDIV-NEXT: ret i64 [[CONV_I]]
//
// AMDGCNSPIRV-LABEL: define spir_func i64 @test_llrintf(
// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.rint.f32(float [[X]])
-// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
+// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64
// AMDGCNSPIRV-NEXT: ret i64 [[CONV_I]]
//
extern "C" __device__ long long int test_llrintf(float x) {
@@ -3557,35 +3557,35 @@ extern "C" __device__ long long int test_llrintf(float x) {
// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// DEFAULT-NEXT: [[ENTRY:.*:]]
// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.rint.f64(double [[X]])
-// DEFAULT-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
+// DEFAULT-NEXT: [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64
// DEFAULT-NEXT: ret i64 [[CONV_I]]
//
// FINITEONLY-LABEL: define dso_local i64 @test_llrint(
// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// FINITEONLY-NEXT: [[ENTRY:.*:]]
// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract double @llvm.rint.f64(double nofpclass(nan inf) [[X]])
-// FINITEONLY-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
+// FINITEONLY-NEXT: [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64
// FINITEONLY-NEXT: ret i64 [[CONV_I]]
//
// APPROX-LABEL: define dso_local i64 @test_llrint(
// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// APPROX-NEXT: [[ENTRY:.*:]]
// APPROX-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.rint.f64(double [[X]])
-// APPROX-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
+// APPROX-NEXT: [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64
// APPROX-NEXT: ret i64 [[CONV_I]]
//
// NCRDIV-LABEL: define dso_local i64 @test_llrint(
// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// NCRDIV-NEXT: [[ENTRY:.*:]]
// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.rint.f64(double [[X]])
-// NCRDIV-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
+// NCRDIV-NEXT: [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64
// NCRDIV-NEXT: ret i64 [[CONV_I]]
//
// AMDGCNSPIRV-LABEL: define spir_func i64 @test_llrint(
// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) double @llvm.rint.f64(double [[X]])
-// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
+// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64
// AMDGCNSPIRV-NEXT: ret i64 [[CONV_I]]
//
extern "C" __device__ long long int test_llrint(double x) {
@@ -3596,35 +3596,35 @@ extern "C" __device__ long long int test_llrint(double x) {
// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// DEFAULT-NEXT: [[ENTRY:.*:]]
// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.round.f32(float [[X]])
-// DEFAULT-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
+// DEFAULT-NEXT: [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64
// DEFAULT-NEXT: ret i64 [[CONV_I]]
//
// FINITEONLY-LABEL: define dso_local i64 @test_llroundf(
// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// FINITEONLY-NEXT: [[ENTRY:.*:]]
// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract float @llvm.round.f32(float nofpclass(nan inf) [[X]])
-// FINITEONLY-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
+// FINITEONLY-NEXT: [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64
// FINITEONLY-NEXT: ret i64 [[CONV_I]]
//
// APPROX-LABEL: define dso_local i64 @test_llroundf(
// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// APPROX-NEXT: [[ENTRY:.*:]]
// APPROX-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.round.f32(float [[X]])
-// APPROX-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
+// APPROX-NEXT: [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64
// APPROX-NEXT: ret i64 [[CONV_I]]
//
// NCRDIV-LABEL: define dso_local i64 @test_llroundf(
// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// NCRDIV-NEXT: [[ENTRY:.*:]]
// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.round.f32(float [[X]])
-// NCRDIV-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
+// NCRDIV-NEXT: [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64
// NCRDIV-NEXT: ret i64 [[CONV_I]]
//
// AMDGCNSPIRV-LABEL: define spir_func i64 @test_llroundf(
// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.round.f32(float [[X]])
-// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
+// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64
// AMDGCNSPIRV-NEXT: ret i64 [[CONV_I]]
//
extern "C" __device__ long long int test_llroundf(float x) {
@@ -3635,35 +3635,35 @@ extern "C" __device__ long long int test_llroundf(float x) {
// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// DEFAULT-NEXT: [[ENTRY:.*:]]
// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.round.f64(double [[X]])
-// DEFAULT-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
+// DEFAULT-NEXT: [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64
// DEFAULT-NEXT: ret i64 [[CONV_I]]
//
// FINITEONLY-LABEL: define dso_local i64 @test_llround(
// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// FINITEONLY-NEXT: [[ENTRY:.*:]]
// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract double @llvm.round.f64(double nofpclass(nan inf) [[X]])
-// FINITEONLY-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
+// FINITEONLY-NEXT: [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64
// FINITEONLY-NEXT: ret i64 [[CONV_I]]
//
// APPROX-LABEL: define dso_local i64 @test_llround(
// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// APPROX-NEXT: [[ENTRY:.*:]]
// APPROX-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.round.f64(double [[X]])
-// APPROX-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
+// APPROX-NEXT: [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64
// APPROX-NEXT: ret i64 [[CONV_I]]
//
// NCRDIV-LABEL: define dso_local i64 @test_llround(
// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// NCRDIV-NEXT: [[ENTRY:.*:]]
// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.round.f64(double [[X]])
-// NCRDIV-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
+// NCRDIV-NEXT: [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64
// NCRDIV-NEXT: ret i64 [[CONV_I]]
//
// AMDGCNSPIRV-LABEL: define spir_func i64 @test_llround(
// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) double @llvm.round.f64(double [[X]])
-// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
+// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64
// AMDGCNSPIRV-NEXT: ret i64 [[CONV_I]]
//
extern "C" __device__ long long int test_llround(double x) {
@@ -3980,35 +3980,35 @@ extern "C" __device__ float test_logf(float x) {
// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// DEFAULT-NEXT: [[ENTRY:.*:]]
// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.rint.f32(float [[X]])
-// DEFAULT-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
+// DEFAULT-NEXT: [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64
// DEFAULT-NEXT: ret i64 [[CONV_I]]
//
// FINITEONLY-LABEL: define dso_local i64 @test_lrintf(
// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// FINITEONLY-NEXT: [[ENTRY:.*:]]
// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract float @llvm.rint.f32(float nofpclass(nan inf) [[X]])
-// FINITEONLY-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
+// FINITEONLY-NEXT: [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64
// FINITEONLY-NEXT: ret i64 [[CONV_I]]
//
// APPROX-LABEL: define dso_local i64 @test_lrintf(
// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// APPROX-NEXT: [[ENTRY:.*:]]
// APPROX-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.rint.f32(float [[X]])
-// APPROX-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
+// APPROX-NEXT: [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64
// APPROX-NEXT: ret i64 [[CONV_I]]
//
// NCRDIV-LABEL: define dso_local i64 @test_lrintf(
// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// NCRDIV-NEXT: [[ENTRY:.*:]]
// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.rint.f32(float [[X]])
-// NCRDIV-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
+// NCRDIV-NEXT: [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64
// NCRDIV-NEXT: ret i64 [[CONV_I]]
//
// AMDGCNSPIRV-LABEL: define spir_func i64 @test_lrintf(
// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.rint.f32(float [[X]])
-// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
+// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64
// AMDGCNSPIRV-NEXT: ret i64 [[CONV_I]]
//
extern "C" __device__ long int test_lrintf(float x) {
@@ -4019,35 +4019,35 @@ extern "C" __device__ long int test_lrintf(float x) {
// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// DEFAULT-NEXT: [[ENTRY:.*:]]
// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.rint.f64(double [[X]])
-// DEFAULT-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
+// DEFAULT-NEXT: [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64
// DEFAULT-NEXT: ret i64 [[CONV_I]]
//
// FINITEONLY-LABEL: define dso_local i64 @test_lrint(
// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// FINITEONLY-NEXT: [[ENTRY:.*:]]
// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract double @llvm.rint.f64(double nofpclass(nan inf) [[X]])
-// FINITEONLY-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
+// FINITEONLY-NEXT: [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64
// FINITEONLY-NEXT: ret i64 [[CONV_I]]
//
// APPROX-LABEL: define dso_local i64 @test_lrint(
// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// APPROX-NEXT: [[ENTRY:.*:]]
// APPROX-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.rint.f64(double [[X]])
-// APPROX-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
+// APPROX-NEXT: [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64
// APPROX-NEXT: ret i64 [[CONV_I]]
//
// NCRDIV-LABEL: define dso_local i64 @test_lrint(
// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// NCRDIV-NEXT: [[ENTRY:.*:]]
// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.rint.f64(double [[X]])
-// NCRDIV-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
+// NCRDIV-NEXT: [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64
// NCRDIV-NEXT: ret i64 [[CONV_I]]
//
// AMDGCNSPIRV-LABEL: define spir_func i64 @test_lrint(
// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) double @llvm.rint.f64(double [[X]])
-// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
+// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64
// AMDGCNSPIRV-NEXT: ret i64 [[CONV_I]]
//
extern "C" __device__ long int test_lrint(double x) {
@@ -4058,35 +4058,35 @@ extern "C" __device__ long int test_lrint(double x) {
// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// DEFAULT-NEXT: [[ENTRY:.*:]]
// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.round.f32(float [[X]])
-// DEFAULT-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
+// DEFAULT-NEXT: [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64
// DEFAULT-NEXT: ret i64 [[CONV_I]]
//
// FINITEONLY-LABEL: define dso_local i64 @test_lroundf(
// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// FINITEONLY-NEXT: [[ENTRY:.*:]]
// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract float @llvm.round.f32(float nofpclass(nan inf) [[X]])
-// FINITEONLY-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
+// FINITEONLY-NEXT: [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64
// FINITEONLY-NEXT: ret i64 [[CONV_I]]
//
// APPROX-LABEL: define dso_local i64 @test_lroundf(
// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// APPROX-NEXT: [[ENTRY:.*:]]
// APPROX-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.round.f32(float [[X]])
-// APPROX-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
+// APPROX-NEXT: [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64
// APPROX-NEXT: ret i64 [[CONV_I]]
//
// NCRDIV-LABEL: define dso_local i64 @test_lroundf(
// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// NCRDIV-NEXT: [[ENTRY:.*:]]
// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.round.f32(float [[X]])
-// NCRDIV-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
+// NCRDIV-NEXT: [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64
// NCRDIV-NEXT: ret i64 [[CONV_I]]
//
// AMDGCNSPIRV-LABEL: define spir_func i64 @test_lroundf(
// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.round.f32(float [[X]])
-// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64
+// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64
// AMDGCNSPIRV-NEXT: ret i64 [[CONV_I]]
//
extern "C" __device__ long int test_lroundf(float x) {
@@ -4097,35 +4097,35 @@ extern "C" __device__ long int test_lroundf(float x) {
// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// DEFAULT-NEXT: [[ENTRY:.*:]]
// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.round.f64(double [[X]])
-// DEFAULT-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
+// DEFAULT-NEXT: [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64
// DEFAULT-NEXT: ret i64 [[CONV_I]]
//
// FINITEONLY-LABEL: define dso_local i64 @test_lround(
// FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// FINITEONLY-NEXT: [[ENTRY:.*:]]
// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract double @llvm.round.f64(double nofpclass(nan inf) [[X]])
-// FINITEONLY-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
+// FINITEONLY-NEXT: [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64
// FINITEONLY-NEXT: ret i64 [[CONV_I]]
//
// APPROX-LABEL: define dso_local i64 @test_lround(
// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// APPROX-NEXT: [[ENTRY:.*:]]
// APPROX-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.round.f64(double [[X]])
-// APPROX-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
+// APPROX-NEXT: [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64
// APPROX-NEXT: ret i64 [[CONV_I]]
//
// NCRDIV-LABEL: define dso_local i64 @test_lround(
// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// NCRDIV-NEXT: [[ENTRY:.*:]]
// NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.round.f64(double [[X]])
-// NCRDIV-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
+// NCRDIV-NEXT: [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64
// NCRDIV-NEXT: ret i64 [[CONV_I]]
//
// AMDGCNSPIRV-LABEL: define spir_func i64 @test_lround(
// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) double @llvm.round.f64(double [[X]])
-// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64
+// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64
// AMDGCNSPIRV-NEXT: ret i64 [[CONV_I]]
//
extern "C" __device__ long int test_lround(double x) {
diff --git a/clang/test/Headers/openmp_device_math_isnan.cpp b/clang/test/Headers/openmp_device_math_isnan.cpp
index 3fd98813f2480..ccc4717af881e 100644
--- a/clang/test/Headers/openmp_device_math_isnan.cpp
+++ b/clang/test/Headers/openmp_device_math_isnan.cpp
@@ -29,7 +29,7 @@ double math(float f, double d) {
double r = 0;
// INT_RETURN: call noundef i32 @__nv_isnanf(float
// AMD_INT_RETURN_SAFE: call i1 @llvm.is.fpclass.f32(float{{.*}}, i32 3)
- // AMD_INT_RETURN_FAST: sitofp i32 {{.*}} to double
+ // AMD_INT_RETURN_FAST: sitofp contract i32 {{.*}} to double
// SPIRV_INT_RETURN: call spir_func noundef i32 @_Z5isnanf(float
// BOOL_RETURN: call noundef i32 @__nv_isnanf(float
// SPIRV_BOOL_RETURN: call spir_func noundef zeroext i1 @_Z5isnanf(float
@@ -39,7 +39,7 @@ double math(float f, double d) {
// INT_RETURN: call noundef i32 @__nv_isnand(double
// SPIRV_INT_RETURN: call spir_func noundef i32 @_Z5isnand(double
// AMD_INT_RETURN_SAFE: call i1 @llvm.is.fpclass.f64(double{{.*}}, i32 3)
- // AMD_INT_RETURN_FAST: sitofp i32 {{.*}} to double
+ // AMD_INT_RETURN_FAST: sitofp contract i32 {{.*}} to double
// BOOL_RETURN: call noundef i32 @__nv_isnand(double
// SPIRV_BOOL_RETURN: call spir_func noundef zeroext i1 @_Z5isnand(double
// AMD_BOOL_RETURN_SAFE: call i1 @llvm.is.fpclass.f64(double{{.*}}, i32 3)
diff --git a/clang/test/SemaHLSL/VectorOverloadResolution.hlsl b/clang/test/SemaHLSL/VectorOverloadResolution.hlsl
index b320abdd81182..0201e6807038d 100644
--- a/clang/test/SemaHLSL/VectorOverloadResolution.hlsl
+++ b/clang/test/SemaHLSL/VectorOverloadResolution.hlsl
@@ -40,7 +40,7 @@ void Fn3( int64_t2 p0);
// CHECK-NEXT: ImplicitCastExpr {{.*}} 'half2':'vector<half, 2>' <LValueToRValue>
// CHECK-NEXT: DeclRefExpr {{.*}} 'half2':'vector<half, 2>' lvalue ParmVar {{.*}} 'p0' 'half2':'vector<half, 2>'
// CHECKIR-LABEL: Call3
-// CHECKIR: {{.*}} = fptosi <2 x half> {{.*}} to <2 x i64>
+// CHECKIR: {{.*}} = fptosi reassoc nnan ninf nsz arcp afn <2 x half> {{.*}} to <2 x i64>
void Call3(half2 p0) {
Fn3(p0);
}
@@ -53,7 +53,7 @@ void Call3(half2 p0) {
// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float2':'vector<float, 2>' <LValueToRValue>
// CHECK-NEXT: DeclRefExpr {{.*}} 'float2':'vector<float, 2>' lvalue ParmVar {{.*}} 'p0' 'float2':'vector<float, 2>'
// CHECKIR-LABEL: Call4
-// CHECKIR: {{.*}} = fptosi <2 x float> {{.*}} to <2 x i64>
+// CHECKIR: {{.*}} = fptosi reassoc nnan ninf nsz arcp afn <2 x float> {{.*}} to <2 x i64>
void Call4(float2 p0) {
Fn3(p0);
}
@@ -68,7 +68,7 @@ void Fn4( float2 p0);
// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int64_t2':'vector<int64_t, 2>' <LValueToRValue>
// CHECK-NEXT: DeclRefExpr {{.*}} 'int64_t2':'vector<int64_t, 2>' lvalue ParmVar {{.*}} 'p0' 'int64_t2':'vector<int64_t, 2>'
// CHECKIR-LABEL: Call5
-// CHECKIR: {{.*}} = sitofp <2 x i64> {{.*}} to <2 x float>
+// CHECKIR: {{.*}} = sitofp reassoc nnan ninf nsz arcp afn <2 x i64> {{.*}} to <2 x float>
void Call5(int64_t2 p0) {
Fn4(p0);
}
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index e6713c827d6ab..b6822af87735c 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -3960,7 +3960,8 @@ Fast-Math Flags
LLVM IR floating-point operations (:ref:`fneg <i_fneg>`, :ref:`fadd <i_fadd>`,
:ref:`fsub <i_fsub>`, :ref:`fmul <i_fmul>`, :ref:`fdiv <i_fdiv>`,
:ref:`frem <i_frem>`, :ref:`fcmp <i_fcmp>`, :ref:`fptrunc <i_fptrunc>`,
-:ref:`fpext <i_fpext>`), and :ref:`phi <i_phi>`, :ref:`select <i_select>`, or
+:ref:`fpext <i_fpext>`), :ref:`fptoui <i_fptoui>`, :ref:`fptosi <i_fptosi>`,
+:ref:`sitofp <i_sitofp>`, and :ref:`phi <i_phi>`, :ref:`select <i_select>`, or
:ref:`call <i_call>` instructions that return floating-point types may use the
following flags to enable otherwise unsafe floating-point transformations.
@@ -12271,6 +12272,8 @@ Example:
%X = fpext float 3.125 to double ; yields double:3.125000e+00
%Y = fpext double %X to fp128 ; yields fp128:0xL00000000000000004000900000000000
+.. _i_fptoui:
+
'``fptoui .. to``' Instruction
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -12313,6 +12316,8 @@ Example:
%Y = fptoui float 1.0E+300 to i1 ; yields undefined:1
%Z = fptoui float 1.04E+17 to i8 ; yields undefined:1
+.. _i_fptosi:
+
'``fptosi .. to``' Instruction
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -12407,6 +12412,8 @@ Example:
%a = uitofp nneg i32 256 to i32 ; yields float:256.0
%b = uitofp nneg i32 -256 to i32 ; yields i32 poison
+.. _i_sitofp:
+
'``sitofp .. to``' Instruction
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/llvm/include/llvm/IR/Operator.h b/llvm/include/llvm/IR/Operator.h
index 10816c0e62c29..b725e01462e70 100644
--- a/llvm/include/llvm/IR/Operator.h
+++ b/llvm/include/llvm/IR/Operator.h
@@ -362,6 +362,9 @@ class FPMathOperator : public Operator {
case Instruction::FRem:
case Instruction::FPTrunc:
case Instruction::FPExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::SIToFP:
// FIXME: To clean up and correct the semantics of fast-math-flags, FCmp
// should not be treated as a math op, but the other opcodes should.
// This would make things consistent with Select/PHI (FP value type
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 897e679095906..c2f856aa65bf6 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -7319,13 +7319,13 @@ int LLParser::parseInstruction(Instruction *&Inst, BasicBlock *BB,
case lltok::kw_sext:
case lltok::kw_bitcast:
case lltok::kw_addrspacecast:
- case lltok::kw_sitofp:
- case lltok::kw_fptoui:
- case lltok::kw_fptosi:
case lltok::kw_inttoptr:
case lltok::kw_ptrtoaddr:
case lltok::kw_ptrtoint:
return parseCast(Inst, PFS, KeywordVal);
+ case lltok::kw_sitofp:
+ case lltok::kw_fptoui:
+ case lltok::kw_fptosi:
case lltok::kw_fptrunc:
case lltok::kw_fpext: {
FastMathFlags FMF = EatFastMathFlagsIfPresent();
diff --git a/llvm/test/Assembler/fast-math-flags.ll b/llvm/test/Assembler/fast-math-flags.ll
index 9c08e9da1d19e..2a725cd445182 100644
--- a/llvm/test/Assembler/fast-math-flags.ll
+++ b/llvm/test/Assembler/fast-math-flags.ll
@@ -56,6 +56,24 @@ entry:
%h_vec = fptrunc <3 x float> %vec to <3 x half>
; CHECK: %h_scalable = fptrunc <vscale x 3 x float> %scalable to <vscale x 3 x half>
%h_scalable = fptrunc <vscale x 3 x float> %scalable to <vscale x 3 x half>
+; CHECK: %i = fptoui float %x to i32
+ %i = fptoui float %x to i32
+; CHECK: %i_vec = fptoui <3 x float> %vec to <3 x i32>
+ %i_vec = fptoui <3 x float> %vec to <3 x i32>
+; CHECK: %i_scalable = fptoui <vscale x 3 x float> %scalable to <vscale x 3 x i32>
+ %i_scalable = fptoui <vscale x 3 x float> %scalable to <vscale x 3 x i32>
+; CHECK: %j = fptosi float %x to i32
+ %j = fptosi float %x to i32
+; CHECK: %j_vec = fptosi <3 x float> %vec to <3 x i32>
+ %j_vec = fptosi <3 x float> %vec to <3 x i32>
+; CHECK: %j_scalable = fptosi <vscale x 3 x float> %scalable to <vscale x 3 x i32>
+ %j_scalable = fptosi <vscale x 3 x float> %scalable to <vscale x 3 x i32>
+; CHECK: %k = sitofp i32 %j to float
+ %k = sitofp i32 %j to float
+; CHECK: %k_vec = sitofp <3 x i32> %j_vec to <3 x float>
+ %k_vec = sitofp <3 x i32> %j_vec to <3 x float>
+; CHECK: %k_scalable = sitofp <vscale x 3 x i32> %j_scalable to <vscale x 3 x float>
+ %k_scalable = sitofp <vscale x 3 x i32> %j_scalable to <vscale x 3 x float>
; CHECK: ret float %f
ret float %f
}
@@ -108,6 +126,24 @@ entry:
%h_vec = fptrunc nnan <3 x float> %vec to <3 x half>
; CHECK: %h_scalable = fptrunc nnan <vscale x 3 x float> %scalable to <vscale x 3 x half>
%h_scalable = fptrunc nnan <vscale x 3 x float> %scalable to <vscale x 3 x half>
+; CHECK: %i = fptoui nnan float %x to i32
+ %i = fptoui nnan float %x to i32
+; CHECK: %i_vec = fptoui nnan <3 x float> %vec to <3 x i32>
+ %i_vec = fptoui nnan <3 x float> %vec to <3 x i32>
+; CHECK: %i_scalable = fptoui nnan <vscale x 3 x float> %scalable to <vscale x 3 x i32>
+ %i_scalable = fptoui nnan <vscale x 3 x float> %scalable to <vscale x 3 x i32>
+; CHECK: %j = fptosi nnan float %x to i32
+ %j = fptosi nnan float %x to i32
+; CHECK: %j_vec = fptosi nnan <3 x float> %vec to <3 x i32>
+ %j_vec = fptosi nnan <3 x float> %vec to <3 x i32>
+; CHECK: %j_scalable = fptosi nnan <vscale x 3 x float> %scalable to <vscale x 3 x i32>
+ %j_scalable = fptosi nnan <vscale x 3 x float> %scalable to <vscale x 3 x i32>
+; CHECK: %k = sitofp nnan i32 %j to float
+ %k = sitofp nnan i32 %j to float
+; CHECK: %k_vec = sitofp nnan <3 x i32> %j_vec to <3 x float>
+ %k_vec = sitofp nnan <3 x i32> %j_vec to <3 x float>
+; CHECK: %k_scalable = sitofp nnan <vscale x 3 x i32> %j_scalable to <vscale x 3 x float>
+ %k_scalable = sitofp nnan <vscale x 3 x i32> %j_scalable to <vscale x 3 x float>
; CHECK: ret float %f
ret float %f
}
@@ -125,6 +161,12 @@ entry:
%d = fpext contract float %x to double
; CHECK: %e = fptrunc contract float %x to half
%e = fptrunc contract float %x to half
+; CHECK: %f = fptoui contract float %x to i32
+ %f = fptoui contract float %x to i32
+; CHECK: %g = fptosi contract float %x to i32
+ %g = fptosi contract float %x to i32
+; CHECK: %h = sitofp contract i32 %g to float
+ %h = sitofp contract i32 %g to float
ret float %c
}
@@ -140,6 +182,12 @@ define float @reassoc(float %x, float %y) {
%d = fpext reassoc float %x to double
; CHECK: %e = fptrunc reassoc float %x to half
%e = fptrunc reassoc float %x to half
+; CHECK: %f = fptoui reassoc float %x to i32
+ %f = fptoui reassoc float %x to i32
+; CHECK: %g = fptosi reassoc float %x to i32
+ %g = fptosi reassoc float %x to i32
+; CHECK: %h = sitofp reassoc i32 %g to float
+ %h = sitofp reassoc i32 %g to float
ret float %c
}
@@ -198,6 +246,24 @@ entry:
%g_vec = fptrunc ninf nnan <3 x float> %vec to <3 x half>
; CHECK: %g_scalable = fptrunc nnan ninf <vscale x 3 x float> %scalable to <vscale x 3 x half>
%g_scalable = fptrunc ninf nnan <vscale x 3 x float> %scalable to <vscale x 3 x half>
+; CHECK: %i = fptoui nnan ninf float %x to i32
+ %i = fptoui ninf nnan float %x to i32
+; CHECK: %i_vec = fptoui nnan ninf <3 x float> %vec to <3 x i32>
+ %i_vec = fptoui ninf nnan <3 x float> %vec to <3 x i32>
+; CHECK: %i_scalable = fptoui nnan ninf <vscale x 3 x float> %scalable to <vscale x 3 x i32>
+ %i_scalable = fptoui ninf nnan <vscale x 3 x float> %scalable to <vscale x 3 x i32>
+; CHECK: %j = fptosi nnan ninf float %x to i32
+ %j = fptosi ninf nnan float %x to i32
+; CHECK: %j_vec = fptosi nnan ninf <3 x float> %vec to <3 x i32>
+ %j_vec = fptosi ninf nnan <3 x float> %vec to <3 x i32>
+; CHECK: %j_scalable = fptosi nnan ninf <vscale x 3 x float> %scalable to <vscale x 3 x i32>
+ %j_scalable = fptosi ninf nnan <vscale x 3 x float> %scalable to <vscale x 3 x i32>
+; CHECK: %k = sitofp nnan ninf i32 %j to float
+ %k = sitofp ninf nnan i32 %j to float
+; CHECK: %k_vec = sitofp nnan ninf <3 x i32> %j_vec to <3 x float>
+ %k_vec = sitofp ninf nnan <3 x i32> %j_vec to <3 x float>
+; CHECK: %k_scalable = sitofp nnan ninf <vscale x 3 x i32> %j_scalable to <vscale x 3 x float>
+ %k_scalable = sitofp ninf nnan <vscale x 3 x i32> %j_scalable to <vscale x 3 x float>
; CHECK: ret float %e
ret float %e
}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll
index e71bf15384727..152b5b37c8308 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll
@@ -94,7 +94,7 @@ define i32 @select_sdiv_lhs_opaque_const0_i32(i1 %cond) {
; IR-NEXT: [[TMP5:%.*]] = uitofp i32 [[TMP4]] to float
; IR-NEXT: [[TMP6:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP5]])
; IR-NEXT: [[TMP7:%.*]] = fmul fast float [[TMP6]], 0x41EFFFFFC0000000
-; IR-NEXT: [[TMP8:%.*]] = fptoui float [[TMP7]] to i32
+; IR-NEXT: [[TMP8:%.*]] = fptoui fast float [[TMP7]] to i32
; IR-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP4]]
; IR-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], [[TMP8]]
; IR-NEXT: [[TMP11:%.*]] = zext i32 [[TMP8]] to i64
@@ -176,7 +176,7 @@ define i32 @select_sdiv_lhs_opaque_const1_i32(i1 %cond) {
; IR-NEXT: [[TMP5:%.*]] = uitofp i32 [[TMP4]] to float
; IR-NEXT: [[TMP6:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP5]])
; IR-NEXT: [[TMP7:%.*]] = fmul fast float [[TMP6]], 0x41EFFFFFC0000000
-; IR-NEXT: [[TMP8:%.*]] = fptoui float [[TMP7]] to i32
+; IR-NEXT: [[TMP8:%.*]] = fptoui fast float [[TMP7]] to i32
; IR-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP4]]
; IR-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], [[TMP8]]
; IR-NEXT: [[TMP11:%.*]] = zext i32 [[TMP8]] to i64
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
index b2dcd77274989..0b447c59eb858 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
@@ -8,7 +8,7 @@ define amdgpu_kernel void @udiv_i32(ptr addrspace(1) %out, i32 %x, i32 %y) {
; CHECK-NEXT: [[TMP1:%.*]] = uitofp i32 [[Y:%.*]] to float
; CHECK-NEXT: [[TMP2:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP1]])
; CHECK-NEXT: [[TMP3:%.*]] = fmul fast float [[TMP2]], 0x41EFFFFFC0000000
-; CHECK-NEXT: [[TMP4:%.*]] = fptoui float [[TMP3]] to i32
+; CHECK-NEXT: [[TMP4:%.*]] = fptoui fast float [[TMP3]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = sub i32 0, [[Y]]
; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], [[TMP4]]
; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP4]] to i64
@@ -108,7 +108,7 @@ define amdgpu_kernel void @urem_i32(ptr addrspace(1) %out, i32 %x, i32 %y) {
; CHECK-NEXT: [[TMP1:%.*]] = uitofp i32 [[Y:%.*]] to float
; CHECK-NEXT: [[TMP2:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP1]])
; CHECK-NEXT: [[TMP3:%.*]] = fmul fast float [[TMP2]], 0x41EFFFFFC0000000
-; CHECK-NEXT: [[TMP4:%.*]] = fptoui float [[TMP3]] to i32
+; CHECK-NEXT: [[TMP4:%.*]] = fptoui fast float [[TMP3]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = sub i32 0, [[Y]]
; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], [[TMP4]]
; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP4]] to i64
@@ -208,7 +208,7 @@ define amdgpu_kernel void @sdiv_i32(ptr addrspace(1) %out, i32 %x, i32 %y) {
; CHECK-NEXT: [[TMP8:%.*]] = uitofp i32 [[TMP7]] to float
; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP8]])
; CHECK-NEXT: [[TMP10:%.*]] = fmul fast float [[TMP9]], 0x41EFFFFFC0000000
-; CHECK-NEXT: [[TMP11:%.*]] = fptoui float [[TMP10]] to i32
+; CHECK-NEXT: [[TMP11:%.*]] = fptoui fast float [[TMP10]] to i32
; CHECK-NEXT: [[TMP12:%.*]] = sub i32 0, [[TMP7]]
; CHECK-NEXT: [[TMP13:%.*]] = mul i32 [[TMP12]], [[TMP11]]
; CHECK-NEXT: [[TMP14:%.*]] = zext i32 [[TMP11]] to i64
@@ -328,7 +328,7 @@ define amdgpu_kernel void @srem_i32(ptr addrspace(1) %out, i32 %x, i32 %y) {
; CHECK-NEXT: [[TMP7:%.*]] = uitofp i32 [[TMP6]] to float
; CHECK-NEXT: [[TMP8:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP7]])
; CHECK-NEXT: [[TMP9:%.*]] = fmul fast float [[TMP8]], 0x41EFFFFFC0000000
-; CHECK-NEXT: [[TMP10:%.*]] = fptoui float [[TMP9]] to i32
+; CHECK-NEXT: [[TMP10:%.*]] = fptoui fast float [[TMP9]] to i32
; CHECK-NEXT: [[TMP11:%.*]] = sub i32 0, [[TMP6]]
; CHECK-NEXT: [[TMP12:%.*]] = mul i32 [[TMP11]], [[TMP10]]
; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP10]] to i64
@@ -439,7 +439,7 @@ define amdgpu_kernel void @udiv_i16(ptr addrspace(1) %out, i16 %x, i16 %y) {
; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.trunc.f32(float [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = fneg fast float [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP8]], float [[TMP4]], float [[TMP3]])
-; CHECK-NEXT: [[TMP10:%.*]] = fptoui float [[TMP7]] to i32
+; CHECK-NEXT: [[TMP10:%.*]] = fptoui fast float [[TMP7]] to i32
; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]])
; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.fabs.f32(float [[TMP4]])
; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast oge float [[TMP11]], [[TMP12]]
@@ -508,7 +508,7 @@ define amdgpu_kernel void @urem_i16(ptr addrspace(1) %out, i16 %x, i16 %y) {
; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.trunc.f32(float [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = fneg fast float [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP8]], float [[TMP4]], float [[TMP3]])
-; CHECK-NEXT: [[TMP10:%.*]] = fptoui float [[TMP7]] to i32
+; CHECK-NEXT: [[TMP10:%.*]] = fptoui fast float [[TMP7]] to i32
; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]])
; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.fabs.f32(float [[TMP4]])
; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast oge float [[TMP11]], [[TMP12]]
@@ -579,14 +579,14 @@ define amdgpu_kernel void @sdiv_i16(ptr addrspace(1) %out, i16 %x, i16 %y) {
; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = ashr i32 [[TMP3]], 30
; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], 1
-; CHECK-NEXT: [[TMP6:%.*]] = sitofp i32 [[TMP1]] to float
-; CHECK-NEXT: [[TMP7:%.*]] = sitofp i32 [[TMP2]] to float
+; CHECK-NEXT: [[TMP6:%.*]] = sitofp fast i32 [[TMP1]] to float
+; CHECK-NEXT: [[TMP7:%.*]] = sitofp fast i32 [[TMP2]] to float
; CHECK-NEXT: [[TMP8:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP7]])
; CHECK-NEXT: [[TMP9:%.*]] = fmul fast float [[TMP6]], [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.trunc.f32(float [[TMP9]])
; CHECK-NEXT: [[TMP11:%.*]] = fneg fast float [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP11]], float [[TMP7]], float [[TMP6]])
-; CHECK-NEXT: [[TMP13:%.*]] = fptosi float [[TMP10]] to i32
+; CHECK-NEXT: [[TMP13:%.*]] = fptosi fast float [[TMP10]] to i32
; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP12]])
; CHECK-NEXT: [[TMP15:%.*]] = call fast float @llvm.fabs.f32(float [[TMP7]])
; CHECK-NEXT: [[TMP16:%.*]] = fcmp fast oge float [[TMP14]], [[TMP15]]
@@ -660,14 +660,14 @@ define amdgpu_kernel void @srem_i16(ptr addrspace(1) %out, i16 %x, i16 %y) {
; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = ashr i32 [[TMP3]], 30
; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], 1
-; CHECK-NEXT: [[TMP6:%.*]] = sitofp i32 [[TMP1]] to float
-; CHECK-NEXT: [[TMP7:%.*]] = sitofp i32 [[TMP2]] to float
+; CHECK-NEXT: [[TMP6:%.*]] = sitofp fast i32 [[TMP1]] to float
+; CHECK-NEXT: [[TMP7:%.*]] = sitofp fast i32 [[TMP2]] to float
; CHECK-NEXT: [[TMP8:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP7]])
; CHECK-NEXT: [[TMP9:%.*]] = fmul fast float [[TMP6]], [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.trunc.f32(float [[TMP9]])
; CHECK-NEXT: [[TMP11:%.*]] = fneg fast float [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP11]], float [[TMP7]], float [[TMP6]])
-; CHECK-NEXT: [[TMP13:%.*]] = fptosi float [[TMP10]] to i32
+; CHECK-NEXT: [[TMP13:%.*]] = fptosi fast float [[TMP10]] to i32
; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP12]])
; CHECK-NEXT: [[TMP15:%.*]] = call fast float @llvm.fabs.f32(float [[TMP7]])
; CHECK-NEXT: [[TMP16:%.*]] = fcmp fast oge float [[TMP14]], [[TMP15]]
@@ -751,7 +751,7 @@ define amdgpu_kernel void @udiv_i8(ptr addrspace(1) %out, i8 %x, i8 %y) {
; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.trunc.f32(float [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = fneg fast float [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP8]], float [[TMP4]], float [[TMP3]])
-; CHECK-NEXT: [[TMP10:%.*]] = fptoui float [[TMP7]] to i32
+; CHECK-NEXT: [[TMP10:%.*]] = fptoui fast float [[TMP7]] to i32
; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]])
; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.fabs.f32(float [[TMP4]])
; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast oge float [[TMP11]], [[TMP12]]
@@ -814,7 +814,7 @@ define amdgpu_kernel void @urem_i8(ptr addrspace(1) %out, i8 %x, i8 %y) {
; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.trunc.f32(float [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = fneg fast float [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP8]], float [[TMP4]], float [[TMP3]])
-; CHECK-NEXT: [[TMP10:%.*]] = fptoui float [[TMP7]] to i32
+; CHECK-NEXT: [[TMP10:%.*]] = fptoui fast float [[TMP7]] to i32
; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]])
; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.fabs.f32(float [[TMP4]])
; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast oge float [[TMP11]], [[TMP12]]
@@ -881,14 +881,14 @@ define amdgpu_kernel void @sdiv_i8(ptr addrspace(1) %out, i8 %x, i8 %y) {
; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = ashr i32 [[TMP3]], 30
; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], 1
-; CHECK-NEXT: [[TMP6:%.*]] = sitofp i32 [[TMP1]] to float
-; CHECK-NEXT: [[TMP7:%.*]] = sitofp i32 [[TMP2]] to float
+; CHECK-NEXT: [[TMP6:%.*]] = sitofp fast i32 [[TMP1]] to float
+; CHECK-NEXT: [[TMP7:%.*]] = sitofp fast i32 [[TMP2]] to float
; CHECK-NEXT: [[TMP8:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP7]])
; CHECK-NEXT: [[TMP9:%.*]] = fmul fast float [[TMP6]], [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.trunc.f32(float [[TMP9]])
; CHECK-NEXT: [[TMP11:%.*]] = fneg fast float [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP11]], float [[TMP7]], float [[TMP6]])
-; CHECK-NEXT: [[TMP13:%.*]] = fptosi float [[TMP10]] to i32
+; CHECK-NEXT: [[TMP13:%.*]] = fptosi fast float [[TMP10]] to i32
; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP12]])
; CHECK-NEXT: [[TMP15:%.*]] = call fast float @llvm.fabs.f32(float [[TMP7]])
; CHECK-NEXT: [[TMP16:%.*]] = fcmp fast oge float [[TMP14]], [[TMP15]]
@@ -962,14 +962,14 @@ define amdgpu_kernel void @srem_i8(ptr addrspace(1) %out, i8 %x, i8 %y) {
; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = ashr i32 [[TMP3]], 30
; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], 1
-; CHECK-NEXT: [[TMP6:%.*]] = sitofp i32 [[TMP1]] to float
-; CHECK-NEXT: [[TMP7:%.*]] = sitofp i32 [[TMP2]] to float
+; CHECK-NEXT: [[TMP6:%.*]] = sitofp fast i32 [[TMP1]] to float
+; CHECK-NEXT: [[TMP7:%.*]] = sitofp fast i32 [[TMP2]] to float
; CHECK-NEXT: [[TMP8:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP7]])
; CHECK-NEXT: [[TMP9:%.*]] = fmul fast float [[TMP6]], [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.trunc.f32(float [[TMP9]])
; CHECK-NEXT: [[TMP11:%.*]] = fneg fast float [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP11]], float [[TMP7]], float [[TMP6]])
-; CHECK-NEXT: [[TMP13:%.*]] = fptosi float [[TMP10]] to i32
+; CHECK-NEXT: [[TMP13:%.*]] = fptosi fast float [[TMP10]] to i32
; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP12]])
; CHECK-NEXT: [[TMP15:%.*]] = call fast float @llvm.fabs.f32(float [[TMP7]])
; CHECK-NEXT: [[TMP16:%.*]] = fcmp fast oge float [[TMP14]], [[TMP15]]
@@ -1051,7 +1051,7 @@ define amdgpu_kernel void @udiv_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x
; CHECK-NEXT: [[TMP3:%.*]] = uitofp i32 [[TMP2]] to float
; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP3]])
; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP4]], 0x41EFFFFFC0000000
-; CHECK-NEXT: [[TMP6:%.*]] = fptoui float [[TMP5]] to i32
+; CHECK-NEXT: [[TMP6:%.*]] = fptoui fast float [[TMP5]] to i32
; CHECK-NEXT: [[TMP7:%.*]] = sub i32 0, [[TMP2]]
; CHECK-NEXT: [[TMP8:%.*]] = mul i32 [[TMP7]], [[TMP6]]
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP6]] to i64
@@ -1083,7 +1083,7 @@ define amdgpu_kernel void @udiv_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x
; CHECK-NEXT: [[TMP35:%.*]] = uitofp i32 [[TMP34]] to float
; CHECK-NEXT: [[TMP36:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP35]])
; CHECK-NEXT: [[TMP37:%.*]] = fmul fast float [[TMP36]], 0x41EFFFFFC0000000
-; CHECK-NEXT: [[TMP38:%.*]] = fptoui float [[TMP37]] to i32
+; CHECK-NEXT: [[TMP38:%.*]] = fptoui fast float [[TMP37]] to i32
; CHECK-NEXT: [[TMP39:%.*]] = sub i32 0, [[TMP34]]
; CHECK-NEXT: [[TMP40:%.*]] = mul i32 [[TMP39]], [[TMP38]]
; CHECK-NEXT: [[TMP41:%.*]] = zext i32 [[TMP38]] to i64
@@ -1115,7 +1115,7 @@ define amdgpu_kernel void @udiv_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x
; CHECK-NEXT: [[TMP67:%.*]] = uitofp i32 [[TMP66]] to float
; CHECK-NEXT: [[TMP68:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP67]])
; CHECK-NEXT: [[TMP69:%.*]] = fmul fast float [[TMP68]], 0x41EFFFFFC0000000
-; CHECK-NEXT: [[TMP70:%.*]] = fptoui float [[TMP69]] to i32
+; CHECK-NEXT: [[TMP70:%.*]] = fptoui fast float [[TMP69]] to i32
; CHECK-NEXT: [[TMP71:%.*]] = sub i32 0, [[TMP66]]
; CHECK-NEXT: [[TMP72:%.*]] = mul i32 [[TMP71]], [[TMP70]]
; CHECK-NEXT: [[TMP73:%.*]] = zext i32 [[TMP70]] to i64
@@ -1147,7 +1147,7 @@ define amdgpu_kernel void @udiv_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x
; CHECK-NEXT: [[TMP99:%.*]] = uitofp i32 [[TMP98]] to float
; CHECK-NEXT: [[TMP100:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP99]])
; CHECK-NEXT: [[TMP101:%.*]] = fmul fast float [[TMP100]], 0x41EFFFFFC0000000
-; CHECK-NEXT: [[TMP102:%.*]] = fptoui float [[TMP101]] to i32
+; CHECK-NEXT: [[TMP102:%.*]] = fptoui fast float [[TMP101]] to i32
; CHECK-NEXT: [[TMP103:%.*]] = sub i32 0, [[TMP98]]
; CHECK-NEXT: [[TMP104:%.*]] = mul i32 [[TMP103]], [[TMP102]]
; CHECK-NEXT: [[TMP105:%.*]] = zext i32 [[TMP102]] to i64
@@ -1379,7 +1379,7 @@ define amdgpu_kernel void @urem_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x
; CHECK-NEXT: [[TMP3:%.*]] = uitofp i32 [[TMP2]] to float
; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP3]])
; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP4]], 0x41EFFFFFC0000000
-; CHECK-NEXT: [[TMP6:%.*]] = fptoui float [[TMP5]] to i32
+; CHECK-NEXT: [[TMP6:%.*]] = fptoui fast float [[TMP5]] to i32
; CHECK-NEXT: [[TMP7:%.*]] = sub i32 0, [[TMP2]]
; CHECK-NEXT: [[TMP8:%.*]] = mul i32 [[TMP7]], [[TMP6]]
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP6]] to i64
@@ -1409,7 +1409,7 @@ define amdgpu_kernel void @urem_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x
; CHECK-NEXT: [[TMP33:%.*]] = uitofp i32 [[TMP32]] to float
; CHECK-NEXT: [[TMP34:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP33]])
; CHECK-NEXT: [[TMP35:%.*]] = fmul fast float [[TMP34]], 0x41EFFFFFC0000000
-; CHECK-NEXT: [[TMP36:%.*]] = fptoui float [[TMP35]] to i32
+; CHECK-NEXT: [[TMP36:%.*]] = fptoui fast float [[TMP35]] to i32
; CHECK-NEXT: [[TMP37:%.*]] = sub i32 0, [[TMP32]]
; CHECK-NEXT: [[TMP38:%.*]] = mul i32 [[TMP37]], [[TMP36]]
; CHECK-NEXT: [[TMP39:%.*]] = zext i32 [[TMP36]] to i64
@@ -1439,7 +1439,7 @@ define amdgpu_kernel void @urem_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x
; CHECK-NEXT: [[TMP63:%.*]] = uitofp i32 [[TMP62]] to float
; CHECK-NEXT: [[TMP64:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP63]])
; CHECK-NEXT: [[TMP65:%.*]] = fmul fast float [[TMP64]], 0x41EFFFFFC0000000
-; CHECK-NEXT: [[TMP66:%.*]] = fptoui float [[TMP65]] to i32
+; CHECK-NEXT: [[TMP66:%.*]] = fptoui fast float [[TMP65]] to i32
; CHECK-NEXT: [[TMP67:%.*]] = sub i32 0, [[TMP62]]
; CHECK-NEXT: [[TMP68:%.*]] = mul i32 [[TMP67]], [[TMP66]]
; CHECK-NEXT: [[TMP69:%.*]] = zext i32 [[TMP66]] to i64
@@ -1469,7 +1469,7 @@ define amdgpu_kernel void @urem_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x
; CHECK-NEXT: [[TMP93:%.*]] = uitofp i32 [[TMP92]] to float
; CHECK-NEXT: [[TMP94:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP93]])
; CHECK-NEXT: [[TMP95:%.*]] = fmul fast float [[TMP94]], 0x41EFFFFFC0000000
-; CHECK-NEXT: [[TMP96:%.*]] = fptoui float [[TMP95]] to i32
+; CHECK-NEXT: [[TMP96:%.*]] = fptoui fast float [[TMP95]] to i32
; CHECK-NEXT: [[TMP97:%.*]] = sub i32 0, [[TMP92]]
; CHECK-NEXT: [[TMP98:%.*]] = mul i32 [[TMP97]], [[TMP96]]
; CHECK-NEXT: [[TMP99:%.*]] = zext i32 [[TMP96]] to i64
@@ -1687,7 +1687,7 @@ define amdgpu_kernel void @sdiv_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x
; CHECK-NEXT: [[TMP10:%.*]] = uitofp i32 [[TMP9]] to float
; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP10]])
; CHECK-NEXT: [[TMP12:%.*]] = fmul fast float [[TMP11]], 0x41EFFFFFC0000000
-; CHECK-NEXT: [[TMP13:%.*]] = fptoui float [[TMP12]] to i32
+; CHECK-NEXT: [[TMP13:%.*]] = fptoui fast float [[TMP12]] to i32
; CHECK-NEXT: [[TMP14:%.*]] = sub i32 0, [[TMP9]]
; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], [[TMP13]]
; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP13]] to i64
@@ -1728,7 +1728,7 @@ define amdgpu_kernel void @sdiv_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x
; CHECK-NEXT: [[TMP51:%.*]] = uitofp i32 [[TMP50]] to float
; CHECK-NEXT: [[TMP52:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP51]])
; CHECK-NEXT: [[TMP53:%.*]] = fmul fast float [[TMP52]], 0x41EFFFFFC0000000
-; CHECK-NEXT: [[TMP54:%.*]] = fptoui float [[TMP53]] to i32
+; CHECK-NEXT: [[TMP54:%.*]] = fptoui fast float [[TMP53]] to i32
; CHECK-NEXT: [[TMP55:%.*]] = sub i32 0, [[TMP50]]
; CHECK-NEXT: [[TMP56:%.*]] = mul i32 [[TMP55]], [[TMP54]]
; CHECK-NEXT: [[TMP57:%.*]] = zext i32 [[TMP54]] to i64
@@ -1769,7 +1769,7 @@ define amdgpu_kernel void @sdiv_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x
; CHECK-NEXT: [[TMP92:%.*]] = uitofp i32 [[TMP91]] to float
; CHECK-NEXT: [[TMP93:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP92]])
; CHECK-NEXT: [[TMP94:%.*]] = fmul fast float [[TMP93]], 0x41EFFFFFC0000000
-; CHECK-NEXT: [[TMP95:%.*]] = fptoui float [[TMP94]] to i32
+; CHECK-NEXT: [[TMP95:%.*]] = fptoui fast float [[TMP94]] to i32
; CHECK-NEXT: [[TMP96:%.*]] = sub i32 0, [[TMP91]]
; CHECK-NEXT: [[TMP97:%.*]] = mul i32 [[TMP96]], [[TMP95]]
; CHECK-NEXT: [[TMP98:%.*]] = zext i32 [[TMP95]] to i64
@@ -1810,7 +1810,7 @@ define amdgpu_kernel void @sdiv_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x
; CHECK-NEXT: [[TMP133:%.*]] = uitofp i32 [[TMP132]] to float
; CHECK-NEXT: [[TMP134:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP133]])
; CHECK-NEXT: [[TMP135:%.*]] = fmul fast float [[TMP134]], 0x41EFFFFFC0000000
-; CHECK-NEXT: [[TMP136:%.*]] = fptoui float [[TMP135]] to i32
+; CHECK-NEXT: [[TMP136:%.*]] = fptoui fast float [[TMP135]] to i32
; CHECK-NEXT: [[TMP137:%.*]] = sub i32 0, [[TMP132]]
; CHECK-NEXT: [[TMP138:%.*]] = mul i32 [[TMP137]], [[TMP136]]
; CHECK-NEXT: [[TMP139:%.*]] = zext i32 [[TMP136]] to i64
@@ -2099,7 +2099,7 @@ define amdgpu_kernel void @srem_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x
; CHECK-NEXT: [[TMP9:%.*]] = uitofp i32 [[TMP8]] to float
; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP9]])
; CHECK-NEXT: [[TMP11:%.*]] = fmul fast float [[TMP10]], 0x41EFFFFFC0000000
-; CHECK-NEXT: [[TMP12:%.*]] = fptoui float [[TMP11]] to i32
+; CHECK-NEXT: [[TMP12:%.*]] = fptoui fast float [[TMP11]] to i32
; CHECK-NEXT: [[TMP13:%.*]] = sub i32 0, [[TMP8]]
; CHECK-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], [[TMP12]]
; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP12]] to i64
@@ -2137,7 +2137,7 @@ define amdgpu_kernel void @srem_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x
; CHECK-NEXT: [[TMP47:%.*]] = uitofp i32 [[TMP46]] to float
; CHECK-NEXT: [[TMP48:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP47]])
; CHECK-NEXT: [[TMP49:%.*]] = fmul fast float [[TMP48]], 0x41EFFFFFC0000000
-; CHECK-NEXT: [[TMP50:%.*]] = fptoui float [[TMP49]] to i32
+; CHECK-NEXT: [[TMP50:%.*]] = fptoui fast float [[TMP49]] to i32
; CHECK-NEXT: [[TMP51:%.*]] = sub i32 0, [[TMP46]]
; CHECK-NEXT: [[TMP52:%.*]] = mul i32 [[TMP51]], [[TMP50]]
; CHECK-NEXT: [[TMP53:%.*]] = zext i32 [[TMP50]] to i64
@@ -2175,7 +2175,7 @@ define amdgpu_kernel void @srem_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x
; CHECK-NEXT: [[TMP85:%.*]] = uitofp i32 [[TMP84]] to float
; CHECK-NEXT: [[TMP86:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP85]])
; CHECK-NEXT: [[TMP87:%.*]] = fmul fast float [[TMP86]], 0x41EFFFFFC0000000
-; CHECK-NEXT: [[TMP88:%.*]] = fptoui float [[TMP87]] to i32
+; CHECK-NEXT: [[TMP88:%.*]] = fptoui fast float [[TMP87]] to i32
; CHECK-NEXT: [[TMP89:%.*]] = sub i32 0, [[TMP84]]
; CHECK-NEXT: [[TMP90:%.*]] = mul i32 [[TMP89]], [[TMP88]]
; CHECK-NEXT: [[TMP91:%.*]] = zext i32 [[TMP88]] to i64
@@ -2213,7 +2213,7 @@ define amdgpu_kernel void @srem_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x
; CHECK-NEXT: [[TMP123:%.*]] = uitofp i32 [[TMP122]] to float
; CHECK-NEXT: [[TMP124:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP123]])
; CHECK-NEXT: [[TMP125:%.*]] = fmul fast float [[TMP124]], 0x41EFFFFFC0000000
-; CHECK-NEXT: [[TMP126:%.*]] = fptoui float [[TMP125]] to i32
+; CHECK-NEXT: [[TMP126:%.*]] = fptoui fast float [[TMP125]] to i32
; CHECK-NEXT: [[TMP127:%.*]] = sub i32 0, [[TMP122]]
; CHECK-NEXT: [[TMP128:%.*]] = mul i32 [[TMP127]], [[TMP126]]
; CHECK-NEXT: [[TMP129:%.*]] = zext i32 [[TMP126]] to i64
@@ -2473,7 +2473,7 @@ define amdgpu_kernel void @udiv_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x
; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.trunc.f32(float [[TMP8]])
; CHECK-NEXT: [[TMP10:%.*]] = fneg fast float [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP10]], float [[TMP6]], float [[TMP5]])
-; CHECK-NEXT: [[TMP12:%.*]] = fptoui float [[TMP9]] to i32
+; CHECK-NEXT: [[TMP12:%.*]] = fptoui fast float [[TMP9]] to i32
; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.fabs.f32(float [[TMP11]])
; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP6]])
; CHECK-NEXT: [[TMP15:%.*]] = fcmp fast oge float [[TMP13]], [[TMP14]]
@@ -2493,7 +2493,7 @@ define amdgpu_kernel void @udiv_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x
; CHECK-NEXT: [[TMP29:%.*]] = call fast float @llvm.trunc.f32(float [[TMP28]])
; CHECK-NEXT: [[TMP30:%.*]] = fneg fast float [[TMP29]]
; CHECK-NEXT: [[TMP31:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP30]], float [[TMP26]], float [[TMP25]])
-; CHECK-NEXT: [[TMP32:%.*]] = fptoui float [[TMP29]] to i32
+; CHECK-NEXT: [[TMP32:%.*]] = fptoui fast float [[TMP29]] to i32
; CHECK-NEXT: [[TMP33:%.*]] = call fast float @llvm.fabs.f32(float [[TMP31]])
; CHECK-NEXT: [[TMP34:%.*]] = call fast float @llvm.fabs.f32(float [[TMP26]])
; CHECK-NEXT: [[TMP35:%.*]] = fcmp fast oge float [[TMP33]], [[TMP34]]
@@ -2513,7 +2513,7 @@ define amdgpu_kernel void @udiv_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x
; CHECK-NEXT: [[TMP49:%.*]] = call fast float @llvm.trunc.f32(float [[TMP48]])
; CHECK-NEXT: [[TMP50:%.*]] = fneg fast float [[TMP49]]
; CHECK-NEXT: [[TMP51:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP50]], float [[TMP46]], float [[TMP45]])
-; CHECK-NEXT: [[TMP52:%.*]] = fptoui float [[TMP49]] to i32
+; CHECK-NEXT: [[TMP52:%.*]] = fptoui fast float [[TMP49]] to i32
; CHECK-NEXT: [[TMP53:%.*]] = call fast float @llvm.fabs.f32(float [[TMP51]])
; CHECK-NEXT: [[TMP54:%.*]] = call fast float @llvm.fabs.f32(float [[TMP46]])
; CHECK-NEXT: [[TMP55:%.*]] = fcmp fast oge float [[TMP53]], [[TMP54]]
@@ -2533,7 +2533,7 @@ define amdgpu_kernel void @udiv_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x
; CHECK-NEXT: [[TMP69:%.*]] = call fast float @llvm.trunc.f32(float [[TMP68]])
; CHECK-NEXT: [[TMP70:%.*]] = fneg fast float [[TMP69]]
; CHECK-NEXT: [[TMP71:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP70]], float [[TMP66]], float [[TMP65]])
-; CHECK-NEXT: [[TMP72:%.*]] = fptoui float [[TMP69]] to i32
+; CHECK-NEXT: [[TMP72:%.*]] = fptoui fast float [[TMP69]] to i32
; CHECK-NEXT: [[TMP73:%.*]] = call fast float @llvm.fabs.f32(float [[TMP71]])
; CHECK-NEXT: [[TMP74:%.*]] = call fast float @llvm.fabs.f32(float [[TMP66]])
; CHECK-NEXT: [[TMP75:%.*]] = fcmp fast oge float [[TMP73]], [[TMP74]]
@@ -2680,7 +2680,7 @@ define amdgpu_kernel void @urem_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x
; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.trunc.f32(float [[TMP8]])
; CHECK-NEXT: [[TMP10:%.*]] = fneg fast float [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP10]], float [[TMP6]], float [[TMP5]])
-; CHECK-NEXT: [[TMP12:%.*]] = fptoui float [[TMP9]] to i32
+; CHECK-NEXT: [[TMP12:%.*]] = fptoui fast float [[TMP9]] to i32
; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.fabs.f32(float [[TMP11]])
; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP6]])
; CHECK-NEXT: [[TMP15:%.*]] = fcmp fast oge float [[TMP13]], [[TMP14]]
@@ -2702,7 +2702,7 @@ define amdgpu_kernel void @urem_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x
; CHECK-NEXT: [[TMP31:%.*]] = call fast float @llvm.trunc.f32(float [[TMP30]])
; CHECK-NEXT: [[TMP32:%.*]] = fneg fast float [[TMP31]]
; CHECK-NEXT: [[TMP33:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP32]], float [[TMP28]], float [[TMP27]])
-; CHECK-NEXT: [[TMP34:%.*]] = fptoui float [[TMP31]] to i32
+; CHECK-NEXT: [[TMP34:%.*]] = fptoui fast float [[TMP31]] to i32
; CHECK-NEXT: [[TMP35:%.*]] = call fast float @llvm.fabs.f32(float [[TMP33]])
; CHECK-NEXT: [[TMP36:%.*]] = call fast float @llvm.fabs.f32(float [[TMP28]])
; CHECK-NEXT: [[TMP37:%.*]] = fcmp fast oge float [[TMP35]], [[TMP36]]
@@ -2724,7 +2724,7 @@ define amdgpu_kernel void @urem_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x
; CHECK-NEXT: [[TMP53:%.*]] = call fast float @llvm.trunc.f32(float [[TMP52]])
; CHECK-NEXT: [[TMP54:%.*]] = fneg fast float [[TMP53]]
; CHECK-NEXT: [[TMP55:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP54]], float [[TMP50]], float [[TMP49]])
-; CHECK-NEXT: [[TMP56:%.*]] = fptoui float [[TMP53]] to i32
+; CHECK-NEXT: [[TMP56:%.*]] = fptoui fast float [[TMP53]] to i32
; CHECK-NEXT: [[TMP57:%.*]] = call fast float @llvm.fabs.f32(float [[TMP55]])
; CHECK-NEXT: [[TMP58:%.*]] = call fast float @llvm.fabs.f32(float [[TMP50]])
; CHECK-NEXT: [[TMP59:%.*]] = fcmp fast oge float [[TMP57]], [[TMP58]]
@@ -2746,7 +2746,7 @@ define amdgpu_kernel void @urem_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x
; CHECK-NEXT: [[TMP75:%.*]] = call fast float @llvm.trunc.f32(float [[TMP74]])
; CHECK-NEXT: [[TMP76:%.*]] = fneg fast float [[TMP75]]
; CHECK-NEXT: [[TMP77:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP76]], float [[TMP72]], float [[TMP71]])
-; CHECK-NEXT: [[TMP78:%.*]] = fptoui float [[TMP75]] to i32
+; CHECK-NEXT: [[TMP78:%.*]] = fptoui fast float [[TMP75]] to i32
; CHECK-NEXT: [[TMP79:%.*]] = call fast float @llvm.fabs.f32(float [[TMP77]])
; CHECK-NEXT: [[TMP80:%.*]] = call fast float @llvm.fabs.f32(float [[TMP72]])
; CHECK-NEXT: [[TMP81:%.*]] = fcmp fast oge float [[TMP79]], [[TMP80]]
@@ -2906,14 +2906,14 @@ define amdgpu_kernel void @sdiv_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x
; CHECK-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = ashr i32 [[TMP5]], 30
; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], 1
-; CHECK-NEXT: [[TMP8:%.*]] = sitofp i32 [[TMP3]] to float
-; CHECK-NEXT: [[TMP9:%.*]] = sitofp i32 [[TMP4]] to float
+; CHECK-NEXT: [[TMP8:%.*]] = sitofp fast i32 [[TMP3]] to float
+; CHECK-NEXT: [[TMP9:%.*]] = sitofp fast i32 [[TMP4]] to float
; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP9]])
; CHECK-NEXT: [[TMP11:%.*]] = fmul fast float [[TMP8]], [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.trunc.f32(float [[TMP11]])
; CHECK-NEXT: [[TMP13:%.*]] = fneg fast float [[TMP12]]
; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP13]], float [[TMP9]], float [[TMP8]])
-; CHECK-NEXT: [[TMP15:%.*]] = fptosi float [[TMP12]] to i32
+; CHECK-NEXT: [[TMP15:%.*]] = fptosi fast float [[TMP12]] to i32
; CHECK-NEXT: [[TMP16:%.*]] = call fast float @llvm.fabs.f32(float [[TMP14]])
; CHECK-NEXT: [[TMP17:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]])
; CHECK-NEXT: [[TMP18:%.*]] = fcmp fast oge float [[TMP16]], [[TMP17]]
@@ -2930,14 +2930,14 @@ define amdgpu_kernel void @sdiv_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x
; CHECK-NEXT: [[TMP29:%.*]] = xor i32 [[TMP27]], [[TMP28]]
; CHECK-NEXT: [[TMP30:%.*]] = ashr i32 [[TMP29]], 30
; CHECK-NEXT: [[TMP31:%.*]] = or i32 [[TMP30]], 1
-; CHECK-NEXT: [[TMP32:%.*]] = sitofp i32 [[TMP27]] to float
-; CHECK-NEXT: [[TMP33:%.*]] = sitofp i32 [[TMP28]] to float
+; CHECK-NEXT: [[TMP32:%.*]] = sitofp fast i32 [[TMP27]] to float
+; CHECK-NEXT: [[TMP33:%.*]] = sitofp fast i32 [[TMP28]] to float
; CHECK-NEXT: [[TMP34:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP33]])
; CHECK-NEXT: [[TMP35:%.*]] = fmul fast float [[TMP32]], [[TMP34]]
; CHECK-NEXT: [[TMP36:%.*]] = call fast float @llvm.trunc.f32(float [[TMP35]])
; CHECK-NEXT: [[TMP37:%.*]] = fneg fast float [[TMP36]]
; CHECK-NEXT: [[TMP38:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP37]], float [[TMP33]], float [[TMP32]])
-; CHECK-NEXT: [[TMP39:%.*]] = fptosi float [[TMP36]] to i32
+; CHECK-NEXT: [[TMP39:%.*]] = fptosi fast float [[TMP36]] to i32
; CHECK-NEXT: [[TMP40:%.*]] = call fast float @llvm.fabs.f32(float [[TMP38]])
; CHECK-NEXT: [[TMP41:%.*]] = call fast float @llvm.fabs.f32(float [[TMP33]])
; CHECK-NEXT: [[TMP42:%.*]] = fcmp fast oge float [[TMP40]], [[TMP41]]
@@ -2954,14 +2954,14 @@ define amdgpu_kernel void @sdiv_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x
; CHECK-NEXT: [[TMP53:%.*]] = xor i32 [[TMP51]], [[TMP52]]
; CHECK-NEXT: [[TMP54:%.*]] = ashr i32 [[TMP53]], 30
; CHECK-NEXT: [[TMP55:%.*]] = or i32 [[TMP54]], 1
-; CHECK-NEXT: [[TMP56:%.*]] = sitofp i32 [[TMP51]] to float
-; CHECK-NEXT: [[TMP57:%.*]] = sitofp i32 [[TMP52]] to float
+; CHECK-NEXT: [[TMP56:%.*]] = sitofp fast i32 [[TMP51]] to float
+; CHECK-NEXT: [[TMP57:%.*]] = sitofp fast i32 [[TMP52]] to float
; CHECK-NEXT: [[TMP58:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP57]])
; CHECK-NEXT: [[TMP59:%.*]] = fmul fast float [[TMP56]], [[TMP58]]
; CHECK-NEXT: [[TMP60:%.*]] = call fast float @llvm.trunc.f32(float [[TMP59]])
; CHECK-NEXT: [[TMP61:%.*]] = fneg fast float [[TMP60]]
; CHECK-NEXT: [[TMP62:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP61]], float [[TMP57]], float [[TMP56]])
-; CHECK-NEXT: [[TMP63:%.*]] = fptosi float [[TMP60]] to i32
+; CHECK-NEXT: [[TMP63:%.*]] = fptosi fast float [[TMP60]] to i32
; CHECK-NEXT: [[TMP64:%.*]] = call fast float @llvm.fabs.f32(float [[TMP62]])
; CHECK-NEXT: [[TMP65:%.*]] = call fast float @llvm.fabs.f32(float [[TMP57]])
; CHECK-NEXT: [[TMP66:%.*]] = fcmp fast oge float [[TMP64]], [[TMP65]]
@@ -2978,14 +2978,14 @@ define amdgpu_kernel void @sdiv_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x
; CHECK-NEXT: [[TMP77:%.*]] = xor i32 [[TMP75]], [[TMP76]]
; CHECK-NEXT: [[TMP78:%.*]] = ashr i32 [[TMP77]], 30
; CHECK-NEXT: [[TMP79:%.*]] = or i32 [[TMP78]], 1
-; CHECK-NEXT: [[TMP80:%.*]] = sitofp i32 [[TMP75]] to float
-; CHECK-NEXT: [[TMP81:%.*]] = sitofp i32 [[TMP76]] to float
+; CHECK-NEXT: [[TMP80:%.*]] = sitofp fast i32 [[TMP75]] to float
+; CHECK-NEXT: [[TMP81:%.*]] = sitofp fast i32 [[TMP76]] to float
; CHECK-NEXT: [[TMP82:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP81]])
; CHECK-NEXT: [[TMP83:%.*]] = fmul fast float [[TMP80]], [[TMP82]]
; CHECK-NEXT: [[TMP84:%.*]] = call fast float @llvm.trunc.f32(float [[TMP83]])
; CHECK-NEXT: [[TMP85:%.*]] = fneg fast float [[TMP84]]
; CHECK-NEXT: [[TMP86:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP85]], float [[TMP81]], float [[TMP80]])
-; CHECK-NEXT: [[TMP87:%.*]] = fptosi float [[TMP84]] to i32
+; CHECK-NEXT: [[TMP87:%.*]] = fptosi fast float [[TMP84]] to i32
; CHECK-NEXT: [[TMP88:%.*]] = call fast float @llvm.fabs.f32(float [[TMP86]])
; CHECK-NEXT: [[TMP89:%.*]] = call fast float @llvm.fabs.f32(float [[TMP81]])
; CHECK-NEXT: [[TMP90:%.*]] = fcmp fast oge float [[TMP88]], [[TMP89]]
@@ -3168,14 +3168,14 @@ define amdgpu_kernel void @srem_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x
; CHECK-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = ashr i32 [[TMP5]], 30
; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], 1
-; CHECK-NEXT: [[TMP8:%.*]] = sitofp i32 [[TMP3]] to float
-; CHECK-NEXT: [[TMP9:%.*]] = sitofp i32 [[TMP4]] to float
+; CHECK-NEXT: [[TMP8:%.*]] = sitofp fast i32 [[TMP3]] to float
+; CHECK-NEXT: [[TMP9:%.*]] = sitofp fast i32 [[TMP4]] to float
; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP9]])
; CHECK-NEXT: [[TMP11:%.*]] = fmul fast float [[TMP8]], [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.trunc.f32(float [[TMP11]])
; CHECK-NEXT: [[TMP13:%.*]] = fneg fast float [[TMP12]]
; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP13]], float [[TMP9]], float [[TMP8]])
-; CHECK-NEXT: [[TMP15:%.*]] = fptosi float [[TMP12]] to i32
+; CHECK-NEXT: [[TMP15:%.*]] = fptosi fast float [[TMP12]] to i32
; CHECK-NEXT: [[TMP16:%.*]] = call fast float @llvm.fabs.f32(float [[TMP14]])
; CHECK-NEXT: [[TMP17:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]])
; CHECK-NEXT: [[TMP18:%.*]] = fcmp fast oge float [[TMP16]], [[TMP17]]
@@ -3194,14 +3194,14 @@ define amdgpu_kernel void @srem_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x
; CHECK-NEXT: [[TMP31:%.*]] = xor i32 [[TMP29]], [[TMP30]]
; CHECK-NEXT: [[TMP32:%.*]] = ashr i32 [[TMP31]], 30
; CHECK-NEXT: [[TMP33:%.*]] = or i32 [[TMP32]], 1
-; CHECK-NEXT: [[TMP34:%.*]] = sitofp i32 [[TMP29]] to float
-; CHECK-NEXT: [[TMP35:%.*]] = sitofp i32 [[TMP30]] to float
+; CHECK-NEXT: [[TMP34:%.*]] = sitofp fast i32 [[TMP29]] to float
+; CHECK-NEXT: [[TMP35:%.*]] = sitofp fast i32 [[TMP30]] to float
; CHECK-NEXT: [[TMP36:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP35]])
; CHECK-NEXT: [[TMP37:%.*]] = fmul fast float [[TMP34]], [[TMP36]]
; CHECK-NEXT: [[TMP38:%.*]] = call fast float @llvm.trunc.f32(float [[TMP37]])
; CHECK-NEXT: [[TMP39:%.*]] = fneg fast float [[TMP38]]
; CHECK-NEXT: [[TMP40:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP39]], float [[TMP35]], float [[TMP34]])
-; CHECK-NEXT: [[TMP41:%.*]] = fptosi float [[TMP38]] to i32
+; CHECK-NEXT: [[TMP41:%.*]] = fptosi fast float [[TMP38]] to i32
; CHECK-NEXT: [[TMP42:%.*]] = call fast float @llvm.fabs.f32(float [[TMP40]])
; CHECK-NEXT: [[TMP43:%.*]] = call fast float @llvm.fabs.f32(float [[TMP35]])
; CHECK-NEXT: [[TMP44:%.*]] = fcmp fast oge float [[TMP42]], [[TMP43]]
@@ -3220,14 +3220,14 @@ define amdgpu_kernel void @srem_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x
; CHECK-NEXT: [[TMP57:%.*]] = xor i32 [[TMP55]], [[TMP56]]
; CHECK-NEXT: [[TMP58:%.*]] = ashr i32 [[TMP57]], 30
; CHECK-NEXT: [[TMP59:%.*]] = or i32 [[TMP58]], 1
-; CHECK-NEXT: [[TMP60:%.*]] = sitofp i32 [[TMP55]] to float
-; CHECK-NEXT: [[TMP61:%.*]] = sitofp i32 [[TMP56]] to float
+; CHECK-NEXT: [[TMP60:%.*]] = sitofp fast i32 [[TMP55]] to float
+; CHECK-NEXT: [[TMP61:%.*]] = sitofp fast i32 [[TMP56]] to float
; CHECK-NEXT: [[TMP62:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP61]])
; CHECK-NEXT: [[TMP63:%.*]] = fmul fast float [[TMP60]], [[TMP62]]
; CHECK-NEXT: [[TMP64:%.*]] = call fast float @llvm.trunc.f32(float [[TMP63]])
; CHECK-NEXT: [[TMP65:%.*]] = fneg fast float [[TMP64]]
; CHECK-NEXT: [[TMP66:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP65]], float [[TMP61]], float [[TMP60]])
-; CHECK-NEXT: [[TMP67:%.*]] = fptosi float [[TMP64]] to i32
+; CHECK-NEXT: [[TMP67:%.*]] = fptosi fast float [[TMP64]] to i32
; CHECK-NEXT: [[TMP68:%.*]] = call fast float @llvm.fabs.f32(float [[TMP66]])
; CHECK-NEXT: [[TMP69:%.*]] = call fast float @llvm.fabs.f32(float [[TMP61]])
; CHECK-NEXT: [[TMP70:%.*]] = fcmp fast oge float [[TMP68]], [[TMP69]]
@@ -3246,14 +3246,14 @@ define amdgpu_kernel void @srem_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x
; CHECK-NEXT: [[TMP83:%.*]] = xor i32 [[TMP81]], [[TMP82]]
; CHECK-NEXT: [[TMP84:%.*]] = ashr i32 [[TMP83]], 30
; CHECK-NEXT: [[TMP85:%.*]] = or i32 [[TMP84]], 1
-; CHECK-NEXT: [[TMP86:%.*]] = sitofp i32 [[TMP81]] to float
-; CHECK-NEXT: [[TMP87:%.*]] = sitofp i32 [[TMP82]] to float
+; CHECK-NEXT: [[TMP86:%.*]] = sitofp fast i32 [[TMP81]] to float
+; CHECK-NEXT: [[TMP87:%.*]] = sitofp fast i32 [[TMP82]] to float
; CHECK-NEXT: [[TMP88:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP87]])
; CHECK-NEXT: [[TMP89:%.*]] = fmul fast float [[TMP86]], [[TMP88]]
; CHECK-NEXT: [[TMP90:%.*]] = call fast float @llvm.trunc.f32(float [[TMP89]])
; CHECK-NEXT: [[TMP91:%.*]] = fneg fast float [[TMP90]]
; CHECK-NEXT: [[TMP92:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP91]], float [[TMP87]], float [[TMP86]])
-; CHECK-NEXT: [[TMP93:%.*]] = fptosi float [[TMP90]] to i32
+; CHECK-NEXT: [[TMP93:%.*]] = fptosi fast float [[TMP90]] to i32
; CHECK-NEXT: [[TMP94:%.*]] = call fast float @llvm.fabs.f32(float [[TMP92]])
; CHECK-NEXT: [[TMP95:%.*]] = call fast float @llvm.fabs.f32(float [[TMP87]])
; CHECK-NEXT: [[TMP96:%.*]] = fcmp fast oge float [[TMP94]], [[TMP95]]
@@ -3460,7 +3460,7 @@ define amdgpu_kernel void @udiv_i3(ptr addrspace(1) %out, i3 %x, i3 %y) {
; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.trunc.f32(float [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = fneg fast float [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP8]], float [[TMP4]], float [[TMP3]])
-; CHECK-NEXT: [[TMP10:%.*]] = fptoui float [[TMP7]] to i32
+; CHECK-NEXT: [[TMP10:%.*]] = fptoui fast float [[TMP7]] to i32
; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]])
; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.fabs.f32(float [[TMP4]])
; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast oge float [[TMP11]], [[TMP12]]
@@ -3529,7 +3529,7 @@ define amdgpu_kernel void @urem_i3(ptr addrspace(1) %out, i3 %x, i3 %y) {
; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.trunc.f32(float [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = fneg fast float [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP8]], float [[TMP4]], float [[TMP3]])
-; CHECK-NEXT: [[TMP10:%.*]] = fptoui float [[TMP7]] to i32
+; CHECK-NEXT: [[TMP10:%.*]] = fptoui fast float [[TMP7]] to i32
; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]])
; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.fabs.f32(float [[TMP4]])
; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast oge float [[TMP11]], [[TMP12]]
@@ -3603,14 +3603,14 @@ define amdgpu_kernel void @sdiv_i3(ptr addrspace(1) %out, i3 %x, i3 %y) {
; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = ashr i32 [[TMP3]], 30
; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], 1
-; CHECK-NEXT: [[TMP6:%.*]] = sitofp i32 [[TMP1]] to float
-; CHECK-NEXT: [[TMP7:%.*]] = sitofp i32 [[TMP2]] to float
+; CHECK-NEXT: [[TMP6:%.*]] = sitofp fast i32 [[TMP1]] to float
+; CHECK-NEXT: [[TMP7:%.*]] = sitofp fast i32 [[TMP2]] to float
; CHECK-NEXT: [[TMP8:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP7]])
; CHECK-NEXT: [[TMP9:%.*]] = fmul fast float [[TMP6]], [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.trunc.f32(float [[TMP9]])
; CHECK-NEXT: [[TMP11:%.*]] = fneg fast float [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP11]], float [[TMP7]], float [[TMP6]])
-; CHECK-NEXT: [[TMP13:%.*]] = fptosi float [[TMP10]] to i32
+; CHECK-NEXT: [[TMP13:%.*]] = fptosi fast float [[TMP10]] to i32
; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP12]])
; CHECK-NEXT: [[TMP15:%.*]] = call fast float @llvm.fabs.f32(float [[TMP7]])
; CHECK-NEXT: [[TMP16:%.*]] = fcmp fast oge float [[TMP14]], [[TMP15]]
@@ -3686,14 +3686,14 @@ define amdgpu_kernel void @srem_i3(ptr addrspace(1) %out, i3 %x, i3 %y) {
; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = ashr i32 [[TMP3]], 30
; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], 1
-; CHECK-NEXT: [[TMP6:%.*]] = sitofp i32 [[TMP1]] to float
-; CHECK-NEXT: [[TMP7:%.*]] = sitofp i32 [[TMP2]] to float
+; CHECK-NEXT: [[TMP6:%.*]] = sitofp fast i32 [[TMP1]] to float
+; CHECK-NEXT: [[TMP7:%.*]] = sitofp fast i32 [[TMP2]] to float
; CHECK-NEXT: [[TMP8:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP7]])
; CHECK-NEXT: [[TMP9:%.*]] = fmul fast float [[TMP6]], [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.trunc.f32(float [[TMP9]])
; CHECK-NEXT: [[TMP11:%.*]] = fneg fast float [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP11]], float [[TMP7]], float [[TMP6]])
-; CHECK-NEXT: [[TMP13:%.*]] = fptosi float [[TMP10]] to i32
+; CHECK-NEXT: [[TMP13:%.*]] = fptosi fast float [[TMP10]] to i32
; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP12]])
; CHECK-NEXT: [[TMP15:%.*]] = call fast float @llvm.fabs.f32(float [[TMP7]])
; CHECK-NEXT: [[TMP16:%.*]] = fcmp fast oge float [[TMP14]], [[TMP15]]
@@ -3784,7 +3784,7 @@ define amdgpu_kernel void @udiv_v3i16(ptr addrspace(1) %out, <3 x i16> %x, <3 x
; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.trunc.f32(float [[TMP8]])
; CHECK-NEXT: [[TMP10:%.*]] = fneg fast float [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP10]], float [[TMP6]], float [[TMP5]])
-; CHECK-NEXT: [[TMP12:%.*]] = fptoui float [[TMP9]] to i32
+; CHECK-NEXT: [[TMP12:%.*]] = fptoui fast float [[TMP9]] to i32
; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.fabs.f32(float [[TMP11]])
; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP6]])
; CHECK-NEXT: [[TMP15:%.*]] = fcmp fast oge float [[TMP13]], [[TMP14]]
@@ -3804,7 +3804,7 @@ define amdgpu_kernel void @udiv_v3i16(ptr addrspace(1) %out, <3 x i16> %x, <3 x
; CHECK-NEXT: [[TMP29:%.*]] = call fast float @llvm.trunc.f32(float [[TMP28]])
; CHECK-NEXT: [[TMP30:%.*]] = fneg fast float [[TMP29]]
; CHECK-NEXT: [[TMP31:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP30]], float [[TMP26]], float [[TMP25]])
-; CHECK-NEXT: [[TMP32:%.*]] = fptoui float [[TMP29]] to i32
+; CHECK-NEXT: [[TMP32:%.*]] = fptoui fast float [[TMP29]] to i32
; CHECK-NEXT: [[TMP33:%.*]] = call fast float @llvm.fabs.f32(float [[TMP31]])
; CHECK-NEXT: [[TMP34:%.*]] = call fast float @llvm.fabs.f32(float [[TMP26]])
; CHECK-NEXT: [[TMP35:%.*]] = fcmp fast oge float [[TMP33]], [[TMP34]]
@@ -3824,7 +3824,7 @@ define amdgpu_kernel void @udiv_v3i16(ptr addrspace(1) %out, <3 x i16> %x, <3 x
; CHECK-NEXT: [[TMP49:%.*]] = call fast float @llvm.trunc.f32(float [[TMP48]])
; CHECK-NEXT: [[TMP50:%.*]] = fneg fast float [[TMP49]]
; CHECK-NEXT: [[TMP51:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP50]], float [[TMP46]], float [[TMP45]])
-; CHECK-NEXT: [[TMP52:%.*]] = fptoui float [[TMP49]] to i32
+; CHECK-NEXT: [[TMP52:%.*]] = fptoui fast float [[TMP49]] to i32
; CHECK-NEXT: [[TMP53:%.*]] = call fast float @llvm.fabs.f32(float [[TMP51]])
; CHECK-NEXT: [[TMP54:%.*]] = call fast float @llvm.fabs.f32(float [[TMP46]])
; CHECK-NEXT: [[TMP55:%.*]] = fcmp fast oge float [[TMP53]], [[TMP54]]
@@ -3946,7 +3946,7 @@ define amdgpu_kernel void @urem_v3i16(ptr addrspace(1) %out, <3 x i16> %x, <3 x
; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.trunc.f32(float [[TMP8]])
; CHECK-NEXT: [[TMP10:%.*]] = fneg fast float [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP10]], float [[TMP6]], float [[TMP5]])
-; CHECK-NEXT: [[TMP12:%.*]] = fptoui float [[TMP9]] to i32
+; CHECK-NEXT: [[TMP12:%.*]] = fptoui fast float [[TMP9]] to i32
; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.fabs.f32(float [[TMP11]])
; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP6]])
; CHECK-NEXT: [[TMP15:%.*]] = fcmp fast oge float [[TMP13]], [[TMP14]]
@@ -3968,7 +3968,7 @@ define amdgpu_kernel void @urem_v3i16(ptr addrspace(1) %out, <3 x i16> %x, <3 x
; CHECK-NEXT: [[TMP31:%.*]] = call fast float @llvm.trunc.f32(float [[TMP30]])
; CHECK-NEXT: [[TMP32:%.*]] = fneg fast float [[TMP31]]
; CHECK-NEXT: [[TMP33:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP32]], float [[TMP28]], float [[TMP27]])
-; CHECK-NEXT: [[TMP34:%.*]] = fptoui float [[TMP31]] to i32
+; CHECK-NEXT: [[TMP34:%.*]] = fptoui fast float [[TMP31]] to i32
; CHECK-NEXT: [[TMP35:%.*]] = call fast float @llvm.fabs.f32(float [[TMP33]])
; CHECK-NEXT: [[TMP36:%.*]] = call fast float @llvm.fabs.f32(float [[TMP28]])
; CHECK-NEXT: [[TMP37:%.*]] = fcmp fast oge float [[TMP35]], [[TMP36]]
@@ -3990,7 +3990,7 @@ define amdgpu_kernel void @urem_v3i16(ptr addrspace(1) %out, <3 x i16> %x, <3 x
; CHECK-NEXT: [[TMP53:%.*]] = call fast float @llvm.trunc.f32(float [[TMP52]])
; CHECK-NEXT: [[TMP54:%.*]] = fneg fast float [[TMP53]]
; CHECK-NEXT: [[TMP55:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP54]], float [[TMP50]], float [[TMP49]])
-; CHECK-NEXT: [[TMP56:%.*]] = fptoui float [[TMP53]] to i32
+; CHECK-NEXT: [[TMP56:%.*]] = fptoui fast float [[TMP53]] to i32
; CHECK-NEXT: [[TMP57:%.*]] = call fast float @llvm.fabs.f32(float [[TMP55]])
; CHECK-NEXT: [[TMP58:%.*]] = call fast float @llvm.fabs.f32(float [[TMP50]])
; CHECK-NEXT: [[TMP59:%.*]] = fcmp fast oge float [[TMP57]], [[TMP58]]
@@ -4121,14 +4121,14 @@ define amdgpu_kernel void @sdiv_v3i16(ptr addrspace(1) %out, <3 x i16> %x, <3 x
; CHECK-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = ashr i32 [[TMP5]], 30
; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], 1
-; CHECK-NEXT: [[TMP8:%.*]] = sitofp i32 [[TMP3]] to float
-; CHECK-NEXT: [[TMP9:%.*]] = sitofp i32 [[TMP4]] to float
+; CHECK-NEXT: [[TMP8:%.*]] = sitofp fast i32 [[TMP3]] to float
+; CHECK-NEXT: [[TMP9:%.*]] = sitofp fast i32 [[TMP4]] to float
; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP9]])
; CHECK-NEXT: [[TMP11:%.*]] = fmul fast float [[TMP8]], [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.trunc.f32(float [[TMP11]])
; CHECK-NEXT: [[TMP13:%.*]] = fneg fast float [[TMP12]]
; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP13]], float [[TMP9]], float [[TMP8]])
-; CHECK-NEXT: [[TMP15:%.*]] = fptosi float [[TMP12]] to i32
+; CHECK-NEXT: [[TMP15:%.*]] = fptosi fast float [[TMP12]] to i32
; CHECK-NEXT: [[TMP16:%.*]] = call fast float @llvm.fabs.f32(float [[TMP14]])
; CHECK-NEXT: [[TMP17:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]])
; CHECK-NEXT: [[TMP18:%.*]] = fcmp fast oge float [[TMP16]], [[TMP17]]
@@ -4145,14 +4145,14 @@ define amdgpu_kernel void @sdiv_v3i16(ptr addrspace(1) %out, <3 x i16> %x, <3 x
; CHECK-NEXT: [[TMP29:%.*]] = xor i32 [[TMP27]], [[TMP28]]
; CHECK-NEXT: [[TMP30:%.*]] = ashr i32 [[TMP29]], 30
; CHECK-NEXT: [[TMP31:%.*]] = or i32 [[TMP30]], 1
-; CHECK-NEXT: [[TMP32:%.*]] = sitofp i32 [[TMP27]] to float
-; CHECK-NEXT: [[TMP33:%.*]] = sitofp i32 [[TMP28]] to float
+; CHECK-NEXT: [[TMP32:%.*]] = sitofp fast i32 [[TMP27]] to float
+; CHECK-NEXT: [[TMP33:%.*]] = sitofp fast i32 [[TMP28]] to float
; CHECK-NEXT: [[TMP34:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP33]])
; CHECK-NEXT: [[TMP35:%.*]] = fmul fast float [[TMP32]], [[TMP34]]
; CHECK-NEXT: [[TMP36:%.*]] = call fast float @llvm.trunc.f32(float [[TMP35]])
; CHECK-NEXT: [[TMP37:%.*]] = fneg fast float [[TMP36]]
; CHECK-NEXT: [[TMP38:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP37]], float [[TMP33]], float [[TMP32]])
-; CHECK-NEXT: [[TMP39:%.*]] = fptosi float [[TMP36]] to i32
+; CHECK-NEXT: [[TMP39:%.*]] = fptosi fast float [[TMP36]] to i32
; CHECK-NEXT: [[TMP40:%.*]] = call fast float @llvm.fabs.f32(float [[TMP38]])
; CHECK-NEXT: [[TMP41:%.*]] = call fast float @llvm.fabs.f32(float [[TMP33]])
; CHECK-NEXT: [[TMP42:%.*]] = fcmp fast oge float [[TMP40]], [[TMP41]]
@@ -4169,14 +4169,14 @@ define amdgpu_kernel void @sdiv_v3i16(ptr addrspace(1) %out, <3 x i16> %x, <3 x
; CHECK-NEXT: [[TMP53:%.*]] = xor i32 [[TMP51]], [[TMP52]]
; CHECK-NEXT: [[TMP54:%.*]] = ashr i32 [[TMP53]], 30
; CHECK-NEXT: [[TMP55:%.*]] = or i32 [[TMP54]], 1
-; CHECK-NEXT: [[TMP56:%.*]] = sitofp i32 [[TMP51]] to float
-; CHECK-NEXT: [[TMP57:%.*]] = sitofp i32 [[TMP52]] to float
+; CHECK-NEXT: [[TMP56:%.*]] = sitofp fast i32 [[TMP51]] to float
+; CHECK-NEXT: [[TMP57:%.*]] = sitofp fast i32 [[TMP52]] to float
; CHECK-NEXT: [[TMP58:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP57]])
; CHECK-NEXT: [[TMP59:%.*]] = fmul fast float [[TMP56]], [[TMP58]]
; CHECK-NEXT: [[TMP60:%.*]] = call fast float @llvm.trunc.f32(float [[TMP59]])
; CHECK-NEXT: [[TMP61:%.*]] = fneg fast float [[TMP60]]
; CHECK-NEXT: [[TMP62:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP61]], float [[TMP57]], float [[TMP56]])
-; CHECK-NEXT: [[TMP63:%.*]] = fptosi float [[TMP60]] to i32
+; CHECK-NEXT: [[TMP63:%.*]] = fptosi fast float [[TMP60]] to i32
; CHECK-NEXT: [[TMP64:%.*]] = call fast float @llvm.fabs.f32(float [[TMP62]])
; CHECK-NEXT: [[TMP65:%.*]] = call fast float @llvm.fabs.f32(float [[TMP57]])
; CHECK-NEXT: [[TMP66:%.*]] = fcmp fast oge float [[TMP64]], [[TMP65]]
@@ -4324,14 +4324,14 @@ define amdgpu_kernel void @srem_v3i16(ptr addrspace(1) %out, <3 x i16> %x, <3 x
; CHECK-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = ashr i32 [[TMP5]], 30
; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], 1
-; CHECK-NEXT: [[TMP8:%.*]] = sitofp i32 [[TMP3]] to float
-; CHECK-NEXT: [[TMP9:%.*]] = sitofp i32 [[TMP4]] to float
+; CHECK-NEXT: [[TMP8:%.*]] = sitofp fast i32 [[TMP3]] to float
+; CHECK-NEXT: [[TMP9:%.*]] = sitofp fast i32 [[TMP4]] to float
; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP9]])
; CHECK-NEXT: [[TMP11:%.*]] = fmul fast float [[TMP8]], [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.trunc.f32(float [[TMP11]])
; CHECK-NEXT: [[TMP13:%.*]] = fneg fast float [[TMP12]]
; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP13]], float [[TMP9]], float [[TMP8]])
-; CHECK-NEXT: [[TMP15:%.*]] = fptosi float [[TMP12]] to i32
+; CHECK-NEXT: [[TMP15:%.*]] = fptosi fast float [[TMP12]] to i32
; CHECK-NEXT: [[TMP16:%.*]] = call fast float @llvm.fabs.f32(float [[TMP14]])
; CHECK-NEXT: [[TMP17:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]])
; CHECK-NEXT: [[TMP18:%.*]] = fcmp fast oge float [[TMP16]], [[TMP17]]
@@ -4350,14 +4350,14 @@ define amdgpu_kernel void @srem_v3i16(ptr addrspace(1) %out, <3 x i16> %x, <3 x
; CHECK-NEXT: [[TMP31:%.*]] = xor i32 [[TMP29]], [[TMP30]]
; CHECK-NEXT: [[TMP32:%.*]] = ashr i32 [[TMP31]], 30
; CHECK-NEXT: [[TMP33:%.*]] = or i32 [[TMP32]], 1
-; CHECK-NEXT: [[TMP34:%.*]] = sitofp i32 [[TMP29]] to float
-; CHECK-NEXT: [[TMP35:%.*]] = sitofp i32 [[TMP30]] to float
+; CHECK-NEXT: [[TMP34:%.*]] = sitofp fast i32 [[TMP29]] to float
+; CHECK-NEXT: [[TMP35:%.*]] = sitofp fast i32 [[TMP30]] to float
; CHECK-NEXT: [[TMP36:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP35]])
; CHECK-NEXT: [[TMP37:%.*]] = fmul fast float [[TMP34]], [[TMP36]]
; CHECK-NEXT: [[TMP38:%.*]] = call fast float @llvm.trunc.f32(float [[TMP37]])
; CHECK-NEXT: [[TMP39:%.*]] = fneg fast float [[TMP38]]
; CHECK-NEXT: [[TMP40:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP39]], float [[TMP35]], float [[TMP34]])
-; CHECK-NEXT: [[TMP41:%.*]] = fptosi float [[TMP38]] to i32
+; CHECK-NEXT: [[TMP41:%.*]] = fptosi fast float [[TMP38]] to i32
; CHECK-NEXT: [[TMP42:%.*]] = call fast float @llvm.fabs.f32(float [[TMP40]])
; CHECK-NEXT: [[TMP43:%.*]] = call fast float @llvm.fabs.f32(float [[TMP35]])
; CHECK-NEXT: [[TMP44:%.*]] = fcmp fast oge float [[TMP42]], [[TMP43]]
@@ -4376,14 +4376,14 @@ define amdgpu_kernel void @srem_v3i16(ptr addrspace(1) %out, <3 x i16> %x, <3 x
; CHECK-NEXT: [[TMP57:%.*]] = xor i32 [[TMP55]], [[TMP56]]
; CHECK-NEXT: [[TMP58:%.*]] = ashr i32 [[TMP57]], 30
; CHECK-NEXT: [[TMP59:%.*]] = or i32 [[TMP58]], 1
-; CHECK-NEXT: [[TMP60:%.*]] = sitofp i32 [[TMP55]] to float
-; CHECK-NEXT: [[TMP61:%.*]] = sitofp i32 [[TMP56]] to float
+; CHECK-NEXT: [[TMP60:%.*]] = sitofp fast i32 [[TMP55]] to float
+; CHECK-NEXT: [[TMP61:%.*]] = sitofp fast i32 [[TMP56]] to float
; CHECK-NEXT: [[TMP62:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP61]])
; CHECK-NEXT: [[TMP63:%.*]] = fmul fast float [[TMP60]], [[TMP62]]
; CHECK-NEXT: [[TMP64:%.*]] = call fast float @llvm.trunc.f32(float [[TMP63]])
; CHECK-NEXT: [[TMP65:%.*]] = fneg fast float [[TMP64]]
; CHECK-NEXT: [[TMP66:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP65]], float [[TMP61]], float [[TMP60]])
-; CHECK-NEXT: [[TMP67:%.*]] = fptosi float [[TMP64]] to i32
+; CHECK-NEXT: [[TMP67:%.*]] = fptosi fast float [[TMP64]] to i32
; CHECK-NEXT: [[TMP68:%.*]] = call fast float @llvm.fabs.f32(float [[TMP66]])
; CHECK-NEXT: [[TMP69:%.*]] = call fast float @llvm.fabs.f32(float [[TMP61]])
; CHECK-NEXT: [[TMP70:%.*]] = fcmp fast oge float [[TMP68]], [[TMP69]]
@@ -4551,7 +4551,7 @@ define amdgpu_kernel void @udiv_v3i15(ptr addrspace(1) %out, <3 x i15> %x, <3 x
; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.trunc.f32(float [[TMP8]])
; CHECK-NEXT: [[TMP10:%.*]] = fneg fast float [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP10]], float [[TMP6]], float [[TMP5]])
-; CHECK-NEXT: [[TMP12:%.*]] = fptoui float [[TMP9]] to i32
+; CHECK-NEXT: [[TMP12:%.*]] = fptoui fast float [[TMP9]] to i32
; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.fabs.f32(float [[TMP11]])
; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP6]])
; CHECK-NEXT: [[TMP15:%.*]] = fcmp fast oge float [[TMP13]], [[TMP14]]
@@ -4571,7 +4571,7 @@ define amdgpu_kernel void @udiv_v3i15(ptr addrspace(1) %out, <3 x i15> %x, <3 x
; CHECK-NEXT: [[TMP29:%.*]] = call fast float @llvm.trunc.f32(float [[TMP28]])
; CHECK-NEXT: [[TMP30:%.*]] = fneg fast float [[TMP29]]
; CHECK-NEXT: [[TMP31:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP30]], float [[TMP26]], float [[TMP25]])
-; CHECK-NEXT: [[TMP32:%.*]] = fptoui float [[TMP29]] to i32
+; CHECK-NEXT: [[TMP32:%.*]] = fptoui fast float [[TMP29]] to i32
; CHECK-NEXT: [[TMP33:%.*]] = call fast float @llvm.fabs.f32(float [[TMP31]])
; CHECK-NEXT: [[TMP34:%.*]] = call fast float @llvm.fabs.f32(float [[TMP26]])
; CHECK-NEXT: [[TMP35:%.*]] = fcmp fast oge float [[TMP33]], [[TMP34]]
@@ -4591,7 +4591,7 @@ define amdgpu_kernel void @udiv_v3i15(ptr addrspace(1) %out, <3 x i15> %x, <3 x
; CHECK-NEXT: [[TMP49:%.*]] = call fast float @llvm.trunc.f32(float [[TMP48]])
; CHECK-NEXT: [[TMP50:%.*]] = fneg fast float [[TMP49]]
; CHECK-NEXT: [[TMP51:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP50]], float [[TMP46]], float [[TMP45]])
-; CHECK-NEXT: [[TMP52:%.*]] = fptoui float [[TMP49]] to i32
+; CHECK-NEXT: [[TMP52:%.*]] = fptoui fast float [[TMP49]] to i32
; CHECK-NEXT: [[TMP53:%.*]] = call fast float @llvm.fabs.f32(float [[TMP51]])
; CHECK-NEXT: [[TMP54:%.*]] = call fast float @llvm.fabs.f32(float [[TMP46]])
; CHECK-NEXT: [[TMP55:%.*]] = fcmp fast oge float [[TMP53]], [[TMP54]]
@@ -4732,7 +4732,7 @@ define amdgpu_kernel void @urem_v3i15(ptr addrspace(1) %out, <3 x i15> %x, <3 x
; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.trunc.f32(float [[TMP8]])
; CHECK-NEXT: [[TMP10:%.*]] = fneg fast float [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP10]], float [[TMP6]], float [[TMP5]])
-; CHECK-NEXT: [[TMP12:%.*]] = fptoui float [[TMP9]] to i32
+; CHECK-NEXT: [[TMP12:%.*]] = fptoui fast float [[TMP9]] to i32
; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.fabs.f32(float [[TMP11]])
; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP6]])
; CHECK-NEXT: [[TMP15:%.*]] = fcmp fast oge float [[TMP13]], [[TMP14]]
@@ -4754,7 +4754,7 @@ define amdgpu_kernel void @urem_v3i15(ptr addrspace(1) %out, <3 x i15> %x, <3 x
; CHECK-NEXT: [[TMP31:%.*]] = call fast float @llvm.trunc.f32(float [[TMP30]])
; CHECK-NEXT: [[TMP32:%.*]] = fneg fast float [[TMP31]]
; CHECK-NEXT: [[TMP33:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP32]], float [[TMP28]], float [[TMP27]])
-; CHECK-NEXT: [[TMP34:%.*]] = fptoui float [[TMP31]] to i32
+; CHECK-NEXT: [[TMP34:%.*]] = fptoui fast float [[TMP31]] to i32
; CHECK-NEXT: [[TMP35:%.*]] = call fast float @llvm.fabs.f32(float [[TMP33]])
; CHECK-NEXT: [[TMP36:%.*]] = call fast float @llvm.fabs.f32(float [[TMP28]])
; CHECK-NEXT: [[TMP37:%.*]] = fcmp fast oge float [[TMP35]], [[TMP36]]
@@ -4776,7 +4776,7 @@ define amdgpu_kernel void @urem_v3i15(ptr addrspace(1) %out, <3 x i15> %x, <3 x
; CHECK-NEXT: [[TMP53:%.*]] = call fast float @llvm.trunc.f32(float [[TMP52]])
; CHECK-NEXT: [[TMP54:%.*]] = fneg fast float [[TMP53]]
; CHECK-NEXT: [[TMP55:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP54]], float [[TMP50]], float [[TMP49]])
-; CHECK-NEXT: [[TMP56:%.*]] = fptoui float [[TMP53]] to i32
+; CHECK-NEXT: [[TMP56:%.*]] = fptoui fast float [[TMP53]] to i32
; CHECK-NEXT: [[TMP57:%.*]] = call fast float @llvm.fabs.f32(float [[TMP55]])
; CHECK-NEXT: [[TMP58:%.*]] = call fast float @llvm.fabs.f32(float [[TMP50]])
; CHECK-NEXT: [[TMP59:%.*]] = fcmp fast oge float [[TMP57]], [[TMP58]]
@@ -4931,14 +4931,14 @@ define amdgpu_kernel void @sdiv_v3i15(ptr addrspace(1) %out, <3 x i15> %x, <3 x
; CHECK-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = ashr i32 [[TMP5]], 30
; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], 1
-; CHECK-NEXT: [[TMP8:%.*]] = sitofp i32 [[TMP3]] to float
-; CHECK-NEXT: [[TMP9:%.*]] = sitofp i32 [[TMP4]] to float
+; CHECK-NEXT: [[TMP8:%.*]] = sitofp fast i32 [[TMP3]] to float
+; CHECK-NEXT: [[TMP9:%.*]] = sitofp fast i32 [[TMP4]] to float
; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP9]])
; CHECK-NEXT: [[TMP11:%.*]] = fmul fast float [[TMP8]], [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.trunc.f32(float [[TMP11]])
; CHECK-NEXT: [[TMP13:%.*]] = fneg fast float [[TMP12]]
; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP13]], float [[TMP9]], float [[TMP8]])
-; CHECK-NEXT: [[TMP15:%.*]] = fptosi float [[TMP12]] to i32
+; CHECK-NEXT: [[TMP15:%.*]] = fptosi fast float [[TMP12]] to i32
; CHECK-NEXT: [[TMP16:%.*]] = call fast float @llvm.fabs.f32(float [[TMP14]])
; CHECK-NEXT: [[TMP17:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]])
; CHECK-NEXT: [[TMP18:%.*]] = fcmp fast oge float [[TMP16]], [[TMP17]]
@@ -4955,14 +4955,14 @@ define amdgpu_kernel void @sdiv_v3i15(ptr addrspace(1) %out, <3 x i15> %x, <3 x
; CHECK-NEXT: [[TMP29:%.*]] = xor i32 [[TMP27]], [[TMP28]]
; CHECK-NEXT: [[TMP30:%.*]] = ashr i32 [[TMP29]], 30
; CHECK-NEXT: [[TMP31:%.*]] = or i32 [[TMP30]], 1
-; CHECK-NEXT: [[TMP32:%.*]] = sitofp i32 [[TMP27]] to float
-; CHECK-NEXT: [[TMP33:%.*]] = sitofp i32 [[TMP28]] to float
+; CHECK-NEXT: [[TMP32:%.*]] = sitofp fast i32 [[TMP27]] to float
+; CHECK-NEXT: [[TMP33:%.*]] = sitofp fast i32 [[TMP28]] to float
; CHECK-NEXT: [[TMP34:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP33]])
; CHECK-NEXT: [[TMP35:%.*]] = fmul fast float [[TMP32]], [[TMP34]]
; CHECK-NEXT: [[TMP36:%.*]] = call fast float @llvm.trunc.f32(float [[TMP35]])
; CHECK-NEXT: [[TMP37:%.*]] = fneg fast float [[TMP36]]
; CHECK-NEXT: [[TMP38:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP37]], float [[TMP33]], float [[TMP32]])
-; CHECK-NEXT: [[TMP39:%.*]] = fptosi float [[TMP36]] to i32
+; CHECK-NEXT: [[TMP39:%.*]] = fptosi fast float [[TMP36]] to i32
; CHECK-NEXT: [[TMP40:%.*]] = call fast float @llvm.fabs.f32(float [[TMP38]])
; CHECK-NEXT: [[TMP41:%.*]] = call fast float @llvm.fabs.f32(float [[TMP33]])
; CHECK-NEXT: [[TMP42:%.*]] = fcmp fast oge float [[TMP40]], [[TMP41]]
@@ -4979,14 +4979,14 @@ define amdgpu_kernel void @sdiv_v3i15(ptr addrspace(1) %out, <3 x i15> %x, <3 x
; CHECK-NEXT: [[TMP53:%.*]] = xor i32 [[TMP51]], [[TMP52]]
; CHECK-NEXT: [[TMP54:%.*]] = ashr i32 [[TMP53]], 30
; CHECK-NEXT: [[TMP55:%.*]] = or i32 [[TMP54]], 1
-; CHECK-NEXT: [[TMP56:%.*]] = sitofp i32 [[TMP51]] to float
-; CHECK-NEXT: [[TMP57:%.*]] = sitofp i32 [[TMP52]] to float
+; CHECK-NEXT: [[TMP56:%.*]] = sitofp fast i32 [[TMP51]] to float
+; CHECK-NEXT: [[TMP57:%.*]] = sitofp fast i32 [[TMP52]] to float
; CHECK-NEXT: [[TMP58:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP57]])
; CHECK-NEXT: [[TMP59:%.*]] = fmul fast float [[TMP56]], [[TMP58]]
; CHECK-NEXT: [[TMP60:%.*]] = call fast float @llvm.trunc.f32(float [[TMP59]])
; CHECK-NEXT: [[TMP61:%.*]] = fneg fast float [[TMP60]]
; CHECK-NEXT: [[TMP62:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP61]], float [[TMP57]], float [[TMP56]])
-; CHECK-NEXT: [[TMP63:%.*]] = fptosi float [[TMP60]] to i32
+; CHECK-NEXT: [[TMP63:%.*]] = fptosi fast float [[TMP60]] to i32
; CHECK-NEXT: [[TMP64:%.*]] = call fast float @llvm.fabs.f32(float [[TMP62]])
; CHECK-NEXT: [[TMP65:%.*]] = call fast float @llvm.fabs.f32(float [[TMP57]])
; CHECK-NEXT: [[TMP66:%.*]] = fcmp fast oge float [[TMP64]], [[TMP65]]
@@ -5152,14 +5152,14 @@ define amdgpu_kernel void @srem_v3i15(ptr addrspace(1) %out, <3 x i15> %x, <3 x
; CHECK-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = ashr i32 [[TMP5]], 30
; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], 1
-; CHECK-NEXT: [[TMP8:%.*]] = sitofp i32 [[TMP3]] to float
-; CHECK-NEXT: [[TMP9:%.*]] = sitofp i32 [[TMP4]] to float
+; CHECK-NEXT: [[TMP8:%.*]] = sitofp fast i32 [[TMP3]] to float
+; CHECK-NEXT: [[TMP9:%.*]] = sitofp fast i32 [[TMP4]] to float
; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP9]])
; CHECK-NEXT: [[TMP11:%.*]] = fmul fast float [[TMP8]], [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.trunc.f32(float [[TMP11]])
; CHECK-NEXT: [[TMP13:%.*]] = fneg fast float [[TMP12]]
; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP13]], float [[TMP9]], float [[TMP8]])
-; CHECK-NEXT: [[TMP15:%.*]] = fptosi float [[TMP12]] to i32
+; CHECK-NEXT: [[TMP15:%.*]] = fptosi fast float [[TMP12]] to i32
; CHECK-NEXT: [[TMP16:%.*]] = call fast float @llvm.fabs.f32(float [[TMP14]])
; CHECK-NEXT: [[TMP17:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]])
; CHECK-NEXT: [[TMP18:%.*]] = fcmp fast oge float [[TMP16]], [[TMP17]]
@@ -5178,14 +5178,14 @@ define amdgpu_kernel void @srem_v3i15(ptr addrspace(1) %out, <3 x i15> %x, <3 x
; CHECK-NEXT: [[TMP31:%.*]] = xor i32 [[TMP29]], [[TMP30]]
; CHECK-NEXT: [[TMP32:%.*]] = ashr i32 [[TMP31]], 30
; CHECK-NEXT: [[TMP33:%.*]] = or i32 [[TMP32]], 1
-; CHECK-NEXT: [[TMP34:%.*]] = sitofp i32 [[TMP29]] to float
-; CHECK-NEXT: [[TMP35:%.*]] = sitofp i32 [[TMP30]] to float
+; CHECK-NEXT: [[TMP34:%.*]] = sitofp fast i32 [[TMP29]] to float
+; CHECK-NEXT: [[TMP35:%.*]] = sitofp fast i32 [[TMP30]] to float
; CHECK-NEXT: [[TMP36:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP35]])
; CHECK-NEXT: [[TMP37:%.*]] = fmul fast float [[TMP34]], [[TMP36]]
; CHECK-NEXT: [[TMP38:%.*]] = call fast float @llvm.trunc.f32(float [[TMP37]])
; CHECK-NEXT: [[TMP39:%.*]] = fneg fast float [[TMP38]]
; CHECK-NEXT: [[TMP40:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP39]], float [[TMP35]], float [[TMP34]])
-; CHECK-NEXT: [[TMP41:%.*]] = fptosi float [[TMP38]] to i32
+; CHECK-NEXT: [[TMP41:%.*]] = fptosi fast float [[TMP38]] to i32
; CHECK-NEXT: [[TMP42:%.*]] = call fast float @llvm.fabs.f32(float [[TMP40]])
; CHECK-NEXT: [[TMP43:%.*]] = call fast float @llvm.fabs.f32(float [[TMP35]])
; CHECK-NEXT: [[TMP44:%.*]] = fcmp fast oge float [[TMP42]], [[TMP43]]
@@ -5204,14 +5204,14 @@ define amdgpu_kernel void @srem_v3i15(ptr addrspace(1) %out, <3 x i15> %x, <3 x
; CHECK-NEXT: [[TMP57:%.*]] = xor i32 [[TMP55]], [[TMP56]]
; CHECK-NEXT: [[TMP58:%.*]] = ashr i32 [[TMP57]], 30
; CHECK-NEXT: [[TMP59:%.*]] = or i32 [[TMP58]], 1
-; CHECK-NEXT: [[TMP60:%.*]] = sitofp i32 [[TMP55]] to float
-; CHECK-NEXT: [[TMP61:%.*]] = sitofp i32 [[TMP56]] to float
+; CHECK-NEXT: [[TMP60:%.*]] = sitofp fast i32 [[TMP55]] to float
+; CHECK-NEXT: [[TMP61:%.*]] = sitofp fast i32 [[TMP56]] to float
; CHECK-NEXT: [[TMP62:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP61]])
; CHECK-NEXT: [[TMP63:%.*]] = fmul fast float [[TMP60]], [[TMP62]]
; CHECK-NEXT: [[TMP64:%.*]] = call fast float @llvm.trunc.f32(float [[TMP63]])
; CHECK-NEXT: [[TMP65:%.*]] = fneg fast float [[TMP64]]
; CHECK-NEXT: [[TMP66:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP65]], float [[TMP61]], float [[TMP60]])
-; CHECK-NEXT: [[TMP67:%.*]] = fptosi float [[TMP64]] to i32
+; CHECK-NEXT: [[TMP67:%.*]] = fptosi fast float [[TMP64]] to i32
; CHECK-NEXT: [[TMP68:%.*]] = call fast float @llvm.fabs.f32(float [[TMP66]])
; CHECK-NEXT: [[TMP69:%.*]] = call fast float @llvm.fabs.f32(float [[TMP61]])
; CHECK-NEXT: [[TMP70:%.*]] = fcmp fast oge float [[TMP68]], [[TMP69]]
@@ -5601,7 +5601,7 @@ define amdgpu_kernel void @udiv_v2i32_pow2_shl_denom(ptr addrspace(1) %out, <2 x
; CHECK-NEXT: [[TMP3:%.*]] = uitofp i32 [[TMP2]] to float
; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP3]])
; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP4]], 0x41EFFFFFC0000000
-; CHECK-NEXT: [[TMP6:%.*]] = fptoui float [[TMP5]] to i32
+; CHECK-NEXT: [[TMP6:%.*]] = fptoui fast float [[TMP5]] to i32
; CHECK-NEXT: [[TMP7:%.*]] = sub i32 0, [[TMP2]]
; CHECK-NEXT: [[TMP8:%.*]] = mul i32 [[TMP7]], [[TMP6]]
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP6]] to i64
@@ -5633,7 +5633,7 @@ define amdgpu_kernel void @udiv_v2i32_pow2_shl_denom(ptr addrspace(1) %out, <2 x
; CHECK-NEXT: [[TMP35:%.*]] = uitofp i32 [[TMP34]] to float
; CHECK-NEXT: [[TMP36:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP35]])
; CHECK-NEXT: [[TMP37:%.*]] = fmul fast float [[TMP36]], 0x41EFFFFFC0000000
-; CHECK-NEXT: [[TMP38:%.*]] = fptoui float [[TMP37]] to i32
+; CHECK-NEXT: [[TMP38:%.*]] = fptoui fast float [[TMP37]] to i32
; CHECK-NEXT: [[TMP39:%.*]] = sub i32 0, [[TMP34]]
; CHECK-NEXT: [[TMP40:%.*]] = mul i32 [[TMP39]], [[TMP38]]
; CHECK-NEXT: [[TMP41:%.*]] = zext i32 [[TMP38]] to i64
@@ -5946,7 +5946,7 @@ define amdgpu_kernel void @urem_v2i32_pow2_shl_denom(ptr addrspace(1) %out, <2 x
; CHECK-NEXT: [[TMP3:%.*]] = uitofp i32 [[TMP2]] to float
; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP3]])
; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP4]], 0x41EFFFFFC0000000
-; CHECK-NEXT: [[TMP6:%.*]] = fptoui float [[TMP5]] to i32
+; CHECK-NEXT: [[TMP6:%.*]] = fptoui fast float [[TMP5]] to i32
; CHECK-NEXT: [[TMP7:%.*]] = sub i32 0, [[TMP2]]
; CHECK-NEXT: [[TMP8:%.*]] = mul i32 [[TMP7]], [[TMP6]]
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP6]] to i64
@@ -5976,7 +5976,7 @@ define amdgpu_kernel void @urem_v2i32_pow2_shl_denom(ptr addrspace(1) %out, <2 x
; CHECK-NEXT: [[TMP33:%.*]] = uitofp i32 [[TMP32]] to float
; CHECK-NEXT: [[TMP34:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP33]])
; CHECK-NEXT: [[TMP35:%.*]] = fmul fast float [[TMP34]], 0x41EFFFFFC0000000
-; CHECK-NEXT: [[TMP36:%.*]] = fptoui float [[TMP35]] to i32
+; CHECK-NEXT: [[TMP36:%.*]] = fptoui fast float [[TMP35]] to i32
; CHECK-NEXT: [[TMP37:%.*]] = sub i32 0, [[TMP32]]
; CHECK-NEXT: [[TMP38:%.*]] = mul i32 [[TMP37]], [[TMP36]]
; CHECK-NEXT: [[TMP39:%.*]] = zext i32 [[TMP36]] to i64
@@ -6408,7 +6408,7 @@ define amdgpu_kernel void @sdiv_v2i32_pow2_shl_denom(ptr addrspace(1) %out, <2 x
; CHECK-NEXT: [[TMP10:%.*]] = uitofp i32 [[TMP9]] to float
; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP10]])
; CHECK-NEXT: [[TMP12:%.*]] = fmul fast float [[TMP11]], 0x41EFFFFFC0000000
-; CHECK-NEXT: [[TMP13:%.*]] = fptoui float [[TMP12]] to i32
+; CHECK-NEXT: [[TMP13:%.*]] = fptoui fast float [[TMP12]] to i32
; CHECK-NEXT: [[TMP14:%.*]] = sub i32 0, [[TMP9]]
; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], [[TMP13]]
; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP13]] to i64
@@ -6449,7 +6449,7 @@ define amdgpu_kernel void @sdiv_v2i32_pow2_shl_denom(ptr addrspace(1) %out, <2 x
; CHECK-NEXT: [[TMP51:%.*]] = uitofp i32 [[TMP50]] to float
; CHECK-NEXT: [[TMP52:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP51]])
; CHECK-NEXT: [[TMP53:%.*]] = fmul fast float [[TMP52]], 0x41EFFFFFC0000000
-; CHECK-NEXT: [[TMP54:%.*]] = fptoui float [[TMP53]] to i32
+; CHECK-NEXT: [[TMP54:%.*]] = fptoui fast float [[TMP53]] to i32
; CHECK-NEXT: [[TMP55:%.*]] = sub i32 0, [[TMP50]]
; CHECK-NEXT: [[TMP56:%.*]] = mul i32 [[TMP55]], [[TMP54]]
; CHECK-NEXT: [[TMP57:%.*]] = zext i32 [[TMP54]] to i64
@@ -6865,7 +6865,7 @@ define amdgpu_kernel void @srem_v2i32_pow2_shl_denom(ptr addrspace(1) %out, <2 x
; CHECK-NEXT: [[TMP9:%.*]] = uitofp i32 [[TMP8]] to float
; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP9]])
; CHECK-NEXT: [[TMP11:%.*]] = fmul fast float [[TMP10]], 0x41EFFFFFC0000000
-; CHECK-NEXT: [[TMP12:%.*]] = fptoui float [[TMP11]] to i32
+; CHECK-NEXT: [[TMP12:%.*]] = fptoui fast float [[TMP11]] to i32
; CHECK-NEXT: [[TMP13:%.*]] = sub i32 0, [[TMP8]]
; CHECK-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], [[TMP12]]
; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP12]] to i64
@@ -6903,7 +6903,7 @@ define amdgpu_kernel void @srem_v2i32_pow2_shl_denom(ptr addrspace(1) %out, <2 x
; CHECK-NEXT: [[TMP47:%.*]] = uitofp i32 [[TMP46]] to float
; CHECK-NEXT: [[TMP48:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP47]])
; CHECK-NEXT: [[TMP49:%.*]] = fmul fast float [[TMP48]], 0x41EFFFFFC0000000
-; CHECK-NEXT: [[TMP50:%.*]] = fptoui float [[TMP49]] to i32
+; CHECK-NEXT: [[TMP50:%.*]] = fptoui fast float [[TMP49]] to i32
; CHECK-NEXT: [[TMP51:%.*]] = sub i32 0, [[TMP46]]
; CHECK-NEXT: [[TMP52:%.*]] = mul i32 [[TMP51]], [[TMP50]]
; CHECK-NEXT: [[TMP53:%.*]] = zext i32 [[TMP50]] to i64
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll
index 091e5a67799a9..84977cede80bd 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll
@@ -2191,7 +2191,7 @@ define float @test_pow_afn_f32_known_integral_sitofp(float %x, i32 %y) {
; CHECK-LABEL: define float @test_pow_afn_f32_known_integral_sitofp
; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) {
; CHECK-NEXT: [[Y_CAST:%.*]] = sitofp i32 [[Y]] to float
-; CHECK-NEXT: [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32
+; CHECK-NEXT: [[TMP1:%.*]] = fptosi afn float [[Y_CAST]] to i32
; CHECK-NEXT: [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 [[TMP1]])
; CHECK-NEXT: ret float [[POW]]
;
@@ -2204,10 +2204,10 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp(float %x, i32 %y)
; CHECK-LABEL: define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp
; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) {
; CHECK-NEXT: [[Y_CAST:%.*]] = sitofp i32 [[Y]] to float
-; CHECK-NEXT: [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32
+; CHECK-NEXT: [[TMP1:%.*]] = fptosi nnan ninf afn float [[Y_CAST]] to i32
; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]])
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[__FABS]])
-; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp i32 [[TMP1]] to float
+; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp nnan ninf afn i32 [[TMP1]] to float
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[POWNI2F]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]])
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[TMP1]], 31
@@ -2227,7 +2227,7 @@ define float @test_pow_afn_nnan_f32_known_integral_sitofp(float %x, i32 %y) {
; CHECK-LABEL: define float @test_pow_afn_nnan_f32_known_integral_sitofp
; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) {
; CHECK-NEXT: [[Y_CAST:%.*]] = sitofp i32 [[Y]] to float
-; CHECK-NEXT: [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32
+; CHECK-NEXT: [[TMP1:%.*]] = fptosi nnan afn float [[Y_CAST]] to i32
; CHECK-NEXT: [[POW:%.*]] = tail call nnan afn float @_Z4pownfi(float [[X]], i32 [[TMP1]])
; CHECK-NEXT: ret float [[POW]]
;
@@ -2240,7 +2240,7 @@ define float @test_pow_afn_ninf_f32_known_integral_sitofp(float %x, i32 %y) {
; CHECK-LABEL: define float @test_pow_afn_ninf_f32_known_integral_sitofp
; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) {
; CHECK-NEXT: [[Y_CAST:%.*]] = sitofp i32 [[Y]] to float
-; CHECK-NEXT: [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32
+; CHECK-NEXT: [[TMP1:%.*]] = fptosi ninf afn float [[Y_CAST]] to i32
; CHECK-NEXT: [[POW:%.*]] = tail call ninf afn float @_Z4pownfi(float [[X]], i32 [[TMP1]])
; CHECK-NEXT: ret float [[POW]]
;
@@ -2279,7 +2279,7 @@ define float @test_pow_afn_f32_known_integral_uitofp(float %x, i32 %y) {
; CHECK-LABEL: define float @test_pow_afn_f32_known_integral_uitofp
; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) {
; CHECK-NEXT: [[Y_CAST:%.*]] = uitofp i32 [[Y]] to float
-; CHECK-NEXT: [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32
+; CHECK-NEXT: [[TMP1:%.*]] = fptosi afn float [[Y_CAST]] to i32
; CHECK-NEXT: [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 [[TMP1]])
; CHECK-NEXT: ret float [[POW]]
;
@@ -2292,10 +2292,10 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp(float %x, i32 %y)
; CHECK-LABEL: define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp
; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) {
; CHECK-NEXT: [[Y_CAST:%.*]] = uitofp i32 [[Y]] to float
-; CHECK-NEXT: [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32
+; CHECK-NEXT: [[TMP1:%.*]] = fptosi nnan ninf afn float [[Y_CAST]] to i32
; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]])
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[__FABS]])
-; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp i32 [[TMP1]] to float
+; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp nnan ninf afn i32 [[TMP1]] to float
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[POWNI2F]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]])
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[TMP1]], 31
@@ -2341,10 +2341,10 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp_i256(float %x, i2
; CHECK-LABEL: define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp_i256
; CHECK-SAME: (float [[X:%.*]], i256 [[Y:%.*]]) {
; CHECK-NEXT: [[Y_CAST:%.*]] = uitofp i256 [[Y]] to float
-; CHECK-NEXT: [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32
+; CHECK-NEXT: [[TMP1:%.*]] = fptosi nnan ninf afn float [[Y_CAST]] to i32
; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]])
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[__FABS]])
-; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp i32 [[TMP1]] to float
+; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp nnan ninf afn i32 [[TMP1]] to float
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[POWNI2F]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]])
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[TMP1]], 31
@@ -2364,10 +2364,10 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp_i256(float %x, i2
; CHECK-LABEL: define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp_i256
; CHECK-SAME: (float [[X:%.*]], i256 [[Y:%.*]]) {
; CHECK-NEXT: [[Y_CAST:%.*]] = sitofp i256 [[Y]] to float
-; CHECK-NEXT: [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32
+; CHECK-NEXT: [[TMP1:%.*]] = fptosi nnan ninf afn float [[Y_CAST]] to i32
; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]])
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[__FABS]])
-; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp i32 [[TMP1]] to float
+; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp nnan ninf afn i32 [[TMP1]] to float
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[POWNI2F]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]])
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[TMP1]], 31
@@ -2387,10 +2387,10 @@ define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_sitofp(<2 x floa
; CHECK-LABEL: define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_sitofp
; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x i32> [[Y:%.*]]) {
; CHECK-NEXT: [[Y_CAST:%.*]] = sitofp <2 x i32> [[Y]] to <2 x float>
-; CHECK-NEXT: [[TMP1:%.*]] = fptosi <2 x float> [[Y_CAST]] to <2 x i32>
+; CHECK-NEXT: [[TMP1:%.*]] = fptosi nnan ninf afn <2 x float> [[Y_CAST]] to <2 x i32>
; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn <2 x float> @llvm.fabs.v2f32(<2 x float> [[X]])
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn <2 x float> @llvm.log2.v2f32(<2 x float> [[__FABS]])
-; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float>
+; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp nnan ninf afn <2 x i32> [[TMP1]] to <2 x float>
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[POWNI2F]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @llvm.exp2.v2f32(<2 x float> [[__YLOGX]])
; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 31)
@@ -2423,7 +2423,7 @@ define <2 x float> @test_pow_afn_v2f32_known_integral_uitofp(<2 x float> %x, <2
; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_known_integral_uitofp
; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x i32> [[Y:%.*]]) {
; CHECK-NEXT: [[Y_CAST:%.*]] = uitofp <2 x i32> [[Y]] to <2 x float>
-; CHECK-NEXT: [[TMP1:%.*]] = fptosi <2 x float> [[Y_CAST]] to <2 x i32>
+; CHECK-NEXT: [[TMP1:%.*]] = fptosi afn <2 x float> [[Y_CAST]] to <2 x i32>
; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> [[TMP1]])
; CHECK-NEXT: ret <2 x float> [[POW]]
;
@@ -2436,10 +2436,10 @@ define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_uitofp(<2 x floa
; CHECK-LABEL: define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_uitofp
; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x i32> [[Y:%.*]]) {
; CHECK-NEXT: [[Y_CAST:%.*]] = uitofp <2 x i32> [[Y]] to <2 x float>
-; CHECK-NEXT: [[TMP1:%.*]] = fptosi <2 x float> [[Y_CAST]] to <2 x i32>
+; CHECK-NEXT: [[TMP1:%.*]] = fptosi nnan ninf afn <2 x float> [[Y_CAST]] to <2 x i32>
; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn <2 x float> @llvm.fabs.v2f32(<2 x float> [[X]])
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn <2 x float> @llvm.log2.v2f32(<2 x float> [[__FABS]])
-; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float>
+; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp nnan ninf afn <2 x i32> [[TMP1]] to <2 x float>
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[POWNI2F]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @llvm.exp2.v2f32(<2 x float> [[__YLOGX]])
; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 31)
@@ -2548,10 +2548,10 @@ define float @test_pow_afn_f32_nnan_ninf__y_known_integral_trunc(float %x, float
; CHECK-LABEL: define float @test_pow_afn_f32_nnan_ninf__y_known_integral_trunc
; CHECK-SAME: (float [[X:%.*]], float [[Y_ARG:%.*]]) {
; CHECK-NEXT: [[Y:%.*]] = call float @llvm.trunc.f32(float [[Y_ARG]])
-; CHECK-NEXT: [[TMP1:%.*]] = fptosi float [[Y]] to i32
+; CHECK-NEXT: [[TMP1:%.*]] = fptosi nnan ninf afn float [[Y]] to i32
; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]])
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[__FABS]])
-; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp i32 [[TMP1]] to float
+; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp nnan ninf afn i32 [[TMP1]] to float
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[POWNI2F]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]])
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[TMP1]], 31
@@ -2571,7 +2571,7 @@ define float @test_pow_afn_f32__y_known_integral_trunc(float %x, float nofpclass
; CHECK-LABEL: define float @test_pow_afn_f32__y_known_integral_trunc
; CHECK-SAME: (float [[X:%.*]], float nofpclass(nan inf) [[Y_ARG:%.*]]) {
; CHECK-NEXT: [[Y:%.*]] = call float @llvm.trunc.f32(float [[Y_ARG]])
-; CHECK-NEXT: [[TMP1:%.*]] = fptosi float [[Y]] to i32
+; CHECK-NEXT: [[TMP1:%.*]] = fptosi afn float [[Y]] to i32
; CHECK-NEXT: [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 [[TMP1]])
; CHECK-NEXT: ret float [[POW]]
;
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll
index 27d204c1a253f..b7e6272761c1b 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll
@@ -671,7 +671,7 @@ define float @test_pown_afn_nnan_ninf_f32(float %x, i32 %y) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]])
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[__FABS]])
-; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp i32 [[Y]] to float
+; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp nnan ninf afn i32 [[Y]] to float
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[POWNI2F]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]])
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[Y]], 31
@@ -693,7 +693,7 @@ define <2 x float> @test_pown_afn_nnan_ninf_v2f32(<2 x float> %x, <2 x i32> %y)
; CHECK-NEXT: entry:
; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn <2 x float> @llvm.fabs.v2f32(<2 x float> [[X]])
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn <2 x float> @llvm.log2.v2f32(<2 x float> [[__FABS]])
-; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp <2 x i32> [[Y]] to <2 x float>
+; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp nnan ninf afn <2 x i32> [[Y]] to <2 x float>
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[POWNI2F]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @llvm.exp2.v2f32(<2 x float> [[__YLOGX]])
; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i32> [[Y]], splat (i32 31)
@@ -715,7 +715,7 @@ define double @test_pown_afn_nnan_ninf_f64(double %x, i32 %y) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn double @llvm.fabs.f64(double [[X]])
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn double @_Z4log2d(double [[__FABS]])
-; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp i32 [[Y]] to double
+; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp nnan ninf afn i32 [[Y]] to double
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn double [[__LOG2]], [[POWNI2F]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn double @_Z4exp2d(double [[__YLOGX]])
; CHECK-NEXT: [[__YTOU:%.*]] = zext i32 [[Y]] to i64
@@ -738,7 +738,7 @@ define <2 x double> @test_pown_afn_nnan_ninf_v2f64(<2 x double> %x, <2 x i32> %y
; CHECK-NEXT: entry:
; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn <2 x double> @llvm.fabs.v2f64(<2 x double> [[X]])
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn <2 x double> @_Z4log2Dv2_d(<2 x double> [[__FABS]])
-; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp <2 x i32> [[Y]] to <2 x double>
+; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp nnan ninf afn <2 x i32> [[Y]] to <2 x double>
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x double> [[__LOG2]], [[POWNI2F]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x double> @_Z4exp2Dv2_d(<2 x double> [[__YLOGX]])
; CHECK-NEXT: [[__YTOU:%.*]] = zext <2 x i32> [[Y]] to <2 x i64>
@@ -761,7 +761,7 @@ define half @test_pown_afn_nnan_ninf_f16(half %x, i32 %y) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn half @llvm.fabs.f16(half [[X]])
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn half @llvm.log2.f16(half [[__FABS]])
-; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp i32 [[Y]] to half
+; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp nnan ninf afn i32 [[Y]] to half
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn half [[__LOG2]], [[POWNI2F]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn half @llvm.exp2.f16(half [[__YLOGX]])
; CHECK-NEXT: [[__YTOU:%.*]] = trunc i32 [[Y]] to i16
@@ -784,7 +784,7 @@ define <2 x half> @test_pown_afn_nnan_ninf_v2f16(<2 x half> %x, <2 x i32> %y) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn <2 x half> @llvm.fabs.v2f16(<2 x half> [[X]])
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn <2 x half> @llvm.log2.v2f16(<2 x half> [[__FABS]])
-; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp <2 x i32> [[Y]] to <2 x half>
+; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp nnan ninf afn <2 x i32> [[Y]] to <2 x half>
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x half> [[__LOG2]], [[POWNI2F]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x half> @llvm.exp2.v2f16(<2 x half> [[__YLOGX]])
; CHECK-NEXT: [[__YTOU:%.*]] = trunc <2 x i32> [[Y]] to <2 x i16>
@@ -1065,7 +1065,7 @@ define float @test_pown_afn_ninf_nnan_f32__x_known_positive(float nofpclass(ninf
; CHECK-NEXT: entry:
; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]])
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[__FABS]])
-; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp i32 [[Y]] to float
+; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp nnan ninf afn i32 [[Y]] to float
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[POWNI2F]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]])
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[Y]], 31
@@ -1126,7 +1126,7 @@ define float @test_fast_pown_f32_y_known_even(float %x, i32 %y.arg) {
; CHECK-NEXT: [[Y:%.*]] = shl i32 [[Y_ARG]], 1
; CHECK-NEXT: [[__FABS:%.*]] = call fast float @llvm.fabs.f32(float [[X]])
; CHECK-NEXT: [[__LOG2:%.*]] = call fast float @llvm.log2.f32(float [[__FABS]])
-; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp i32 [[Y]] to float
+; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp fast i32 [[Y]] to float
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul fast float [[__LOG2]], [[POWNI2F]]
; CHECK-NEXT: [[__EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[__YLOGX]])
; CHECK-NEXT: ret float [[__EXP2]]
@@ -1144,7 +1144,7 @@ define float @test_fast_pown_f32_known_positive_y_known_even(float nofpclass(nin
; CHECK-NEXT: [[Y:%.*]] = shl i32 [[Y_ARG]], 1
; CHECK-NEXT: [[__FABS:%.*]] = call fast float @llvm.fabs.f32(float [[X]])
; CHECK-NEXT: [[__LOG2:%.*]] = call fast float @llvm.log2.f32(float [[__FABS]])
-; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp i32 [[Y]] to float
+; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp fast i32 [[Y]] to float
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul fast float [[__LOG2]], [[POWNI2F]]
; CHECK-NEXT: [[__EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[__YLOGX]])
; CHECK-NEXT: ret float [[__EXP2]]
diff --git a/llvm/test/CodeGen/AMDGPU/divrem24-assume.ll b/llvm/test/CodeGen/AMDGPU/divrem24-assume.ll
index dc79385d9eaca..46ff0f61b31a8 100644
--- a/llvm/test/CodeGen/AMDGPU/divrem24-assume.ll
+++ b/llvm/test/CodeGen/AMDGPU/divrem24-assume.ll
@@ -4,7 +4,7 @@
define amdgpu_kernel void @divrem24_assume(ptr addrspace(1) %arg, i32 %arg1) {
; CHECK-LABEL: @divrem24_assume(
; CHECK-NEXT: bb:
-; CHECK-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0
+; CHECK-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x(), !range [[RNG0:![0-9]+]]
; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[ARG1:%.*]], 42
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP2]])
; CHECK-NEXT: [[TMP0:%.*]] = uitofp i32 [[TMP]] to float
@@ -14,7 +14,7 @@ define amdgpu_kernel void @divrem24_assume(ptr addrspace(1) %arg, i32 %arg1) {
; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.trunc.f32(float [[TMP3]])
; CHECK-NEXT: [[TMP5:%.*]] = fneg fast float [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP5]], float [[TMP1]], float [[TMP0]])
-; CHECK-NEXT: [[TMP7:%.*]] = fptoui float [[TMP4]] to i32
+; CHECK-NEXT: [[TMP7:%.*]] = fptoui fast float [[TMP4]] to i32
; CHECK-NEXT: [[TMP8:%.*]] = call fast float @llvm.fabs.f32(float [[TMP6]])
; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.fabs.f32(float [[TMP1]])
; CHECK-NEXT: [[TMP10:%.*]] = fcmp fast oge float [[TMP8]], [[TMP9]]
diff --git a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
index 6a88be6e55859..02d42f1774c26 100644
--- a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
+++ b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
@@ -352,7 +352,7 @@ declare half @_Z4pownDhi(half, i32)
; GCN-LABEL: {{^}}define half @test_pown_f16(
; GCN-NATIVE: %__fabs = tail call fast half @llvm.fabs.f16(half %x)
; GCN-NATIVE: %__log2 = tail call fast half @llvm.log2.f16(half %__fabs)
-; GCN-NATIVE: %pownI2F = sitofp i32 %y to half
+; GCN-NATIVE: %pownI2F = sitofp fast i32 %y to half
; GCN-NATIVE: %__ylogx = fmul fast half %__log2, %pownI2F
; GCN-NATIVE: %__exp2 = tail call fast half @llvm.exp2.f16(half %__ylogx)
; GCN-NATIVE: %__ytou = trunc i32 %y to i16
@@ -404,7 +404,7 @@ entry:
; GCN: %conv = fptosi float %tmp1 to i32
; GCN: %__fabs = tail call fast float @llvm.fabs.f32(float %tmp)
; GCN: %__log2 = tail call fast float @llvm.log2.f32(float %__fabs)
-; GCN: %pownI2F = sitofp i32 %conv to float
+; GCN: %pownI2F = sitofp fast i32 %conv to float
; GCN: %__ylogx = fmul fast float %__log2, %pownI2F
; GCN: %__exp2 = tail call fast float @llvm.exp2.f32(float %__ylogx)
; GCN: %__yeven = shl i32 %conv, 31
diff --git a/llvm/test/Transforms/InstCombine/log-pow.ll b/llvm/test/Transforms/InstCombine/log-pow.ll
index 374115953145d..f5091c4a21b7b 100644
--- a/llvm/test/Transforms/InstCombine/log-pow.ll
+++ b/llvm/test/Transforms/InstCombine/log-pow.ll
@@ -26,7 +26,7 @@ define double @log_powi_const(double %x) {
define double @log_powi_nonconst(double %x, i32 %y) {
; CHECK-LABEL: @log_powi_nonconst(
; CHECK-NEXT: [[LOG1:%.*]] = call fast double @llvm.log.f64(double [[X:%.*]])
-; CHECK-NEXT: [[CAST:%.*]] = sitofp i32 [[Y:%.*]] to double
+; CHECK-NEXT: [[CAST:%.*]] = sitofp fast i32 [[Y:%.*]] to double
; CHECK-NEXT: [[MUL:%.*]] = fmul fast double [[LOG1]], [[CAST]]
; CHECK-NEXT: ret double [[MUL]]
;
@@ -38,7 +38,7 @@ define double @log_powi_nonconst(double %x, i32 %y) {
define double @logf64_powi_nonconst(double %x, i32 %y) {
; CHECK-LABEL: @logf64_powi_nonconst(
; CHECK-NEXT: [[LOG1:%.*]] = call fast double @llvm.log.f64(double [[X:%.*]])
-; CHECK-NEXT: [[CAST:%.*]] = sitofp i32 [[Y:%.*]] to double
+; CHECK-NEXT: [[CAST:%.*]] = sitofp fast i32 [[Y:%.*]] to double
; CHECK-NEXT: [[MUL:%.*]] = fmul fast double [[LOG1]], [[CAST]]
; CHECK-NEXT: ret double [[MUL]]
;
@@ -61,7 +61,7 @@ define float @logf_powfi_const(float %x) {
define float @logf_powfi_nonconst(float %x, i32 %y) {
; CHECK-LABEL: @logf_powfi_nonconst(
; CHECK-NEXT: [[LOG1:%.*]] = call fast float @llvm.log.f32(float [[X:%.*]])
-; CHECK-NEXT: [[CAST:%.*]] = sitofp i32 [[Y:%.*]] to float
+; CHECK-NEXT: [[CAST:%.*]] = sitofp fast i32 [[Y:%.*]] to float
; CHECK-NEXT: [[MUL:%.*]] = fmul fast float [[LOG1]], [[CAST]]
; CHECK-NEXT: ret float [[MUL]]
;
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-scalarivsext-crash.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-scalarivsext-crash.ll
index 34b8deaa8de03..11f4a7bc81b4c 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-scalarivsext-crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-scalarivsext-crash.ll
@@ -31,13 +31,13 @@ define void @test_iv_trunc_crash(ptr %a, ptr %b, i32 %n) {
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 8, i32 [[N_MOD_VF]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP3]], [[TMP11]]
-; CHECK-NEXT: [[DOTCAST:%.*]] = sitofp i32 [[N_VEC]] to double
+; CHECK-NEXT: [[DOTCAST:%.*]] = sitofp reassoc i32 [[N_VEC]] to double
; CHECK-NEXT: [[TMP12:%.*]] = fmul reassoc double [[X]], [[DOTCAST]]
; CHECK-NEXT: [[TMP13:%.*]] = fadd reassoc double [[SUM_0]], [[TMP12]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[DOTCAST2:%.*]] = sitofp i32 [[INDEX]] to double
+; CHECK-NEXT: [[DOTCAST2:%.*]] = sitofp reassoc i32 [[INDEX]] to double
; CHECK-NEXT: [[TMP14:%.*]] = fmul reassoc double [[X]], [[DOTCAST2]]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = fadd reassoc double [[SUM_0]], [[TMP14]]
; CHECK-NEXT: [[TMP15:%.*]] = fmul reassoc double 7.000000e+00, [[X]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
index c0ff8816c2543..2e1698a3940c8 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
@@ -23,14 +23,14 @@ define void @fp_iv_loop1(ptr noalias nocapture %A, i32 %N) #0 {
; AUTO_VEC: [[ITER_CHECK]]:
; AUTO_VEC-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
; AUTO_VEC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
-; AUTO_VEC-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[FOR_BODY:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
+; AUTO_VEC-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
; AUTO_VEC: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
; AUTO_VEC-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP0]], 32
; AUTO_VEC-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
; AUTO_VEC: [[VECTOR_PH]]:
; AUTO_VEC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 32
; AUTO_VEC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
-; AUTO_VEC-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float
+; AUTO_VEC-NEXT: [[DOTCAST:%.*]] = sitofp fast i64 [[N_VEC]] to float
; AUTO_VEC-NEXT: [[TMP6:%.*]] = fmul fast float 5.000000e-01, [[DOTCAST]]
; AUTO_VEC-NEXT: [[IND_END:%.*]] = fadd fast float 1.000000e+00, [[TMP6]]
; AUTO_VEC-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -56,18 +56,18 @@ define void @fp_iv_loop1(ptr noalias nocapture %A, i32 %N) #0 {
; AUTO_VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; AUTO_VEC-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
; AUTO_VEC: [[VEC_EPILOG_ITER_CHECK]]:
-; AUTO_VEC-NEXT: [[DOTCAST12:%.*]] = sitofp i64 [[N_VEC]] to float
+; AUTO_VEC-NEXT: [[DOTCAST12:%.*]] = sitofp fast i64 [[N_VEC]] to float
; AUTO_VEC-NEXT: [[TMP11:%.*]] = fmul fast float 5.000000e-01, [[DOTCAST12]]
; AUTO_VEC-NEXT: [[IND_END1:%.*]] = fadd fast float 1.000000e+00, [[TMP11]]
; AUTO_VEC-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP0]], [[N_VEC]]
; AUTO_VEC-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4
-; AUTO_VEC-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[FOR_BODY]], label %[[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]]
+; AUTO_VEC-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]]
; AUTO_VEC: [[VEC_EPILOG_PH]]:
; AUTO_VEC-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; AUTO_VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi float [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 1.000000e+00, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; AUTO_VEC-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[TMP0]], 4
; AUTO_VEC-NEXT: [[N_VEC3:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF2]]
-; AUTO_VEC-NEXT: [[DOTCAST4:%.*]] = sitofp i64 [[N_VEC3]] to float
+; AUTO_VEC-NEXT: [[DOTCAST4:%.*]] = sitofp fast i64 [[N_VEC3]] to float
; AUTO_VEC-NEXT: [[TMP12:%.*]] = fmul fast float 5.000000e-01, [[DOTCAST4]]
; AUTO_VEC-NEXT: [[TMP10:%.*]] = fadd fast float 1.000000e+00, [[TMP12]]
; AUTO_VEC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[BC_RESUME_VAL]], i64 0
@@ -85,14 +85,14 @@ define void @fp_iv_loop1(ptr noalias nocapture %A, i32 %N) #0 {
; AUTO_VEC-NEXT: br i1 [[TMP9]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; AUTO_VEC: [[VEC_EPILOG_MIDDLE_BLOCK]]:
; AUTO_VEC-NEXT: [[CMP_N9:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]]
-; AUTO_VEC-NEXT: br i1 [[CMP_N9]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]]
-; AUTO_VEC: [[FOR_BODY]]:
+; AUTO_VEC-NEXT: br i1 [[CMP_N9]], label %[[FOR_END_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
+; AUTO_VEC: [[VEC_EPILOG_SCALAR_PH]]:
; AUTO_VEC-NEXT: [[BC_RESUME_VAL10:%.*]] = phi i64 [ [[N_VEC3]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
; AUTO_VEC-NEXT: [[BC_RESUME_VAL11:%.*]] = phi float [ [[TMP10]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END1]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 1.000000e+00, %[[ITER_CHECK]] ]
; AUTO_VEC-NEXT: br label %[[LOOP:.*]]
; AUTO_VEC: [[LOOP]]:
-; AUTO_VEC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL10]], %[[FOR_BODY]] ]
-; AUTO_VEC-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL11]], %[[FOR_BODY]] ]
+; AUTO_VEC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL10]], %[[VEC_EPILOG_SCALAR_PH]] ]
+; AUTO_VEC-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL11]], %[[VEC_EPILOG_SCALAR_PH]] ]
; AUTO_VEC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
; AUTO_VEC-NEXT: store float [[X_06]], ptr [[ARRAYIDX]], align 4
; AUTO_VEC-NEXT: [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01
@@ -145,19 +145,19 @@ define void @fp_iv_loop2(ptr noalias nocapture %A, i32 %N) {
; AUTO_VEC-SAME: ptr noalias captures(none) [[A:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
; AUTO_VEC-NEXT: [[ENTRY:.*:]]
; AUTO_VEC-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N]], 0
-; AUTO_VEC-NEXT: br i1 [[CMP4]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]]
-; AUTO_VEC: [[FOR_BODY_PREHEADER]]:
-; AUTO_VEC-NEXT: br label %[[FOR_BODY:.*]]
-; AUTO_VEC: [[FOR_BODY]]:
-; AUTO_VEC-NEXT: [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], %[[FOR_BODY]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
-; AUTO_VEC-NEXT: [[X_06_EPIL:%.*]] = phi float [ [[CONV1_EPIL:%.*]], %[[FOR_BODY]] ], [ 1.000000e+00, %[[FOR_BODY_PREHEADER]] ]
+; AUTO_VEC-NEXT: br i1 [[CMP4]], label %[[LOOP_PREHEADER:.*]], label %[[FOR_END:.*]]
+; AUTO_VEC: [[LOOP_PREHEADER]]:
+; AUTO_VEC-NEXT: br label %[[LOOP:.*]]
+; AUTO_VEC: [[LOOP]]:
+; AUTO_VEC-NEXT: [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
+; AUTO_VEC-NEXT: [[X_06_EPIL:%.*]] = phi float [ [[CONV1_EPIL:%.*]], %[[LOOP]] ], [ 1.000000e+00, %[[LOOP_PREHEADER]] ]
; AUTO_VEC-NEXT: [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV_EPIL]]
; AUTO_VEC-NEXT: store float [[X_06_EPIL]], ptr [[ARRAYIDX_EPIL]], align 4
; AUTO_VEC-NEXT: [[CONV1_EPIL]] = fadd float [[X_06_EPIL]], 5.000000e-01
; AUTO_VEC-NEXT: [[INDVARS_IV_NEXT_EPIL]] = add nuw nsw i64 [[INDVARS_IV_EPIL]], 1
; AUTO_VEC-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT_EPIL]] to i32
; AUTO_VEC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
-; AUTO_VEC-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY]]
+; AUTO_VEC-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT:.*]], label %[[LOOP]]
; AUTO_VEC: [[FOR_END_LOOPEXIT]]:
; AUTO_VEC-NEXT: br label %[[FOR_END]]
; AUTO_VEC: [[FOR_END]]:
@@ -194,11 +194,11 @@ define double @external_use_with_fast_math(ptr %a, i64 %n) {
; AUTO_VEC-NEXT: [[ENTRY:.*]]:
; AUTO_VEC-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
; AUTO_VEC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 16
-; AUTO_VEC-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[FOR_BODY:.*]], label %[[VECTOR_PH:.*]]
+; AUTO_VEC-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; AUTO_VEC: [[VECTOR_PH]]:
; AUTO_VEC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[SMAX]], 16
; AUTO_VEC-NEXT: [[N_VEC:%.*]] = sub i64 [[SMAX]], [[N_MOD_VF]]
-; AUTO_VEC-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to double
+; AUTO_VEC-NEXT: [[DOTCAST:%.*]] = sitofp fast i64 [[N_VEC]] to double
; AUTO_VEC-NEXT: [[TMP0:%.*]] = fmul fast double 3.000000e+00, [[DOTCAST]]
; AUTO_VEC-NEXT: [[TMP6:%.*]] = fadd fast double 0.000000e+00, [[TMP0]]
; AUTO_VEC-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -223,14 +223,14 @@ define double @external_use_with_fast_math(ptr %a, i64 %n) {
; AUTO_VEC: [[MIDDLE_BLOCK]]:
; AUTO_VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
; AUTO_VEC-NEXT: [[TMP7:%.*]] = fsub fast double [[TMP6]], 3.000000e+00
-; AUTO_VEC-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[FOR_BODY]]
-; AUTO_VEC: [[FOR_BODY]]:
+; AUTO_VEC-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
+; AUTO_VEC: [[SCALAR_PH]]:
; AUTO_VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; AUTO_VEC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi double [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[ENTRY]] ]
; AUTO_VEC-NEXT: br label %[[LOOP:.*]]
; AUTO_VEC: [[LOOP]]:
-; AUTO_VEC-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[FOR_BODY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
-; AUTO_VEC-NEXT: [[J:%.*]] = phi double [ [[BC_RESUME_VAL1]], %[[FOR_BODY]] ], [ [[J_NEXT:%.*]], %[[LOOP]] ]
+; AUTO_VEC-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
+; AUTO_VEC-NEXT: [[J:%.*]] = phi double [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[J_NEXT:%.*]], %[[LOOP]] ]
; AUTO_VEC-NEXT: [[T0:%.*]] = getelementptr double, ptr [[A]], i64 [[I]]
; AUTO_VEC-NEXT: store double [[J]], ptr [[T0]], align 8
; AUTO_VEC-NEXT: [[I_NEXT]] = add i64 [[I]], 1
@@ -262,19 +262,19 @@ for.end:
define double @external_use_without_fast_math(ptr %a, i64 %n) {
; AUTO_VEC-LABEL: define double @external_use_without_fast_math(
; AUTO_VEC-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
-; AUTO_VEC-NEXT: [[ENTRY_NEW:.*]]:
-; AUTO_VEC-NEXT: br label %[[FOR_BODY:.*]]
-; AUTO_VEC: [[FOR_BODY]]:
-; AUTO_VEC-NEXT: [[I:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[I_NEXT_7:%.*]], %[[FOR_BODY]] ]
-; AUTO_VEC-NEXT: [[J:%.*]] = phi double [ 0.000000e+00, %[[ENTRY_NEW]] ], [ [[J_NEXT_7:%.*]], %[[FOR_BODY]] ]
+; AUTO_VEC-NEXT: [[ENTRY:.*]]:
+; AUTO_VEC-NEXT: br label %[[LOOP:.*]]
+; AUTO_VEC: [[LOOP]]:
+; AUTO_VEC-NEXT: [[I:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[I_NEXT_7:%.*]], %[[LOOP]] ]
+; AUTO_VEC-NEXT: [[J:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[J_NEXT_7:%.*]], %[[LOOP]] ]
; AUTO_VEC-NEXT: [[TMP7:%.*]] = getelementptr double, ptr [[A]], i64 [[I]]
; AUTO_VEC-NEXT: store double [[J]], ptr [[TMP7]], align 8
; AUTO_VEC-NEXT: [[I_NEXT_7]] = add i64 [[I]], 1
; AUTO_VEC-NEXT: [[J_NEXT_7]] = fadd double [[J]], 3.000000e+00
; AUTO_VEC-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT_7]], [[N]]
-; AUTO_VEC-NEXT: br i1 [[COND]], label %[[FOR_BODY]], label %[[FOR_END:.*]]
+; AUTO_VEC-NEXT: br i1 [[COND]], label %[[LOOP]], label %[[FOR_END:.*]]
; AUTO_VEC: [[FOR_END]]:
-; AUTO_VEC-NEXT: [[J_LCSSA:%.*]] = phi double [ [[J]], %[[FOR_BODY]] ]
+; AUTO_VEC-NEXT: [[J_LCSSA:%.*]] = phi double [ [[J]], %[[LOOP]] ]
; AUTO_VEC-NEXT: ret double [[J_LCSSA]]
;
entry:
@@ -309,14 +309,14 @@ define void @fadd_reassoc_FMF(ptr nocapture %p, i32 %N) {
; AUTO_VEC-NEXT: [[ITER_CHECK:.*]]:
; AUTO_VEC-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
; AUTO_VEC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
-; AUTO_VEC-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[FOR_BODY:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
+; AUTO_VEC-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
; AUTO_VEC: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
; AUTO_VEC-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP0]], 32
; AUTO_VEC-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
; AUTO_VEC: [[VECTOR_PH]]:
; AUTO_VEC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 32
; AUTO_VEC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
-; AUTO_VEC-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float
+; AUTO_VEC-NEXT: [[DOTCAST:%.*]] = sitofp reassoc i64 [[N_VEC]] to float
; AUTO_VEC-NEXT: [[TMP1:%.*]] = fmul reassoc float 4.200000e+01, [[DOTCAST]]
; AUTO_VEC-NEXT: [[IND_END:%.*]] = fadd reassoc float 1.000000e+00, [[TMP1]]
; AUTO_VEC-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -350,18 +350,18 @@ define void @fadd_reassoc_FMF(ptr nocapture %p, i32 %N) {
; AUTO_VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; AUTO_VEC-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
; AUTO_VEC: [[VEC_EPILOG_ITER_CHECK]]:
-; AUTO_VEC-NEXT: [[DOTCAST16:%.*]] = sitofp i64 [[N_VEC]] to float
+; AUTO_VEC-NEXT: [[DOTCAST16:%.*]] = sitofp reassoc i64 [[N_VEC]] to float
; AUTO_VEC-NEXT: [[TMP12:%.*]] = fmul reassoc float 4.200000e+01, [[DOTCAST16]]
; AUTO_VEC-NEXT: [[IND_END1:%.*]] = fadd reassoc float 1.000000e+00, [[TMP12]]
; AUTO_VEC-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP0]], [[N_VEC]]
; AUTO_VEC-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4
-; AUTO_VEC-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[FOR_BODY]], label %[[VEC_EPILOG_PH]], !prof [[PROF3]]
+; AUTO_VEC-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3]]
; AUTO_VEC: [[VEC_EPILOG_PH]]:
; AUTO_VEC-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; AUTO_VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi float [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 1.000000e+00, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; AUTO_VEC-NEXT: [[N_MOD_VF5:%.*]] = urem i64 [[TMP0]], 4
; AUTO_VEC-NEXT: [[N_VEC6:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF5]]
-; AUTO_VEC-NEXT: [[DOTCAST7:%.*]] = sitofp i64 [[N_VEC6]] to float
+; AUTO_VEC-NEXT: [[DOTCAST7:%.*]] = sitofp reassoc i64 [[N_VEC6]] to float
; AUTO_VEC-NEXT: [[TMP17:%.*]] = fmul reassoc float 4.200000e+01, [[DOTCAST7]]
; AUTO_VEC-NEXT: [[TMP18:%.*]] = fadd reassoc float 1.000000e+00, [[TMP17]]
; AUTO_VEC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[BC_RESUME_VAL]], i64 0
@@ -381,14 +381,14 @@ define void @fadd_reassoc_FMF(ptr nocapture %p, i32 %N) {
; AUTO_VEC-NEXT: br i1 [[TMP15]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; AUTO_VEC: [[VEC_EPILOG_MIDDLE_BLOCK]]:
; AUTO_VEC-NEXT: [[CMP_N18:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC6]]
-; AUTO_VEC-NEXT: br i1 [[CMP_N18]], label %[[EXIT]], label %[[FOR_BODY]]
-; AUTO_VEC: [[FOR_BODY]]:
+; AUTO_VEC-NEXT: br i1 [[CMP_N18]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
+; AUTO_VEC: [[VEC_EPILOG_SCALAR_PH]]:
; AUTO_VEC-NEXT: [[BC_RESUME_VAL14:%.*]] = phi i64 [ [[N_VEC6]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
; AUTO_VEC-NEXT: [[BC_RESUME_VAL15:%.*]] = phi float [ [[TMP18]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END1]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 1.000000e+00, %[[ITER_CHECK]] ]
; AUTO_VEC-NEXT: br label %[[LOOP:.*]]
; AUTO_VEC: [[LOOP]]:
-; AUTO_VEC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL14]], %[[FOR_BODY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[LOOP]] ]
-; AUTO_VEC-NEXT: [[X_012:%.*]] = phi float [ [[BC_RESUME_VAL15]], %[[FOR_BODY]] ], [ [[ADD3:%.*]], %[[LOOP]] ]
+; AUTO_VEC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL14]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[LOOP]] ]
+; AUTO_VEC-NEXT: [[X_012:%.*]] = phi float [ [[BC_RESUME_VAL15]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD3:%.*]], %[[LOOP]] ]
; AUTO_VEC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[P]], i64 [[INDVARS_IV]]
; AUTO_VEC-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; AUTO_VEC-NEXT: [[ADD:%.*]] = fadd reassoc float [[X_012]], [[TMP16]]
diff --git a/llvm/test/Transforms/LoopVectorize/float-induction.ll b/llvm/test/Transforms/LoopVectorize/float-induction.ll
index 2b15aae628274..f000fceaf6e50 100644
--- a/llvm/test/Transforms/LoopVectorize/float-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/float-induction.ll
@@ -148,7 +148,7 @@ define void @fp_iv_loop1_fast_FMF(float %init, ptr noalias nocapture %A, i32 %N)
; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]]
; VEC1_INTERL2: vector.body:
; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; VEC1_INTERL2-NEXT: [[DOTCAST2:%.*]] = sitofp i64 [[INDEX]] to float
+; VEC1_INTERL2-NEXT: [[DOTCAST2:%.*]] = sitofp fast i64 [[INDEX]] to float
; VEC1_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], [[DOTCAST2]]
; VEC1_INTERL2-NEXT: [[OFFSET_IDX:%.*]] = fsub fast float [[INIT]], [[TMP3]]
; VEC1_INTERL2-NEXT: [[TMP4:%.*]] = fsub fast float [[OFFSET_IDX]], [[FPINC]]
@@ -395,7 +395,7 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, ptr noalias nocapture %A, i32
; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]]
; VEC1_INTERL2: vector.body:
; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; VEC1_INTERL2-NEXT: [[DOTCAST2:%.*]] = sitofp i64 [[INDEX]] to float
+; VEC1_INTERL2-NEXT: [[DOTCAST2:%.*]] = sitofp reassoc i64 [[INDEX]] to float
; VEC1_INTERL2-NEXT: [[TMP3:%.*]] = fmul reassoc float [[FPINC]], [[DOTCAST2]]
; VEC1_INTERL2-NEXT: [[OFFSET_IDX:%.*]] = fsub reassoc float [[INIT]], [[TMP3]]
; VEC1_INTERL2-NEXT: [[TMP6:%.*]] = fsub reassoc float [[OFFSET_IDX]], [[FPINC]]
@@ -629,7 +629,7 @@ define void @fp_iv_loop2(float %init, ptr noalias nocapture %A, i32 %N) #0 {
; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]]
; VEC1_INTERL2: vector.body:
; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; VEC1_INTERL2-NEXT: [[DOTCAST2:%.*]] = sitofp i64 [[INDEX]] to float
+; VEC1_INTERL2-NEXT: [[DOTCAST2:%.*]] = sitofp fast i64 [[INDEX]] to float
; VEC1_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[DOTCAST2]], 5.000000e-01
; VEC1_INTERL2-NEXT: [[OFFSET_IDX:%.*]] = fadd fast float [[INIT]], [[TMP3]]
; VEC1_INTERL2-NEXT: [[TMP4:%.*]] = fadd fast float [[OFFSET_IDX]], 5.000000e-01
@@ -930,9 +930,9 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca
; VEC1_INTERL2: vector.body:
; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; VEC1_INTERL2-NEXT: [[TMP4:%.*]] = or disjoint i64 [[INDEX]], 1
-; VEC1_INTERL2-NEXT: [[DOTCAST5:%.*]] = sitofp i64 [[INDEX]] to float
+; VEC1_INTERL2-NEXT: [[DOTCAST5:%.*]] = sitofp fast i64 [[INDEX]] to float
; VEC1_INTERL2-NEXT: [[TMP5:%.*]] = fmul fast float [[DOTCAST5]], -5.000000e-01
-; VEC1_INTERL2-NEXT: [[DOTCAST6:%.*]] = sitofp i64 [[INDEX]] to float
+; VEC1_INTERL2-NEXT: [[DOTCAST6:%.*]] = sitofp fast i64 [[INDEX]] to float
; VEC1_INTERL2-NEXT: [[TMP6:%.*]] = fmul fast float [[TMP0]], [[DOTCAST6]]
; VEC1_INTERL2-NEXT: [[OFFSET_IDX7:%.*]] = fadd fast float [[INIT]], [[TMP6]]
; VEC1_INTERL2-NEXT: [[TMP7:%.*]] = fadd fast float [[OFFSET_IDX7]], [[TMP0]]
@@ -1210,7 +1210,7 @@ define void @fp_iv_loop4(ptr noalias nocapture %A, i32 %N) {
; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]]
; VEC1_INTERL2: vector.body:
; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; VEC1_INTERL2-NEXT: [[DOTCAST2:%.*]] = sitofp i64 [[INDEX]] to float
+; VEC1_INTERL2-NEXT: [[DOTCAST2:%.*]] = sitofp fast i64 [[INDEX]] to float
; VEC1_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[DOTCAST2]], 5.000000e-01
; VEC1_INTERL2-NEXT: [[OFFSET_IDX:%.*]] = fadd fast float [[TMP3]], 1.000000e+00
; VEC1_INTERL2-NEXT: [[TMP4:%.*]] = fadd fast float [[TMP3]], 1.500000e+00
@@ -1321,7 +1321,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) {
; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]]
; VEC4_INTERL1: vector.body:
; VEC4_INTERL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE7:%.*]] ]
-; VEC4_INTERL1-NEXT: [[DOTCAST2:%.*]] = sitofp i64 [[INDEX]] to float
+; VEC4_INTERL1-NEXT: [[DOTCAST2:%.*]] = sitofp fast i64 [[INDEX]] to float
; VEC4_INTERL1-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
; VEC4_INTERL1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4
; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD]], zeroinitializer
@@ -1398,7 +1398,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) {
; VEC4_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]]
; VEC4_INTERL2: vector.body:
; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE16:%.*]] ]
-; VEC4_INTERL2-NEXT: [[DOTCAST2:%.*]] = sitofp i64 [[INDEX]] to float
+; VEC4_INTERL2-NEXT: [[DOTCAST2:%.*]] = sitofp fast i64 [[INDEX]] to float
; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
; VEC4_INTERL2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
@@ -1514,7 +1514,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) {
; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]]
; VEC1_INTERL2: vector.body:
; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ]
-; VEC1_INTERL2-NEXT: [[DOTCAST1:%.*]] = sitofp i64 [[INDEX]] to float
+; VEC1_INTERL2-NEXT: [[DOTCAST1:%.*]] = sitofp fast i64 [[INDEX]] to float
; VEC1_INTERL2-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
; VEC1_INTERL2-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[A]], i64 [[INDEX]]
; VEC1_INTERL2-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 4
@@ -1572,7 +1572,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) {
; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]]
; VEC2_INTERL1_PRED_STORE: vector.body:
; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ]
-; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST2:%.*]] = sitofp i64 [[INDEX]] to float
+; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST2:%.*]] = sitofp fast i64 [[INDEX]] to float
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
; VEC2_INTERL1_PRED_STORE-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP0]], align 4
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = fcmp fast oeq <2 x float> [[WIDE_LOAD]], zeroinitializer
@@ -1693,7 +1693,7 @@ define i32 @float_induction_with_dbg_on_fadd(ptr %dst) {
; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]]
; VEC1_INTERL2: vector.body:
; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; VEC1_INTERL2-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[INDEX]] to float
+; VEC1_INTERL2-NEXT: [[DOTCAST:%.*]] = sitofp reassoc i64 [[INDEX]] to float
; VEC1_INTERL2-NEXT: [[TMP0:%.*]] = call reassoc float @llvm.copysign.f32(float 0.000000e+00, float [[DOTCAST]])
; VEC1_INTERL2-NEXT: [[OFFSET_IDX:%.*]] = fadd reassoc float [[TMP0]], 0.000000e+00
; VEC1_INTERL2-NEXT: [[TMP1:%.*]] = getelementptr float, ptr null, i64 [[INDEX]]
diff --git a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
index 3f91baa117b7f..3d0feb12f68b0 100644
--- a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
+++ b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
@@ -734,7 +734,7 @@ define float @fp_postinc_use_fadd(float %init, ptr noalias nocapture %A, i64 %N,
; VEC: [[VECTOR_PH]]:
; VEC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
; VEC-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
-; VEC-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float
+; VEC-NEXT: [[DOTCAST:%.*]] = sitofp fast i64 [[N_VEC]] to float
; VEC-NEXT: [[TMP0:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]]
; VEC-NEXT: [[TMP1:%.*]] = fadd fast float [[INIT]], [[TMP0]]
; VEC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0
@@ -784,14 +784,14 @@ define float @fp_postinc_use_fadd(float %init, ptr noalias nocapture %A, i64 %N,
; INTERLEAVE: [[VECTOR_PH]]:
; INTERLEAVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
; INTERLEAVE-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
-; INTERLEAVE-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float
+; INTERLEAVE-NEXT: [[DOTCAST:%.*]] = sitofp fast i64 [[N_VEC]] to float
; INTERLEAVE-NEXT: [[TMP0:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]]
; INTERLEAVE-NEXT: [[TMP1:%.*]] = fadd fast float [[INIT]], [[TMP0]]
; INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]]
; INTERLEAVE: [[VECTOR_BODY]]:
; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; INTERLEAVE-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 1
-; INTERLEAVE-NEXT: [[DOTCAST1:%.*]] = sitofp i64 [[INDEX]] to float
+; INTERLEAVE-NEXT: [[DOTCAST1:%.*]] = sitofp fast i64 [[INDEX]] to float
; INTERLEAVE-NEXT: [[TMP4:%.*]] = fmul fast float [[FPINC]], [[DOTCAST1]]
; INTERLEAVE-NEXT: [[OFFSET_IDX:%.*]] = fadd fast float [[INIT]], [[TMP4]]
; INTERLEAVE-NEXT: [[TMP7:%.*]] = fmul fast float 1.000000e+00, [[FPINC]]
@@ -849,7 +849,7 @@ define float @fp_postinc_use_fadd_ops_swapped(float %init, ptr noalias nocapture
; VEC: [[VECTOR_PH]]:
; VEC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
; VEC-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
-; VEC-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float
+; VEC-NEXT: [[DOTCAST:%.*]] = sitofp fast i64 [[N_VEC]] to float
; VEC-NEXT: [[TMP0:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]]
; VEC-NEXT: [[TMP1:%.*]] = fadd fast float [[INIT]], [[TMP0]]
; VEC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0
@@ -899,14 +899,14 @@ define float @fp_postinc_use_fadd_ops_swapped(float %init, ptr noalias nocapture
; INTERLEAVE: [[VECTOR_PH]]:
; INTERLEAVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
; INTERLEAVE-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
-; INTERLEAVE-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float
+; INTERLEAVE-NEXT: [[DOTCAST:%.*]] = sitofp fast i64 [[N_VEC]] to float
; INTERLEAVE-NEXT: [[TMP0:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]]
; INTERLEAVE-NEXT: [[TMP1:%.*]] = fadd fast float [[INIT]], [[TMP0]]
; INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]]
; INTERLEAVE: [[VECTOR_BODY]]:
; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; INTERLEAVE-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 1
-; INTERLEAVE-NEXT: [[DOTCAST1:%.*]] = sitofp i64 [[INDEX]] to float
+; INTERLEAVE-NEXT: [[DOTCAST1:%.*]] = sitofp fast i64 [[INDEX]] to float
; INTERLEAVE-NEXT: [[TMP4:%.*]] = fmul fast float [[FPINC]], [[DOTCAST1]]
; INTERLEAVE-NEXT: [[OFFSET_IDX:%.*]] = fadd fast float [[INIT]], [[TMP4]]
; INTERLEAVE-NEXT: [[TMP7:%.*]] = fmul fast float 1.000000e+00, [[FPINC]]
@@ -964,7 +964,7 @@ define float @fp_postinc_use_fsub(float %init, ptr noalias nocapture %A, i64 %N,
; VEC: [[VECTOR_PH]]:
; VEC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
; VEC-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
-; VEC-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float
+; VEC-NEXT: [[DOTCAST:%.*]] = sitofp fast i64 [[N_VEC]] to float
; VEC-NEXT: [[TMP0:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]]
; VEC-NEXT: [[TMP1:%.*]] = fsub fast float [[INIT]], [[TMP0]]
; VEC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0
@@ -1014,14 +1014,14 @@ define float @fp_postinc_use_fsub(float %init, ptr noalias nocapture %A, i64 %N,
; INTERLEAVE: [[VECTOR_PH]]:
; INTERLEAVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
; INTERLEAVE-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
-; INTERLEAVE-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float
+; INTERLEAVE-NEXT: [[DOTCAST:%.*]] = sitofp fast i64 [[N_VEC]] to float
; INTERLEAVE-NEXT: [[TMP0:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]]
; INTERLEAVE-NEXT: [[TMP1:%.*]] = fsub fast float [[INIT]], [[TMP0]]
; INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]]
; INTERLEAVE: [[VECTOR_BODY]]:
; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; INTERLEAVE-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 1
-; INTERLEAVE-NEXT: [[DOTCAST1:%.*]] = sitofp i64 [[INDEX]] to float
+; INTERLEAVE-NEXT: [[DOTCAST1:%.*]] = sitofp fast i64 [[INDEX]] to float
; INTERLEAVE-NEXT: [[TMP4:%.*]] = fmul fast float [[FPINC]], [[DOTCAST1]]
; INTERLEAVE-NEXT: [[OFFSET_IDX:%.*]] = fsub fast float [[INIT]], [[TMP4]]
; INTERLEAVE-NEXT: [[TMP7:%.*]] = fmul fast float 1.000000e+00, [[FPINC]]
diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll
index 54408b24db114..c0767ac153af4 100644
--- a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll
+++ b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll
@@ -273,7 +273,7 @@ define float @same_exit_block_pre_inc_use1_iv64_endf32() {
; CHECK: vector.early.exit:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true)
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[TMP10]] to float
+; CHECK-NEXT: [[DOTCAST:%.*]] = sitofp fast i64 [[TMP10]] to float
; CHECK-NEXT: [[TMP11:%.*]] = fmul fast float 1.000000e+00, [[DOTCAST]]
; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = fadd fast float 9.000000e+00, [[TMP11]]
; CHECK-NEXT: br label [[LOOP_END]]
More information about the cfe-commits
mailing list