[llvm] b671979 - [NVPTX] Remove `UnsafeFPMath` uses (#151479)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 13 17:42:32 PDT 2025
Author: paperchalice
Date: 2025-08-14T08:42:29+08:00
New Revision: b671979b7ec5e83859e158fc19d23b8e5b178083
URL: https://github.com/llvm/llvm-project/commit/b671979b7ec5e83859e158fc19d23b8e5b178083
DIFF: https://github.com/llvm/llvm-project/commit/b671979b7ec5e83859e158fc19d23b8e5b178083.diff
LOG: [NVPTX] Remove `UnsafeFPMath` uses (#151479)
Remove `UnsafeFPMath` in NVPTX part, it blocks some bugfixes related to
clang and the ultimate goal is to remove `resetTargetOptions` method in
`TargetMachine`, see FIXME in `resetTargetOptions`.
See also
https://discourse.llvm.org/t/rfc-honor-pragmas-with-ffp-contract-fast
https://discourse.llvm.org/t/allowfpopfusion-vs-sdnodeflags-hasallowcontract
Added:
Modified:
llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
llvm/lib/Target/NVPTX/NVPTXISelLowering.h
llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
llvm/test/CodeGen/NVPTX/bf16x2-instructions-approx.ll
llvm/test/CodeGen/NVPTX/f16-instructions.ll
llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
llvm/test/CodeGen/NVPTX/f32x2-instructions.ll
llvm/test/CodeGen/NVPTX/fast-math.ll
llvm/test/CodeGen/NVPTX/fma-relu-fma-intrinsic.ll
llvm/test/CodeGen/NVPTX/frem.ll
llvm/test/CodeGen/NVPTX/sqrt-approx.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 18aeda6a7935a..2445005bf98ce 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -70,7 +70,7 @@ NVPTXDAGToDAGISel::getDivF32Level(const SDNode *N) const {
}
bool NVPTXDAGToDAGISel::usePrecSqrtF32(const SDNode *N) const {
- return Subtarget->getTargetLowering()->usePrecSqrtF32(*MF, N);
+ return Subtarget->getTargetLowering()->usePrecSqrtF32(N);
}
bool NVPTXDAGToDAGISel::useF32FTZ() const {
@@ -82,11 +82,6 @@ bool NVPTXDAGToDAGISel::allowFMA() const {
return TL->allowFMA(*MF, OptLevel);
}
-bool NVPTXDAGToDAGISel::allowUnsafeFPMath() const {
- const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
- return TL->allowUnsafeFPMath(*MF);
-}
-
bool NVPTXDAGToDAGISel::doRsqrtOpt() const { return EnableRsqrtOpt; }
/// Select - Select instructions not customized! Used for
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 357e915fd077e..65731722f5343 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -44,7 +44,6 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
bool usePrecSqrtF32(const SDNode *N) const;
bool useF32FTZ() const;
bool allowFMA() const;
- bool allowUnsafeFPMath() const;
bool doRsqrtOpt() const;
NVPTXScopes Scopes{};
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 3daf25d551520..b94cbd0bd9c16 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -125,10 +125,6 @@ NVPTXTargetLowering::getDivF32Level(const MachineFunction &MF,
if (UsePrecDivF32.getNumOccurrences() > 0)
return UsePrecDivF32;
- // Otherwise, use div.approx if fast math is enabled
- if (allowUnsafeFPMath(MF))
- return NVPTX::DivPrecisionLevel::Approx;
-
const SDNodeFlags Flags = N.getFlags();
if (Flags.hasApproximateFuncs())
return NVPTX::DivPrecisionLevel::Approx;
@@ -136,16 +132,11 @@ NVPTXTargetLowering::getDivF32Level(const MachineFunction &MF,
return NVPTX::DivPrecisionLevel::IEEE754;
}
-bool NVPTXTargetLowering::usePrecSqrtF32(const MachineFunction &MF,
- const SDNode *N) const {
+bool NVPTXTargetLowering::usePrecSqrtF32(const SDNode *N) const {
// If nvptx-prec-sqrtf32 is used on the command-line, always honor it
if (UsePrecSqrtF32.getNumOccurrences() > 0)
return UsePrecSqrtF32;
- // Otherwise, use sqrt.approx if fast math is enabled
- if (allowUnsafeFPMath(MF))
- return false;
-
if (N) {
const SDNodeFlags Flags = N->getFlags();
if (Flags.hasApproximateFuncs())
@@ -1193,8 +1184,7 @@ SDValue NVPTXTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
bool &UseOneConst,
bool Reciprocal) const {
if (!(Enabled == ReciprocalEstimate::Enabled ||
- (Enabled == ReciprocalEstimate::Unspecified &&
- !usePrecSqrtF32(DAG.getMachineFunction()))))
+ (Enabled == ReciprocalEstimate::Unspecified && !usePrecSqrtF32())))
return SDValue();
if (ExtraSteps == ReciprocalEstimate::Unspecified)
@@ -2851,8 +2841,7 @@ static SDValue lowerROT(SDValue Op, SelectionDAG &DAG) {
SDLoc(Op), Opcode, DAG);
}
-static SDValue lowerFREM(SDValue Op, SelectionDAG &DAG,
- bool AllowUnsafeFPMath) {
+static SDValue lowerFREM(SDValue Op, SelectionDAG &DAG) {
// Lower (frem x, y) into (sub x, (mul (ftrunc (div x, y)) y)),
// i.e. "poor man's fmod()". When y is infinite, x is returned. This matches
// the semantics of LLVM's frem.
@@ -2869,7 +2858,7 @@ static SDValue lowerFREM(SDValue Op, SelectionDAG &DAG,
SDValue Sub = DAG.getNode(ISD::FSUB, DL, Ty, X, Mul,
Flags | SDNodeFlags::AllowContract);
- if (AllowUnsafeFPMath || Flags.hasNoInfs())
+ if (Flags.hasNoInfs())
return Sub;
// If Y is infinite, return X
@@ -3014,7 +3003,7 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::CTLZ:
return lowerCTLZCTPOP(Op, DAG);
case ISD::FREM:
- return lowerFREM(Op, DAG, allowUnsafeFPMath(DAG.getMachineFunction()));
+ return lowerFREM(Op, DAG);
default:
llvm_unreachable("Custom lowering not defined for operation");
@@ -4868,17 +4857,7 @@ bool NVPTXTargetLowering::allowFMA(MachineFunction &MF,
if (MF.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast)
return true;
- return allowUnsafeFPMath(MF);
-}
-
-bool NVPTXTargetLowering::allowUnsafeFPMath(const MachineFunction &MF) const {
- // Honor TargetOptions flags that explicitly say unsafe math is okay.
- if (MF.getTarget().Options.UnsafeFPMath)
- return true;
-
- // Allow unsafe math if unsafe-fp-math attribute explicitly says so.
- const Function &F = MF.getFunction();
- return F.getFnAttribute("unsafe-fp-math").getValueAsBool();
+ return false;
}
static bool isConstZero(const SDValue &Operand) {
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index 43e721a9c2a4c..27f099e220976 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -206,8 +206,7 @@ class NVPTXTargetLowering : public TargetLowering {
// Get whether we should use a precise or approximate 32-bit floating point
// sqrt instruction.
- bool usePrecSqrtF32(const MachineFunction &MF,
- const SDNode *N = nullptr) const;
+ bool usePrecSqrtF32(const SDNode *N = nullptr) const;
// Get whether we should use instructions that flush floating-point denormals
// to sign-preserving zero.
@@ -220,7 +219,6 @@ class NVPTXTargetLowering : public TargetLowering {
unsigned combineRepeatedFPDivisors() const override { return 2; }
bool allowFMA(MachineFunction &MF, CodeGenOptLevel OptLevel) const;
- bool allowUnsafeFPMath(const MachineFunction &MF) const;
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
EVT) const override {
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index bd54d1db9156f..ebb5e32f5e6fc 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -1133,9 +1133,8 @@ defm FMA_F64 : FMA<F64RT, allow_ftz = false>;
// sin/cos/tanh
class UnaryOpAllowsApproxFn<SDPatternOperator operator>
- : PatFrag<(ops node:$A),
- (operator node:$A), [{
- return allowUnsafeFPMath() || N->getFlags().hasApproximateFuncs();
+ : PatFrag<(ops node:$A), (operator node:$A), [{
+ return N->getFlags().hasApproximateFuncs();
}]>;
def SIN_APPROX_f32 :
diff --git a/llvm/test/CodeGen/NVPTX/bf16x2-instructions-approx.ll b/llvm/test/CodeGen/NVPTX/bf16x2-instructions-approx.ll
index 80627a03354a0..e1d4ef1073a78 100644
--- a/llvm/test/CodeGen/NVPTX/bf16x2-instructions-approx.ll
+++ b/llvm/test/CodeGen/NVPTX/bf16x2-instructions-approx.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx71 --enable-unsafe-fp-math | FileCheck --check-prefixes=CHECK %s
-; RUN: %if ptxas-11.8 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx71 --enable-unsafe-fp-math | %ptxas-verify -arch=sm_80 %}
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx71 | FileCheck --check-prefixes=CHECK %s
+; RUN: %if ptxas-11.8 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx71 | %ptxas-verify -arch=sm_80 %}
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
@@ -22,7 +22,7 @@ define <2 x bfloat> @test_sin(<2 x bfloat> %a) #0 #1 {
; CHECK-NEXT: cvt.rn.bf16x2.f32 %r5, %r4, %r2;
; CHECK-NEXT: st.param.b32 [func_retval0], %r5;
; CHECK-NEXT: ret;
- %r = call <2 x bfloat> @llvm.sin.f16(<2 x bfloat> %a)
+ %r = call afn <2 x bfloat> @llvm.sin.f16(<2 x bfloat> %a)
ret <2 x bfloat> %r
}
@@ -41,7 +41,7 @@ define <2 x bfloat> @test_cos(<2 x bfloat> %a) #0 #1 {
; CHECK-NEXT: cvt.rn.bf16x2.f32 %r5, %r4, %r2;
; CHECK-NEXT: st.param.b32 [func_retval0], %r5;
; CHECK-NEXT: ret;
- %r = call <2 x bfloat> @llvm.cos.f16(<2 x bfloat> %a)
+ %r = call afn <2 x bfloat> @llvm.cos.f16(<2 x bfloat> %a)
ret <2 x bfloat> %r
}
diff --git a/llvm/test/CodeGen/NVPTX/f16-instructions.ll b/llvm/test/CodeGen/NVPTX/f16-instructions.ll
index 2b7e4184670c7..d4aec4f16f1ab 100644
--- a/llvm/test/CodeGen/NVPTX/f16-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f16-instructions.ll
@@ -886,8 +886,8 @@ define half @test_sqrt(half %a) #0 {
; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]];
; CHECK: st.param.b16 [func_retval0], [[R]];
; CHECK: ret;
-define half @test_sin(half %a) #0 #1 {
- %r = call half @llvm.sin.f16(half %a)
+define half @test_sin(half %a) #0 {
+ %r = call afn half @llvm.sin.f16(half %a)
ret half %r
}
@@ -900,8 +900,8 @@ define half @test_sin(half %a) #0 #1 {
; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]];
; CHECK: st.param.b16 [func_retval0], [[R]];
; CHECK: ret;
-define half @test_cos(half %a) #0 #1 {
- %r = call half @llvm.cos.f16(half %a)
+define half @test_cos(half %a) #0 {
+ %r = call afn half @llvm.cos.f16(half %a)
ret half %r
}
@@ -1183,4 +1183,3 @@ define <2 x half> @test_neg_f16x2(<2 x half> noundef %arg) #0 {
}
attributes #0 = { nounwind }
-attributes #1 = { "unsafe-fp-math" = "true" }
diff --git a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
index d4fcea320f3ad..991311f9492b9 100644
--- a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
@@ -1674,7 +1674,7 @@ define <2 x half> @test_sqrt(<2 x half> %a) #0 {
; ret <2 x half> %r
;}
-define <2 x half> @test_sin(<2 x half> %a) #0 #1 {
+define <2 x half> @test_sin(<2 x half> %a) #0 {
; CHECK-LABEL: test_sin(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<5>;
@@ -1692,11 +1692,11 @@ define <2 x half> @test_sin(<2 x half> %a) #0 #1 {
; CHECK-NEXT: mov.b32 %r6, {%rs4, %rs3};
; CHECK-NEXT: st.param.b32 [func_retval0], %r6;
; CHECK-NEXT: ret;
- %r = call <2 x half> @llvm.sin.f16(<2 x half> %a)
+ %r = call afn <2 x half> @llvm.sin.f16(<2 x half> %a)
ret <2 x half> %r
}
-define <2 x half> @test_cos(<2 x half> %a) #0 #1 {
+define <2 x half> @test_cos(<2 x half> %a) #0 {
; CHECK-LABEL: test_cos(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<5>;
@@ -1714,7 +1714,7 @@ define <2 x half> @test_cos(<2 x half> %a) #0 #1 {
; CHECK-NEXT: mov.b32 %r6, {%rs4, %rs3};
; CHECK-NEXT: st.param.b32 [func_retval0], %r6;
; CHECK-NEXT: ret;
- %r = call <2 x half> @llvm.cos.f16(<2 x half> %a)
+ %r = call afn <2 x half> @llvm.cos.f16(<2 x half> %a)
ret <2 x half> %r
}
@@ -2330,4 +2330,3 @@ define void @test_store_2xhalf(ptr %p1, ptr %p2, <2 x half> %v) {
attributes #0 = { nounwind }
-attributes #1 = { "unsafe-fp-math" = "true" }
diff --git a/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll
index 47b7c9a09be4a..467459759c42c 100644
--- a/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll
@@ -1638,7 +1638,7 @@ define <2 x float> @test_sqrt(<2 x float> %a) #0 {
; ret <2 x float> %r
;}
-define <2 x float> @test_sin(<2 x float> %a) #0 #1 {
+define <2 x float> @test_sin(<2 x float> %a) #0 {
; CHECK-LABEL: test_sin(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<5>;
@@ -1651,11 +1651,11 @@ define <2 x float> @test_sin(<2 x float> %a) #0 #1 {
; CHECK-NEXT: sin.approx.f32 %r4, %r1;
; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3};
; CHECK-NEXT: ret;
- %r = call <2 x float> @llvm.sin(<2 x float> %a)
+ %r = call afn <2 x float> @llvm.sin(<2 x float> %a)
ret <2 x float> %r
}
-define <2 x float> @test_cos(<2 x float> %a) #0 #1 {
+define <2 x float> @test_cos(<2 x float> %a) #0 {
; CHECK-LABEL: test_cos(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<5>;
@@ -1668,7 +1668,7 @@ define <2 x float> @test_cos(<2 x float> %a) #0 #1 {
; CHECK-NEXT: cos.approx.f32 %r4, %r1;
; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3};
; CHECK-NEXT: ret;
- %r = call <2 x float> @llvm.cos(<2 x float> %a)
+ %r = call afn <2 x float> @llvm.cos(<2 x float> %a)
ret <2 x float> %r
}
@@ -2157,5 +2157,4 @@ define void @test_trunc_to_v2f16(<2 x float> %a, ptr %p) {
attributes #0 = { nounwind }
-attributes #1 = { "unsafe-fp-math" = "true" }
attributes #2 = { "denormal-fp-math"="preserve-sign" }
diff --git a/llvm/test/CodeGen/NVPTX/fast-math.ll b/llvm/test/CodeGen/NVPTX/fast-math.ll
index 5eda3a1e2dda1..8561c60a46948 100644
--- a/llvm/test/CodeGen/NVPTX/fast-math.ll
+++ b/llvm/test/CodeGen/NVPTX/fast-math.ll
@@ -22,7 +22,7 @@ define float @sqrt_div(float %a, float %b) {
ret float %t2
}
-define float @sqrt_div_fast(float %a, float %b) #0 {
+define float @sqrt_div_fast(float %a, float %b) {
; CHECK-LABEL: sqrt_div_fast(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<5>;
@@ -34,29 +34,25 @@ define float @sqrt_div_fast(float %a, float %b) #0 {
; CHECK-NEXT: div.approx.f32 %r4, %r2, %r3;
; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
; CHECK-NEXT: ret;
- %t1 = tail call float @llvm.sqrt.f32(float %a)
- %t2 = fdiv float %t1, %b
+ %t1 = tail call afn float @llvm.sqrt.f32(float %a)
+ %t2 = fdiv afn float %t1, %b
ret float %t2
}
-define float @sqrt_div_fast_ninf(float %a, float %b) #0 {
+define float @sqrt_div_fast_ninf(float %a, float %b) {
; CHECK-LABEL: sqrt_div_fast_ninf(
; CHECK: {
-; CHECK-NEXT: .reg .pred %p<2>;
-; CHECK-NEXT: .reg .b32 %r<7>;
+; CHECK-NEXT: .reg .b32 %r<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [sqrt_div_fast_ninf_param_0];
; CHECK-NEXT: sqrt.approx.f32 %r2, %r1;
-; CHECK-NEXT: abs.f32 %r3, %r1;
-; CHECK-NEXT: setp.lt.f32 %p1, %r3, 0f00800000;
-; CHECK-NEXT: selp.f32 %r4, 0f00000000, %r2, %p1;
-; CHECK-NEXT: ld.param.b32 %r5, [sqrt_div_fast_ninf_param_1];
-; CHECK-NEXT: div.approx.f32 %r6, %r4, %r5;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r6;
+; CHECK-NEXT: ld.param.b32 %r3, [sqrt_div_fast_ninf_param_1];
+; CHECK-NEXT: div.approx.f32 %r4, %r2, %r3;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
; CHECK-NEXT: ret;
%t1 = tail call ninf afn float @llvm.sqrt.f32(float %a)
- %t2 = fdiv float %t1, %b
+ %t2 = fdiv afn float %t1, %b
ret float %t2
}
@@ -77,7 +73,7 @@ define float @sqrt_div_ftz(float %a, float %b) #1 {
ret float %t2
}
-define float @sqrt_div_fast_ftz(float %a, float %b) #0 #1 {
+define float @sqrt_div_fast_ftz(float %a, float %b) #1 {
; CHECK-LABEL: sqrt_div_fast_ftz(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<5>;
@@ -89,35 +85,32 @@ define float @sqrt_div_fast_ftz(float %a, float %b) #0 #1 {
; CHECK-NEXT: div.approx.ftz.f32 %r4, %r2, %r3;
; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
; CHECK-NEXT: ret;
- %t1 = tail call float @llvm.sqrt.f32(float %a)
- %t2 = fdiv float %t1, %b
+ %t1 = tail call afn float @llvm.sqrt.f32(float %a)
+ %t2 = fdiv afn float %t1, %b
ret float %t2
}
-define float @sqrt_div_fast_ftz_ninf(float %a, float %b) #0 #1 {
+define float @sqrt_div_fast_ftz_ninf(float %a, float %b) #1 {
; CHECK-LABEL: sqrt_div_fast_ftz_ninf(
; CHECK: {
-; CHECK-NEXT: .reg .pred %p<2>;
-; CHECK-NEXT: .reg .b32 %r<6>;
+; CHECK-NEXT: .reg .b32 %r<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [sqrt_div_fast_ftz_ninf_param_0];
-; CHECK-NEXT: setp.eq.ftz.f32 %p1, %r1, 0f00000000;
; CHECK-NEXT: sqrt.approx.ftz.f32 %r2, %r1;
-; CHECK-NEXT: selp.f32 %r3, 0f00000000, %r2, %p1;
-; CHECK-NEXT: ld.param.b32 %r4, [sqrt_div_fast_ftz_ninf_param_1];
-; CHECK-NEXT: div.approx.ftz.f32 %r5, %r3, %r4;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r5;
+; CHECK-NEXT: ld.param.b32 %r3, [sqrt_div_fast_ftz_ninf_param_1];
+; CHECK-NEXT: div.approx.ftz.f32 %r4, %r2, %r3;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
; CHECK-NEXT: ret;
%t1 = tail call ninf afn float @llvm.sqrt.f32(float %a)
- %t2 = fdiv float %t1, %b
+ %t2 = fdiv afn float %t1, %b
ret float %t2
}
; There are no fast-math or ftz versions of sqrt and div for f64. We use
; reciprocal(rsqrt(x)) for sqrt(x), and emit a vanilla divide.
-define double @sqrt_div_fast_ftz_f64(double %a, double %b) #0 #1 {
+define double @sqrt_div_fast_ftz_f64(double %a, double %b) #1 {
; CHECK-LABEL: sqrt_div_fast_ftz_f64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<5>;
@@ -134,22 +127,17 @@ define double @sqrt_div_fast_ftz_f64(double %a, double %b) #0 #1 {
ret double %t2
}
-define double @sqrt_div_fast_ftz_f64_ninf(double %a, double %b) #0 #1 {
+define double @sqrt_div_fast_ftz_f64_ninf(double %a, double %b) #1 {
; CHECK-LABEL: sqrt_div_fast_ftz_f64_ninf(
; CHECK: {
-; CHECK-NEXT: .reg .pred %p<2>;
-; CHECK-NEXT: .reg .b64 %rd<8>;
+; CHECK-NEXT: .reg .b64 %rd<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [sqrt_div_fast_ftz_f64_ninf_param_0];
-; CHECK-NEXT: abs.f64 %rd2, %rd1;
-; CHECK-NEXT: setp.lt.f64 %p1, %rd2, 0d0010000000000000;
-; CHECK-NEXT: rsqrt.approx.f64 %rd3, %rd1;
-; CHECK-NEXT: rcp.approx.ftz.f64 %rd4, %rd3;
-; CHECK-NEXT: selp.f64 %rd5, 0d0000000000000000, %rd4, %p1;
-; CHECK-NEXT: ld.param.b64 %rd6, [sqrt_div_fast_ftz_f64_ninf_param_1];
-; CHECK-NEXT: div.rn.f64 %rd7, %rd5, %rd6;
-; CHECK-NEXT: st.param.b64 [func_retval0], %rd7;
+; CHECK-NEXT: sqrt.rn.f64 %rd2, %rd1;
+; CHECK-NEXT: ld.param.b64 %rd3, [sqrt_div_fast_ftz_f64_ninf_param_1];
+; CHECK-NEXT: div.rn.f64 %rd4, %rd2, %rd3;
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd4;
; CHECK-NEXT: ret;
%t1 = tail call ninf afn double @llvm.sqrt.f64(double %a)
%t2 = fdiv double %t1, %b
@@ -172,7 +160,7 @@ define float @rsqrt(float %a) {
ret float %ret
}
-define float @rsqrt_fast(float %a) #0 {
+define float @rsqrt_fast(float %a) {
; CHECK-LABEL: rsqrt_fast(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
@@ -182,12 +170,12 @@ define float @rsqrt_fast(float %a) #0 {
; CHECK-NEXT: rsqrt.approx.f32 %r2, %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
- %b = tail call float @llvm.sqrt.f32(float %a)
- %ret = fdiv float 1.0, %b
+ %b = tail call afn float @llvm.sqrt.f32(float %a)
+ %ret = fdiv afn float 1.0, %b
ret float %ret
}
-define float @rsqrt_fast_ftz(float %a) #0 #1 {
+define float @rsqrt_fast_ftz(float %a) #1 {
; CHECK-LABEL: rsqrt_fast_ftz(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
@@ -197,8 +185,8 @@ define float @rsqrt_fast_ftz(float %a) #0 #1 {
; CHECK-NEXT: rsqrt.approx.ftz.f32 %r2, %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
- %b = tail call float @llvm.sqrt.f32(float %a)
- %ret = fdiv float 1.0, %b
+ %b = tail call afn float @llvm.sqrt.f32(float %a)
+ %ret = fdiv afn float 1.0, %b
ret float %ret
}
@@ -263,35 +251,7 @@ define float @fcos_approx_afn(float %a) {
ret float %r
}
-define float @fsin_approx(float %a) #0 {
-; CHECK-LABEL: fsin_approx(
-; CHECK: {
-; CHECK-NEXT: .reg .b32 %r<3>;
-; CHECK-EMPTY:
-; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b32 %r1, [fsin_approx_param_0];
-; CHECK-NEXT: sin.approx.f32 %r2, %r1;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
-; CHECK-NEXT: ret;
- %r = tail call float @llvm.sin.f32(float %a)
- ret float %r
-}
-
-define float @fcos_approx(float %a) #0 {
-; CHECK-LABEL: fcos_approx(
-; CHECK: {
-; CHECK-NEXT: .reg .b32 %r<3>;
-; CHECK-EMPTY:
-; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b32 %r1, [fcos_approx_param_0];
-; CHECK-NEXT: cos.approx.f32 %r2, %r1;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
-; CHECK-NEXT: ret;
- %r = tail call float @llvm.cos.f32(float %a)
- ret float %r
-}
-
-define float @fsin_approx_ftz(float %a) #0 #1 {
+define float @fsin_approx_ftz(float %a) #1 {
; CHECK-LABEL: fsin_approx_ftz(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
@@ -301,11 +261,11 @@ define float @fsin_approx_ftz(float %a) #0 #1 {
; CHECK-NEXT: sin.approx.ftz.f32 %r2, %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
- %r = tail call float @llvm.sin.f32(float %a)
+ %r = tail call afn float @llvm.sin.f32(float %a)
ret float %r
}
-define float @fcos_approx_ftz(float %a) #0 #1 {
+define float @fcos_approx_ftz(float %a) #1 {
; CHECK-LABEL: fcos_approx_ftz(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
@@ -315,7 +275,7 @@ define float @fcos_approx_ftz(float %a) #0 #1 {
; CHECK-NEXT: cos.approx.ftz.f32 %r2, %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
- %r = tail call float @llvm.cos.f32(float %a)
+ %r = tail call afn float @llvm.cos.f32(float %a)
ret float %r
}
@@ -423,7 +383,7 @@ define float @repeated_div_recip_allowed_ftz_sel(i1 %pred, float %a, float %b, f
ret float %w
}
-define float @repeated_div_fast(i1 %pred, float %a, float %b, float %divisor) #0 {
+define float @repeated_div_fast(i1 %pred, float %a, float %b, float %divisor) {
; CHECK-LABEL: repeated_div_fast(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
@@ -444,14 +404,14 @@ define float @repeated_div_fast(i1 %pred, float %a, float %b, float %divisor) #0
; CHECK-NEXT: selp.f32 %r8, %r7, %r6, %p1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r8;
; CHECK-NEXT: ret;
- %x = fdiv float %a, %divisor
- %y = fdiv float %b, %divisor
- %z = fmul float %x, %y
+ %x = fdiv afn arcp float %a, %divisor
+ %y = fdiv afn arcp contract float %b, %divisor
+ %z = fmul contract float %x, %y
%w = select i1 %pred, float %z, float %y
ret float %w
}
-define float @repeated_div_fast_sel(i1 %pred, float %a, float %b, float %divisor) #0 {
+define float @repeated_div_fast_sel(i1 %pred, float %a, float %b, float %divisor) {
; CHECK-LABEL: repeated_div_fast_sel(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
@@ -469,13 +429,13 @@ define float @repeated_div_fast_sel(i1 %pred, float %a, float %b, float %divisor
; CHECK-NEXT: div.approx.f32 %r5, %r3, %r4;
; CHECK-NEXT: st.param.b32 [func_retval0], %r5;
; CHECK-NEXT: ret;
- %x = fdiv float %a, %divisor
- %y = fdiv float %b, %divisor
+ %x = fdiv afn float %a, %divisor
+ %y = fdiv afn float %b, %divisor
%w = select i1 %pred, float %x, float %y
ret float %w
}
-define float @repeated_div_fast_ftz(i1 %pred, float %a, float %b, float %divisor) #0 #1 {
+define float @repeated_div_fast_ftz(i1 %pred, float %a, float %b, float %divisor) #1 {
; CHECK-LABEL: repeated_div_fast_ftz(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
@@ -496,14 +456,14 @@ define float @repeated_div_fast_ftz(i1 %pred, float %a, float %b, float %divisor
; CHECK-NEXT: selp.f32 %r8, %r7, %r6, %p1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r8;
; CHECK-NEXT: ret;
- %x = fdiv float %a, %divisor
- %y = fdiv float %b, %divisor
- %z = fmul float %x, %y
+ %x = fdiv afn arcp float %a, %divisor
+ %y = fdiv afn arcp contract float %b, %divisor
+ %z = fmul contract float %x, %y
%w = select i1 %pred, float %z, float %y
ret float %w
}
-define float @repeated_div_fast_ftz_sel(i1 %pred, float %a, float %b, float %divisor) #0 #1 {
+define float @repeated_div_fast_ftz_sel(i1 %pred, float %a, float %b, float %divisor) #1 {
; CHECK-LABEL: repeated_div_fast_ftz_sel(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
@@ -521,13 +481,13 @@ define float @repeated_div_fast_ftz_sel(i1 %pred, float %a, float %b, float %div
; CHECK-NEXT: div.approx.ftz.f32 %r5, %r3, %r4;
; CHECK-NEXT: st.param.b32 [func_retval0], %r5;
; CHECK-NEXT: ret;
- %x = fdiv float %a, %divisor
- %y = fdiv float %b, %divisor
+ %x = fdiv afn float %a, %divisor
+ %y = fdiv afn float %b, %divisor
%w = select i1 %pred, float %x, float %y
ret float %w
}
-define float @frem(float %a, float %b) #0 {
+define float @frem(float %a, float %b) {
; CHECK-LABEL: frem(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<7>;
@@ -541,11 +501,11 @@ define float @frem(float %a, float %b) #0 {
; CHECK-NEXT: fma.rn.f32 %r6, %r5, %r2, %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r6;
; CHECK-NEXT: ret;
- %rem = frem float %a, %b
+ %rem = frem afn arcp contract ninf float %a, %b
ret float %rem
}
-define float @frem_ftz(float %a, float %b) #0 #1 {
+define float @frem_ftz(float %a, float %b) #1 {
; CHECK-LABEL: frem_ftz(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<7>;
@@ -559,11 +519,11 @@ define float @frem_ftz(float %a, float %b) #0 #1 {
; CHECK-NEXT: fma.rn.ftz.f32 %r6, %r5, %r2, %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r6;
; CHECK-NEXT: ret;
- %rem = frem float %a, %b
+ %rem = frem afn contract ninf float %a, %b
ret float %rem
}
-define double @frem_f64(double %a, double %b) #0 {
+define double @frem_f64(double %a, double %b) {
; CHECK-LABEL: frem_f64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<7>;
@@ -577,9 +537,8 @@ define double @frem_f64(double %a, double %b) #0 {
; CHECK-NEXT: fma.rn.f64 %rd6, %rd5, %rd2, %rd1;
; CHECK-NEXT: st.param.b64 [func_retval0], %rd6;
; CHECK-NEXT: ret;
- %rem = frem double %a, %b
+ %rem = frem ninf double %a, %b
ret double %rem
}
-attributes #0 = { "unsafe-fp-math" = "true" }
attributes #1 = { "denormal-fp-math-f32" = "preserve-sign" }
diff --git a/llvm/test/CodeGen/NVPTX/fma-relu-fma-intrinsic.ll b/llvm/test/CodeGen/NVPTX/fma-relu-fma-intrinsic.ll
index 2f1d7d6321438..6d983ba6bf0ff 100644
--- a/llvm/test/CodeGen/NVPTX/fma-relu-fma-intrinsic.ll
+++ b/llvm/test/CodeGen/NVPTX/fma-relu-fma-intrinsic.ll
@@ -9,7 +9,7 @@
; SM < 80 or (which needs PTX version >= 70) should not emit fma{.ftz}.relu
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_70 | FileCheck %s --check-prefixes=CHECK-SM70
-define half @fma_f16_no_nans(half %a, half %b, half %c) #0 {
+define half @fma_f16_no_nans(half %a, half %b, half %c) {
; CHECK-LABEL: fma_f16_no_nans(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<5>;
@@ -49,14 +49,14 @@ define half @fma_f16_no_nans(half %a, half %b, half %c) #0 {
; CHECK-SM70-NEXT: selp.b16 %rs6, %rs4, 0x0000, %p1;
; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %rs6;
; CHECK-SM70-NEXT: ret;
- %1 = call half @llvm.fma.f16(half %a, half %b, half %c)
+ %1 = call nnan half @llvm.fma.f16(half %a, half %b, half %c)
%2 = fcmp ogt half %1, 0.0
- %3 = select i1 %2, half %1, half 0.0
+ %3 = select nsz i1 %2, half %1, half 0.0
ret half %3
}
; FMA relu shouldn't be selected if the FMA operation has multiple uses
-define half @fma_f16_no_nans_multiple_uses_of_fma(half %a, half %b, half %c) #0 {
+define half @fma_f16_no_nans_multiple_uses_of_fma(half %a, half %b, half %c) {
; CHECK-LABEL: fma_f16_no_nans_multiple_uses_of_fma(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<8>;
@@ -103,13 +103,13 @@ define half @fma_f16_no_nans_multiple_uses_of_fma(half %a, half %b, half %c) #0
; CHECK-SM70-NEXT: ret;
%1 = call half @llvm.fma.f16(half %a, half %b, half %c)
%2 = fcmp ogt half %1, 0.0
- %3 = select i1 %2, half %1, half 0.0
- %4 = fadd half %1, 7.0
- %5 = fadd half %4, %1
+ %3 = select i1 %2, half %1, half 0.0
+ %4 = fadd contract half %1, 7.0
+ %5 = fadd contract half %4, %1
ret half %5
}
-define half @fma_f16_maxnum_no_nans(half %a, half %b, half %c) #0 {
+define half @fma_f16_maxnum_no_nans(half %a, half %b, half %c) {
; CHECK-LABEL: fma_f16_maxnum_no_nans(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<5>;
@@ -149,12 +149,12 @@ define half @fma_f16_maxnum_no_nans(half %a, half %b, half %c) #0 {
; CHECK-SM70-NEXT: cvt.rn.f16.f32 %rs5, %r2;
; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %rs5;
; CHECK-SM70-NEXT: ret;
- %1 = call half @llvm.fma.f16(half %a, half %b, half %c)
- %2 = call half @llvm.maxnum.f16(half %1, half 0.0)
+ %1 = call nnan half @llvm.fma.f16(half %a, half %b, half %c)
+ %2 = call nsz half @llvm.maxnum.f16(half %1, half 0.0)
ret half %2
}
-define bfloat @fma_bf16_no_nans(bfloat %a, bfloat %b, bfloat %c) #0 {
+define bfloat @fma_bf16_no_nans(bfloat %a, bfloat %b, bfloat %c) {
; CHECK-LABEL: fma_bf16_no_nans(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<5>;
@@ -205,14 +205,14 @@ define bfloat @fma_bf16_no_nans(bfloat %a, bfloat %b, bfloat %c) #0 {
; CHECK-SM70-NEXT: selp.b16 %rs2, %rs1, 0x0000, %p2;
; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %rs2;
; CHECK-SM70-NEXT: ret;
- %1 = call bfloat @llvm.fma.bf16(bfloat %a, bfloat %b, bfloat %c)
+ %1 = call nnan bfloat @llvm.fma.bf16(bfloat %a, bfloat %b, bfloat %c)
%2 = fcmp ogt bfloat %1, 0.0
- %3 = select i1 %2, bfloat %1, bfloat 0.0
+ %3 = select nsz i1 %2, bfloat %1, bfloat 0.0
ret bfloat %3
}
; FMA_relu shouldn't be selected if the FMA operation has multiple uses
-define bfloat @fma_bf16_no_nans_multiple_uses_of_fma(bfloat %a, bfloat %b, bfloat %c) #0 {
+define bfloat @fma_bf16_no_nans_multiple_uses_of_fma(bfloat %a, bfloat %b, bfloat %c) {
; CHECK-LABEL: fma_bf16_no_nans_multiple_uses_of_fma(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<9>;
@@ -291,12 +291,12 @@ define bfloat @fma_bf16_no_nans_multiple_uses_of_fma(bfloat %a, bfloat %b, bfloa
%1 = call bfloat @llvm.fma.bf16(bfloat %a, bfloat %b, bfloat %c)
%2 = fcmp ogt bfloat %1, 0.0
%3 = select i1 %2, bfloat %1, bfloat 0.0
- %4 = fadd bfloat %1, 7.0
- %5 = fadd bfloat %4, %1
+ %4 = fadd contract bfloat %1, 7.0
+ %5 = fadd contract bfloat %4, %1
ret bfloat %5
}
-define bfloat @fma_bf16_maxnum_no_nans(bfloat %a, bfloat %b, bfloat %c) #0 {
+define bfloat @fma_bf16_maxnum_no_nans(bfloat %a, bfloat %b, bfloat %c) {
; CHECK-LABEL: fma_bf16_maxnum_no_nans(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<5>;
@@ -351,12 +351,12 @@ define bfloat @fma_bf16_maxnum_no_nans(bfloat %a, bfloat %b, bfloat %c) #0 {
; CHECK-SM70-NEXT: shr.u32 %r20, %r19, 16;
; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %r20;
; CHECK-SM70-NEXT: ret;
- %1 = call bfloat @llvm.fma.bf16(bfloat %a, bfloat %b, bfloat %c)
- %2 = call bfloat @llvm.maxnum.bf16(bfloat %1, bfloat 0.0)
+ %1 = call nnan bfloat @llvm.fma.bf16(bfloat %a, bfloat %b, bfloat %c)
+ %2 = call nsz bfloat @llvm.maxnum.bf16(bfloat %1, bfloat 0.0)
ret bfloat %2
}
-define <2 x half> @fma_f16x2_no_nans(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 {
+define <2 x half> @fma_f16x2_no_nans(<2 x half> %a, <2 x half> %b, <2 x half> %c) {
; CHECK-LABEL: fma_f16x2_no_nans(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<5>;
@@ -399,14 +399,14 @@ define <2 x half> @fma_f16x2_no_nans(<2 x half> %a, <2 x half> %b, <2 x half> %c
; CHECK-SM70-NEXT: selp.b16 %rs4, %rs1, 0x0000, %p1;
; CHECK-SM70-NEXT: st.param.v2.b16 [func_retval0], {%rs4, %rs3};
; CHECK-SM70-NEXT: ret;
- %1 = call <2 x half> @llvm.fma.f16x2(<2 x half> %a, <2 x half> %b, <2 x half> %c)
+ %1 = call nnan <2 x half> @llvm.fma.f16x2(<2 x half> %a, <2 x half> %b, <2 x half> %c)
%2 = fcmp ogt <2 x half> %1, <half 0.0, half 0.0>
- %3 = select <2 x i1> %2, <2 x half> %1, <2 x half> <half 0.0, half 0.0>
+ %3 = select nsz <2 x i1> %2, <2 x half> %1, <2 x half> <half 0.0, half 0.0>
ret <2 x half> %3
}
; FMA relu shouldn't be selected if the FMA operation has multiple uses
-define <2 x half> @fma_f16x2_no_nans_multiple_uses_of_fma(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 {
+define <2 x half> @fma_f16x2_no_nans_multiple_uses_of_fma(<2 x half> %a, <2 x half> %b, <2 x half> %c) {
; CHECK-LABEL: fma_f16x2_no_nans_multiple_uses_of_fma(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<8>;
@@ -454,12 +454,12 @@ define <2 x half> @fma_f16x2_no_nans_multiple_uses_of_fma(<2 x half> %a, <2 x ha
%1 = call <2 x half> @llvm.fma.f16x2(<2 x half> %a, <2 x half> %b, <2 x half> %c)
%2 = fcmp ogt <2 x half> %1, <half 0.0, half 0.0>
%3 = select <2 x i1> %2, <2 x half> %1, <2 x half> <half 0.0, half 0.0>
- %4 = fadd <2 x half> %1, <half 7.0, half 7.0>
- %5 = fadd <2 x half> %4, %1
+ %4 = fadd contract <2 x half> %1, <half 7.0, half 7.0>
+ %5 = fadd contract <2 x half> %4, %1
ret <2 x half> %5
}
-define <2 x half> @fma_f16x2_maxnum_no_nans(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 {
+define <2 x half> @fma_f16x2_maxnum_no_nans(<2 x half> %a, <2 x half> %b, <2 x half> %c) {
; CHECK-LABEL: fma_f16x2_maxnum_no_nans(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<5>;
@@ -504,12 +504,12 @@ define <2 x half> @fma_f16x2_maxnum_no_nans(<2 x half> %a, <2 x half> %b, <2 x h
; CHECK-SM70-NEXT: mov.b32 %r9, {%rs4, %rs3};
; CHECK-SM70-NEXT: st.param.b32 [func_retval0], %r9;
; CHECK-SM70-NEXT: ret;
- %1 = call <2 x half> @llvm.fma.f16x2(<2 x half> %a, <2 x half> %b, <2 x half> %c)
- %2 = call <2 x half> @llvm.maxnum.f16x2(<2 x half> %1, <2 x half> <half 0.0, half 0.0>)
+ %1 = call nnan <2 x half> @llvm.fma.f16x2(<2 x half> %a, <2 x half> %b, <2 x half> %c)
+ %2 = call nsz <2 x half> @llvm.maxnum.f16x2(<2 x half> %1, <2 x half> <half 0.0, half 0.0>)
ret <2 x half> %2
}
-define <2 x bfloat> @fma_bf16x2_no_nans(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c) #0 {
+define <2 x bfloat> @fma_bf16x2_no_nans(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c) {
; CHECK-LABEL: fma_bf16x2_no_nans(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<5>;
@@ -580,14 +580,14 @@ define <2 x bfloat> @fma_bf16x2_no_nans(<2 x bfloat> %a, <2 x bfloat> %b, <2 x b
; CHECK-SM70-NEXT: selp.b16 %rs10, %rs7, 0x0000, %p3;
; CHECK-SM70-NEXT: st.param.v2.b16 [func_retval0], {%rs10, %rs9};
; CHECK-SM70-NEXT: ret;
- %1 = call <2 x bfloat> @llvm.fma.bf16x2(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c)
+ %1 = call nnan <2 x bfloat> @llvm.fma.bf16x2(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c)
%2 = fcmp ogt <2 x bfloat> %1, <bfloat 0.0, bfloat 0.0>
- %3 = select <2 x i1> %2, <2 x bfloat> %1, <2 x bfloat> <bfloat 0.0, bfloat 0.0>
+ %3 = select nsz <2 x i1> %2, <2 x bfloat> %1, <2 x bfloat> <bfloat 0.0, bfloat 0.0>
ret <2 x bfloat> %3
}
; FMA_relu shouldn't be selected if the FMA operation has multiple uses
-define <2 x bfloat> @fma_bf16x2_no_nans_multiple_uses_of_fma(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c) #0 {
+define <2 x bfloat> @fma_bf16x2_no_nans_multiple_uses_of_fma(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c) {
; CHECK-LABEL: fma_bf16x2_no_nans_multiple_uses_of_fma(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<9>;
@@ -707,12 +707,12 @@ define <2 x bfloat> @fma_bf16x2_no_nans_multiple_uses_of_fma(<2 x bfloat> %a, <2
%1 = call <2 x bfloat> @llvm.fma.bf16x2(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c)
%2 = fcmp ogt <2 x bfloat> %1, <bfloat 0.0, bfloat 0.0>
%3 = select <2 x i1> %2, <2 x bfloat> %1, <2 x bfloat> <bfloat 0.0, bfloat 0.0>
- %4 = fadd <2 x bfloat> %1, <bfloat 7.0, bfloat 7.0>
- %5 = fadd <2 x bfloat> %4, %1
+ %4 = fadd contract <2 x bfloat> %1, <bfloat 7.0, bfloat 7.0>
+ %5 = fadd contract <2 x bfloat> %4, %1
ret <2 x bfloat> %5
}
-define <2 x bfloat> @fma_bf16x2_maxnum_no_nans(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c) #0 {
+define <2 x bfloat> @fma_bf16x2_maxnum_no_nans(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c) {
; CHECK-LABEL: fma_bf16x2_maxnum_no_nans(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<5>;
@@ -792,10 +792,7 @@ define <2 x bfloat> @fma_bf16x2_maxnum_no_nans(<2 x bfloat> %a, <2 x bfloat> %b,
; CHECK-SM70-NEXT: prmt.b32 %r39, %r38, %r31, 0x7632U;
; CHECK-SM70-NEXT: st.param.b32 [func_retval0], %r39;
; CHECK-SM70-NEXT: ret;
- %1 = call <2 x bfloat> @llvm.fma.bf16x2(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c)
- %2 = call <2 x bfloat> @llvm.maxnum.bf16x2(<2 x bfloat> %1, <2 x bfloat> <bfloat 0.0, bfloat 0.0>)
+ %1 = call nnan <2 x bfloat> @llvm.fma.bf16x2(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c)
+ %2 = call nsz <2 x bfloat> @llvm.maxnum.bf16x2(<2 x bfloat> %1, <2 x bfloat> <bfloat 0.0, bfloat 0.0>)
ret <2 x bfloat> %2
}
-
-attributes #0 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "unsafe-fp-math"="true" }
-attributes #1 = { "unsafe-fp-math"="true" }
diff --git a/llvm/test/CodeGen/NVPTX/frem.ll b/llvm/test/CodeGen/NVPTX/frem.ll
index 5805aed1bebe6..d30c72cef83d5 100644
--- a/llvm/test/CodeGen/NVPTX/frem.ll
+++ b/llvm/test/CodeGen/NVPTX/frem.ll
@@ -1,313 +1,316 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s --enable-unsafe-fp-math -mcpu=sm_60 | FileCheck %s --check-prefixes=FAST
-; RUN: llc < %s -mcpu=sm_60 | FileCheck %s --check-prefixes=NORMAL
+; RUN: llc < %s -mcpu=sm_60 | FileCheck %s
target triple = "nvptx64-unknown-cuda"
define half @frem_f16(half %a, half %b) {
-; FAST-LABEL: frem_f16(
-; FAST: {
-; FAST-NEXT: .reg .b16 %rs<4>;
-; FAST-NEXT: .reg .b32 %r<7>;
-; FAST-EMPTY:
-; FAST-NEXT: // %bb.0:
-; FAST-NEXT: ld.param.b16 %rs1, [frem_f16_param_0];
-; FAST-NEXT: ld.param.b16 %rs2, [frem_f16_param_1];
-; FAST-NEXT: cvt.f32.f16 %r1, %rs2;
-; FAST-NEXT: cvt.f32.f16 %r2, %rs1;
-; FAST-NEXT: div.approx.f32 %r3, %r2, %r1;
-; FAST-NEXT: cvt.rzi.f32.f32 %r4, %r3;
-; FAST-NEXT: neg.f32 %r5, %r4;
-; FAST-NEXT: fma.rn.f32 %r6, %r5, %r1, %r2;
-; FAST-NEXT: cvt.rn.f16.f32 %rs3, %r6;
-; FAST-NEXT: st.param.b16 [func_retval0], %rs3;
-; FAST-NEXT: ret;
-;
-; NORMAL-LABEL: frem_f16(
-; NORMAL: {
-; NORMAL-NEXT: .reg .pred %p<2>;
-; NORMAL-NEXT: .reg .b16 %rs<4>;
-; NORMAL-NEXT: .reg .b32 %r<8>;
-; NORMAL-EMPTY:
-; NORMAL-NEXT: // %bb.0:
-; NORMAL-NEXT: ld.param.b16 %rs1, [frem_f16_param_0];
-; NORMAL-NEXT: ld.param.b16 %rs2, [frem_f16_param_1];
-; NORMAL-NEXT: cvt.f32.f16 %r1, %rs2;
-; NORMAL-NEXT: cvt.f32.f16 %r2, %rs1;
-; NORMAL-NEXT: div.rn.f32 %r3, %r2, %r1;
-; NORMAL-NEXT: cvt.rzi.f32.f32 %r4, %r3;
-; NORMAL-NEXT: neg.f32 %r5, %r4;
-; NORMAL-NEXT: fma.rn.f32 %r6, %r5, %r1, %r2;
-; NORMAL-NEXT: testp.infinite.f32 %p1, %r1;
-; NORMAL-NEXT: selp.f32 %r7, %r2, %r6, %p1;
-; NORMAL-NEXT: cvt.rn.f16.f32 %rs3, %r7;
-; NORMAL-NEXT: st.param.b16 [func_retval0], %rs3;
-; NORMAL-NEXT: ret;
+; CHECK-LABEL: frem_f16(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<4>;
+; CHECK-NEXT: .reg .b32 %r<8>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b16 %rs1, [frem_f16_param_0];
+; CHECK-NEXT: ld.param.b16 %rs2, [frem_f16_param_1];
+; CHECK-NEXT: cvt.f32.f16 %r1, %rs2;
+; CHECK-NEXT: cvt.f32.f16 %r2, %rs1;
+; CHECK-NEXT: div.rn.f32 %r3, %r2, %r1;
+; CHECK-NEXT: cvt.rzi.f32.f32 %r4, %r3;
+; CHECK-NEXT: neg.f32 %r5, %r4;
+; CHECK-NEXT: fma.rn.f32 %r6, %r5, %r1, %r2;
+; CHECK-NEXT: testp.infinite.f32 %p1, %r1;
+; CHECK-NEXT: selp.f32 %r7, %r2, %r6, %p1;
+; CHECK-NEXT: cvt.rn.f16.f32 %rs3, %r7;
+; CHECK-NEXT: st.param.b16 [func_retval0], %rs3;
+; CHECK-NEXT: ret;
%r = frem half %a, %b
ret half %r
}
+define half @frem_f16_fast(half %a, half %b) {
+; CHECK-LABEL: frem_f16_fast(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<4>;
+; CHECK-NEXT: .reg .b32 %r<7>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b16 %rs1, [frem_f16_fast_param_0];
+; CHECK-NEXT: ld.param.b16 %rs2, [frem_f16_fast_param_1];
+; CHECK-NEXT: cvt.f32.f16 %r1, %rs2;
+; CHECK-NEXT: cvt.f32.f16 %r2, %rs1;
+; CHECK-NEXT: div.approx.f32 %r3, %r2, %r1;
+; CHECK-NEXT: cvt.rzi.f32.f32 %r4, %r3;
+; CHECK-NEXT: neg.f32 %r5, %r4;
+; CHECK-NEXT: fma.rn.f32 %r6, %r5, %r1, %r2;
+; CHECK-NEXT: cvt.rn.f16.f32 %rs3, %r6;
+; CHECK-NEXT: st.param.b16 [func_retval0], %rs3;
+; CHECK-NEXT: ret;
+ %r = frem afn ninf half %a, %b
+ ret half %r
+}
+
define float @frem_f32(float %a, float %b) {
-; FAST-LABEL: frem_f32(
-; FAST: {
-; FAST-NEXT: .reg .b32 %r<7>;
-; FAST-EMPTY:
-; FAST-NEXT: // %bb.0:
-; FAST-NEXT: ld.param.b32 %r1, [frem_f32_param_0];
-; FAST-NEXT: ld.param.b32 %r2, [frem_f32_param_1];
-; FAST-NEXT: div.approx.f32 %r3, %r1, %r2;
-; FAST-NEXT: cvt.rzi.f32.f32 %r4, %r3;
-; FAST-NEXT: neg.f32 %r5, %r4;
-; FAST-NEXT: fma.rn.f32 %r6, %r5, %r2, %r1;
-; FAST-NEXT: st.param.b32 [func_retval0], %r6;
-; FAST-NEXT: ret;
-;
-; NORMAL-LABEL: frem_f32(
-; NORMAL: {
-; NORMAL-NEXT: .reg .pred %p<2>;
-; NORMAL-NEXT: .reg .b32 %r<8>;
-; NORMAL-EMPTY:
-; NORMAL-NEXT: // %bb.0:
-; NORMAL-NEXT: ld.param.b32 %r1, [frem_f32_param_0];
-; NORMAL-NEXT: ld.param.b32 %r2, [frem_f32_param_1];
-; NORMAL-NEXT: div.rn.f32 %r3, %r1, %r2;
-; NORMAL-NEXT: cvt.rzi.f32.f32 %r4, %r3;
-; NORMAL-NEXT: neg.f32 %r5, %r4;
-; NORMAL-NEXT: fma.rn.f32 %r6, %r5, %r2, %r1;
-; NORMAL-NEXT: testp.infinite.f32 %p1, %r2;
-; NORMAL-NEXT: selp.f32 %r7, %r1, %r6, %p1;
-; NORMAL-NEXT: st.param.b32 [func_retval0], %r7;
-; NORMAL-NEXT: ret;
+; CHECK-LABEL: frem_f32(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b32 %r<8>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [frem_f32_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [frem_f32_param_1];
+; CHECK-NEXT: div.rn.f32 %r3, %r1, %r2;
+; CHECK-NEXT: cvt.rzi.f32.f32 %r4, %r3;
+; CHECK-NEXT: neg.f32 %r5, %r4;
+; CHECK-NEXT: fma.rn.f32 %r6, %r5, %r2, %r1;
+; CHECK-NEXT: testp.infinite.f32 %p1, %r2;
+; CHECK-NEXT: selp.f32 %r7, %r1, %r6, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r7;
+; CHECK-NEXT: ret;
%r = frem float %a, %b
ret float %r
}
+define float @frem_f32_fast(float %a, float %b) {
+; CHECK-LABEL: frem_f32_fast(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<7>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [frem_f32_fast_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [frem_f32_fast_param_1];
+; CHECK-NEXT: div.approx.f32 %r3, %r1, %r2;
+; CHECK-NEXT: cvt.rzi.f32.f32 %r4, %r3;
+; CHECK-NEXT: neg.f32 %r5, %r4;
+; CHECK-NEXT: fma.rn.f32 %r6, %r5, %r2, %r1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r6;
+; CHECK-NEXT: ret;
+ %r = frem afn ninf float %a, %b
+ ret float %r
+}
+
define double @frem_f64(double %a, double %b) {
-; FAST-LABEL: frem_f64(
-; FAST: {
-; FAST-NEXT: .reg .b64 %rd<7>;
-; FAST-EMPTY:
-; FAST-NEXT: // %bb.0:
-; FAST-NEXT: ld.param.b64 %rd1, [frem_f64_param_0];
-; FAST-NEXT: ld.param.b64 %rd2, [frem_f64_param_1];
-; FAST-NEXT: div.rn.f64 %rd3, %rd1, %rd2;
-; FAST-NEXT: cvt.rzi.f64.f64 %rd4, %rd3;
-; FAST-NEXT: neg.f64 %rd5, %rd4;
-; FAST-NEXT: fma.rn.f64 %rd6, %rd5, %rd2, %rd1;
-; FAST-NEXT: st.param.b64 [func_retval0], %rd6;
-; FAST-NEXT: ret;
-;
-; NORMAL-LABEL: frem_f64(
-; NORMAL: {
-; NORMAL-NEXT: .reg .pred %p<2>;
-; NORMAL-NEXT: .reg .b64 %rd<8>;
-; NORMAL-EMPTY:
-; NORMAL-NEXT: // %bb.0:
-; NORMAL-NEXT: ld.param.b64 %rd1, [frem_f64_param_0];
-; NORMAL-NEXT: ld.param.b64 %rd2, [frem_f64_param_1];
-; NORMAL-NEXT: div.rn.f64 %rd3, %rd1, %rd2;
-; NORMAL-NEXT: cvt.rzi.f64.f64 %rd4, %rd3;
-; NORMAL-NEXT: neg.f64 %rd5, %rd4;
-; NORMAL-NEXT: fma.rn.f64 %rd6, %rd5, %rd2, %rd1;
-; NORMAL-NEXT: testp.infinite.f64 %p1, %rd2;
-; NORMAL-NEXT: selp.f64 %rd7, %rd1, %rd6, %p1;
-; NORMAL-NEXT: st.param.b64 [func_retval0], %rd7;
-; NORMAL-NEXT: ret;
+; CHECK-LABEL: frem_f64(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b64 %rd<8>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [frem_f64_param_0];
+; CHECK-NEXT: ld.param.b64 %rd2, [frem_f64_param_1];
+; CHECK-NEXT: div.rn.f64 %rd3, %rd1, %rd2;
+; CHECK-NEXT: cvt.rzi.f64.f64 %rd4, %rd3;
+; CHECK-NEXT: neg.f64 %rd5, %rd4;
+; CHECK-NEXT: fma.rn.f64 %rd6, %rd5, %rd2, %rd1;
+; CHECK-NEXT: testp.infinite.f64 %p1, %rd2;
+; CHECK-NEXT: selp.f64 %rd7, %rd1, %rd6, %p1;
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd7;
+; CHECK-NEXT: ret;
%r = frem double %a, %b
ret double %r
}
+define double @frem_f64_fast(double %a, double %b) {
+; CHECK-LABEL: frem_f64_fast(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<7>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [frem_f64_fast_param_0];
+; CHECK-NEXT: ld.param.b64 %rd2, [frem_f64_fast_param_1];
+; CHECK-NEXT: div.rn.f64 %rd3, %rd1, %rd2;
+; CHECK-NEXT: cvt.rzi.f64.f64 %rd4, %rd3;
+; CHECK-NEXT: neg.f64 %rd5, %rd4;
+; CHECK-NEXT: fma.rn.f64 %rd6, %rd5, %rd2, %rd1;
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd6;
+; CHECK-NEXT: ret;
+ %r = frem afn ninf double %a, %b
+ ret double %r
+}
+
define half @frem_f16_ninf(half %a, half %b) {
-; FAST-LABEL: frem_f16_ninf(
-; FAST: {
-; FAST-NEXT: .reg .b16 %rs<4>;
-; FAST-NEXT: .reg .b32 %r<7>;
-; FAST-EMPTY:
-; FAST-NEXT: // %bb.0:
-; FAST-NEXT: ld.param.b16 %rs1, [frem_f16_ninf_param_0];
-; FAST-NEXT: ld.param.b16 %rs2, [frem_f16_ninf_param_1];
-; FAST-NEXT: cvt.f32.f16 %r1, %rs2;
-; FAST-NEXT: cvt.f32.f16 %r2, %rs1;
-; FAST-NEXT: div.approx.f32 %r3, %r2, %r1;
-; FAST-NEXT: cvt.rzi.f32.f32 %r4, %r3;
-; FAST-NEXT: neg.f32 %r5, %r4;
-; FAST-NEXT: fma.rn.f32 %r6, %r5, %r1, %r2;
-; FAST-NEXT: cvt.rn.f16.f32 %rs3, %r6;
-; FAST-NEXT: st.param.b16 [func_retval0], %rs3;
-; FAST-NEXT: ret;
-;
-; NORMAL-LABEL: frem_f16_ninf(
-; NORMAL: {
-; NORMAL-NEXT: .reg .b16 %rs<4>;
-; NORMAL-NEXT: .reg .b32 %r<7>;
-; NORMAL-EMPTY:
-; NORMAL-NEXT: // %bb.0:
-; NORMAL-NEXT: ld.param.b16 %rs1, [frem_f16_ninf_param_0];
-; NORMAL-NEXT: ld.param.b16 %rs2, [frem_f16_ninf_param_1];
-; NORMAL-NEXT: cvt.f32.f16 %r1, %rs2;
-; NORMAL-NEXT: cvt.f32.f16 %r2, %rs1;
-; NORMAL-NEXT: div.rn.f32 %r3, %r2, %r1;
-; NORMAL-NEXT: cvt.rzi.f32.f32 %r4, %r3;
-; NORMAL-NEXT: neg.f32 %r5, %r4;
-; NORMAL-NEXT: fma.rn.f32 %r6, %r5, %r1, %r2;
-; NORMAL-NEXT: cvt.rn.f16.f32 %rs3, %r6;
-; NORMAL-NEXT: st.param.b16 [func_retval0], %rs3;
-; NORMAL-NEXT: ret;
+; CHECK-LABEL: frem_f16_ninf(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<4>;
+; CHECK-NEXT: .reg .b32 %r<7>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b16 %rs1, [frem_f16_ninf_param_0];
+; CHECK-NEXT: ld.param.b16 %rs2, [frem_f16_ninf_param_1];
+; CHECK-NEXT: cvt.f32.f16 %r1, %rs2;
+; CHECK-NEXT: cvt.f32.f16 %r2, %rs1;
+; CHECK-NEXT: div.rn.f32 %r3, %r2, %r1;
+; CHECK-NEXT: cvt.rzi.f32.f32 %r4, %r3;
+; CHECK-NEXT: neg.f32 %r5, %r4;
+; CHECK-NEXT: fma.rn.f32 %r6, %r5, %r1, %r2;
+; CHECK-NEXT: cvt.rn.f16.f32 %rs3, %r6;
+; CHECK-NEXT: st.param.b16 [func_retval0], %rs3;
+; CHECK-NEXT: ret;
%r = frem ninf half %a, %b
ret half %r
}
+define half @frem_f16_ninf_fast(half %a, half %b) {
+; CHECK-LABEL: frem_f16_ninf_fast(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<4>;
+; CHECK-NEXT: .reg .b32 %r<7>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b16 %rs1, [frem_f16_ninf_fast_param_0];
+; CHECK-NEXT: ld.param.b16 %rs2, [frem_f16_ninf_fast_param_1];
+; CHECK-NEXT: cvt.f32.f16 %r1, %rs2;
+; CHECK-NEXT: cvt.f32.f16 %r2, %rs1;
+; CHECK-NEXT: div.approx.f32 %r3, %r2, %r1;
+; CHECK-NEXT: cvt.rzi.f32.f32 %r4, %r3;
+; CHECK-NEXT: neg.f32 %r5, %r4;
+; CHECK-NEXT: fma.rn.f32 %r6, %r5, %r1, %r2;
+; CHECK-NEXT: cvt.rn.f16.f32 %rs3, %r6;
+; CHECK-NEXT: st.param.b16 [func_retval0], %rs3;
+; CHECK-NEXT: ret;
+ %r = frem afn ninf half %a, %b
+ ret half %r
+}
+
define float @frem_f32_ninf(float %a, float %b) {
-; FAST-LABEL: frem_f32_ninf(
-; FAST: {
-; FAST-NEXT: .reg .b32 %r<7>;
-; FAST-EMPTY:
-; FAST-NEXT: // %bb.0:
-; FAST-NEXT: ld.param.b32 %r1, [frem_f32_ninf_param_0];
-; FAST-NEXT: ld.param.b32 %r2, [frem_f32_ninf_param_1];
-; FAST-NEXT: div.approx.f32 %r3, %r1, %r2;
-; FAST-NEXT: cvt.rzi.f32.f32 %r4, %r3;
-; FAST-NEXT: neg.f32 %r5, %r4;
-; FAST-NEXT: fma.rn.f32 %r6, %r5, %r2, %r1;
-; FAST-NEXT: st.param.b32 [func_retval0], %r6;
-; FAST-NEXT: ret;
-;
-; NORMAL-LABEL: frem_f32_ninf(
-; NORMAL: {
-; NORMAL-NEXT: .reg .b32 %r<7>;
-; NORMAL-EMPTY:
-; NORMAL-NEXT: // %bb.0:
-; NORMAL-NEXT: ld.param.b32 %r1, [frem_f32_ninf_param_0];
-; NORMAL-NEXT: ld.param.b32 %r2, [frem_f32_ninf_param_1];
-; NORMAL-NEXT: div.rn.f32 %r3, %r1, %r2;
-; NORMAL-NEXT: cvt.rzi.f32.f32 %r4, %r3;
-; NORMAL-NEXT: neg.f32 %r5, %r4;
-; NORMAL-NEXT: fma.rn.f32 %r6, %r5, %r2, %r1;
-; NORMAL-NEXT: st.param.b32 [func_retval0], %r6;
-; NORMAL-NEXT: ret;
+; CHECK-LABEL: frem_f32_ninf(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<7>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [frem_f32_ninf_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [frem_f32_ninf_param_1];
+; CHECK-NEXT: div.rn.f32 %r3, %r1, %r2;
+; CHECK-NEXT: cvt.rzi.f32.f32 %r4, %r3;
+; CHECK-NEXT: neg.f32 %r5, %r4;
+; CHECK-NEXT: fma.rn.f32 %r6, %r5, %r2, %r1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r6;
+; CHECK-NEXT: ret;
%r = frem ninf float %a, %b
ret float %r
}
+define float @frem_f32_ninf_fast(float %a, float %b) {
+; CHECK-LABEL: frem_f32_ninf_fast(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<7>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [frem_f32_ninf_fast_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [frem_f32_ninf_fast_param_1];
+; CHECK-NEXT: div.approx.f32 %r3, %r1, %r2;
+; CHECK-NEXT: cvt.rzi.f32.f32 %r4, %r3;
+; CHECK-NEXT: neg.f32 %r5, %r4;
+; CHECK-NEXT: fma.rn.f32 %r6, %r5, %r2, %r1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r6;
+; CHECK-NEXT: ret;
+ %r = frem afn ninf float %a, %b
+ ret float %r
+}
+
define double @frem_f64_ninf(double %a, double %b) {
-; FAST-LABEL: frem_f64_ninf(
-; FAST: {
-; FAST-NEXT: .reg .b64 %rd<7>;
-; FAST-EMPTY:
-; FAST-NEXT: // %bb.0:
-; FAST-NEXT: ld.param.b64 %rd1, [frem_f64_ninf_param_0];
-; FAST-NEXT: ld.param.b64 %rd2, [frem_f64_ninf_param_1];
-; FAST-NEXT: div.rn.f64 %rd3, %rd1, %rd2;
-; FAST-NEXT: cvt.rzi.f64.f64 %rd4, %rd3;
-; FAST-NEXT: neg.f64 %rd5, %rd4;
-; FAST-NEXT: fma.rn.f64 %rd6, %rd5, %rd2, %rd1;
-; FAST-NEXT: st.param.b64 [func_retval0], %rd6;
-; FAST-NEXT: ret;
-;
-; NORMAL-LABEL: frem_f64_ninf(
-; NORMAL: {
-; NORMAL-NEXT: .reg .b64 %rd<7>;
-; NORMAL-EMPTY:
-; NORMAL-NEXT: // %bb.0:
-; NORMAL-NEXT: ld.param.b64 %rd1, [frem_f64_ninf_param_0];
-; NORMAL-NEXT: ld.param.b64 %rd2, [frem_f64_ninf_param_1];
-; NORMAL-NEXT: div.rn.f64 %rd3, %rd1, %rd2;
-; NORMAL-NEXT: cvt.rzi.f64.f64 %rd4, %rd3;
-; NORMAL-NEXT: neg.f64 %rd5, %rd4;
-; NORMAL-NEXT: fma.rn.f64 %rd6, %rd5, %rd2, %rd1;
-; NORMAL-NEXT: st.param.b64 [func_retval0], %rd6;
-; NORMAL-NEXT: ret;
+; CHECK-LABEL: frem_f64_ninf(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<7>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [frem_f64_ninf_param_0];
+; CHECK-NEXT: ld.param.b64 %rd2, [frem_f64_ninf_param_1];
+; CHECK-NEXT: div.rn.f64 %rd3, %rd1, %rd2;
+; CHECK-NEXT: cvt.rzi.f64.f64 %rd4, %rd3;
+; CHECK-NEXT: neg.f64 %rd5, %rd4;
+; CHECK-NEXT: fma.rn.f64 %rd6, %rd5, %rd2, %rd1;
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd6;
+; CHECK-NEXT: ret;
%r = frem ninf double %a, %b
ret double %r
}
+define double @frem_f64_ninf_fast(double %a, double %b) {
+; CHECK-LABEL: frem_f64_ninf_fast(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<7>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [frem_f64_ninf_fast_param_0];
+; CHECK-NEXT: ld.param.b64 %rd2, [frem_f64_ninf_fast_param_1];
+; CHECK-NEXT: div.rn.f64 %rd3, %rd1, %rd2;
+; CHECK-NEXT: cvt.rzi.f64.f64 %rd4, %rd3;
+; CHECK-NEXT: neg.f64 %rd5, %rd4;
+; CHECK-NEXT: fma.rn.f64 %rd6, %rd5, %rd2, %rd1;
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd6;
+; CHECK-NEXT: ret;
+ %r = frem afn ninf double %a, %b
+ ret double %r
+}
+
define float @frem_f32_imm1_fast(float %a) {
-; FAST-LABEL: frem_f32_imm1_fast(
-; FAST: {
-; FAST-NEXT: .reg .b32 %r<5>;
-; FAST-EMPTY:
-; FAST-NEXT: // %bb.0:
-; FAST-NEXT: ld.param.b32 %r1, [frem_f32_imm1_fast_param_0];
-; FAST-NEXT: mul.f32 %r2, %r1, 0f3E124925;
-; FAST-NEXT: cvt.rzi.f32.f32 %r3, %r2;
-; FAST-NEXT: fma.rn.f32 %r4, %r3, 0fC0E00000, %r1;
-; FAST-NEXT: st.param.b32 [func_retval0], %r4;
-; FAST-NEXT: ret;
-;
-; NORMAL-LABEL: frem_f32_imm1_fast(
-; NORMAL: {
-; NORMAL-NEXT: .reg .b32 %r<5>;
-; NORMAL-EMPTY:
-; NORMAL-NEXT: // %bb.0:
-; NORMAL-NEXT: ld.param.b32 %r1, [frem_f32_imm1_fast_param_0];
-; NORMAL-NEXT: mul.rn.f32 %r2, %r1, 0f3E124925;
-; NORMAL-NEXT: cvt.rzi.f32.f32 %r3, %r2;
-; NORMAL-NEXT: fma.rn.f32 %r4, %r3, 0fC0E00000, %r1;
-; NORMAL-NEXT: st.param.b32 [func_retval0], %r4;
-; NORMAL-NEXT: ret;
+; CHECK-LABEL: frem_f32_imm1_fast(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [frem_f32_imm1_fast_param_0];
+; CHECK-NEXT: mul.rn.f32 %r2, %r1, 0f3E124925;
+; CHECK-NEXT: cvt.rzi.f32.f32 %r3, %r2;
+; CHECK-NEXT: fma.rn.f32 %r4, %r3, 0fC0E00000, %r1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT: ret;
%r = frem arcp float %a, 7.0
ret float %r
}
define float @frem_f32_imm1_normal(float %a) {
-; FAST-LABEL: frem_f32_imm1_normal(
-; FAST: {
-; FAST-NEXT: .reg .b32 %r<5>;
-; FAST-EMPTY:
-; FAST-NEXT: // %bb.0:
-; FAST-NEXT: ld.param.b32 %r1, [frem_f32_imm1_normal_param_0];
-; FAST-NEXT: div.approx.f32 %r2, %r1, 0f40E00000;
-; FAST-NEXT: cvt.rzi.f32.f32 %r3, %r2;
-; FAST-NEXT: fma.rn.f32 %r4, %r3, 0fC0E00000, %r1;
-; FAST-NEXT: st.param.b32 [func_retval0], %r4;
-; FAST-NEXT: ret;
-;
-; NORMAL-LABEL: frem_f32_imm1_normal(
-; NORMAL: {
-; NORMAL-NEXT: .reg .b32 %r<5>;
-; NORMAL-EMPTY:
-; NORMAL-NEXT: // %bb.0:
-; NORMAL-NEXT: ld.param.b32 %r1, [frem_f32_imm1_normal_param_0];
-; NORMAL-NEXT: div.rn.f32 %r2, %r1, 0f40E00000;
-; NORMAL-NEXT: cvt.rzi.f32.f32 %r3, %r2;
-; NORMAL-NEXT: fma.rn.f32 %r4, %r3, 0fC0E00000, %r1;
-; NORMAL-NEXT: st.param.b32 [func_retval0], %r4;
-; NORMAL-NEXT: ret;
+; CHECK-LABEL: frem_f32_imm1_normal(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [frem_f32_imm1_normal_param_0];
+; CHECK-NEXT: div.rn.f32 %r2, %r1, 0f40E00000;
+; CHECK-NEXT: cvt.rzi.f32.f32 %r3, %r2;
+; CHECK-NEXT: fma.rn.f32 %r4, %r3, 0fC0E00000, %r1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT: ret;
%r = frem float %a, 7.0
ret float %r
}
define float @frem_f32_imm2(float %a) {
-; FAST-LABEL: frem_f32_imm2(
-; FAST: {
-; FAST-NEXT: .reg .b32 %r<7>;
-; FAST-EMPTY:
-; FAST-NEXT: // %bb.0:
-; FAST-NEXT: ld.param.b32 %r1, [frem_f32_imm2_param_0];
-; FAST-NEXT: mov.b32 %r2, 0f40E00000;
-; FAST-NEXT: div.approx.f32 %r3, %r2, %r1;
-; FAST-NEXT: cvt.rzi.f32.f32 %r4, %r3;
-; FAST-NEXT: neg.f32 %r5, %r4;
-; FAST-NEXT: fma.rn.f32 %r6, %r5, %r1, 0f40E00000;
-; FAST-NEXT: st.param.b32 [func_retval0], %r6;
-; FAST-NEXT: ret;
-;
-; NORMAL-LABEL: frem_f32_imm2(
-; NORMAL: {
-; NORMAL-NEXT: .reg .pred %p<2>;
-; NORMAL-NEXT: .reg .b32 %r<8>;
-; NORMAL-EMPTY:
-; NORMAL-NEXT: // %bb.0:
-; NORMAL-NEXT: ld.param.b32 %r1, [frem_f32_imm2_param_0];
-; NORMAL-NEXT: mov.b32 %r2, 0f40E00000;
-; NORMAL-NEXT: div.rn.f32 %r3, %r2, %r1;
-; NORMAL-NEXT: cvt.rzi.f32.f32 %r4, %r3;
-; NORMAL-NEXT: neg.f32 %r5, %r4;
-; NORMAL-NEXT: fma.rn.f32 %r6, %r5, %r1, 0f40E00000;
-; NORMAL-NEXT: testp.infinite.f32 %p1, %r1;
-; NORMAL-NEXT: selp.f32 %r7, 0f40E00000, %r6, %p1;
-; NORMAL-NEXT: st.param.b32 [func_retval0], %r7;
-; NORMAL-NEXT: ret;
+; CHECK-LABEL: frem_f32_imm2(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b32 %r<8>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [frem_f32_imm2_param_0];
+; CHECK-NEXT: mov.b32 %r2, 0f40E00000;
+; CHECK-NEXT: div.rn.f32 %r3, %r2, %r1;
+; CHECK-NEXT: cvt.rzi.f32.f32 %r4, %r3;
+; CHECK-NEXT: neg.f32 %r5, %r4;
+; CHECK-NEXT: fma.rn.f32 %r6, %r5, %r1, 0f40E00000;
+; CHECK-NEXT: testp.infinite.f32 %p1, %r1;
+; CHECK-NEXT: selp.f32 %r7, 0f40E00000, %r6, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r7;
+; CHECK-NEXT: ret;
%r = frem float 7.0, %a
ret float %r
}
+
+define float @frem_f32_imm2_fast(float %a) {
+; CHECK-LABEL: frem_f32_imm2_fast(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<7>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [frem_f32_imm2_fast_param_0];
+; CHECK-NEXT: mov.b32 %r2, 0f40E00000;
+; CHECK-NEXT: div.approx.f32 %r3, %r2, %r1;
+; CHECK-NEXT: cvt.rzi.f32.f32 %r4, %r3;
+; CHECK-NEXT: neg.f32 %r5, %r4;
+; CHECK-NEXT: fma.rn.f32 %r6, %r5, %r1, 0f40E00000;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r6;
+; CHECK-NEXT: ret;
+ %r = frem afn ninf float 7.0, %a
+ ret float %r
+}
diff --git a/llvm/test/CodeGen/NVPTX/sqrt-approx.ll b/llvm/test/CodeGen/NVPTX/sqrt-approx.ll
index 3989c8e32e458..7e4e701af4cd1 100644
--- a/llvm/test/CodeGen/NVPTX/sqrt-approx.ll
+++ b/llvm/test/CodeGen/NVPTX/sqrt-approx.ll
@@ -13,7 +13,7 @@ declare double @llvm.sqrt.f64(double)
; -- reciprocal sqrt --
-define float @test_rsqrt32(float %a) #0 {
+define float @test_rsqrt32(float %a) {
; CHECK-LABEL: test_rsqrt32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
@@ -28,7 +28,7 @@ define float @test_rsqrt32(float %a) #0 {
ret float %ret
}
-define float @test_rsqrt_ftz(float %a) #0 #1 {
+define float @test_rsqrt_ftz(float %a) #1 {
; CHECK-LABEL: test_rsqrt_ftz(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
@@ -76,7 +76,7 @@ define double @test_rsqrt64_ftz(double %a) #1 {
; -- sqrt --
-define float @test_sqrt32(float %a) #0 {
+define float @test_sqrt32(float %a) {
; CHECK-LABEL: test_sqrt32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
@@ -90,7 +90,7 @@ define float @test_sqrt32(float %a) #0 {
ret float %ret
}
-define float @test_sqrt32_ninf(float %a) #0 {
+define float @test_sqrt32_ninf(float %a) {
; CHECK-LABEL: test_sqrt32_ninf(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
@@ -108,7 +108,7 @@ define float @test_sqrt32_ninf(float %a) #0 {
ret float %ret
}
-define float @test_sqrt_ftz(float %a) #0 #1 {
+define float @test_sqrt_ftz(float %a) #1 {
; CHECK-LABEL: test_sqrt_ftz(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
@@ -122,7 +122,7 @@ define float @test_sqrt_ftz(float %a) #0 #1 {
ret float %ret
}
-define float @test_sqrt_ftz_ninf(float %a) #0 #1 {
+define float @test_sqrt_ftz_ninf(float %a) #1 {
; CHECK-LABEL: test_sqrt_ftz_ninf(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
@@ -139,7 +139,7 @@ define float @test_sqrt_ftz_ninf(float %a) #0 #1 {
ret float %ret
}
-define double @test_sqrt64(double %a) #0 {
+define double @test_sqrt64(double %a) {
; CHECK-LABEL: test_sqrt64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<3>;
@@ -156,7 +156,7 @@ define double @test_sqrt64(double %a) #0 {
; There's no sqrt.approx.f64 instruction; we emit
; reciprocal(rsqrt.approx.f64(x)). There's no non-ftz approximate reciprocal,
; so we just use the ftz version.
-define double @test_sqrt64_ninf(double %a) #0 {
+define double @test_sqrt64_ninf(double %a) {
; CHECK-LABEL: test_sqrt64_ninf(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
@@ -175,7 +175,7 @@ define double @test_sqrt64_ninf(double %a) #0 {
ret double %ret
}
-define double @test_sqrt64_ftz(double %a) #0 #1 {
+define double @test_sqrt64_ftz(double %a) #1 {
; CHECK-LABEL: test_sqrt64_ftz(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<3>;
@@ -190,7 +190,7 @@ define double @test_sqrt64_ftz(double %a) #0 #1 {
}
; There's no sqrt.approx.ftz.f64 instruction; we just use the non-ftz version.
-define double @test_sqrt64_ftz_ninf(double %a) #0 #1 {
+define double @test_sqrt64_ftz_ninf(double %a) #1 {
; CHECK-LABEL: test_sqrt64_ftz_ninf(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
@@ -214,7 +214,7 @@ define double @test_sqrt64_ftz_ninf(double %a) #0 #1 {
; The sqrt and rsqrt refinement algorithms both emit an rsqrt.approx, followed
; by some math.
-define float @test_rsqrt32_refined(float %a) #0 #2 {
+define float @test_rsqrt32_refined(float %a) #2 {
; CHECK-LABEL: test_rsqrt32_refined(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<7>;
@@ -229,11 +229,11 @@ define float @test_rsqrt32_refined(float %a) #0 #2 {
; CHECK-NEXT: st.param.b32 [func_retval0], %r6;
; CHECK-NEXT: ret;
%val = tail call float @llvm.sqrt.f32(float %a)
- %ret = fdiv arcp float 1.0, %val
+ %ret = fdiv arcp contract float 1.0, %val
ret float %ret
}
-define float @test_sqrt32_refined(float %a) #0 #2 {
+define float @test_sqrt32_refined(float %a) #2 {
; CHECK-LABEL: test_sqrt32_refined(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
@@ -247,7 +247,7 @@ define float @test_sqrt32_refined(float %a) #0 #2 {
ret float %ret
}
-define float @test_sqrt32_refined_ninf(float %a) #0 #2 {
+define float @test_sqrt32_refined_ninf(float %a) #2 {
; CHECK-LABEL: test_sqrt32_refined_ninf(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
@@ -265,11 +265,11 @@ define float @test_sqrt32_refined_ninf(float %a) #0 #2 {
; CHECK-NEXT: selp.f32 %r8, 0f00000000, %r6, %p1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r8;
; CHECK-NEXT: ret;
- %ret = tail call ninf afn float @llvm.sqrt.f32(float %a)
+ %ret = tail call ninf afn contract float @llvm.sqrt.f32(float %a)
ret float %ret
}
-define double @test_rsqrt64_refined(double %a) #0 #2 {
+define double @test_rsqrt64_refined(double %a) #2 {
; CHECK-LABEL: test_rsqrt64_refined(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<7>;
@@ -284,11 +284,11 @@ define double @test_rsqrt64_refined(double %a) #0 #2 {
; CHECK-NEXT: st.param.b64 [func_retval0], %rd6;
; CHECK-NEXT: ret;
%val = tail call double @llvm.sqrt.f64(double %a)
- %ret = fdiv arcp double 1.0, %val
+ %ret = fdiv arcp contract double 1.0, %val
ret double %ret
}
-define double @test_sqrt64_refined(double %a) #0 #2 {
+define double @test_sqrt64_refined(double %a) #2 {
; CHECK-LABEL: test_sqrt64_refined(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<3>;
@@ -302,7 +302,7 @@ define double @test_sqrt64_refined(double %a) #0 #2 {
ret double %ret
}
-define double @test_sqrt64_refined_ninf(double %a) #0 #2 {
+define double @test_sqrt64_refined_ninf(double %a) #2 {
; CHECK-LABEL: test_sqrt64_refined_ninf(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
@@ -320,13 +320,13 @@ define double @test_sqrt64_refined_ninf(double %a) #0 #2 {
; CHECK-NEXT: selp.f64 %rd8, 0d0000000000000000, %rd6, %p1;
; CHECK-NEXT: st.param.b64 [func_retval0], %rd8;
; CHECK-NEXT: ret;
- %ret = tail call ninf afn double @llvm.sqrt.f64(double %a)
+ %ret = tail call ninf afn contract double @llvm.sqrt.f64(double %a)
ret double %ret
}
; -- refined sqrt and rsqrt with ftz enabled --
-define float @test_rsqrt32_refined_ftz(float %a) #0 #1 #2 {
+define float @test_rsqrt32_refined_ftz(float %a) #1 #2 {
; CHECK-LABEL: test_rsqrt32_refined_ftz(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<7>;
@@ -341,11 +341,11 @@ define float @test_rsqrt32_refined_ftz(float %a) #0 #1 #2 {
; CHECK-NEXT: st.param.b32 [func_retval0], %r6;
; CHECK-NEXT: ret;
%val = tail call float @llvm.sqrt.f32(float %a)
- %ret = fdiv arcp float 1.0, %val
+ %ret = fdiv arcp contract float 1.0, %val
ret float %ret
}
-define float @test_sqrt32_refined_ftz(float %a) #0 #1 #2 {
+define float @test_sqrt32_refined_ftz(float %a) #1 #2 {
; CHECK-LABEL: test_sqrt32_refined_ftz(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
@@ -359,7 +359,7 @@ define float @test_sqrt32_refined_ftz(float %a) #0 #1 #2 {
ret float %ret
}
-define float @test_sqrt32_refined_ftz_ninf(float %a) #0 #1 #2 {
+define float @test_sqrt32_refined_ftz_ninf(float %a) #1 #2 {
; CHECK-LABEL: test_sqrt32_refined_ftz_ninf(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
@@ -376,12 +376,12 @@ define float @test_sqrt32_refined_ftz_ninf(float %a) #0 #1 #2 {
; CHECK-NEXT: selp.f32 %r7, 0f00000000, %r6, %p1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r7;
; CHECK-NEXT: ret;
- %ret = tail call ninf afn float @llvm.sqrt.f32(float %a)
+ %ret = tail call ninf afn contract float @llvm.sqrt.f32(float %a)
ret float %ret
}
; There's no rsqrt.approx.ftz.f64, so we just use the non-ftz version.
-define double @test_rsqrt64_refined_ftz(double %a) #0 #1 #2 {
+define double @test_rsqrt64_refined_ftz(double %a) #1 #2 {
; CHECK-LABEL: test_rsqrt64_refined_ftz(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<7>;
@@ -396,11 +396,11 @@ define double @test_rsqrt64_refined_ftz(double %a) #0 #1 #2 {
; CHECK-NEXT: st.param.b64 [func_retval0], %rd6;
; CHECK-NEXT: ret;
%val = tail call double @llvm.sqrt.f64(double %a)
- %ret = fdiv arcp double 1.0, %val
+ %ret = fdiv arcp contract double 1.0, %val
ret double %ret
}
-define double @test_sqrt64_refined_ftz(double %a) #0 #1 #2 {
+define double @test_sqrt64_refined_ftz(double %a) #1 #2 {
; CHECK-LABEL: test_sqrt64_refined_ftz(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<3>;
@@ -414,7 +414,7 @@ define double @test_sqrt64_refined_ftz(double %a) #0 #1 #2 {
ret double %ret
}
-define double @test_sqrt64_refined_ftz_ninf(double %a) #0 #1 #2 {
+define double @test_sqrt64_refined_ftz_ninf(double %a) #1 #2 {
; CHECK-LABEL: test_sqrt64_refined_ftz_ninf(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
@@ -432,10 +432,9 @@ define double @test_sqrt64_refined_ftz_ninf(double %a) #0 #1 #2 {
; CHECK-NEXT: selp.f64 %rd8, 0d0000000000000000, %rd6, %p1;
; CHECK-NEXT: st.param.b64 [func_retval0], %rd8;
; CHECK-NEXT: ret;
- %ret = tail call ninf afn double @llvm.sqrt.f64(double %a)
+ %ret = tail call ninf afn contract double @llvm.sqrt.f64(double %a)
ret double %ret
}
-attributes #0 = { "unsafe-fp-math" = "true" }
attributes #1 = { "denormal-fp-math-f32" = "preserve-sign,preserve-sign" }
attributes #2 = { "reciprocal-estimates" = "rsqrtf:1,rsqrtd:1,sqrtf:1,sqrtd:1" }
More information about the llvm-commits
mailing list