[llvm-branch-commits] [llvm] AMDGPU: Introduce f64 rsq pattern in AMDGPUCodeGenPrepare (PR #172053)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Dec 12 09:41:34 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
Handle this here instead of DAGCombine, mostly because the f32
case is handled here due to the dependency on !fpmath. Also we can
take advantage of computeKnownFPClass.
---
Patch is 604.88 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/172053.diff
3 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp (+129-12)
- (modified) llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.f64.ll (+309-56)
- (modified) llvm/test/CodeGen/AMDGPU/rsq.f64.ll (+5716-4375)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 71ea9ef6fc050..e45d0652a65ef 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -236,6 +236,9 @@ class AMDGPUCodeGenPrepareImpl
FastMathFlags FMF) const;
Value *emitSqrtIEEE2ULP(IRBuilder<> &Builder, Value *Src,
FastMathFlags FMF) const;
+ Value *emitRsqF64(IRBuilder<> &Builder, Value *X, FastMathFlags SqrtFMF,
+ FastMathFlags DivFMF, const Instruction *CtxI,
+ bool IsNegative) const;
bool tryNarrowMathIfNoOverflow(Instruction *I);
@@ -605,6 +608,94 @@ static Value *emitRsqIEEE1ULP(IRBuilder<> &Builder, Value *Src,
return Builder.CreateFMul(Rsq, OutputScaleFactor);
}
+/// Emit inverse sqrt expansion for f64 with a correction sequence on top of
+/// v_rsq_f64. This should give a 1ulp result.
+Value *AMDGPUCodeGenPrepareImpl::emitRsqF64(IRBuilder<> &Builder, Value *X,
+ FastMathFlags SqrtFMF,
+ FastMathFlags DivFMF,
+ const Instruction *CtxI,
+ bool IsNegative) const {
+ // rsq(x):
+ // double y0 = BUILTIN_AMDGPU_RSQRT_F64(x);
+ // double e = MATH_MAD(-y0 * (x == PINF_F64 || x == 0.0 ? y0 : x), y0, 1.0);
+ // return MATH_MAD(y0*e, MATH_MAD(e, 0.375, 0.5), y0);
+ //
+ // The rsq instruction handles the special cases correctly. We need to check
+ // for the edge case conditions to ensure the special case propagates through
+ // the later instructions.
+
+ Value *Y0 = Builder.CreateUnaryIntrinsic(Intrinsic::amdgcn_rsq, X);
+
+ // Try to elide the edge case check.
+ //
+ // Fast math flags imply:
+ // sqrt ninf => !isinf(x)
+ // sqrt nnan => not helpful
+ // fdiv ninf => x != 0, !isinf(x)
+ // fdiv nnan => x != 0
+ bool MaybePosInf = !SqrtFMF.noInfs() && !DivFMF.noInfs();
+ bool MaybeZero = !DivFMF.noInfs() && !DivFMF.noNaNs();
+
+ DenormalMode DenormMode;
+ FPClassTest Interested = fcNone;
+ if (MaybeZero)
+ Interested = fcZero;
+ if (MaybePosInf)
+ Interested = fcPosInf;
+
+ if (Interested != fcNone) {
+ KnownFPClass KnownSrc = computeKnownFPClass(X, Interested, CtxI);
+ if (KnownSrc.isKnownNeverPosInfinity())
+ MaybePosInf = false;
+
+ DenormMode = F.getDenormalMode(X->getType()->getFltSemantics());
+ if (KnownSrc.isKnownNeverLogicalZero(DenormMode))
+ MaybeZero = false;
+ }
+
+ Value *SpecialOrRsq = Y0;
+ if (MaybeZero || MaybePosInf) {
+ Value *Cond;
+ if (MaybePosInf && MaybeZero) {
+ if (DenormMode.Input != DenormalMode::DenormalModeKind::Dynamic) {
+ FPClassTest TestMask = fcPosInf | fcZero;
+ if (DenormMode.inputsAreZero())
+ TestMask |= fcSubnormal;
+
+ Cond = Builder.createIsFPClass(X, TestMask);
+ } else {
+ // Avoid using llvm.is.fpclass for dynamic denormal mode, since it
+ // doesn't respect the floating-point environment.
+ Value *IsZero =
+ Builder.CreateFCmpOEQ(X, ConstantFP::getZero(X->getType()));
+ Value *IsInf =
+ Builder.CreateFCmpOEQ(X, ConstantFP::getInfinity(X->getType()));
+ Cond = Builder.CreateOr(IsZero, IsInf);
+ }
+ } else if (MaybeZero) {
+ Cond = Builder.CreateFCmpOEQ(X, ConstantFP::getZero(X->getType()));
+ } else {
+ Cond = Builder.CreateFCmpOEQ(X, ConstantFP::getInfinity(X->getType()));
+ }
+
+ SpecialOrRsq = Builder.CreateSelect(Cond, Y0, X);
+ }
+
+ Value *NegY0 = Builder.CreateFNeg(Y0);
+ Value *NegXY0 = Builder.CreateFMul(NegY0, SpecialOrRsq);
+
+ // Could be fmuladd, but isFMAFasterThanFMulAndFAdd is always true for f64.
+ Value *E = Builder.CreateFMA(NegXY0, Y0, ConstantFP::get(X->getType(), 1.0));
+ Value *Y0E = Builder.CreateFMul(Y0, E);
+
+ Value *EFMA = Builder.CreateFMA(E, ConstantFP::get(X->getType(), 0.375),
+ ConstantFP::get(X->getType(), 0.5));
+ if (IsNegative)
+ EFMA = Builder.CreateFNeg(EFMA);
+
+ return Builder.CreateFMA(Y0E, EFMA, Y0);
+}
+
bool AMDGPUCodeGenPrepareImpl::canOptimizeWithRsq(const FPMathOperator *SqrtOp,
FastMathFlags DivFMF,
FastMathFlags SqrtFMF) const {
@@ -612,8 +703,22 @@ bool AMDGPUCodeGenPrepareImpl::canOptimizeWithRsq(const FPMathOperator *SqrtOp,
if (!DivFMF.allowContract() || !SqrtFMF.allowContract())
return false;
- // v_rsq_f32 gives 1ulp
- return SqrtFMF.approxFunc() || SqrtOp->getFPAccuracy() >= 1.0f;
+ Type *EltTy = SqrtOp->getType()->getScalarType();
+ switch (EltTy->getTypeID()) {
+ case Type::FloatTyID:
+ // v_rsq_f32 gives 1ulp
+ // Separate correctly rounded fdiv + sqrt give ~1.81 ulp.
+
+ // FIXME: rsq formation should not depend on approx func or the fpmath
+ // accuracy. This strictly improves precision.
+ return SqrtFMF.approxFunc() || SqrtOp->getFPAccuracy() >= 1.0f;
+ case Type::DoubleTyID:
+ return true;
+ default:
+ return false;
+ }
+
+ llvm_unreachable("covered switch");
}
Value *AMDGPUCodeGenPrepareImpl::optimizeWithRsq(
@@ -629,8 +734,6 @@ Value *AMDGPUCodeGenPrepareImpl::optimizeWithRsq(
if (!CLHS)
return nullptr;
- assert(Den->getType()->isFloatTy());
-
bool IsNegative = false;
// TODO: Handle other numerator values with arcp.
@@ -639,14 +742,20 @@ Value *AMDGPUCodeGenPrepareImpl::optimizeWithRsq(
IRBuilder<>::FastMathFlagGuard Guard(Builder);
Builder.setFastMathFlags(DivFMF | SqrtFMF);
- if ((DivFMF.approxFunc() && SqrtFMF.approxFunc()) ||
- canIgnoreDenormalInput(Den, CtxI)) {
- Value *Result = Builder.CreateUnaryIntrinsic(Intrinsic::amdgcn_rsq, Den);
- // -1.0 / sqrt(x) -> fneg(rsq(x))
- return IsNegative ? Builder.CreateFNeg(Result) : Result;
+ if (Den->getType()->isFloatTy()) {
+ if ((DivFMF.approxFunc() && SqrtFMF.approxFunc()) ||
+ canIgnoreDenormalInput(Den, CtxI)) {
+ Value *Result =
+ Builder.CreateUnaryIntrinsic(Intrinsic::amdgcn_rsq, Den);
+ // -1.0 / sqrt(x) -> fneg(rsq(x))
+ return IsNegative ? Builder.CreateFNeg(Result) : Result;
+ }
+
+ return emitRsqIEEE1ULP(Builder, Den, IsNegative);
}
- return emitRsqIEEE1ULP(Builder, Den, IsNegative);
+ if (Den->getType()->isDoubleTy())
+ return emitRsqF64(Builder, Den, SqrtFMF, DivFMF, CtxI, IsNegative);
}
return nullptr;
@@ -758,6 +867,9 @@ Value *AMDGPUCodeGenPrepareImpl::visitFDivElement(
return Rsq;
}
+ if (!Num->getType()->isFloatTy())
+ return nullptr;
+
Value *Rcp = optimizeWithRcp(Builder, Num, Den, DivFMF, FDivInst);
if (Rcp)
return Rcp;
@@ -793,7 +905,8 @@ bool AMDGPUCodeGenPrepareImpl::visitFDiv(BinaryOperator &FDiv) {
return false;
Type *Ty = FDiv.getType()->getScalarType();
- if (!Ty->isFloatTy())
+ const bool IsFloat = Ty->isFloatTy();
+ if (!IsFloat && !Ty->isDoubleTy())
return false;
// The f64 rcp/rsq approximations are pretty inaccurate. We can do an
@@ -818,6 +931,10 @@ bool AMDGPUCodeGenPrepareImpl::visitFDiv(BinaryOperator &FDiv) {
RsqOp = SqrtOp->getOperand(0);
}
+ // rcp path not yet implemented for f64.
+ if (!IsFloat && !RsqOp)
+ return false;
+
// Inaccurate rcp is allowed with afn.
//
// Defer to codegen to handle this.
@@ -832,7 +949,7 @@ bool AMDGPUCodeGenPrepareImpl::visitFDiv(BinaryOperator &FDiv) {
return false;
// Defer the correct implementations to codegen.
- if (ReqdAccuracy < 1.0f)
+ if (IsFloat && ReqdAccuracy < 1.0f)
return false;
IRBuilder<> Builder(FDiv.getParent(), std::next(FDiv.getIterator()));
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.f64.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.f64.ll
index b97cd91f2ab32..764b10a7d1987 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.f64.ll
@@ -4,8 +4,15 @@
define double @rsq_f64(double %x) {
; CHECK-LABEL: define double @rsq_f64(
; CHECK-SAME: double [[X:%.*]]) {
-; CHECK-NEXT: [[SQRT_X:%.*]] = call contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT: [[FDIV:%.*]] = fdiv contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT: [[TMP1:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.is.fpclass.f64(double [[X]], i32 608)
+; CHECK-NEXT: [[TMP3:%.*]] = select contract i1 [[TMP2]], double [[TMP1]], double [[X]]
+; CHECK-NEXT: [[TMP4:%.*]] = fneg contract double [[TMP1]]
+; CHECK-NEXT: [[TMP5:%.*]] = fmul contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT: [[TMP6:%.*]] = call contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT: [[TMP7:%.*]] = fmul contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = call contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT: [[FDIV:%.*]] = call contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]])
; CHECK-NEXT: ret double [[FDIV]]
;
%sqrt.x = call contract double @llvm.sqrt.f64(double %x)
@@ -16,8 +23,16 @@ define double @rsq_f64(double %x) {
define double @neg_rsq_f64(double %x) {
; CHECK-LABEL: define double @neg_rsq_f64(
; CHECK-SAME: double [[X:%.*]]) {
-; CHECK-NEXT: [[SQRT_X:%.*]] = call contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT: [[FDIV:%.*]] = fdiv contract double -1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT: [[TMP1:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.is.fpclass.f64(double [[X]], i32 608)
+; CHECK-NEXT: [[TMP3:%.*]] = select contract i1 [[TMP2]], double [[TMP1]], double [[X]]
+; CHECK-NEXT: [[TMP4:%.*]] = fneg contract double [[TMP1]]
+; CHECK-NEXT: [[TMP5:%.*]] = fmul contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT: [[TMP6:%.*]] = call contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT: [[TMP7:%.*]] = fmul contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = call contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT: [[TMP9:%.*]] = fneg contract double [[TMP8]]
+; CHECK-NEXT: [[FDIV:%.*]] = call contract double @llvm.fma.f64(double [[TMP7]], double [[TMP9]], double [[TMP1]])
; CHECK-NEXT: ret double [[FDIV]]
;
%sqrt.x = call contract double @llvm.sqrt.f64(double %x)
@@ -28,8 +43,15 @@ define double @neg_rsq_f64(double %x) {
define double @rsq_f64_nnan(double %x) {
; CHECK-LABEL: define double @rsq_f64_nnan(
; CHECK-SAME: double [[X:%.*]]) {
-; CHECK-NEXT: [[SQRT_X:%.*]] = call nnan contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT: [[TMP1:%.*]] = call nnan contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT: [[TMP2:%.*]] = fcmp nnan contract oeq double [[X]], 0x7FF0000000000000
+; CHECK-NEXT: [[TMP3:%.*]] = select nnan contract i1 [[TMP2]], double [[TMP1]], double [[X]]
+; CHECK-NEXT: [[TMP4:%.*]] = fneg nnan contract double [[TMP1]]
+; CHECK-NEXT: [[TMP5:%.*]] = fmul nnan contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT: [[TMP6:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT: [[TMP7:%.*]] = fmul nnan contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT: [[FDIV:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]])
; CHECK-NEXT: ret double [[FDIV]]
;
%sqrt.x = call contract nnan double @llvm.sqrt.f64(double %x)
@@ -40,8 +62,16 @@ define double @rsq_f64_nnan(double %x) {
define double @neg_rsq_f64_nnan(double %x) {
; CHECK-LABEL: define double @neg_rsq_f64_nnan(
; CHECK-SAME: double [[X:%.*]]) {
-; CHECK-NEXT: [[SQRT_X:%.*]] = call nnan contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan contract double -1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT: [[TMP1:%.*]] = call nnan contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT: [[TMP2:%.*]] = fcmp nnan contract oeq double [[X]], 0x7FF0000000000000
+; CHECK-NEXT: [[TMP3:%.*]] = select nnan contract i1 [[TMP2]], double [[TMP1]], double [[X]]
+; CHECK-NEXT: [[TMP4:%.*]] = fneg nnan contract double [[TMP1]]
+; CHECK-NEXT: [[TMP5:%.*]] = fmul nnan contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT: [[TMP6:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT: [[TMP7:%.*]] = fmul nnan contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT: [[TMP9:%.*]] = fneg nnan contract double [[TMP8]]
+; CHECK-NEXT: [[FDIV:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP7]], double [[TMP9]], double [[TMP1]])
; CHECK-NEXT: ret double [[FDIV]]
;
%sqrt.x = call contract nnan double @llvm.sqrt.f64(double %x)
@@ -52,8 +82,13 @@ define double @neg_rsq_f64_nnan(double %x) {
define double @rsq_f64_ninf(double %x) {
; CHECK-LABEL: define double @rsq_f64_ninf(
; CHECK-SAME: double [[X:%.*]]) {
-; CHECK-NEXT: [[SQRT_X:%.*]] = call ninf contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT: [[FDIV:%.*]] = fdiv ninf contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT: [[TMP1:%.*]] = call ninf contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT: [[TMP2:%.*]] = fneg ninf contract double [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = fmul ninf contract double [[TMP2]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT: [[TMP5:%.*]] = fmul ninf contract double [[TMP1]], [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT: [[FDIV:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP5]], double [[TMP6]], double [[TMP1]])
; CHECK-NEXT: ret double [[FDIV]]
;
%sqrt.x = call contract ninf double @llvm.sqrt.f64(double %x)
@@ -64,8 +99,14 @@ define double @rsq_f64_ninf(double %x) {
define double @neg_rsq_f64_ninf(double %x) {
; CHECK-LABEL: define double @neg_rsq_f64_ninf(
; CHECK-SAME: double [[X:%.*]]) {
-; CHECK-NEXT: [[SQRT_X:%.*]] = call ninf contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT: [[FDIV:%.*]] = fdiv ninf contract double -1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT: [[TMP1:%.*]] = call ninf contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT: [[TMP2:%.*]] = fneg ninf contract double [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = fmul ninf contract double [[TMP2]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT: [[TMP5:%.*]] = fmul ninf contract double [[TMP1]], [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT: [[TMP7:%.*]] = fneg ninf contract double [[TMP6]]
+; CHECK-NEXT: [[FDIV:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP5]], double [[TMP7]], double [[TMP1]])
; CHECK-NEXT: ret double [[FDIV]]
;
%sqrt.x = call contract ninf double @llvm.sqrt.f64(double %x)
@@ -76,8 +117,13 @@ define double @neg_rsq_f64_ninf(double %x) {
define double @rsq_f64_nnan_ninf(double %x) {
; CHECK-LABEL: define double @rsq_f64_nnan_ninf(
; CHECK-SAME: double [[X:%.*]]) {
-; CHECK-NEXT: [[SQRT_X:%.*]] = call nnan ninf contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan ninf contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT: [[TMP1:%.*]] = call nnan ninf contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT: [[TMP2:%.*]] = fneg nnan ninf contract double [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = fmul nnan ninf contract double [[TMP2]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT: [[TMP5:%.*]] = fmul nnan ninf contract double [[TMP1]], [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT: [[FDIV:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP5]], double [[TMP6]], double [[TMP1]])
; CHECK-NEXT: ret double [[FDIV]]
;
%sqrt.x = call contract nnan ninf double @llvm.sqrt.f64(double %x)
@@ -88,8 +134,14 @@ define double @rsq_f64_nnan_ninf(double %x) {
define double @neg_rsq_f64_nnan_ninf(double %x) {
; CHECK-LABEL: define double @neg_rsq_f64_nnan_ninf(
; CHECK-SAME: double [[X:%.*]]) {
-; CHECK-NEXT: [[SQRT_X:%.*]] = call nnan ninf contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan ninf contract double -1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT: [[TMP1:%.*]] = call nnan ninf contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT: [[TMP2:%.*]] = fneg nnan ninf contract double [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = fmul nnan ninf contract double [[TMP2]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT: [[TMP5:%.*]] = fmul nnan ninf contract double [[TMP1]], [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT: [[TMP7:%.*]] = fneg nnan ninf contract double [[TMP6]]
+; CHECK-NEXT: [[FDIV:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP5]], double [[TMP7]], double [[TMP1]])
; CHECK-NEXT: ret double [[FDIV]]
;
%sqrt.x = call contract nnan ninf double @llvm.sqrt.f64(double %x)
@@ -100,8 +152,15 @@ define double @neg_rsq_f64_nnan_ninf(double %x) {
define double @rsq_f64_sqrt_nnan_ninf(double %x) {
; CHECK-LABEL: define double @rsq_f64_sqrt_nnan_ninf(
; CHECK-SAME: double [[X:%.*]]) {
-; CHECK-NEXT: [[SQRT_X:%.*]] = call nnan ninf contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT: [[FDIV:%.*]] = fdiv contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT: [[TMP1:%.*]] = call nnan ninf contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT: [[TMP2:%.*]] = fcmp nnan ninf contract oeq double [[X]], 0.000000e+00
+; CHECK-NEXT: [[TMP3:%.*]] = select nnan ninf contract i1 [[TMP2]], double [[TMP1]], double [[X]]
+; CHECK-NEXT: [[TMP4:%.*]] = fneg nnan ninf contract double [[TMP1]]
+; CHECK-NEXT: [[TMP5:%.*]] = fmul nnan ninf contract double [[TMP4]], [[TMP3]]
+; CHECK-NEXT: [[TMP6:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00)
+; CHECK-NEXT: [[TMP7:%.*]] = fmul nnan ninf contract double [[TMP1]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01)
+; CHECK-NEXT: [[FDIV:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]])
; CHECK-NEXT: ret double [[FDIV]]
;
%sqrt.x = call contract nnan ninf double @llvm.sqrt.f64(double %x)
@@ -112,8 +171,13 @@ define double @rsq_f64_sqrt_nnan_ninf(double %x) {
define double @rsq_f64_fdiv_nnan_ninf(double %x) {
; CHECK-LABEL: define double @rsq_f64_fdiv_nnan_ninf(
; CHECK-SAME: double [[X:%.*]]) {
-; CHECK-NEXT: [[SQRT_X:%.*]] = call contract double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan ninf contract double 1.000000e+00, [[SQRT_X]]
+; CHECK-NEXT: [[TMP1:%.*]] = call nnan ninf contract double @llvm.amdgcn.rsq.f64(double [[X]])
+; CHECK-NEXT: [[TMP2:%.*]] = fneg nnan ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/172053
More information about the llvm-branch-commits
mailing list