[llvm] [GlobalIsel] Canonicalize G_FCMP (PR #108891)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 16 14:59:59 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Thorsten Schütt (tschuett)
<details>
<summary>Changes</summary>
As a side-effect, we start constant folding fcmps.
---
Patch is 64.79 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/108891.diff
11 Files Affected:
- (modified) llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h (+3)
- (modified) llvm/include/llvm/CodeGen/GlobalISel/Utils.h (+39)
- (modified) llvm/include/llvm/Target/GlobalISel/Combine.td (+9-2)
- (modified) llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp (+59)
- (modified) llvm/lib/CodeGen/GlobalISel/Utils.cpp (+40)
- (added) llvm/test/CodeGen/AArch64/GlobalISel/combine-cannonicalize-fcmp.mir (+119)
- (modified) llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll (+22-22)
- (modified) llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll (+66-66)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.log.ll (+27-41)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.log10.ll (+27-41)
- (modified) llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.ll (+1-1)
``````````diff
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 37c9422d192754..3261b26e74cd71 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -911,6 +911,7 @@ class CombinerHelper {
const MachineInstr &BVMI, BuildFnTy &MatchInfo);
bool matchCanonicalizeICmp(const MachineInstr &MI, BuildFnTy &MatchInfo);
+ bool matchCanonicalizeFCmp(const MachineInstr &MI, BuildFnTy &MatchInfo);
private:
/// Checks for legality of an indexed variant of \p LdSt.
@@ -1029,6 +1030,8 @@ class CombinerHelper {
bool constantFoldICmp(const GICmp &ICmp, const GIConstant &LHSCst,
const GIConstant &RHSCst, BuildFnTy &MatchInfo);
+ bool constantFoldFCmp(const GFCmp &FCmp, const GFConstant &LHSCst,
+ const GFConstant &RHSCst, BuildFnTy &MatchInfo);
};
} // namespace llvm
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index 76e0954357a5d7..95a8234d3c6080 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -632,5 +632,44 @@ class GIConstant {
const MachineRegisterInfo &MRI);
};
+/// An floating-point-like constant.
+///
+/// It abstracts over scalar, fixed-length vectors, and scalable vectors.
+/// In the common case, it provides a common API and feels like an APFloat,
+/// while still providing low-level access.
+/// It can be used for constant-folding.
+///
+/// bool isZero()
+/// abstracts over the kind.
+///
+/// switch(const.getKind())
+/// {
+/// }
+/// provides low-level access.
+class GFConstant {
+public:
+ enum class GFConstantKind { Scalar, FixedVector, ScalableVector };
+
+private:
+ GFConstantKind Kind;
+ SmallVector<APFloat> Values;
+
+public:
+ GFConstant(ArrayRef<APFloat> Values)
+ : Kind(GFConstantKind::FixedVector), Values(Values) {};
+ GFConstant(const APFloat &Value, GFConstantKind Kind) : Kind(Kind) {
+ Values.push_back(Value);
+ }
+
+ /// Returns the kind of of this constant, e.g, Scalar.
+ GFConstantKind getKind() const { return Kind; }
+
+ /// Returns the value, if this constant is a scalar.
+ APFloat getScalarValue() const;
+
+ static std::optional<GFConstant> getConstant(Register Const,
+ const MachineRegisterInfo &MRI);
+};
+
} // End namespace llvm.
#endif
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index c66212d2ab12c8..e75cf0b7d4afc1 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1921,8 +1921,15 @@ def canonicalize_icmp : GICombineRule<
[{ return Helper.matchCanonicalizeICmp(*${cmp}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFn(*${cmp}, ${matchinfo}); }])>;
-def icmp_combines: GICombineGroup<[
+def canonicalize_fcmp : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (G_FCMP $root, $pred, $lhs, $rhs):$cmp,
+ [{ return Helper.matchCanonicalizeFCmp(*${cmp}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${cmp}, ${matchinfo}); }])>;
+
+def cmp_combines: GICombineGroup<[
canonicalize_icmp,
+ canonicalize_fcmp,
icmp_to_true_false_known_bits,
icmp_to_lhs_known_bits,
double_icmp_zero_and_combine,
@@ -1995,7 +2002,7 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
combine_extracted_vector_load,
undef_combines, identity_combines, phi_combines,
simplify_add_to_sub, hoist_logic_op_with_same_opcode_hands, shifts_too_big,
- reassocs, ptr_add_immed_chain, icmp_combines,
+ reassocs, ptr_add_immed_chain, cmp_combines,
shl_ashr_to_sext_inreg, sext_inreg_of_load,
width_reduction_combines, select_combines,
known_bits_simplifications,
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp
index 025cd2dc9f87f1..7a4cfd4b1a7bb5 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp
@@ -60,6 +60,37 @@ bool CombinerHelper::constantFoldICmp(const GICmp &ICmp,
return true;
}
+bool CombinerHelper::constantFoldFCmp(const GFCmp &FCmp,
+ const GFConstant &LHSCst,
+ const GFConstant &RHSCst,
+ BuildFnTy &MatchInfo) {
+ if (LHSCst.getKind() != GFConstant::GFConstantKind::Scalar)
+ return false;
+
+ Register Dst = FCmp.getReg(0);
+ LLT DstTy = MRI.getType(Dst);
+
+ if (!isConstantLegalOrBeforeLegalizer(DstTy))
+ return false;
+
+ CmpInst::Predicate Pred = FCmp.getCond();
+ APFloat LHS = LHSCst.getScalarValue();
+ APFloat RHS = RHSCst.getScalarValue();
+
+ bool Result = FCmpInst::compare(LHS, RHS, Pred);
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ if (Result)
+ B.buildConstant(Dst, getICmpTrueVal(getTargetLowering(),
+ /*IsVector=*/DstTy.isVector(),
+ /*IsFP=*/true));
+ else
+ B.buildConstant(Dst, 0);
+ };
+
+ return true;
+}
+
bool CombinerHelper::matchCanonicalizeICmp(const MachineInstr &MI,
BuildFnTy &MatchInfo) {
const GICmp *Cmp = cast<GICmp>(&MI);
@@ -84,3 +115,31 @@ bool CombinerHelper::matchCanonicalizeICmp(const MachineInstr &MI,
return false;
}
+
+bool CombinerHelper::matchCanonicalizeFCmp(const MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ const GFCmp *Cmp = cast<GFCmp>(&MI);
+
+ Register Dst = Cmp->getReg(0);
+ Register LHS = Cmp->getLHSReg();
+ Register RHS = Cmp->getRHSReg();
+
+ CmpInst::Predicate Pred = Cmp->getCond();
+ assert(CmpInst::isFPPredicate(Pred) && "Not an FP compare!");
+
+ if (auto CLHS = GFConstant::getConstant(LHS, MRI)) {
+ if (auto CRHS = GFConstant::getConstant(RHS, MRI))
+ return constantFoldFCmp(*Cmp, *CLHS, *CRHS, MatchInfo);
+
+ // If we have a constant, make sure it is on the RHS.
+ std::swap(LHS, RHS);
+ Pred = CmpInst::getSwappedPredicate(Pred);
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildFCmp(Pred, Dst, LHS, RHS, Cmp->getFlags());
+ };
+ return true;
+ }
+
+ return false;
+}
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 15d3aa427d568d..9574464207d99f 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -2008,3 +2008,43 @@ llvm::GIConstant::getConstant(Register Const, const MachineRegisterInfo &MRI) {
return GIConstant(MayBeConstant->Value, GIConstantKind::Scalar);
}
+
+APFloat llvm::GFConstant::getScalarValue() const {
+ assert(Kind == GFConstantKind::Scalar && "Expected scalar constant");
+
+ return Values[0];
+}
+
+std::optional<GFConstant>
+llvm::GFConstant::getConstant(Register Const, const MachineRegisterInfo &MRI) {
+ MachineInstr *Constant = getDefIgnoringCopies(Const, MRI);
+
+ if (GSplatVector *Splat = dyn_cast<GSplatVector>(Constant)) {
+ std::optional<FPValueAndVReg> MayBeConstant =
+ getFConstantVRegValWithLookThrough(Splat->getScalarReg(), MRI);
+ if (!MayBeConstant)
+ return std::nullopt;
+ return GFConstant(MayBeConstant->Value, GFConstantKind::ScalableVector);
+ }
+
+ if (GBuildVector *Build = dyn_cast<GBuildVector>(Constant)) {
+ SmallVector<APFloat> Values;
+ unsigned NumSources = Build->getNumSources();
+ for (unsigned I = 0; I < NumSources; ++I) {
+ Register SrcReg = Build->getSourceReg(I);
+ std::optional<FPValueAndVReg> MayBeConstant =
+ getFConstantVRegValWithLookThrough(SrcReg, MRI);
+ if (!MayBeConstant)
+ return std::nullopt;
+ Values.push_back(MayBeConstant->Value);
+ }
+ return GFConstant(Values);
+ }
+
+ std::optional<FPValueAndVReg> MayBeConstant =
+ getFConstantVRegValWithLookThrough(Const, MRI);
+ if (!MayBeConstant)
+ return std::nullopt;
+
+ return GFConstant(MayBeConstant->Value, GFConstantKind::Scalar);
+}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-cannonicalize-fcmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-cannonicalize-fcmp.mir
new file mode 100644
index 00000000000000..94204611095db0
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-cannonicalize-fcmp.mir
@@ -0,0 +1,119 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s --check-prefixes=CHECK
+
+---
+name: test_fcmp_canon
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_fcmp_canon
+ ; CHECK: %lhs:_(s64) = G_FCONSTANT double 1.000000e+00
+ ; CHECK-NEXT: %rhs:_(s64) = COPY $x0
+ ; CHECK-NEXT: %res:_(s32) = afn G_FCMP floatpred(ole), %rhs(s64), %lhs
+ ; CHECK-NEXT: $w0 = COPY %res(s32)
+ %lhs:_(s64) = G_FCONSTANT double 1.0
+ %rhs:_(s64) = COPY $x0
+ %res:_(s32) = afn G_FCMP floatpred(oge), %lhs(s64), %rhs
+ $w0 = COPY %res(s32)
+...
+---
+name: test_fcmp_no_canon
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_fcmp_no_canon
+ ; CHECK: %lhs:_(s64) = COPY $x0
+ ; CHECK-NEXT: %rhs:_(s64) = G_FCONSTANT double 1.000000e+00
+ ; CHECK-NEXT: %res:_(s32) = afn G_FCMP floatpred(oge), %lhs(s64), %rhs
+ ; CHECK-NEXT: $w0 = COPY %res(s32)
+ %lhs:_(s64) = COPY $x0
+ %rhs:_(s64) = G_FCONSTANT double 1.0
+ %res:_(s32) = afn G_FCMP floatpred(oge), %lhs(s64), %rhs
+ $w0 = COPY %res(s32)
+...
+---
+name: test_fcmp_no_canon_bv
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_fcmp_no_canon_bv
+ ; CHECK: %opaque1:_(s64) = COPY $x0
+ ; CHECK-NEXT: %opaque2:_(s64) = COPY $x0
+ ; CHECK-NEXT: %const1:_(s64) = G_FCONSTANT double 1.000000e+00
+ ; CHECK-NEXT: %const2:_(s64) = G_FCONSTANT double 2.000000e+00
+ ; CHECK-NEXT: %lhs:_(<2 x s64>) = G_BUILD_VECTOR %const1(s64), %opaque2(s64)
+ ; CHECK-NEXT: %rhs:_(<2 x s64>) = G_BUILD_VECTOR %opaque1(s64), %const2(s64)
+ ; CHECK-NEXT: %res:_(<2 x s32>) = afn G_FCMP floatpred(oge), %lhs(<2 x s64>), %rhs
+ ; CHECK-NEXT: $x0 = COPY %res(<2 x s32>)
+ %opaque1:_(s64) = COPY $x0
+ %opaque2:_(s64) = COPY $x0
+ %const1:_(s64) = G_FCONSTANT double 1.0
+ %const2:_(s64) = G_FCONSTANT double 2.0
+ %lhs:_(<2 x s64>) = G_BUILD_VECTOR %const1(s64), %opaque2(s64)
+ %rhs:_(<2 x s64>) = G_BUILD_VECTOR %opaque1(s64), %const2(s64)
+ %res:_(<2 x s32>) = afn G_FCMP floatpred(oge), %lhs(<2 x s64>), %rhs
+ $x0 = COPY %res(<2 x s32>)
+...
+---
+name: test_fcmp_canon_bv
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_fcmp_canon_bv
+ ; CHECK: %opaque1:_(s64) = COPY $x0
+ ; CHECK-NEXT: %opaque2:_(s64) = COPY $x0
+ ; CHECK-NEXT: %const1:_(s64) = G_FCONSTANT double 1.000000e+00
+ ; CHECK-NEXT: %const2:_(s64) = G_FCONSTANT double 2.000000e+00
+ ; CHECK-NEXT: %lhs:_(<2 x s64>) = G_BUILD_VECTOR %const1(s64), %const2(s64)
+ ; CHECK-NEXT: %rhs:_(<2 x s64>) = G_BUILD_VECTOR %opaque1(s64), %opaque2(s64)
+ ; CHECK-NEXT: %res:_(<2 x s32>) = afn G_FCMP floatpred(ole), %rhs(<2 x s64>), %lhs
+ ; CHECK-NEXT: $x0 = COPY %res(<2 x s32>)
+ %opaque1:_(s64) = COPY $x0
+ %opaque2:_(s64) = COPY $x0
+ %const1:_(s64) = G_FCONSTANT double 1.0
+ %const2:_(s64) = G_FCONSTANT double 2.0
+ %lhs:_(<2 x s64>) = G_BUILD_VECTOR %const1(s64), %const2(s64)
+ %rhs:_(<2 x s64>) = G_BUILD_VECTOR %opaque1(s64), %opaque2(s64)
+ %res:_(<2 x s32>) = afn G_FCMP floatpred(oge), %lhs(<2 x s64>), %rhs
+ $x0 = COPY %res(<2 x s32>)
+...
+---
+name: test_fcmp_canon_splat
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_fcmp_canon_splat
+ ; CHECK: %const:_(s64) = G_FCONSTANT double 1.000000e+00
+ ; CHECK-NEXT: %lhs:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR %const(s64)
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x1
+ ; CHECK-NEXT: %rhs:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR [[COPY]](s64)
+ ; CHECK-NEXT: %res:_(<vscale x 2 x s32>) = afn G_FCMP floatpred(ole), %rhs(<vscale x 2 x s64>), %lhs
+ ; CHECK-NEXT: %z:_(<vscale x 2 x s64>) = G_ZEXT %res(<vscale x 2 x s32>)
+ ; CHECK-NEXT: $z0 = COPY %z(<vscale x 2 x s64>)
+ %const:_(s64) = G_FCONSTANT double 1.0
+ %lhs:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR %const:_(s64)
+ %1:_(s64) = COPY $x1
+ %rhs:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR %1:_(s64)
+ %res:_(<vscale x 2 x s32>) = afn G_FCMP floatpred(oge), %lhs(<vscale x 2 x s64>), %rhs
+ %z:_(<vscale x 2 x s64>) = G_ZEXT %res
+ $z0 = COPY %z(<vscale x 2 x s64>)
+...
+---
+name: test_fcmp_const
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_fcmp_const
+ ; CHECK: %res:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: $w0 = COPY %res(s32)
+ %lhs:_(s64) = G_FCONSTANT double 1.0
+ %rhs:_(s64) = G_FCONSTANT double 2.0
+ %res:_(s32) = afn G_FCMP floatpred(oge), %lhs(s64), %rhs
+ $w0 = COPY %res(s32)
+...
+---
+name: test_fcmp_const_other
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_fcmp_const_other
+ ; CHECK: %res:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: $w0 = COPY %res(s32)
+ %lhs:_(s64) = G_FCONSTANT double 2.0
+ %rhs:_(s64) = G_FCONSTANT double 1.0
+ %res:_(s32) = afn G_FCMP floatpred(oge), %lhs(s64), %rhs
+ $w0 = COPY %res(s32)
+...
diff --git a/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll b/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll
index 5ae989603b31ab..2140f50611d711 100644
--- a/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll
@@ -342,7 +342,7 @@ define float @v_fdiv_recip_sqrt_f32(float %x) {
; CODEGEN-IEEE-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CODEGEN-IEEE-GISEL-NEXT: v_mov_b32_e32 v1, 0xf800000
; CODEGEN-IEEE-GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
-; CODEGEN-IEEE-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
+; CODEGEN-IEEE-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; CODEGEN-IEEE-GISEL-NEXT: v_sqrt_f32_e32 v1, v0
; CODEGEN-IEEE-GISEL-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
@@ -410,7 +410,7 @@ define float @v_fdiv_recip_sqrt_f32(float %x) {
; IR-IEEE-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; IR-IEEE-GISEL-NEXT: v_mov_b32_e32 v1, 0xf800000
; IR-IEEE-GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
-; IR-IEEE-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
+; IR-IEEE-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
; IR-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; IR-IEEE-GISEL-NEXT: v_sqrt_f32_e32 v1, v0
; IR-IEEE-GISEL-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
@@ -479,7 +479,7 @@ define float @v_fdiv_recip_sqrt_f32(float %x) {
; CODEGEN-DAZ-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CODEGEN-DAZ-GISEL-NEXT: v_mov_b32_e32 v1, 0xf800000
; CODEGEN-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
-; CODEGEN-DAZ-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
+; CODEGEN-DAZ-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
; CODEGEN-DAZ-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; CODEGEN-DAZ-GISEL-NEXT: v_rsq_f32_e32 v1, v0
; CODEGEN-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, v0, v1
@@ -549,7 +549,7 @@ define float @v_fdiv_recip_sqrt_f32(float %x) {
; IR-DAZ-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; IR-DAZ-GISEL-NEXT: v_mov_b32_e32 v1, 0xf800000
; IR-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
-; IR-DAZ-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
+; IR-DAZ-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
; IR-DAZ-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; IR-DAZ-GISEL-NEXT: v_rsq_f32_e32 v1, v0
; IR-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, v0, v1
@@ -607,7 +607,7 @@ define float @v_fdiv_recip_sqrt_f32_arcp(float %x) {
; CODEGEN-IEEE-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CODEGEN-IEEE-GISEL-NEXT: v_mov_b32_e32 v1, 0xf800000
; CODEGEN-IEEE-GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
-; CODEGEN-IEEE-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
+; CODEGEN-IEEE-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; CODEGEN-IEEE-GISEL-NEXT: v_sqrt_f32_e32 v1, v0
; CODEGEN-IEEE-GISEL-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
@@ -647,7 +647,7 @@ define float @v_fdiv_recip_sqrt_f32_arcp(float %x) {
; IR-IEEE-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; IR-IEEE-GISEL-NEXT: v_mov_b32_e32 v1, 0xf800000
; IR-IEEE-GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
-; IR-IEEE-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
+; IR-IEEE-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
; IR-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; IR-IEEE-GISEL-NEXT: v_sqrt_f32_e32 v1, v0
; IR-IEEE-GISEL-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
@@ -687,7 +687,7 @@ define float @v_fdiv_recip_sqrt_f32_arcp(float %x) {
; CODEGEN-DAZ-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CODEGEN-DAZ-GISEL-NEXT: v_mov_b32_e32 v1, 0xf800000
; CODEGEN-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
-; CODEGEN-DAZ-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
+; CODEGEN-DAZ-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
; CODEGEN-DAZ-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; CODEGEN-DAZ-GISEL-NEXT: v_rsq_f32_e32 v1, v0
; CODEGEN-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, v0, v1
@@ -728,7 +728,7 @@ define float @v_fdiv_recip_sqrt_f32_arcp(float %x) {
; IR-DAZ-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; IR-DAZ-GISEL-NEXT: v_mov_b32_e32 v1, 0xf800000
; IR-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
-; IR-DAZ-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
+; IR-DAZ-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
; IR-DAZ-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; IR-DAZ-GISEL-NEXT: v_rsq_f32_e32 v1, v0
; IR-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, v0, v1
@@ -831,7 +831,7 @@ define float @v_fdiv_recip_sqrt_f32_arcp_fdiv_only(float %x) {
; CODEGEN-IEEE-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CODEGEN-IEEE-GISEL-NEXT: v_mov_b32_e32 v1, 0xf800000
; CODEGEN-IEEE-GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
-; CODEGEN-IEEE-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
+; CODEGEN-IEEE-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; CODEGEN-IEEE-GISEL-NEXT: v_sqrt_f32_e32 v1, v0
; CODEGEN-IEEE-GISEL-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
@@ -871,7 +871,7 @@ define float @v_fdiv_recip_sqrt_f32_arcp_fdiv_only(float %x) {
; IR-IEEE-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; IR-IEEE-GISEL-NEXT: v_mov_b32_e32 v1, 0xf800000
; IR-IEEE-GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
-; IR-IEEE-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
+; IR-IEEE-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
; IR-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; IR-IEEE-GISEL-NEXT: v_sqrt_f32_e32 v1, v0
; IR-IEEE-GISEL-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
@@ -911,7 +911,7 @@ define float @v_fdiv_recip_sqrt_f32_arcp_fdiv_only(float %x) {
; CODEGEN-DAZ-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CODEGEN-DAZ-GISEL-NEXT: v_mov_b32_e32 v1, 0xf800000
; CODEGEN-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
-; CODEGEN-DAZ-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
+; CODEGEN-DAZ-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
; CODEGEN-DAZ-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; CODEGEN-DAZ-GISEL-NEXT: v_rsq_f32_e32 v1, v0
; CODEGEN-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, v0, v1
@@ -952,7 +952,7 @@ define float @v_fdiv_recip_sqrt_f32_arcp_fdiv_only(float %x) {
; IR-DAZ-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; IR-DAZ-GISEL-NEXT: v_mov_b32_e32 v1, 0xf800000
; IR-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
-; IR-DAZ-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
+; IR-DAZ-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
; IR-DAZ-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; IR-DAZ-GISEL-NEXT: v_rsq_f32_e32 v1, v0
; IR-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/108891
More information about the llvm-commits
mailing list