[llvm] AMDGPU: Match fract from compare and select and minimum (PR #189082)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 31 00:17:56 PDT 2026
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/189082
>From 0aa937f256bc9486b961f778c69bad34734d077b Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Fri, 27 Mar 2026 16:18:32 +0100
Subject: [PATCH] AMDGPU: Match fract from compare and select and minimum
Implementing this with any of the minnum variants is overconstraining
for the actual use. Existing patterns use fmin, then have to manually
clamp nan inputs to get nan propagating behavior. It's cleaner to express
this with a nan propagating operation to start with.
---
.../Target/AMDGPU/AMDGPUCodeGenPrepare.cpp | 217 +++++----
llvm/test/CodeGen/AMDGPU/fract-match.ll | 461 ++++++++----------
2 files changed, 325 insertions(+), 353 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 88418522e856a..d049df810c476 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -208,7 +208,8 @@ class AMDGPUCodeGenPrepareImpl
bool canWidenScalarExtLoad(LoadInst &I) const;
- Value *matchFractPat(Value &V);
+ Value *matchFractPatImpl(Value &V, const APFloat &C) const;
+ Value *matchFractPatNanAvoidant(Value &V);
Value *applyFractPat(IRBuilder<> &Builder, Value *FractArg);
bool canOptimizeWithRsq(FastMathFlags DivFMF, FastMathFlags SqrtFMF) const;
@@ -1591,63 +1592,85 @@ bool AMDGPUCodeGenPrepareImpl::visitLoadInst(LoadInst &I) {
}
bool AMDGPUCodeGenPrepareImpl::visitSelectInst(SelectInst &I) {
- Value *Cond = I.getCondition();
- Value *TrueVal = I.getTrueValue();
- Value *FalseVal = I.getFalseValue();
- Value *CmpVal;
- CmpPredicate IsNanPred;
-
- // Match fract pattern with nan check.
- if (!match(Cond, m_FCmp(IsNanPred, m_Value(CmpVal), m_NonNaN())))
- return false;
-
FPMathOperator *FPOp = dyn_cast<FPMathOperator>(&I);
if (!FPOp)
return false;
- IRBuilder<> Builder(&I);
- Builder.setFastMathFlags(FPOp->getFastMathFlags());
-
+ Value *X;
Value *Fract = nullptr;
- if (IsNanPred == FCmpInst::FCMP_UNO && TrueVal == CmpVal &&
- CmpVal == matchFractPat(*FalseVal)) {
- // isnan(x) ? x : fract(x)
- Fract = applyFractPat(Builder, CmpVal);
- } else if (IsNanPred == FCmpInst::FCMP_ORD && FalseVal == CmpVal) {
- if (CmpVal == matchFractPat(*TrueVal)) {
- // !isnan(x) ? fract(x) : x
- Fract = applyFractPat(Builder, CmpVal);
- } else {
- // Match an intermediate clamp infinity to 0 pattern. i.e.
- // !isnan(x) ? (!isinf(x) ? fract(x) : 0.0) : x
- CmpPredicate PredInf;
- Value *IfNotInf;
-
- if (!match(TrueVal, m_Select(m_FCmp(PredInf, m_FAbs(m_Specific(CmpVal)),
- m_PosInf()),
- m_Value(IfNotInf), m_PosZeroFP())) ||
- PredInf != FCmpInst::FCMP_UNE || CmpVal != matchFractPat(*IfNotInf))
- return false;
-
- SelectInst *ClampInfSelect = cast<SelectInst>(TrueVal);
- // Insert before the fabs
- Value *InsertPt =
- cast<Instruction>(ClampInfSelect->getCondition())->getOperand(0);
-
- Builder.SetInsertPoint(cast<Instruction>(InsertPt));
- Value *NewFract = applyFractPat(Builder, CmpVal);
- NewFract->takeName(TrueVal);
+ // Match:
+ // (x - floor(x)) >= MIN_CONSTANT ? MIN_CONSTANT : (x - floor(x))
+ //
+ // This is the preferred way to implement fract.
+ // TODO: Could also match with compare against 1.0
+ const APFloat *C;
+ if (match(&I, m_UnordFMin(m_Value(X), m_APFloatAllowPoison(C)))) {
+ Value *FractSrc = matchFractPatImpl(*X, *C);
+ if (!FractSrc)
+ return false;
+ IRBuilder<> Builder(&I);
+ Builder.setFastMathFlags(FPOp->getFastMathFlags());
+ Fract = applyFractPat(Builder, FractSrc);
+ } else {
+ // Match patterns which may appear in legacy implementations of the fract()
+ // function, built around the nan-avoidant minnum intrinsic. These are the
+ // core pattern plus additional clamping of inf and nan values on the
+ // result.
+ Value *Cond = I.getCondition();
+ Value *TrueVal = I.getTrueValue();
+ Value *FalseVal = I.getFalseValue();
+ Value *CmpVal;
+ CmpPredicate IsNanPred;
+
+ // Match fract pattern with nan check.
+ if (!match(Cond, m_FCmp(IsNanPred, m_Value(CmpVal), m_NonNaN())))
+ return false;
- // Thread the new fract into the inf clamping sequence.
- DeadVals.push_back(ClampInfSelect->getOperand(1));
- ClampInfSelect->setOperand(1, NewFract);
+ IRBuilder<> Builder(&I);
+ Builder.setFastMathFlags(FPOp->getFastMathFlags());
- // The outer select nan handling is also absorbed into the fract.
- Fract = ClampInfSelect;
- }
- } else
- return false;
+ if (IsNanPred == FCmpInst::FCMP_UNO && TrueVal == CmpVal &&
+ CmpVal == matchFractPatNanAvoidant(*FalseVal)) {
+ // isnan(x) ? x : fract(x)
+ Fract = applyFractPat(Builder, CmpVal);
+ } else if (IsNanPred == FCmpInst::FCMP_ORD && FalseVal == CmpVal) {
+ if (CmpVal == matchFractPatNanAvoidant(*TrueVal)) {
+ // !isnan(x) ? fract(x) : x
+ Fract = applyFractPat(Builder, CmpVal);
+ } else {
+ // Match an intermediate clamp infinity to 0 pattern. i.e.
+ // !isnan(x) ? (!isinf(x) ? fract(x) : 0.0) : x
+ CmpPredicate PredInf;
+ Value *IfNotInf;
+
+ if (!match(TrueVal, m_Select(m_FCmp(PredInf, m_FAbs(m_Specific(CmpVal)),
+ m_PosInf()),
+ m_Value(IfNotInf), m_PosZeroFP())) ||
+ PredInf != FCmpInst::FCMP_UNE ||
+ CmpVal != matchFractPatNanAvoidant(*IfNotInf))
+ return false;
+
+ SelectInst *ClampInfSelect = cast<SelectInst>(TrueVal);
+
+ // Insert before the fabs
+ Value *InsertPt =
+ cast<Instruction>(ClampInfSelect->getCondition())->getOperand(0);
+
+ Builder.SetInsertPoint(cast<Instruction>(InsertPt));
+ Value *NewFract = applyFractPat(Builder, CmpVal);
+ NewFract->takeName(TrueVal);
+
+ // Thread the new fract into the inf clamping sequence.
+ DeadVals.push_back(ClampInfSelect->getOperand(1));
+ ClampInfSelect->setOperand(1, NewFract);
+
+ // The outer select nan handling is also absorbed into the fract.
+ Fract = ClampInfSelect;
+ }
+ } else
+ return false;
+ }
Fract->takeName(&I);
I.replaceAllUsesWith(Fract);
@@ -2048,54 +2071,56 @@ bool AMDGPUCodeGenPrepareImpl::visitIntrinsicInst(IntrinsicInst &I) {
}
}
-/// Match non-nan fract pattern.
-/// minnum(fsub(x, floor(x)), nextafter(1.0, -1.0))
-/// minimumnum(fsub(x, floor(x)), nextafter(1.0, -1.0))
-/// minimum(fsub(x, floor(x)), nextafter(1.0, -1.0))
-///
-/// If fract is a useful instruction for the subtarget. Does not account for the
-/// nan handling; the instruction has a nan check on the input value.
-Value *AMDGPUCodeGenPrepareImpl::matchFractPat(Value &V) {
+/// Match the core sequence in the fract pattern (x - floor(x), which doesn't
+/// need to consider edge case handling.
+Value *AMDGPUCodeGenPrepareImpl::matchFractPatImpl(Value &FractSrc,
+ const APFloat &C) const {
if (ST.hasFractBug())
return nullptr;
- IntrinsicInst *II = dyn_cast<IntrinsicInst>(&V);
- if (!II)
- return nullptr;
-
- Intrinsic::ID IID = II->getIntrinsicID();
-
- // The value is only used in contexts where we know the input isn't a nan, so
- // any of the fmin variants are fine.
- if (IID != Intrinsic::minnum && IID != Intrinsic::minimum &&
- IID != Intrinsic::minimumnum)
- return nullptr;
-
- Type *Ty = V.getType();
+ Type *Ty = FractSrc.getType();
if (!isLegalFloatingTy(Ty->getScalarType()))
return nullptr;
- Value *Arg0 = II->getArgOperand(0);
- Value *Arg1 = II->getArgOperand(1);
-
- const APFloat *C;
- if (!match(Arg1, m_APFloatAllowPoison(C)))
- return nullptr;
-
- APFloat OneNextDown = APFloat::getOne(C->getSemantics());
+ APFloat OneNextDown = APFloat::getOne(C.getSemantics());
OneNextDown.next(true);
// Match nextafter(1.0, -1)
- if (OneNextDown != *C)
+ if (OneNextDown != C)
return nullptr;
Value *FloorSrc;
- if (match(Arg0, m_FSub(m_Value(FloorSrc),
- m_Intrinsic<Intrinsic::floor>(m_Deferred(FloorSrc)))))
+ if (match(&FractSrc, m_FSub(m_Value(FloorSrc), m_Intrinsic<Intrinsic::floor>(
+ m_Deferred(FloorSrc)))))
return FloorSrc;
return nullptr;
}
+/// Match non-nan fract pattern.
+// MIN_CONSTANT = nextafter(1.0, -1.0)
+/// minnum(fsub(x, floor(x)), MIN_CONSTANT)
+/// minimumnum(fsub(x, floor(x)), MIN_CONSTANT)
+/// minimum(fsub(x, floor(x)), MIN_CONSTANT)
+
+// x_sub_floor >= MIN_CONSTANT ? MIN_CONSTANT : x_sub_floor;
+///
+/// If fract is a useful instruction for the subtarget. Does not account for the
+/// nan handling; the instruction has a nan check on the input value.
+Value *AMDGPUCodeGenPrepareImpl::matchFractPatNanAvoidant(Value &V) {
+ Value *Arg0;
+ const APFloat *C;
+
+ // The value is only used in contexts where we know the input isn't a nan, so
+ // any of the fmin variants are fine.
+ if (!match(&V,
+ m_CombineOr(m_FMinNum_or_FMinimumNum(m_Value(Arg0),
+ m_APFloatAllowPoison(C)),
+ m_FMinimum(m_Value(Arg0), m_APFloatAllowPoison(C)))))
+ return nullptr;
+
+ return matchFractPatImpl(*Arg0, *C);
+}
+
Value *AMDGPUCodeGenPrepareImpl::applyFractPat(IRBuilder<> &Builder,
Value *FractArg) {
SmallVector<Value *, 4> FractVals;
@@ -2113,14 +2138,28 @@ Value *AMDGPUCodeGenPrepareImpl::applyFractPat(IRBuilder<> &Builder,
}
bool AMDGPUCodeGenPrepareImpl::visitFMinLike(IntrinsicInst &I) {
- Value *FractArg = matchFractPat(I);
- if (!FractArg)
- return false;
+ const APFloat *C;
+ Value *FractArg;
+
+ // minimum(x - floor(x), MIN_CONSTANT)
+ Value *X;
+ if (!ST.hasFractBug() &&
+ match(&I, m_FMinimum(m_Value(X), m_APFloatAllowPoison(C)))) {
+ FractArg = matchFractPatImpl(*X, *C);
+ if (!FractArg)
+ return false;
+ } else {
+ // minnum(x - floor(x), MIN_CONSTANT)
+ FractArg = matchFractPatNanAvoidant(I);
+ if (!FractArg)
+ return false;
- // Match pattern for fract intrinsic in contexts where the nan check has been
- // optimized out (and hope the knowledge the source can't be nan wasn't lost).
- if (!I.hasNoNaNs() && !isKnownNeverNaN(FractArg, SQ.getWithInstruction(&I)))
- return false;
+ // Match pattern for fract intrinsic in contexts where the nan check has
+ // been optimized out (and hope the knowledge the source can't be nan wasn't
+ // lost).
+ if (!I.hasNoNaNs() && !isKnownNeverNaN(FractArg, SQ.getWithInstruction(&I)))
+ return false;
+ }
IRBuilder<> Builder(&I);
FastMathFlags FMF = I.getFastMathFlags();
diff --git a/llvm/test/CodeGen/AMDGPU/fract-match.ll b/llvm/test/CodeGen/AMDGPU/fract-match.ll
index eac2a29bda3ec..da538744fb8d2 100644
--- a/llvm/test/CodeGen/AMDGPU/fract-match.ll
+++ b/llvm/test/CodeGen/AMDGPU/fract-match.ll
@@ -4084,13 +4084,19 @@ entry:
}
define float @basic_fract_f32_flags_minimum(float %x) {
-; IR-LABEL: define float @basic_fract_f32_flags_minimum(
-; IR-SAME: float [[X:%.*]]) #[[ATTR0]] {
-; IR-NEXT: [[ENTRY:.*:]]
-; IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
-; IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
-; IR-NEXT: [[MIN:%.*]] = tail call nsz float @llvm.minimum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
-; IR-NEXT: ret float [[MIN]]
+; GFX6-IR-LABEL: define float @basic_fract_f32_flags_minimum(
+; GFX6-IR-SAME: float [[X:%.*]]) #[[ATTR0]] {
+; GFX6-IR-NEXT: [[ENTRY:.*:]]
+; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
+; GFX6-IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
+; GFX6-IR-NEXT: [[MIN:%.*]] = tail call nsz float @llvm.minimum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
+; GFX6-IR-NEXT: ret float [[MIN]]
+;
+; IR-FRACT-LABEL: define float @basic_fract_f32_flags_minimum(
+; IR-FRACT-SAME: float [[X:%.*]]) #[[ATTR0]] {
+; IR-FRACT-NEXT: [[ENTRY:.*:]]
+; IR-FRACT-NEXT: [[MIN:%.*]] = call nnan nsz float @llvm.amdgcn.fract.f32(float [[X]])
+; IR-FRACT-NEXT: ret float [[MIN]]
;
; GFX6-LABEL: basic_fract_f32_flags_minimum:
; GFX6: ; %bb.0: ; %entry
@@ -4106,35 +4112,19 @@ define float @basic_fract_f32_flags_minimum(float %x) {
; GFX7-LABEL: basic_fract_f32_flags_minimum:
; GFX7: ; %bb.0: ; %entry
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_floor_f32_e32 v1, v0
-; GFX7-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX7-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v0
-; GFX7-NEXT: v_mov_b32_e32 v2, 0x7fc00000
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX7-NEXT: v_fract_f32_e32 v0, v0
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: basic_fract_f32_flags_minimum:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_floor_f32_e32 v1, v0
-; GFX8-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX8-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v0
-; GFX8-NEXT: v_mov_b32_e32 v2, 0x7fc00000
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX8-NEXT: v_fract_f32_e32 v0, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: basic_fract_f32_flags_minimum:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_floor_f32_e32 v1, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX11-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v0
-; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
+; GFX11-NEXT: v_fract_f32_e32 v0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: basic_fract_f32_flags_minimum:
@@ -4144,10 +4134,7 @@ define float @basic_fract_f32_flags_minimum(float %x) {
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_floor_f32_e32 v1, v0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX12-NEXT: v_minimum_f32 v0, 0x3f7fffff, v0
+; GFX12-NEXT: v_fract_f32_e32 v0, v0
; GFX12-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call float @llvm.floor.f32(float %x)
@@ -5684,9 +5671,8 @@ define float @fract_pat_fcmp_oge_select(float %x, ptr addrspace(1) %iptr) #0 {
; GFX7-NEXT: s_mov_b32 s5, s6
; GFX7-NEXT: v_floor_f32_e32 v3, v0
; GFX7-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
-; GFX7-NEXT: v_sub_f32_e32 v1, v0, v3
; GFX7-NEXT: s_mov_b32 s4, 0x7f800000
-; GFX7-NEXT: v_min_legacy_f32_e32 v1, 0x3f7fffff, v1
+; GFX7-NEXT: v_fract_f32_e32 v1, v0
; GFX7-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s4
; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
; GFX7-NEXT: s_waitcnt vmcnt(0)
@@ -5696,13 +5682,9 @@ define float @fract_pat_fcmp_oge_select(float %x, ptr addrspace(1) %iptr) #0 {
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_floor_f32_e32 v3, v0
-; GFX8-NEXT: global_store_dword v[1:2], v3, off
-; GFX8-NEXT: v_sub_f32_e32 v1, v0, v3
-; GFX8-NEXT: s_mov_b32 s4, 0x3f7fffff
-; GFX8-NEXT: v_not_b32_e32 v2, -4.0
-; GFX8-NEXT: v_cmp_nle_f32_e32 vcc, s4, v1
; GFX8-NEXT: s_mov_b32 s4, 0x7f800000
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX8-NEXT: global_store_dword v[1:2], v3, off
+; GFX8-NEXT: v_fract_f32_e32 v1, v0
; GFX8-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s4
; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
; GFX8-NEXT: s_waitcnt vmcnt(0)
@@ -5711,14 +5693,12 @@ define float @fract_pat_fcmp_oge_select(float %x, ptr addrspace(1) %iptr) #0 {
; GFX11-LABEL: fract_pat_fcmp_oge_select:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_floor_f32_e32 v3, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_sub_f32_e32 v4, v0, v3
-; GFX11-NEXT: global_store_b32 v[1:2], v3, off
-; GFX11-NEXT: v_cmp_nle_f32_e32 vcc_lo, 0x3f7fffff, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v4, 0x3f7fffff, v4, vcc_lo
+; GFX11-NEXT: v_fract_f32_e32 v3, v0
; GFX11-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0|
-; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc_lo
+; GFX11-NEXT: v_floor_f32_e32 v4, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc_lo
+; GFX11-NEXT: global_store_b32 v[1:2], v4, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: fract_pat_fcmp_oge_select:
@@ -5728,30 +5708,36 @@ define float @fract_pat_fcmp_oge_select(float %x, ptr addrspace(1) %iptr) #0 {
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_floor_f32_e32 v3, v0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_sub_f32_e32 v4, v0, v3
-; GFX12-NEXT: global_store_b32 v[1:2], v3, off
-; GFX12-NEXT: v_cmp_nle_f32_e32 vcc_lo, 0x3f7fffff, v4
-; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
-; GFX12-NEXT: v_cndmask_b32_e32 v4, 0x3f7fffff, v4, vcc_lo
+; GFX12-NEXT: v_fract_f32_e32 v3, v0
; GFX12-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX12-NEXT: v_floor_f32_e32 v4, v0
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc_lo
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX12-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc_lo
+; GFX12-NEXT: global_store_b32 v[1:2], v4, off
; GFX12-NEXT: s_setpc_b64 s[30:31]
-; IR-LABEL: define float @fract_pat_fcmp_oge_select(
-; IR-SAME: float [[X:%.*]], ptr addrspace(1) [[IPTR:%.*]]) #[[ATTR1]] {
-; IR-NEXT: [[ENTRY:.*:]]
-; IR-NEXT: [[CALL:%.*]] = call float @llvm.floor.f32(float [[X]])
-; IR-NEXT: store float [[CALL]], ptr addrspace(1) [[IPTR]], align 4
-; IR-NEXT: [[SUB1:%.*]] = fsub float [[X]], [[CALL]]
-; IR-NEXT: [[CMP:%.*]] = fcmp oge float [[SUB1]], 0x3FEFFFFFE0000000
-; IR-NEXT: [[COND:%.*]] = select i1 [[CMP]], float 0x3FEFFFFFE0000000, float [[SUB1]]
-; IR-NEXT: [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X]])
-; IR-NEXT: [[NOT_INF:%.*]] = fcmp une float [[FABS_X]], 0x7FF0000000000000
-; IR-NEXT: [[COND6:%.*]] = select i1 [[NOT_INF]], float [[COND]], float 0.000000e+00
-; IR-NEXT: ret float [[COND6]]
+; GFX6-IR-LABEL: define float @fract_pat_fcmp_oge_select(
+; GFX6-IR-SAME: float [[X:%.*]], ptr addrspace(1) [[IPTR:%.*]]) #[[ATTR1]] {
+; GFX6-IR-NEXT: [[ENTRY:.*:]]
+; GFX6-IR-NEXT: [[CALL:%.*]] = call float @llvm.floor.f32(float [[X]])
+; GFX6-IR-NEXT: store float [[CALL]], ptr addrspace(1) [[IPTR]], align 4
+; GFX6-IR-NEXT: [[SUB1:%.*]] = fsub float [[X]], [[CALL]]
+; GFX6-IR-NEXT: [[CMP:%.*]] = fcmp oge float [[SUB1]], 0x3FEFFFFFE0000000
+; GFX6-IR-NEXT: [[COND:%.*]] = select i1 [[CMP]], float 0x3FEFFFFFE0000000, float [[SUB1]]
+; GFX6-IR-NEXT: [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X]])
+; GFX6-IR-NEXT: [[NOT_INF:%.*]] = fcmp une float [[FABS_X]], 0x7FF0000000000000
+; GFX6-IR-NEXT: [[COND6:%.*]] = select i1 [[NOT_INF]], float [[COND]], float 0.000000e+00
+; GFX6-IR-NEXT: ret float [[COND6]]
+; IR-FRACT-LABEL: define float @fract_pat_fcmp_oge_select(
+; IR-FRACT-SAME: float [[X:%.*]], ptr addrspace(1) [[IPTR:%.*]]) #[[ATTR1]] {
+; IR-FRACT-NEXT: [[ENTRY:.*:]]
+; IR-FRACT-NEXT: [[CALL:%.*]] = call float @llvm.floor.f32(float [[X]])
+; IR-FRACT-NEXT: store float [[CALL]], ptr addrspace(1) [[IPTR]], align 4
+; IR-FRACT-NEXT: [[COND:%.*]] = call float @llvm.amdgcn.fract.f32(float [[X]])
+; IR-FRACT-NEXT: [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X]])
+; IR-FRACT-NEXT: [[NOT_INF:%.*]] = fcmp une float [[FABS_X]], 0x7FF0000000000000
+; IR-FRACT-NEXT: [[COND6:%.*]] = select i1 [[NOT_INF]], float [[COND]], float 0.000000e+00
+; IR-FRACT-NEXT: ret float [[COND6]]
entry:
%call = call float @llvm.floor.f32(float %x)
store float %call, ptr addrspace(1) %iptr, align 4
@@ -5791,9 +5777,8 @@ define float @fract_pat_fcmp_ogt_select(float %x, ptr addrspace(1) %iptr) #0 {
; GFX7-NEXT: s_mov_b32 s5, s6
; GFX7-NEXT: v_floor_f32_e32 v3, v0
; GFX7-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
-; GFX7-NEXT: v_sub_f32_e32 v1, v0, v3
; GFX7-NEXT: s_mov_b32 s4, 0x7f800000
-; GFX7-NEXT: v_min_legacy_f32_e32 v1, 0x3f7fffff, v1
+; GFX7-NEXT: v_fract_f32_e32 v1, v0
; GFX7-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s4
; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
; GFX7-NEXT: s_waitcnt vmcnt(0)
@@ -5803,13 +5788,9 @@ define float @fract_pat_fcmp_ogt_select(float %x, ptr addrspace(1) %iptr) #0 {
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_floor_f32_e32 v3, v0
-; GFX8-NEXT: global_store_dword v[1:2], v3, off
-; GFX8-NEXT: v_sub_f32_e32 v1, v0, v3
-; GFX8-NEXT: s_mov_b32 s4, 0x3f7fffff
-; GFX8-NEXT: v_not_b32_e32 v2, -4.0
-; GFX8-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v1
; GFX8-NEXT: s_mov_b32 s4, 0x7f800000
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX8-NEXT: global_store_dword v[1:2], v3, off
+; GFX8-NEXT: v_fract_f32_e32 v1, v0
; GFX8-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s4
; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
; GFX8-NEXT: s_waitcnt vmcnt(0)
@@ -5818,14 +5799,12 @@ define float @fract_pat_fcmp_ogt_select(float %x, ptr addrspace(1) %iptr) #0 {
; GFX11-LABEL: fract_pat_fcmp_ogt_select:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_floor_f32_e32 v3, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_sub_f32_e32 v4, v0, v3
-; GFX11-NEXT: global_store_b32 v[1:2], v3, off
-; GFX11-NEXT: v_cmp_nlt_f32_e32 vcc_lo, 0x3f7fffff, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v4, 0x3f7fffff, v4, vcc_lo
+; GFX11-NEXT: v_fract_f32_e32 v3, v0
; GFX11-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0|
-; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc_lo
+; GFX11-NEXT: v_floor_f32_e32 v4, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc_lo
+; GFX11-NEXT: global_store_b32 v[1:2], v4, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: fract_pat_fcmp_ogt_select:
@@ -5835,30 +5814,36 @@ define float @fract_pat_fcmp_ogt_select(float %x, ptr addrspace(1) %iptr) #0 {
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_floor_f32_e32 v3, v0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_sub_f32_e32 v4, v0, v3
-; GFX12-NEXT: global_store_b32 v[1:2], v3, off
-; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, 0x3f7fffff, v4
-; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
-; GFX12-NEXT: v_cndmask_b32_e32 v4, 0x3f7fffff, v4, vcc_lo
+; GFX12-NEXT: v_fract_f32_e32 v3, v0
; GFX12-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX12-NEXT: v_floor_f32_e32 v4, v0
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc_lo
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX12-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc_lo
+; GFX12-NEXT: global_store_b32 v[1:2], v4, off
; GFX12-NEXT: s_setpc_b64 s[30:31]
-; IR-LABEL: define float @fract_pat_fcmp_ogt_select(
-; IR-SAME: float [[X:%.*]], ptr addrspace(1) [[IPTR:%.*]]) #[[ATTR1]] {
-; IR-NEXT: [[ENTRY:.*:]]
-; IR-NEXT: [[CALL:%.*]] = call float @llvm.floor.f32(float [[X]])
-; IR-NEXT: store float [[CALL]], ptr addrspace(1) [[IPTR]], align 4
-; IR-NEXT: [[SUB1:%.*]] = fsub float [[X]], [[CALL]]
-; IR-NEXT: [[CMP:%.*]] = fcmp ogt float [[SUB1]], 0x3FEFFFFFE0000000
-; IR-NEXT: [[COND:%.*]] = select i1 [[CMP]], float 0x3FEFFFFFE0000000, float [[SUB1]]
-; IR-NEXT: [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X]])
-; IR-NEXT: [[NOT_INF:%.*]] = fcmp une float [[FABS_X]], 0x7FF0000000000000
-; IR-NEXT: [[COND6:%.*]] = select i1 [[NOT_INF]], float [[COND]], float 0.000000e+00
-; IR-NEXT: ret float [[COND6]]
+; GFX6-IR-LABEL: define float @fract_pat_fcmp_ogt_select(
+; GFX6-IR-SAME: float [[X:%.*]], ptr addrspace(1) [[IPTR:%.*]]) #[[ATTR1]] {
+; GFX6-IR-NEXT: [[ENTRY:.*:]]
+; GFX6-IR-NEXT: [[CALL:%.*]] = call float @llvm.floor.f32(float [[X]])
+; GFX6-IR-NEXT: store float [[CALL]], ptr addrspace(1) [[IPTR]], align 4
+; GFX6-IR-NEXT: [[SUB1:%.*]] = fsub float [[X]], [[CALL]]
+; GFX6-IR-NEXT: [[CMP:%.*]] = fcmp ogt float [[SUB1]], 0x3FEFFFFFE0000000
+; GFX6-IR-NEXT: [[COND:%.*]] = select i1 [[CMP]], float 0x3FEFFFFFE0000000, float [[SUB1]]
+; GFX6-IR-NEXT: [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X]])
+; GFX6-IR-NEXT: [[NOT_INF:%.*]] = fcmp une float [[FABS_X]], 0x7FF0000000000000
+; GFX6-IR-NEXT: [[COND6:%.*]] = select i1 [[NOT_INF]], float [[COND]], float 0.000000e+00
+; GFX6-IR-NEXT: ret float [[COND6]]
+; IR-FRACT-LABEL: define float @fract_pat_fcmp_ogt_select(
+; IR-FRACT-SAME: float [[X:%.*]], ptr addrspace(1) [[IPTR:%.*]]) #[[ATTR1]] {
+; IR-FRACT-NEXT: [[ENTRY:.*:]]
+; IR-FRACT-NEXT: [[CALL:%.*]] = call float @llvm.floor.f32(float [[X]])
+; IR-FRACT-NEXT: store float [[CALL]], ptr addrspace(1) [[IPTR]], align 4
+; IR-FRACT-NEXT: [[COND:%.*]] = call float @llvm.amdgcn.fract.f32(float [[X]])
+; IR-FRACT-NEXT: [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X]])
+; IR-FRACT-NEXT: [[NOT_INF:%.*]] = fcmp une float [[FABS_X]], 0x7FF0000000000000
+; IR-FRACT-NEXT: [[COND6:%.*]] = select i1 [[NOT_INF]], float [[COND]], float 0.000000e+00
+; IR-FRACT-NEXT: ret float [[COND6]]
entry:
%call = call float @llvm.floor.f32(float %x)
store float %call, ptr addrspace(1) %iptr, align 4
@@ -6103,12 +6088,8 @@ define float @fract_pat_minimum(float %x, ptr addrspace(1) %iptr) #0 {
; GFX7-NEXT: s_mov_b32 s5, s6
; GFX7-NEXT: v_floor_f32_e32 v3, v0
; GFX7-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
-; GFX7-NEXT: v_sub_f32_e32 v1, v0, v3
-; GFX7-NEXT: v_min_f32_e32 v2, 0x3f7fffff, v1
-; GFX7-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
; GFX7-NEXT: s_mov_b32 s4, 0x7f800000
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc
+; GFX7-NEXT: v_fract_f32_e32 v1, v0
; GFX7-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s4
; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
; GFX7-NEXT: s_waitcnt vmcnt(0)
@@ -6118,13 +6099,9 @@ define float @fract_pat_minimum(float %x, ptr addrspace(1) %iptr) #0 {
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_floor_f32_e32 v3, v0
-; GFX8-NEXT: global_store_dword v[1:2], v3, off
-; GFX8-NEXT: v_sub_f32_e32 v1, v0, v3
-; GFX8-NEXT: v_min_f32_e32 v2, 0x3f7fffff, v1
-; GFX8-NEXT: v_mov_b32_e32 v3, 0x7fc00000
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v1
; GFX8-NEXT: s_mov_b32 s4, 0x7f800000
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc
+; GFX8-NEXT: global_store_dword v[1:2], v3, off
+; GFX8-NEXT: v_fract_f32_e32 v1, v0
; GFX8-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s4
; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
; GFX8-NEXT: s_waitcnt vmcnt(0)
@@ -6133,16 +6110,12 @@ define float @fract_pat_minimum(float %x, ptr addrspace(1) %iptr) #0 {
; GFX11-LABEL: fract_pat_minimum:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_floor_f32_e32 v3, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_sub_f32_e32 v4, v0, v3
-; GFX11-NEXT: global_store_b32 v[1:2], v3, off
-; GFX11-NEXT: v_min_f32_e32 v5, 0x3f7fffff, v4
-; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v4, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v4, 0x7fc00000, v5, vcc_lo
+; GFX11-NEXT: v_fract_f32_e32 v3, v0
; GFX11-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0|
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc_lo
+; GFX11-NEXT: v_floor_f32_e32 v4, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc_lo
+; GFX11-NEXT: global_store_b32 v[1:2], v4, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: fract_pat_minimum:
@@ -6152,26 +6125,35 @@ define float @fract_pat_minimum(float %x, ptr addrspace(1) %iptr) #0 {
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_floor_f32_e32 v3, v0
+; GFX12-NEXT: v_fract_f32_e32 v3, v0
; GFX12-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0|
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1)
-; GFX12-NEXT: v_sub_f32_e32 v4, v0, v3
-; GFX12-NEXT: global_store_b32 v[1:2], v3, off
-; GFX12-NEXT: v_minimum_f32 v4, 0x3f7fffff, v4
+; GFX12-NEXT: v_floor_f32_e32 v4, v0
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
-; GFX12-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc_lo
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX12-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc_lo
+; GFX12-NEXT: global_store_b32 v[1:2], v4, off
; GFX12-NEXT: s_setpc_b64 s[30:31]
-; IR-LABEL: define float @fract_pat_minimum(
-; IR-SAME: float [[X:%.*]], ptr addrspace(1) [[IPTR:%.*]]) #[[ATTR1]] {
-; IR-NEXT: [[ENTRY:.*:]]
-; IR-NEXT: [[CALL:%.*]] = call float @llvm.floor.f32(float [[X]])
-; IR-NEXT: store float [[CALL]], ptr addrspace(1) [[IPTR]], align 4
-; IR-NEXT: [[SUB1:%.*]] = fsub float [[X]], [[CALL]]
-; IR-NEXT: [[MIN:%.*]] = call float @llvm.minimum.f32(float [[SUB1]], float 0x3FEFFFFFE0000000)
-; IR-NEXT: [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X]])
-; IR-NEXT: [[NOT_INF:%.*]] = fcmp une float [[FABS_X]], 0x7FF0000000000000
-; IR-NEXT: [[COND6:%.*]] = select i1 [[NOT_INF]], float [[MIN]], float 0.000000e+00
-; IR-NEXT: ret float [[COND6]]
+; GFX6-IR-LABEL: define float @fract_pat_minimum(
+; GFX6-IR-SAME: float [[X:%.*]], ptr addrspace(1) [[IPTR:%.*]]) #[[ATTR1]] {
+; GFX6-IR-NEXT: [[ENTRY:.*:]]
+; GFX6-IR-NEXT: [[CALL:%.*]] = call float @llvm.floor.f32(float [[X]])
+; GFX6-IR-NEXT: store float [[CALL]], ptr addrspace(1) [[IPTR]], align 4
+; GFX6-IR-NEXT: [[SUB1:%.*]] = fsub float [[X]], [[CALL]]
+; GFX6-IR-NEXT: [[MIN:%.*]] = call float @llvm.minimum.f32(float [[SUB1]], float 0x3FEFFFFFE0000000)
+; GFX6-IR-NEXT: [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X]])
+; GFX6-IR-NEXT: [[NOT_INF:%.*]] = fcmp une float [[FABS_X]], 0x7FF0000000000000
+; GFX6-IR-NEXT: [[COND6:%.*]] = select i1 [[NOT_INF]], float [[MIN]], float 0.000000e+00
+; GFX6-IR-NEXT: ret float [[COND6]]
+; IR-FRACT-LABEL: define float @fract_pat_minimum(
+; IR-FRACT-SAME: float [[X:%.*]], ptr addrspace(1) [[IPTR:%.*]]) #[[ATTR1]] {
+; IR-FRACT-NEXT: [[ENTRY:.*:]]
+; IR-FRACT-NEXT: [[CALL:%.*]] = call float @llvm.floor.f32(float [[X]])
+; IR-FRACT-NEXT: store float [[CALL]], ptr addrspace(1) [[IPTR]], align 4
+; IR-FRACT-NEXT: [[MIN:%.*]] = call nnan float @llvm.amdgcn.fract.f32(float [[X]])
+; IR-FRACT-NEXT: [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X]])
+; IR-FRACT-NEXT: [[NOT_INF:%.*]] = fcmp une float [[FABS_X]], 0x7FF0000000000000
+; IR-FRACT-NEXT: [[COND6:%.*]] = select i1 [[NOT_INF]], float [[MIN]], float 0.000000e+00
+; IR-FRACT-NEXT: ret float [[COND6]]
entry:
%call = call float @llvm.floor.f32(float %x)
store float %call, ptr addrspace(1) %iptr, align 4
@@ -6195,30 +6177,19 @@ define float @core_fract_pat_fcmp_oge_select(float %x) #0 {
; GFX7-LABEL: core_fract_pat_fcmp_oge_select:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_floor_f32_e32 v1, v0
-; GFX7-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX7-NEXT: v_min_legacy_f32_e32 v0, 0x3f7fffff, v0
+; GFX7-NEXT: v_fract_f32_e32 v0, v0
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: core_fract_pat_fcmp_oge_select:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_floor_f32_e32 v1, v0
-; GFX8-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX8-NEXT: s_mov_b32 s4, 0x3f7fffff
-; GFX8-NEXT: v_not_b32_e32 v1, -4.0
-; GFX8-NEXT: v_cmp_nle_f32_e32 vcc, s4, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
+; GFX8-NEXT: v_fract_f32_e32 v0, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: core_fract_pat_fcmp_oge_select:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_floor_f32_e32 v1, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX11-NEXT: v_cmp_nle_f32_e32 vcc_lo, 0x3f7fffff, v0
-; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x3f7fffff, v0, vcc_lo
+; GFX11-NEXT: v_fract_f32_e32 v0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: core_fract_pat_fcmp_oge_select:
@@ -6228,20 +6199,19 @@ define float @core_fract_pat_fcmp_oge_select(float %x) #0 {
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_floor_f32_e32 v1, v0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX12-NEXT: v_cmp_nle_f32_e32 vcc_lo, 0x3f7fffff, v0
-; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
-; GFX12-NEXT: v_cndmask_b32_e32 v0, 0x3f7fffff, v0, vcc_lo
+; GFX12-NEXT: v_fract_f32_e32 v0, v0
; GFX12-NEXT: s_setpc_b64 s[30:31]
-; IR-LABEL: define float @core_fract_pat_fcmp_oge_select(
-; IR-SAME: float [[X:%.*]]) #[[ATTR1]] {
-; IR-NEXT: [[FLOOR:%.*]] = call float @llvm.floor.f32(float [[X]])
-; IR-NEXT: [[SUB_FLOOR:%.*]] = fsub float [[X]], [[FLOOR]]
-; IR-NEXT: [[OGE_MIN_CONST:%.*]] = fcmp oge float [[SUB_FLOOR]], 0x3FEFFFFFE0000000
-; IR-NEXT: [[SELECT:%.*]] = select i1 [[OGE_MIN_CONST]], float 0x3FEFFFFFE0000000, float [[SUB_FLOOR]]
-; IR-NEXT: ret float [[SELECT]]
+; GFX6-IR-LABEL: define float @core_fract_pat_fcmp_oge_select(
+; GFX6-IR-SAME: float [[X:%.*]]) #[[ATTR1]] {
+; GFX6-IR-NEXT: [[FLOOR:%.*]] = call float @llvm.floor.f32(float [[X]])
+; GFX6-IR-NEXT: [[SUB_FLOOR:%.*]] = fsub float [[X]], [[FLOOR]]
+; GFX6-IR-NEXT: [[OGE_MIN_CONST:%.*]] = fcmp oge float [[SUB_FLOOR]], 0x3FEFFFFFE0000000
+; GFX6-IR-NEXT: [[SELECT:%.*]] = select i1 [[OGE_MIN_CONST]], float 0x3FEFFFFFE0000000, float [[SUB_FLOOR]]
+; GFX6-IR-NEXT: ret float [[SELECT]]
+; IR-FRACT-LABEL: define float @core_fract_pat_fcmp_oge_select(
+; IR-FRACT-SAME: float [[X:%.*]]) #[[ATTR1]] {
+; IR-FRACT-NEXT: [[SELECT:%.*]] = call float @llvm.amdgcn.fract.f32(float [[X]])
+; IR-FRACT-NEXT: ret float [[SELECT]]
%floor = call float @llvm.floor.f32(float %x)
%sub.floor = fsub float %x, %floor
%oge.min.const = fcmp oge float %sub.floor, 0x3FEFFFFFE0000000
@@ -6264,35 +6234,19 @@ define float @core_fract_pat_minimum(float %x) #0 {
; GFX7-LABEL: core_fract_pat_minimum:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_floor_f32_e32 v1, v0
-; GFX7-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX7-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v0
-; GFX7-NEXT: v_mov_b32_e32 v2, 0x7fc00000
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX7-NEXT: v_fract_f32_e32 v0, v0
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: core_fract_pat_minimum:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_floor_f32_e32 v1, v0
-; GFX8-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX8-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v0
-; GFX8-NEXT: v_mov_b32_e32 v2, 0x7fc00000
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX8-NEXT: v_fract_f32_e32 v0, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: core_fract_pat_minimum:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_floor_f32_e32 v1, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX11-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v0
-; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
+; GFX11-NEXT: v_fract_f32_e32 v0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: core_fract_pat_minimum:
@@ -6302,17 +6256,18 @@ define float @core_fract_pat_minimum(float %x) #0 {
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_floor_f32_e32 v1, v0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX12-NEXT: v_minimum_f32 v0, 0x3f7fffff, v0
+; GFX12-NEXT: v_fract_f32_e32 v0, v0
; GFX12-NEXT: s_setpc_b64 s[30:31]
-; IR-LABEL: define float @core_fract_pat_minimum(
-; IR-SAME: float [[X:%.*]]) #[[ATTR1]] {
-; IR-NEXT: [[FLOOR:%.*]] = call float @llvm.floor.f32(float [[X]])
-; IR-NEXT: [[SUB_FLOOR:%.*]] = fsub float [[X]], [[FLOOR]]
-; IR-NEXT: [[MIN:%.*]] = call float @llvm.minimum.f32(float [[SUB_FLOOR]], float 0x3FEFFFFFE0000000)
-; IR-NEXT: ret float [[MIN]]
+; GFX6-IR-LABEL: define float @core_fract_pat_minimum(
+; GFX6-IR-SAME: float [[X:%.*]]) #[[ATTR1]] {
+; GFX6-IR-NEXT: [[FLOOR:%.*]] = call float @llvm.floor.f32(float [[X]])
+; GFX6-IR-NEXT: [[SUB_FLOOR:%.*]] = fsub float [[X]], [[FLOOR]]
+; GFX6-IR-NEXT: [[MIN:%.*]] = call float @llvm.minimum.f32(float [[SUB_FLOOR]], float 0x3FEFFFFFE0000000)
+; GFX6-IR-NEXT: ret float [[MIN]]
+; IR-FRACT-LABEL: define float @core_fract_pat_minimum(
+; IR-FRACT-SAME: float [[X:%.*]]) #[[ATTR1]] {
+; IR-FRACT-NEXT: [[MIN:%.*]] = call nnan float @llvm.amdgcn.fract.f32(float [[X]])
+; IR-FRACT-NEXT: ret float [[MIN]]
%floor = call float @llvm.floor.f32(float %x)
%sub.floor = fsub float %x, %floor
%min = call float @llvm.minimum.f32(float %sub.floor, float 0x3FEFFFFFE0000000)
@@ -6333,36 +6288,22 @@ define <2 x float> @core_fract_pat_fcmp_oge_select_v2f32(<2 x float> %x) #0 {
; GFX7-LABEL: core_fract_pat_fcmp_oge_select_v2f32:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_floor_f32_e32 v2, v0
-; GFX7-NEXT: v_floor_f32_e32 v3, v1
-; GFX7-NEXT: v_sub_f32_e32 v0, v0, v2
-; GFX7-NEXT: v_sub_f32_e32 v1, v1, v3
-; GFX7-NEXT: v_min_legacy_f32_e32 v0, 0x3f7fffff, v0
+; GFX7-NEXT: v_fract_f32_e32 v0, v0
+; GFX7-NEXT: v_fract_f32_e32 v1, v1
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: core_fract_pat_fcmp_oge_select_v2f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_floor_f32_e32 v2, v0
-; GFX8-NEXT: v_sub_f32_e32 v0, v0, v2
-; GFX8-NEXT: s_mov_b32 s4, 0x3f7fffff
-; GFX8-NEXT: v_floor_f32_e32 v3, v1
-; GFX8-NEXT: v_not_b32_e32 v2, -4.0
-; GFX8-NEXT: v_cmp_nle_f32_e32 vcc, s4, v0
-; GFX8-NEXT: v_sub_f32_e32 v1, v1, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX8-NEXT: v_fract_f32_e32 v0, v0
+; GFX8-NEXT: v_fract_f32_e32 v1, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: core_fract_pat_fcmp_oge_select_v2f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_floor_f32_e32 v2, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_sub_f32_e32 v0, v0, v2
-; GFX11-NEXT: v_floor_f32_e32 v2, v1
-; GFX11-NEXT: v_cmp_nle_f32_e32 vcc_lo, 0x3f7fffff, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT: v_dual_sub_f32 v1, v1, v2 :: v_dual_cndmask_b32 v0, 0x3f7fffff, v0
+; GFX11-NEXT: v_fract_f32_e32 v0, v0
+; GFX11-NEXT: v_fract_f32_e32 v1, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: core_fract_pat_fcmp_oge_select_v2f32:
@@ -6372,22 +6313,25 @@ define <2 x float> @core_fract_pat_fcmp_oge_select_v2f32(<2 x float> %x) #0 {
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_floor_f32_e32 v2, v0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX12-NEXT: v_sub_f32_e32 v0, v0, v2
-; GFX12-NEXT: v_floor_f32_e32 v2, v1
-; GFX12-NEXT: v_cmp_nle_f32_e32 vcc_lo, 0x3f7fffff, v0
-; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-NEXT: v_dual_sub_f32 v1, v1, v2 :: v_dual_cndmask_b32 v0, 0x3f7fffff, v0
+; GFX12-NEXT: v_fract_f32_e32 v0, v0
+; GFX12-NEXT: v_fract_f32_e32 v1, v1
; GFX12-NEXT: s_setpc_b64 s[30:31]
-; IR-LABEL: define <2 x float> @core_fract_pat_fcmp_oge_select_v2f32(
-; IR-SAME: <2 x float> [[X:%.*]]) #[[ATTR1]] {
-; IR-NEXT: [[FLOOR:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[X]])
-; IR-NEXT: [[SUB_FLOOR:%.*]] = fsub <2 x float> [[X]], [[FLOOR]]
-; IR-NEXT: [[OGE_MIN_CONST:%.*]] = fcmp oge <2 x float> [[SUB_FLOOR]], <float 0x3FEFFFFFE0000000, float poison>
-; IR-NEXT: [[SELECT:%.*]] = select <2 x i1> [[OGE_MIN_CONST]], <2 x float> <float 0x3FEFFFFFE0000000, float poison>, <2 x float> [[SUB_FLOOR]]
-; IR-NEXT: ret <2 x float> [[SELECT]]
+; GFX6-IR-LABEL: define <2 x float> @core_fract_pat_fcmp_oge_select_v2f32(
+; GFX6-IR-SAME: <2 x float> [[X:%.*]]) #[[ATTR1]] {
+; GFX6-IR-NEXT: [[FLOOR:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[X]])
+; GFX6-IR-NEXT: [[SUB_FLOOR:%.*]] = fsub <2 x float> [[X]], [[FLOOR]]
+; GFX6-IR-NEXT: [[OGE_MIN_CONST:%.*]] = fcmp oge <2 x float> [[SUB_FLOOR]], <float 0x3FEFFFFFE0000000, float poison>
+; GFX6-IR-NEXT: [[SELECT:%.*]] = select <2 x i1> [[OGE_MIN_CONST]], <2 x float> <float 0x3FEFFFFFE0000000, float poison>, <2 x float> [[SUB_FLOOR]]
+; GFX6-IR-NEXT: ret <2 x float> [[SELECT]]
+; IR-FRACT-LABEL: define <2 x float> @core_fract_pat_fcmp_oge_select_v2f32(
+; IR-FRACT-SAME: <2 x float> [[X:%.*]]) #[[ATTR1]] {
+; IR-FRACT-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X]], i64 0
+; IR-FRACT-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[X]], i64 1
+; IR-FRACT-NEXT: [[TMP3:%.*]] = call float @llvm.amdgcn.fract.f32(float [[TMP1]])
+; IR-FRACT-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.fract.f32(float [[TMP2]])
+; IR-FRACT-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0
+; IR-FRACT-NEXT: [[SELECT:%.*]] = insertelement <2 x float> [[TMP5]], float [[TMP4]], i64 1
+; IR-FRACT-NEXT: ret <2 x float> [[SELECT]]
%floor = call <2 x float> @llvm.floor.v2f32(<2 x float> %x)
%sub.floor = fsub <2 x float> %x, %floor
%oge.min.const = fcmp oge <2 x float> %sub.floor, <float 0x3FEFFFFFE0000000, float poison>
@@ -6411,37 +6355,22 @@ define <2 x float> @core_fract_pat_minimum_v2f32(<2 x float> %x) #0 {
; GFX7-LABEL: core_fract_pat_minimum_v2f32:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_floor_f32_e32 v1, v0
-; GFX7-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX7-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v0
-; GFX7-NEXT: v_mov_b32_e32 v2, 0x7fc00000
-; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
-; GFX7-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX7-NEXT: v_mov_b32_e32 v1, 0x7fc00000
+; GFX7-NEXT: v_fract_f32_e32 v0, v0
+; GFX7-NEXT: v_fract_f32_e32 v1, v1
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: core_fract_pat_minimum_v2f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_floor_f32_e32 v1, v0
-; GFX8-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX8-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v0
-; GFX8-NEXT: v_mov_b32_e32 v2, 0x7fc00000
-; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX8-NEXT: v_mov_b32_e32 v1, 0x7fc00000
+; GFX8-NEXT: v_fract_f32_e32 v0, v0
+; GFX8-NEXT: v_fract_f32_e32 v1, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: core_fract_pat_minimum_v2f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_floor_f32_e32 v1, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX11-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v0
-; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT: v_dual_cndmask_b32 v0, 0x7fc00000, v1 :: v_dual_mov_b32 v1, 0x7fc00000
+; GFX11-NEXT: v_fract_f32_e32 v0, v0
+; GFX11-NEXT: v_fract_f32_e32 v1, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: core_fract_pat_minimum_v2f32:
@@ -6451,20 +6380,24 @@ define <2 x float> @core_fract_pat_minimum_v2f32(<2 x float> %x) #0 {
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_floor_f32_e32 v2, v0
-; GFX12-NEXT: v_floor_f32_e32 v3, v1
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-NEXT: v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3
-; GFX12-NEXT: v_minimum_f32 v0, 0x3f7fffff, v0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-NEXT: v_minimum_f32 v1, v1, s0
+; GFX12-NEXT: v_fract_f32_e32 v0, v0
+; GFX12-NEXT: v_fract_f32_e32 v1, v1
; GFX12-NEXT: s_setpc_b64 s[30:31]
-; IR-LABEL: define <2 x float> @core_fract_pat_minimum_v2f32(
-; IR-SAME: <2 x float> [[X:%.*]]) #[[ATTR1]] {
-; IR-NEXT: [[FLOOR:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[X]])
-; IR-NEXT: [[SUB_FLOOR:%.*]] = fsub <2 x float> [[X]], [[FLOOR]]
-; IR-NEXT: [[MIN:%.*]] = call <2 x float> @llvm.minimum.v2f32(<2 x float> [[SUB_FLOOR]], <2 x float> <float 0x3FEFFFFFE0000000, float poison>)
-; IR-NEXT: ret <2 x float> [[MIN]]
+; GFX6-IR-LABEL: define <2 x float> @core_fract_pat_minimum_v2f32(
+; GFX6-IR-SAME: <2 x float> [[X:%.*]]) #[[ATTR1]] {
+; GFX6-IR-NEXT: [[FLOOR:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[X]])
+; GFX6-IR-NEXT: [[SUB_FLOOR:%.*]] = fsub <2 x float> [[X]], [[FLOOR]]
+; GFX6-IR-NEXT: [[MIN:%.*]] = call <2 x float> @llvm.minimum.v2f32(<2 x float> [[SUB_FLOOR]], <2 x float> <float 0x3FEFFFFFE0000000, float poison>)
+; GFX6-IR-NEXT: ret <2 x float> [[MIN]]
+; IR-FRACT-LABEL: define <2 x float> @core_fract_pat_minimum_v2f32(
+; IR-FRACT-SAME: <2 x float> [[X:%.*]]) #[[ATTR1]] {
+; IR-FRACT-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X]], i64 0
+; IR-FRACT-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[X]], i64 1
+; IR-FRACT-NEXT: [[TMP3:%.*]] = call nnan float @llvm.amdgcn.fract.f32(float [[TMP1]])
+; IR-FRACT-NEXT: [[TMP4:%.*]] = call nnan float @llvm.amdgcn.fract.f32(float [[TMP2]])
+; IR-FRACT-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0
+; IR-FRACT-NEXT: [[MIN:%.*]] = insertelement <2 x float> [[TMP5]], float [[TMP4]], i64 1
+; IR-FRACT-NEXT: ret <2 x float> [[MIN]]
%floor = call <2 x float> @llvm.floor.v2f32(<2 x float> %x)
%sub.floor = fsub <2 x float> %x, %floor
%min = call <2 x float> @llvm.minimum.v2f32(<2 x float> %sub.floor, <2 x float> <float 0x3FEFFFFFE0000000, float poison>)
More information about the llvm-commits
mailing list