[llvm] 5aa6e24 - AMDGPU/GlobalISel: Legalize f64 G_FFLOOR for SI
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 5 11:32:31 PST 2020
Author: Matt Arsenault
Date: 2020-02-05T14:32:01-05:00
New Revision: 5aa6e246a1e44655a66581bc2ca6a20e3051e7e9
URL: https://github.com/llvm/llvm-project/commit/5aa6e246a1e44655a66581bc2ca6a20e3051e7e9
DIFF: https://github.com/llvm/llvm-project/commit/5aa6e246a1e44655a66581bc2ca6a20e3051e7e9.diff
LOG: AMDGPU/GlobalISel: Legalize f64 G_FFLOOR for SI
Use cmp ord instead of cmp_class compared to the DAG version for the
nan check, but mostly try to match the existsing pattern.
I think the sign doesn't matter for fract, so we could do a little
better with the source modifier matching.
I think this is also still broken as in D22898, but I'm leaving it
as-is for now while I don't have an SI system to test on.
Added:
llvm/test/CodeGen/AMDGPU/GlobalISel/floor.f64.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s32.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s64.mir
Modified:
llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
llvm/lib/Target/AMDGPU/SIInstructions.td
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptosi.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptoui.mir
Removed:
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.mir
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index 1823f283e005..003903e7c480 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -1285,6 +1285,30 @@ class MachineIRBuilder {
return buildInstr(TargetOpcode::G_FMUL, {Dst}, {Src0, Src1}, Flags);
}
+ MachineInstrBuilder buildFMinNum(const DstOp &Dst, const SrcOp &Src0,
+ const SrcOp &Src1,
+ Optional<unsigned> Flags = None) {
+ return buildInstr(TargetOpcode::G_FMINNUM, {Dst}, {Src0, Src1}, Flags);
+ }
+
+ MachineInstrBuilder buildFMaxNum(const DstOp &Dst, const SrcOp &Src0,
+ const SrcOp &Src1,
+ Optional<unsigned> Flags = None) {
+ return buildInstr(TargetOpcode::G_FMAXNUM, {Dst}, {Src0, Src1}, Flags);
+ }
+
+ MachineInstrBuilder buildFMinNumIEEE(const DstOp &Dst, const SrcOp &Src0,
+ const SrcOp &Src1,
+ Optional<unsigned> Flags = None) {
+ return buildInstr(TargetOpcode::G_FMINNUM_IEEE, {Dst}, {Src0, Src1}, Flags);
+ }
+
+ MachineInstrBuilder buildFMaxNumIEEE(const DstOp &Dst, const SrcOp &Src0,
+ const SrcOp &Src1,
+ Optional<unsigned> Flags = None) {
+ return buildInstr(TargetOpcode::G_FMAXNUM_IEEE, {Dst}, {Src0, Src1}, Flags);
+ }
+
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0,
const SrcOp &Src1,
Optional<unsigned> Flags = None) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 8cca8e446798..c3790fdb359c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -417,10 +417,23 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.scalarize(0)
.clampScalar(0, S16, S64);
} else {
- getActionDefinitionsBuilder({G_FSQRT, G_FFLOOR})
+ getActionDefinitionsBuilder(G_FSQRT)
.legalFor({S32, S64})
.scalarize(0)
.clampScalar(0, S32, S64);
+
+ if (ST.hasFractBug()) {
+ getActionDefinitionsBuilder(G_FFLOOR)
+ .customFor({S64})
+ .legalFor({S32, S64})
+ .scalarize(0)
+ .clampScalar(0, S32, S64);
+ } else {
+ getActionDefinitionsBuilder(G_FFLOOR)
+ .legalFor({S32, S64})
+ .scalarize(0)
+ .clampScalar(0, S32, S64);
+ }
}
getActionDefinitionsBuilder(G_FPTRUNC)
@@ -1249,6 +1262,8 @@ bool AMDGPULegalizerInfo::legalizeCustom(MachineInstr &MI,
return legalizeFlog(MI, B, numbers::ln2f / numbers::ln10f);
case TargetOpcode::G_FEXP:
return legalizeFExp(MI, B);
+ case TargetOpcode::G_FFLOOR:
+ return legalizeFFloor(MI, MRI, B);
case TargetOpcode::G_BUILD_VECTOR:
return legalizeBuildVector(MI, MRI, B);
default:
@@ -1973,6 +1988,75 @@ bool AMDGPULegalizerInfo::legalizeFExp(MachineInstr &MI,
return true;
}
+// Find a source register, ignoring any possible source modifiers.
+static Register stripAnySourceMods(Register OrigSrc, MachineRegisterInfo &MRI) {
+ Register ModSrc = OrigSrc;
+ if (MachineInstr *SrcFNeg = getOpcodeDef(AMDGPU::G_FNEG, ModSrc, MRI)) {
+ ModSrc = SrcFNeg->getOperand(1).getReg();
+ if (MachineInstr *SrcFAbs = getOpcodeDef(AMDGPU::G_FABS, ModSrc, MRI))
+ ModSrc = SrcFAbs->getOperand(1).getReg();
+ } else if (MachineInstr *SrcFAbs = getOpcodeDef(AMDGPU::G_FABS, ModSrc, MRI))
+ ModSrc = SrcFAbs->getOperand(1).getReg();
+ return ModSrc;
+}
+
+bool AMDGPULegalizerInfo::legalizeFFloor(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ B.setInstr(MI);
+
+ const LLT S1 = LLT::scalar(1);
+ const LLT S64 = LLT::scalar(64);
+ Register Dst = MI.getOperand(0).getReg();
+ Register OrigSrc = MI.getOperand(1).getReg();
+ unsigned Flags = MI.getFlags();
+ assert(ST.hasFractBug() && MRI.getType(Dst) == S64 &&
+ "this should not have been custom lowered");
+
+ // V_FRACT is buggy on SI, so the F32 version is never used and (x-floor(x))
+ // is used instead. However, SI doesn't have V_FLOOR_F64, so the most
+ // efficient way to implement it is using V_FRACT_F64. The workaround for the
+ // V_FRACT bug is:
+ // fract(x) = isnan(x) ? x : min(V_FRACT(x), 0.99999999999999999)
+ //
+ // Convert floor(x) to (x - fract(x))
+
+ auto Fract = B.buildIntrinsic(Intrinsic::amdgcn_fract, {S64}, false)
+ .addUse(OrigSrc)
+ .setMIFlags(Flags);
+
+ // Give source modifier matching some assistance before obscuring a foldable
+ // pattern.
+
+ // TODO: We can avoid the neg on the fract? The input sign to fract
+ // shouldn't matter?
+ Register ModSrc = stripAnySourceMods(OrigSrc, MRI);
+
+ auto Const = B.buildFConstant(S64, BitsToDouble(0x3fefffffffffffff));
+
+ Register Min = MRI.createGenericVirtualRegister(S64);
+
+ // We don't need to concern ourselves with the snan handling
diff erence, so
+ // use the one which will directly select.
+ const SIMachineFunctionInfo *MFI = B.getMF().getInfo<SIMachineFunctionInfo>();
+ if (MFI->getMode().IEEE)
+ B.buildFMinNumIEEE(Min, Fract, Const, Flags);
+ else
+ B.buildFMinNum(Min, Fract, Const, Flags);
+
+ Register CorrectedFract = Min;
+ if (!MI.getFlag(MachineInstr::FmNoNans)) {
+ auto IsNan = B.buildFCmp(CmpInst::FCMP_ORD, S1, ModSrc, ModSrc, Flags);
+ CorrectedFract = B.buildSelect(S64, IsNan, ModSrc, Min, Flags).getReg(0);
+ }
+
+ auto NegFract = B.buildFNeg(S64, CorrectedFract, Flags);
+ B.buildFAdd(Dst, OrigSrc, NegFract, Flags);
+
+ MI.eraseFromParent();
+ return true;
+}
+
// Turn an illegal packed v2s16 build vector into bit operations.
// TODO: This should probably be a bitcast action in LegalizerHelper.
bool AMDGPULegalizerInfo::legalizeBuildVector(
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index 421641e19fb0..184f4bda62f8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -82,6 +82,9 @@ class AMDGPULegalizerInfo : public LegalizerInfo {
bool legalizeFlog(MachineInstr &MI, MachineIRBuilder &B,
double Log2BaseInverted) const;
bool legalizeFExp(MachineInstr &MI, MachineIRBuilder &B) const;
+ bool legalizeFFloor(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
+
bool legalizeBuildVector(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 99cb4e9e5d4a..6f3ca01cec65 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -506,6 +506,10 @@ class GCNSubtarget : public AMDGPUGenSubtargetInfo,
return getGeneration() >= VOLCANIC_ISLANDS;
}
+ bool hasFractBug() const {
+ return getGeneration() == SOUTHERN_ISLANDS;
+ }
+
bool hasBFE() const {
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index d15a9a05776e..2dbc668b3440 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1990,6 +1990,11 @@ let SubtargetPredicate = isGFX6 in {
// fract(x) = isnan(x) ? x : min(V_FRACT(x), 0.99999999999999999)
// Convert floor(x) to (x - fract(x))
+
+// Don't bother handling this for GlobalISel, it's handled during
+// lowering.
+//
+// FIXME: DAG should also custom lower this.
def : GCNPat <
(f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))),
(V_ADD_F64
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/floor.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/floor.f64.ll
new file mode 100644
index 000000000000..a4f5948da2c6
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/floor.f64.ll
@@ -0,0 +1,298 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire < %s | FileCheck -check-prefix=GFX78 %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefix=GFX78 %s
+
+define double @v_floor_f64_ieee(double %x) {
+; GFX6-LABEL: v_floor_f64_ieee:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1]
+; GFX6-NEXT: s_mov_b32 s4, -1
+; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff
+; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5]
+; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
+; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX6-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3]
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX78-LABEL: v_floor_f64_ieee:
+; GFX78: ; %bb.0:
+; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX78-NEXT: v_floor_f64_e32 v[0:1], v[0:1]
+; GFX78-NEXT: s_setpc_b64 s[30:31]
+ %result = call double @llvm.floor.f64(double %x)
+ ret double %result
+}
+
+define double @v_floor_f64_ieee_nnan(double %x) {
+; GFX6-LABEL: v_floor_f64_ieee_nnan:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1]
+; GFX6-NEXT: s_mov_b32 s4, -1
+; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff
+; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5]
+; GFX6-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3]
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX78-LABEL: v_floor_f64_ieee_nnan:
+; GFX78: ; %bb.0:
+; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX78-NEXT: v_floor_f64_e32 v[0:1], v[0:1]
+; GFX78-NEXT: s_setpc_b64 s[30:31]
+ %result = call nnan double @llvm.floor.f64(double %x)
+ ret double %result
+}
+
+define double @v_floor_f64_ieee_fneg(double %x) {
+; GFX6-LABEL: v_floor_f64_ieee_fneg:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: v_fract_f64_e64 v[2:3], -v[0:1]
+; GFX6-NEXT: s_mov_b32 s4, -1
+; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff
+; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5]
+; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
+; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX6-NEXT: v_add_f64 v[0:1], -v[0:1], -v[2:3]
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX78-LABEL: v_floor_f64_ieee_fneg:
+; GFX78: ; %bb.0:
+; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX78-NEXT: v_floor_f64_e64 v[0:1], -v[0:1]
+; GFX78-NEXT: s_setpc_b64 s[30:31]
+ %neg.x = fneg double %x
+ %result = call double @llvm.floor.f64(double %neg.x)
+ ret double %result
+}
+
+define double @v_floor_f64_nonieee(double %x) #1 {
+; GFX6-LABEL: v_floor_f64_nonieee:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1]
+; GFX6-NEXT: s_mov_b32 s4, -1
+; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff
+; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5]
+; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
+; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX6-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3]
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX78-LABEL: v_floor_f64_nonieee:
+; GFX78: ; %bb.0:
+; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX78-NEXT: v_floor_f64_e32 v[0:1], v[0:1]
+; GFX78-NEXT: s_setpc_b64 s[30:31]
+ %result = call double @llvm.floor.f64(double %x)
+ ret double %result
+}
+
+define double @v_floor_f64_nonieee_nnan(double %x) #1 {
+; GFX6-LABEL: v_floor_f64_nonieee_nnan:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1]
+; GFX6-NEXT: s_mov_b32 s4, -1
+; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff
+; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5]
+; GFX6-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3]
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX78-LABEL: v_floor_f64_nonieee_nnan:
+; GFX78: ; %bb.0:
+; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX78-NEXT: v_floor_f64_e32 v[0:1], v[0:1]
+; GFX78-NEXT: s_setpc_b64 s[30:31]
+ %result = call nnan double @llvm.floor.f64(double %x)
+ ret double %result
+}
+
+define double @v_floor_f64_non_ieee_fneg(double %x) #1 {
+; GFX6-LABEL: v_floor_f64_non_ieee_fneg:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: v_fract_f64_e64 v[2:3], -v[0:1]
+; GFX6-NEXT: s_mov_b32 s4, -1
+; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff
+; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5]
+; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
+; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX6-NEXT: v_add_f64 v[0:1], -v[0:1], -v[2:3]
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX78-LABEL: v_floor_f64_non_ieee_fneg:
+; GFX78: ; %bb.0:
+; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX78-NEXT: v_floor_f64_e64 v[0:1], -v[0:1]
+; GFX78-NEXT: s_setpc_b64 s[30:31]
+ %neg.x = fneg double %x
+ %result = call double @llvm.floor.f64(double %neg.x)
+ ret double %result
+}
+
+define double @v_floor_f64_fabs(double %x) {
+; GFX6-LABEL: v_floor_f64_fabs:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: v_fract_f64_e64 v[2:3], |v[0:1]|
+; GFX6-NEXT: s_mov_b32 s4, -1
+; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff
+; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5]
+; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
+; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX6-NEXT: v_add_f64 v[0:1], |v[0:1]|, -v[2:3]
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX78-LABEL: v_floor_f64_fabs:
+; GFX78: ; %bb.0:
+; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX78-NEXT: v_floor_f64_e64 v[0:1], |v[0:1]|
+; GFX78-NEXT: s_setpc_b64 s[30:31]
+; GFX7-LABEL: v_floor_f64_fabs:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_floor_f64_e64 v[0:1], |v[0:1]|
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+ %abs.x = call double @llvm.fabs.f64(double %x)
+ %result = call double @llvm.floor.f64(double %abs.x)
+ ret double %result
+}
+
+define double @v_floor_f64_fneg_fabs(double %x) {
+; GFX6-LABEL: v_floor_f64_fneg_fabs:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: v_fract_f64_e64 v[2:3], -|v[0:1]|
+; GFX6-NEXT: s_mov_b32 s4, -1
+; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff
+; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5]
+; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
+; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
+; GFX6-NEXT: v_add_f64 v[0:1], -|v[0:1]|, -v[2:3]
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX78-LABEL: v_floor_f64_fneg_fabs:
+; GFX78: ; %bb.0:
+; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX78-NEXT: v_floor_f64_e64 v[0:1], -|v[0:1]|
+; GFX78-NEXT: s_setpc_b64 s[30:31]
+ %abs.x = call double @llvm.fabs.f64(double %x)
+ %neg.abs.x = fneg double %abs.x
+ %result = call double @llvm.floor.f64(double %neg.abs.x)
+ ret double %result
+}
+
+define amdgpu_ps <2 x float> @s_floor_f64(double inreg %x) {
+; GFX6-LABEL: s_floor_f64:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: v_fract_f64_e32 v[0:1], s[2:3]
+; GFX6-NEXT: s_mov_b32 s0, -1
+; GFX6-NEXT: s_mov_b32 s1, 0x3fefffff
+; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], s[0:1]
+; GFX6-NEXT: v_cmp_o_f64_e64 vcc, s[2:3], s[2:3]
+; GFX6-NEXT: v_mov_b32_e32 v2, s2
+; GFX6-NEXT: v_mov_b32_e32 v3, s3
+; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX6-NEXT: v_add_f64 v[0:1], s[2:3], -v[0:1]
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX78-LABEL: s_floor_f64:
+; GFX78: ; %bb.0:
+; GFX78-NEXT: v_floor_f64_e32 v[0:1], s[2:3]
+; GFX78-NEXT: ; return to shader part epilog
+ %result = call double @llvm.floor.f64(double %x)
+ %cast = bitcast double %result to <2 x float>
+ ret <2 x float> %cast
+}
+
+define amdgpu_ps <2 x float> @s_floor_f64_fneg(double inreg %x) {
+; GFX6-LABEL: s_floor_f64_fneg:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: v_fract_f64_e64 v[0:1], -s[2:3]
+; GFX6-NEXT: s_mov_b32 s0, -1
+; GFX6-NEXT: s_mov_b32 s1, 0x3fefffff
+; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], s[0:1]
+; GFX6-NEXT: v_cmp_o_f64_e64 vcc, s[2:3], s[2:3]
+; GFX6-NEXT: v_mov_b32_e32 v2, s2
+; GFX6-NEXT: v_mov_b32_e32 v3, s3
+; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX6-NEXT: v_add_f64 v[0:1], -s[2:3], -v[0:1]
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX78-LABEL: s_floor_f64_fneg:
+; GFX78: ; %bb.0:
+; GFX78-NEXT: v_floor_f64_e64 v[0:1], -s[2:3]
+; GFX78-NEXT: ; return to shader part epilog
+ %neg.x = fneg double %x
+ %result = call double @llvm.floor.f64(double %neg.x)
+ %cast = bitcast double %result to <2 x float>
+ ret <2 x float> %cast
+}
+
+define amdgpu_ps <2 x float> @s_floor_f64_fabs(double inreg %x) {
+; GFX6-LABEL: s_floor_f64_fabs:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: v_fract_f64_e64 v[0:1], |s[2:3]|
+; GFX6-NEXT: s_mov_b32 s0, -1
+; GFX6-NEXT: s_mov_b32 s1, 0x3fefffff
+; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], s[0:1]
+; GFX6-NEXT: v_cmp_o_f64_e64 vcc, s[2:3], s[2:3]
+; GFX6-NEXT: v_mov_b32_e32 v2, s2
+; GFX6-NEXT: v_mov_b32_e32 v3, s3
+; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX6-NEXT: v_add_f64 v[0:1], |s[2:3]|, -v[0:1]
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX78-LABEL: s_floor_f64_fabs:
+; GFX78: ; %bb.0:
+; GFX78-NEXT: v_floor_f64_e64 v[0:1], |s[2:3]|
+; GFX78-NEXT: ; return to shader part epilog
+ %abs.x = call double @llvm.fabs.f64(double %x)
+ %result = call double @llvm.floor.f64(double %abs.x)
+ %cast = bitcast double %result to <2 x float>
+ ret <2 x float> %cast
+}
+
+define amdgpu_ps <2 x float> @s_floor_f64_fneg_fabs(double inreg %x) {
+; GFX6-LABEL: s_floor_f64_fneg_fabs:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: v_fract_f64_e64 v[0:1], -|s[2:3]|
+; GFX6-NEXT: s_mov_b32 s0, -1
+; GFX6-NEXT: s_mov_b32 s1, 0x3fefffff
+; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], s[0:1]
+; GFX6-NEXT: v_cmp_o_f64_e64 vcc, s[2:3], s[2:3]
+; GFX6-NEXT: v_mov_b32_e32 v2, s2
+; GFX6-NEXT: v_mov_b32_e32 v3, s3
+; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX6-NEXT: v_add_f64 v[0:1], -|s[2:3]|, -v[0:1]
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX78-LABEL: s_floor_f64_fneg_fabs:
+; GFX78: ; %bb.0:
+; GFX78-NEXT: v_floor_f64_e64 v[0:1], -|s[2:3]|
+; GFX78-NEXT: ; return to shader part epilog
+ %abs.x = call double @llvm.fabs.f64(double %x)
+ %neg.abs.x = fneg double %abs.x
+ %result = call double @llvm.floor.f64(double %neg.abs.x)
+ %cast = bitcast double %result to <2 x float>
+ ret <2 x float> %cast
+}
+
+declare double @llvm.floor.f64(double) #0
+declare double @llvm.fabs.f64(double) #0
+
+attributes #0 = { nounwind readnone speculatable willreturn }
+attributes #1 = { "amdgpu-ieee"="false" }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.mir
deleted file mode 100644
index 76f16eb2898f..000000000000
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.mir
+++ /dev/null
@@ -1,150 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck %s
-
----
-name: ffloor_s32_vv
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $vgpr0
-
- ; CHECK-LABEL: name: ffloor_s32_vv
- ; CHECK: liveins: $vgpr0
- ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; CHECK: [[V_FLOOR_F32_e64_:%[0-9]+]]:vgpr_32 = V_FLOOR_F32_e64 0, [[COPY]], 0, 0, implicit $exec
- ; CHECK: $vgpr0 = COPY [[V_FLOOR_F32_e64_]]
- %0:vgpr(s32) = COPY $vgpr0
- %1:vgpr(s32) = G_FFLOOR %0
- $vgpr0 = COPY %1
-...
-
----
-name: ffloor_s32_vs
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $sgpr0
-
- ; CHECK-LABEL: name: ffloor_s32_vs
- ; CHECK: liveins: $sgpr0
- ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; CHECK: [[V_FLOOR_F32_e64_:%[0-9]+]]:vgpr_32 = V_FLOOR_F32_e64 0, [[COPY]], 0, 0, implicit $exec
- ; CHECK: $vgpr0 = COPY [[V_FLOOR_F32_e64_]]
- %0:sgpr(s32) = COPY $sgpr0
- %1:vgpr(s32) = G_FFLOOR %0
- $vgpr0 = COPY %1
-...
-
----
-name: ffloor_s64_vv
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1
-
- ; CHECK-LABEL: name: ffloor_s64_vv
- ; CHECK: liveins: $vgpr0_vgpr1
- ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; CHECK: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_CLASS_F64_e64 0, [[COPY]], 3, implicit $exec
- ; CHECK: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4607182418800017407, implicit $exec
- ; CHECK: [[V_FRACT_F64_e64_:%[0-9]+]]:vreg_64 = V_FRACT_F64_e64 0, [[COPY]], 0, 0, implicit $exec
- ; CHECK: [[V_MIN_F64_:%[0-9]+]]:vreg_64 = V_MIN_F64 0, [[V_FRACT_F64_e64_]], 0, [[V_MOV_B]], 0, 0, implicit $exec
- ; CHECK: [[V_CNDMA:%[0-9]+]]:vreg_64 = V_CNDMASK_B64_PSEUDO [[V_MIN_F64_]], [[COPY]], [[V_CMP_CLASS_F64_e64_]], implicit $exec
- ; CHECK: [[V_ADD_F64_:%[0-9]+]]:vreg_64 = V_ADD_F64 0, [[COPY]], 1, [[V_CNDMA]], 0, 0, implicit $exec
- ; CHECK: $vgpr0_vgpr1 = COPY [[V_ADD_F64_]]
- %0:vgpr(s64) = COPY $vgpr0_vgpr1
- %1:vgpr(s64) = G_FFLOOR %0
- $vgpr0_vgpr1 = COPY %1
-...
-
-# FIXME: Constant bus restriction
-# ---
-# name: ffloor_s64_vs
-# legalized: true
-# regBankSelected: true
-# tracksRegLiveness: true
-
-# body: |
-# bb.0:
-# liveins: $sgpr0_sgpr1
-
-# %0:sgpr(s64) = COPY $sgpr0_sgpr1
-# %1:vgpr(s64) = G_FFLOOR %0
-# $vgpr0_vgpr1 = COPY %1
-# ...
-
----
-name: ffloor_fneg_s32_vs
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $sgpr0
-
- ; CHECK-LABEL: name: ffloor_fneg_s32_vs
- ; CHECK: liveins: $sgpr0
- ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; CHECK: [[V_FLOOR_F32_e64_:%[0-9]+]]:vgpr_32 = V_FLOOR_F32_e64 1, [[COPY]], 0, 0, implicit $exec
- ; CHECK: $vgpr0 = COPY [[V_FLOOR_F32_e64_]]
- %0:sgpr(s32) = COPY $sgpr0
- %1:sgpr(s32) = G_FNEG %0
- %2:vgpr(s32) = G_FFLOOR %1
- $vgpr0 = COPY %2
-...
-
----
-name: ffloor_fneg_s32_vv
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $vgpr0
- ; CHECK-LABEL: name: ffloor_fneg_s32_vv
- ; CHECK: liveins: $vgpr0
- ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; CHECK: [[V_FLOOR_F32_e64_:%[0-9]+]]:vgpr_32 = V_FLOOR_F32_e64 1, [[COPY]], 0, 0, implicit $exec
- ; CHECK: $vgpr0 = COPY [[V_FLOOR_F32_e64_]]
- %0:vgpr(s32) = COPY $vgpr0
- %1:vgpr(s32) = G_FNEG %0
- %2:vgpr(s32) = G_FFLOOR %1
- $vgpr0 = COPY %2
-...
-
----
-name: ffloor_fneg_s64_vv
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1
-
- ; CHECK-LABEL: name: ffloor_fneg_s64_vv
- ; CHECK: liveins: $vgpr0_vgpr1
- ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; CHECK: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_CLASS_F64_e64 0, [[COPY]], 3, implicit $exec
- ; CHECK: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4607182418800017407, implicit $exec
- ; CHECK: [[V_FRACT_F64_e64_:%[0-9]+]]:vreg_64 = V_FRACT_F64_e64 1, [[COPY]], 0, 0, implicit $exec
- ; CHECK: [[V_MIN_F64_:%[0-9]+]]:vreg_64 = V_MIN_F64 0, [[V_FRACT_F64_e64_]], 0, [[V_MOV_B]], 0, 0, implicit $exec
- ; CHECK: [[V_CNDMA:%[0-9]+]]:vreg_64 = V_CNDMASK_B64_PSEUDO [[V_MIN_F64_]], [[COPY]], [[V_CMP_CLASS_F64_e64_]], implicit $exec
- ; CHECK: [[V_ADD_F64_:%[0-9]+]]:vreg_64 = V_ADD_F64 1, [[COPY]], 1, [[V_CNDMA]], 0, 0, implicit $exec
- ; CHECK: $vgpr0_vgpr1 = COPY [[V_ADD_F64_]]
- %0:vgpr(s64) = COPY $vgpr0_vgpr1
- %1:vgpr(s64) = G_FNEG %0
- %2:vgpr(s64) = G_FFLOOR %1
- $vgpr0_vgpr1 = COPY %2
-...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s32.mir
new file mode 100644
index 000000000000..e7d27af6481b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s32.mir
@@ -0,0 +1,83 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck %s
+
+---
+name: ffloor_s32_vv
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: ffloor_s32_vv
+ ; CHECK: liveins: $vgpr0
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; CHECK: [[V_FLOOR_F32_e64_:%[0-9]+]]:vgpr_32 = V_FLOOR_F32_e64 0, [[COPY]], 0, 0, implicit $exec
+ ; CHECK: $vgpr0 = COPY [[V_FLOOR_F32_e64_]]
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:vgpr(s32) = G_FFLOOR %0
+ $vgpr0 = COPY %1
+...
+
+---
+name: ffloor_s32_vs
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; CHECK-LABEL: name: ffloor_s32_vs
+ ; CHECK: liveins: $sgpr0
+ ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; CHECK: [[V_FLOOR_F32_e64_:%[0-9]+]]:vgpr_32 = V_FLOOR_F32_e64 0, [[COPY]], 0, 0, implicit $exec
+ ; CHECK: $vgpr0 = COPY [[V_FLOOR_F32_e64_]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:vgpr(s32) = G_FFLOOR %0
+ $vgpr0 = COPY %1
+...
+
+---
+name: ffloor_fneg_s32_vs
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; CHECK-LABEL: name: ffloor_fneg_s32_vs
+ ; CHECK: liveins: $sgpr0
+ ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; CHECK: [[V_FLOOR_F32_e64_:%[0-9]+]]:vgpr_32 = V_FLOOR_F32_e64 1, [[COPY]], 0, 0, implicit $exec
+ ; CHECK: $vgpr0 = COPY [[V_FLOOR_F32_e64_]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s32) = G_FNEG %0
+ %2:vgpr(s32) = G_FFLOOR %1
+ $vgpr0 = COPY %2
+...
+
+---
+name: ffloor_fneg_s32_vv
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: ffloor_fneg_s32_vv
+ ; CHECK: liveins: $vgpr0
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; CHECK: [[V_FLOOR_F32_e64_:%[0-9]+]]:vgpr_32 = V_FLOOR_F32_e64 1, [[COPY]], 0, 0, implicit $exec
+ ; CHECK: $vgpr0 = COPY [[V_FLOOR_F32_e64_]]
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:vgpr(s32) = G_FNEG %0
+ %2:vgpr(s32) = G_FFLOOR %1
+ $vgpr0 = COPY %2
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s64.mir
new file mode 100644
index 000000000000..1af481c27a97
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s64.mir
@@ -0,0 +1,59 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s
+
+---
+name: ffloor_s64_vv
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; CHECK-LABEL: name: ffloor_s64_vv
+ ; CHECK: liveins: $vgpr0_vgpr1
+ ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; CHECK: [[V_FLOOR_F64_e64_:%[0-9]+]]:vreg_64 = V_FLOOR_F64_e64 0, [[COPY]], 0, 0, implicit $exec
+ ; CHECK: $vgpr0_vgpr1 = COPY [[V_FLOOR_F64_e64_]]
+ %0:vgpr(s64) = COPY $vgpr0_vgpr1
+ %1:vgpr(s64) = G_FFLOOR %0
+ $vgpr0_vgpr1 = COPY %1
+...
+
+# FIXME: Constant bus restriction
+# ---
+# name: ffloor_s64_vs
+# legalized: true
+# regBankSelected: true
+# tracksRegLiveness: true
+
+# body: |
+# bb.0:
+# liveins: $sgpr0_sgpr1
+
+# %0:sgpr(s64) = COPY $sgpr0_sgpr1
+# %1:vgpr(s64) = G_FFLOOR %0
+# $vgpr0_vgpr1 = COPY %1
+# ...
+
+---
+name: ffloor_fneg_s64_vv
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; CHECK-LABEL: name: ffloor_fneg_s64_vv
+ ; CHECK: liveins: $vgpr0_vgpr1
+ ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; CHECK: [[V_FLOOR_F64_e64_:%[0-9]+]]:vreg_64 = V_FLOOR_F64_e64 1, [[COPY]], 0, 0, implicit $exec
+ ; CHECK: $vgpr0_vgpr1 = COPY [[V_FLOOR_F64_e64_]]
+ %0:vgpr(s64) = COPY $vgpr0_vgpr1
+ %1:vgpr(s64) = G_FNEG %0
+ %2:vgpr(s64) = G_FFLOOR %1
+ $vgpr0_vgpr1 = COPY %2
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir
index 2abe2c9ab5db..4140e89bc119 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir
@@ -34,8 +34,14 @@ body: |
; SI-LABEL: name: test_ffloor_s64
; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
- ; SI: [[FFLOOR:%[0-9]+]]:_(s64) = G_FFLOOR [[COPY]]
- ; SI: $vgpr0_vgpr1 = COPY [[FFLOOR]](s64)
+ ; SI: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[COPY]](s64)
+ ; SI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF
+ ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[INT]], [[C]]
+ ; SI: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ord), [[COPY]](s64), [[COPY]]
+ ; SI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[COPY]], [[FMINNUM_IEEE]]
+ ; SI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[SELECT]]
+ ; SI: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[FNEG]]
+ ; SI: $vgpr0_vgpr1 = COPY [[FADD]](s64)
; VI-LABEL: name: test_ffloor_s64
; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
; VI: [[FFLOOR:%[0-9]+]]:_(s64) = G_FFLOOR [[COPY]]
@@ -49,6 +55,65 @@ body: |
$vgpr0_vgpr1 = COPY %1
...
+
+---
+name: test_ffloor_s64_nnan
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; SI-LABEL: name: test_ffloor_s64_nnan
+ ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+ ; SI: [[INT:%[0-9]+]]:_(s64) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[COPY]](s64)
+ ; SI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF
+ ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = nnan G_FMINNUM_IEEE [[INT]], [[C]]
+ ; SI: [[FNEG:%[0-9]+]]:_(s64) = nnan G_FNEG [[FMINNUM_IEEE]]
+ ; SI: [[FADD:%[0-9]+]]:_(s64) = nnan G_FADD [[COPY]], [[FNEG]]
+ ; SI: $vgpr0_vgpr1 = COPY [[FADD]](s64)
+ ; VI-LABEL: name: test_ffloor_s64_nnan
+ ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+ ; VI: [[FFLOOR:%[0-9]+]]:_(s64) = nnan G_FFLOOR [[COPY]]
+ ; VI: $vgpr0_vgpr1 = COPY [[FFLOOR]](s64)
+ ; GFX9-LABEL: name: test_ffloor_s64_nnan
+ ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+ ; GFX9: [[FFLOOR:%[0-9]+]]:_(s64) = nnan G_FFLOOR [[COPY]]
+ ; GFX9: $vgpr0_vgpr1 = COPY [[FFLOOR]](s64)
+ %0:_(s64) = COPY $vgpr0_vgpr1
+ %1:_(s64) = nnan G_FFLOOR %0
+ $vgpr0_vgpr1 = COPY %1
+
+...
+
+---
+name: test_ffloor_s64_nssaz
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; SI-LABEL: name: test_ffloor_s64_nssaz
+ ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+ ; SI: [[INT:%[0-9]+]]:_(s64) = nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[COPY]](s64)
+ ; SI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF
+ ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = nsz G_FMINNUM_IEEE [[INT]], [[C]]
+ ; SI: [[FCMP:%[0-9]+]]:_(s1) = nsz G_FCMP floatpred(ord), [[COPY]](s64), [[COPY]]
+ ; SI: [[SELECT:%[0-9]+]]:_(s64) = nsz G_SELECT [[FCMP]](s1), [[COPY]], [[FMINNUM_IEEE]]
+ ; SI: [[FNEG:%[0-9]+]]:_(s64) = nsz G_FNEG [[SELECT]]
+ ; SI: [[FADD:%[0-9]+]]:_(s64) = nsz G_FADD [[COPY]], [[FNEG]]
+ ; SI: $vgpr0_vgpr1 = COPY [[FADD]](s64)
+ ; VI-LABEL: name: test_ffloor_s64_nssaz
+ ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+ ; VI: [[FFLOOR:%[0-9]+]]:_(s64) = nsz G_FFLOOR [[COPY]]
+ ; VI: $vgpr0_vgpr1 = COPY [[FFLOOR]](s64)
+ ; GFX9-LABEL: name: test_ffloor_s64_nssaz
+ ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+ ; GFX9: [[FFLOOR:%[0-9]+]]:_(s64) = nsz G_FFLOOR [[COPY]]
+ ; GFX9: $vgpr0_vgpr1 = COPY [[FFLOOR]](s64)
+ %0:_(s64) = COPY $vgpr0_vgpr1
+ %1:_(s64) = nsz G_FFLOOR %0
+ $vgpr0_vgpr1 = COPY %1
+
+...
+
---
name: test_ffloor_s16
body: |
@@ -158,9 +223,20 @@ body: |
; SI-LABEL: name: test_ffloor_v2s64
; SI: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; SI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
- ; SI: [[FFLOOR:%[0-9]+]]:_(s64) = G_FFLOOR [[UV]]
- ; SI: [[FFLOOR1:%[0-9]+]]:_(s64) = G_FFLOOR [[UV1]]
- ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FFLOOR]](s64), [[FFLOOR1]](s64)
+ ; SI: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[UV]](s64)
+ ; SI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF
+ ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[INT]], [[C]]
+ ; SI: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ord), [[UV]](s64), [[UV]]
+ ; SI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[UV]], [[FMINNUM_IEEE]]
+ ; SI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[SELECT]]
+ ; SI: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[UV]], [[FNEG]]
+ ; SI: [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[UV1]](s64)
+ ; SI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[INT1]], [[C]]
+ ; SI: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ord), [[UV1]](s64), [[UV1]]
+ ; SI: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[FCMP1]](s1), [[UV1]], [[FMINNUM_IEEE1]]
+ ; SI: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[SELECT1]]
+ ; SI: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[UV1]], [[FNEG1]]
+ ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD]](s64), [[FADD1]](s64)
; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
; VI-LABEL: name: test_ffloor_v2s64
; VI: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptosi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptosi.mir
index 7bc4ef58e668..81056ce92ea0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptosi.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptosi.mir
@@ -192,9 +192,15 @@ body: |
; SI: [[C8:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3DF0000000000000
; SI: [[C9:%[0-9]+]]:_(s64) = G_FCONSTANT double 0xC1F0000000000000
; SI: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INTRINSIC_TRUNC]], [[C8]]
- ; SI: [[FFLOOR:%[0-9]+]]:_(s64) = G_FFLOOR [[FMUL]]
- ; SI: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FFLOOR]], [[C9]], [[INTRINSIC_TRUNC]]
- ; SI: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FFLOOR]](s64)
+ ; SI: [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s64)
+ ; SI: [[C10:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF
+ ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[INT1]], [[C10]]
+ ; SI: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ord), [[FMUL]](s64), [[FMUL]]
+ ; SI: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[FMUL]], [[FMINNUM_IEEE]]
+ ; SI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[SELECT1]]
+ ; SI: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[FNEG]]
+ ; SI: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FADD]], [[C9]], [[INTRINSIC_TRUNC]]
+ ; SI: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FADD]](s64)
; SI: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMA]](s64)
; SI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FPTOUI]](s32), [[FPTOSI]](s32)
; SI: $vgpr0_vgpr1 = COPY [[MV1]](s64)
@@ -247,9 +253,13 @@ body: |
; SI: [[C8:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3DF0000000000000
; SI: [[C9:%[0-9]+]]:_(s64) = G_FCONSTANT double 0xC1F0000000000000
; SI: [[FMUL:%[0-9]+]]:_(s64) = nnan G_FMUL [[INTRINSIC_TRUNC]], [[C8]]
- ; SI: [[FFLOOR:%[0-9]+]]:_(s64) = nnan G_FFLOOR [[FMUL]]
- ; SI: [[FMA:%[0-9]+]]:_(s64) = nnan G_FMA [[FFLOOR]], [[C9]], [[INTRINSIC_TRUNC]]
- ; SI: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FFLOOR]](s64)
+ ; SI: [[INT1:%[0-9]+]]:_(s64) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s64)
+ ; SI: [[C10:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF
+ ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = nnan G_FMINNUM_IEEE [[INT1]], [[C10]]
+ ; SI: [[FNEG:%[0-9]+]]:_(s64) = nnan G_FNEG [[FMINNUM_IEEE]]
+ ; SI: [[FADD:%[0-9]+]]:_(s64) = nnan G_FADD [[FMUL]], [[FNEG]]
+ ; SI: [[FMA:%[0-9]+]]:_(s64) = nnan G_FMA [[FADD]], [[C9]], [[INTRINSIC_TRUNC]]
+ ; SI: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FADD]](s64)
; SI: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMA]](s64)
; SI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FPTOUI]](s32), [[FPTOSI]](s32)
; SI: $vgpr0_vgpr1 = COPY [[MV1]](s64)
@@ -303,14 +313,20 @@ body: |
; SI: [[C8:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3DF0000000000000
; SI: [[C9:%[0-9]+]]:_(s64) = G_FCONSTANT double 0xC1F0000000000000
; SI: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INTRINSIC_TRUNC]], [[C8]]
- ; SI: [[FFLOOR:%[0-9]+]]:_(s64) = G_FFLOOR [[FMUL]]
- ; SI: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FFLOOR]], [[C9]], [[INTRINSIC_TRUNC]]
- ; SI: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FFLOOR]](s64)
+ ; SI: [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s64)
+ ; SI: [[C10:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF
+ ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[INT1]], [[C10]]
+ ; SI: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ord), [[FMUL]](s64), [[FMUL]]
+ ; SI: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[FMUL]], [[FMINNUM_IEEE]]
+ ; SI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[SELECT1]]
+ ; SI: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[FNEG]]
+ ; SI: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FADD]], [[C9]], [[INTRINSIC_TRUNC]]
+ ; SI: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FADD]](s64)
; SI: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMA]](s64)
; SI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FPTOUI]](s32), [[FPTOSI]](s32)
; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
- ; SI: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[C]](s32), [[C1]](s32)
- ; SI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[INT1]], [[C2]]
+ ; SI: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[C]](s32), [[C1]](s32)
+ ; SI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[INT2]], [[C2]]
; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C3]]
; SI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C5]](s32), [[AND2]](s32)
; SI: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[C4]], [[SUB1]](s32)
@@ -318,13 +334,18 @@ body: |
; SI: [[AND3:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[XOR1]]
; SI: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB1]](s32), [[C5]]
; SI: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SUB1]](s32), [[C7]]
- ; SI: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[MV2]], [[AND3]]
- ; SI: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV1]], [[SELECT1]]
+ ; SI: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[MV2]], [[AND3]]
+ ; SI: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV1]], [[SELECT2]]
; SI: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[UV1]]
; SI: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[INTRINSIC_TRUNC1]], [[C8]]
- ; SI: [[FFLOOR1:%[0-9]+]]:_(s64) = G_FFLOOR [[FMUL1]]
- ; SI: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[FFLOOR1]], [[C9]], [[INTRINSIC_TRUNC1]]
- ; SI: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FFLOOR1]](s64)
+ ; SI: [[INT3:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](s64)
+ ; SI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[INT3]], [[C10]]
+ ; SI: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ord), [[FMUL1]](s64), [[FMUL1]]
+ ; SI: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[FCMP1]](s1), [[FMUL1]], [[FMINNUM_IEEE1]]
+ ; SI: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[SELECT3]]
+ ; SI: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[FMUL1]], [[FNEG1]]
+ ; SI: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[FADD1]], [[C9]], [[INTRINSIC_TRUNC1]]
+ ; SI: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FADD1]](s64)
; SI: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMA1]](s64)
; SI: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FPTOUI1]](s32), [[FPTOSI1]](s32)
; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV1]](s64), [[MV3]](s64)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptoui.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptoui.mir
index 4a17a96aa528..b895a1a09662 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptoui.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptoui.mir
@@ -192,9 +192,15 @@ body: |
; SI: [[C8:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3DF0000000000000
; SI: [[C9:%[0-9]+]]:_(s64) = G_FCONSTANT double 0xC1F0000000000000
; SI: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INTRINSIC_TRUNC]], [[C8]]
- ; SI: [[FFLOOR:%[0-9]+]]:_(s64) = G_FFLOOR [[FMUL]]
- ; SI: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FFLOOR]], [[C9]], [[INTRINSIC_TRUNC]]
- ; SI: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FFLOOR]](s64)
+ ; SI: [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s64)
+ ; SI: [[C10:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF
+ ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[INT1]], [[C10]]
+ ; SI: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ord), [[FMUL]](s64), [[FMUL]]
+ ; SI: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[FMUL]], [[FMINNUM_IEEE]]
+ ; SI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[SELECT1]]
+ ; SI: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[FNEG]]
+ ; SI: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FADD]], [[C9]], [[INTRINSIC_TRUNC]]
+ ; SI: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD]](s64)
; SI: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMA]](s64)
; SI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FPTOUI1]](s32), [[FPTOUI]](s32)
; SI: $vgpr0_vgpr1 = COPY [[MV1]](s64)
@@ -247,9 +253,13 @@ body: |
; SI: [[C8:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3DF0000000000000
; SI: [[C9:%[0-9]+]]:_(s64) = G_FCONSTANT double 0xC1F0000000000000
; SI: [[FMUL:%[0-9]+]]:_(s64) = nnan G_FMUL [[INTRINSIC_TRUNC]], [[C8]]
- ; SI: [[FFLOOR:%[0-9]+]]:_(s64) = nnan G_FFLOOR [[FMUL]]
- ; SI: [[FMA:%[0-9]+]]:_(s64) = nnan G_FMA [[FFLOOR]], [[C9]], [[INTRINSIC_TRUNC]]
- ; SI: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FFLOOR]](s64)
+ ; SI: [[INT1:%[0-9]+]]:_(s64) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s64)
+ ; SI: [[C10:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF
+ ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = nnan G_FMINNUM_IEEE [[INT1]], [[C10]]
+ ; SI: [[FNEG:%[0-9]+]]:_(s64) = nnan G_FNEG [[FMINNUM_IEEE]]
+ ; SI: [[FADD:%[0-9]+]]:_(s64) = nnan G_FADD [[FMUL]], [[FNEG]]
+ ; SI: [[FMA:%[0-9]+]]:_(s64) = nnan G_FMA [[FADD]], [[C9]], [[INTRINSIC_TRUNC]]
+ ; SI: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD]](s64)
; SI: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMA]](s64)
; SI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FPTOUI1]](s32), [[FPTOUI]](s32)
; SI: $vgpr0_vgpr1 = COPY [[MV1]](s64)
@@ -303,14 +313,20 @@ body: |
; SI: [[C8:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3DF0000000000000
; SI: [[C9:%[0-9]+]]:_(s64) = G_FCONSTANT double 0xC1F0000000000000
; SI: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INTRINSIC_TRUNC]], [[C8]]
- ; SI: [[FFLOOR:%[0-9]+]]:_(s64) = G_FFLOOR [[FMUL]]
- ; SI: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FFLOOR]], [[C9]], [[INTRINSIC_TRUNC]]
- ; SI: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FFLOOR]](s64)
+ ; SI: [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s64)
+ ; SI: [[C10:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF
+ ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[INT1]], [[C10]]
+ ; SI: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ord), [[FMUL]](s64), [[FMUL]]
+ ; SI: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[FMUL]], [[FMINNUM_IEEE]]
+ ; SI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[SELECT1]]
+ ; SI: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[FNEG]]
+ ; SI: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FADD]], [[C9]], [[INTRINSIC_TRUNC]]
+ ; SI: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD]](s64)
; SI: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMA]](s64)
; SI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FPTOUI1]](s32), [[FPTOUI]](s32)
; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
- ; SI: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[C]](s32), [[C1]](s32)
- ; SI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[INT1]], [[C2]]
+ ; SI: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[C]](s32), [[C1]](s32)
+ ; SI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[INT2]], [[C2]]
; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C3]]
; SI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C5]](s32), [[AND2]](s32)
; SI: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[C4]], [[SUB1]](s32)
@@ -318,13 +334,18 @@ body: |
; SI: [[AND3:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[XOR1]]
; SI: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB1]](s32), [[C5]]
; SI: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SUB1]](s32), [[C7]]
- ; SI: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[MV2]], [[AND3]]
- ; SI: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV1]], [[SELECT1]]
+ ; SI: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[MV2]], [[AND3]]
+ ; SI: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV1]], [[SELECT2]]
; SI: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[UV1]]
; SI: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[INTRINSIC_TRUNC1]], [[C8]]
- ; SI: [[FFLOOR1:%[0-9]+]]:_(s64) = G_FFLOOR [[FMUL1]]
- ; SI: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[FFLOOR1]], [[C9]], [[INTRINSIC_TRUNC1]]
- ; SI: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FFLOOR1]](s64)
+ ; SI: [[INT3:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](s64)
+ ; SI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[INT3]], [[C10]]
+ ; SI: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ord), [[FMUL1]](s64), [[FMUL1]]
+ ; SI: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[FCMP1]](s1), [[FMUL1]], [[FMINNUM_IEEE1]]
+ ; SI: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[SELECT3]]
+ ; SI: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[FMUL1]], [[FNEG1]]
+ ; SI: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[FADD1]], [[C9]], [[INTRINSIC_TRUNC1]]
+ ; SI: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s64)
; SI: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[FMA1]](s64)
; SI: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FPTOUI3]](s32), [[FPTOUI2]](s32)
; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV1]](s64), [[MV3]](s64)
More information about the llvm-commits
mailing list