[llvm] [clang] [clang-tools-extra] [AMDGPU] Generic lowering for rint and nearbyint (PR #69596)
via cfe-commits
cfe-commits at lists.llvm.org
Mon Nov 13 07:05:36 PST 2023
https://github.com/Acim-Maravic updated https://github.com/llvm/llvm-project/pull/69596
>From af211dc797ead570ce28ccdb1248f588b15d07bd Mon Sep 17 00:00:00 2001
From: Acim Maravic <acim.maravic at syrmia.com>
Date: Mon, 13 Nov 2023 13:56:10 +0100
Subject: [PATCH] [AMDGPU] Generic lowering for rint and nearbyint
There are three different rounding intrinsics, that are brought down to same instruction.
---
.../CodeGen/GlobalISel/MachineIRBuilder.h | 10 +-
.../CodeGen/GlobalISel/LegalizerHelper.cpp | 4 +-
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 1 +
llvm/lib/CodeGen/TargetLoweringBase.cpp | 7 +-
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 1 +
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 40 ++--
.../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 49 ++--
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h | 4 +-
.../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 2 +-
llvm/lib/Target/AMDGPU/R600ISelLowering.cpp | 2 +-
llvm/lib/Target/AMDGPU/R600Instructions.td | 2 +-
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 5 +-
llvm/lib/Target/AMDGPU/SOPInstructions.td | 4 +-
llvm/lib/Target/AMDGPU/VOP1Instructions.td | 6 +-
llvm/lib/Target/X86/X86ISelLowering.cpp | 5 +
llvm/test/Analysis/CostModel/AMDGPU/fround.ll | 116 +++++++--
.../AMDGPU/GlobalISel/inst-select-frint.mir | 110 ---------
.../GlobalISel/inst-select-frint.s16.mir | 97 --------
.../inst-select-scalar-float-sop1.mir | 4 +-
.../AMDGPU/GlobalISel/legalize-fexp.mir | 126 +++++-----
.../AMDGPU/GlobalISel/legalize-frint.mir | 220 ------------------
.../AMDGPU/GlobalISel/regbankselect-frint.mir | 36 ---
22 files changed, 236 insertions(+), 615 deletions(-)
delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frint.mir
delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frint.s16.mir
delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-frint.mir
delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-frint.mir
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index bf41c19cd6cc726..634eba76f91210c 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -1882,10 +1882,12 @@ class MachineIRBuilder {
return buildInstr(TargetOpcode::G_FPTOSI, {Dst}, {Src0});
}
- /// Build and insert \p Dst = G_FRINT \p Src0, \p Src1
- MachineInstrBuilder buildFRint(const DstOp &Dst, const SrcOp &Src0,
- std::optional<unsigned> Flags = std::nullopt) {
- return buildInstr(TargetOpcode::G_FRINT, {Dst}, {Src0}, Flags);
+ /// Build and insert \p Dst = G_INTRINSIC_ROUNDEVEN \p Src0, \p Src1
+ MachineInstrBuilder
+ buildIntrinsicRoundeven(const DstOp &Dst, const SrcOp &Src0,
+ std::optional<unsigned> Flags = std::nullopt) {
+ return buildInstr(TargetOpcode::G_INTRINSIC_ROUNDEVEN, {Dst}, {Src0},
+ Flags);
}
/// Build and insert \p Res = G_SMIN \p Op0, \p Op1
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 108768494ccbb28..8c0c18691a368cb 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -3570,10 +3570,10 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return lowerFFloor(MI);
case TargetOpcode::G_INTRINSIC_ROUND:
return lowerIntrinsicRound(MI);
- case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
+ case TargetOpcode::G_FRINT: {
// Since round even is the assumed rounding mode for unconstrained FP
// operations, rint and roundeven are the same operation.
- changeOpcode(MI, TargetOpcode::G_FRINT);
+ changeOpcode(MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
return Legalized;
}
case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2dfdddad3cc389f..646c8ed2a06e41e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17631,6 +17631,7 @@ SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
case ISD::FRINT:
case ISD::FTRUNC:
case ISD::FNEARBYINT:
+ case ISD::FROUNDEVEN:
case ISD::FFLOOR:
case ISD::FCEIL:
return N0;
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 3e4bff5ddce1264..f5f698aafcd17e8 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -867,9 +867,8 @@ void TargetLoweringBase::initActions() {
setOperationAction({ISD::BITREVERSE, ISD::PARITY}, VT, Expand);
// These library functions default to expand.
- setOperationAction(
- {ISD::FROUND, ISD::FROUNDEVEN, ISD::FPOWI, ISD::FLDEXP, ISD::FFREXP},
- VT, Expand);
+ setOperationAction({ISD::FROUND, ISD::FPOWI, ISD::FLDEXP, ISD::FFREXP}, VT,
+ Expand);
// These operations default to expand for vector types.
if (VT.isVector())
@@ -928,7 +927,7 @@ void TargetLoweringBase::initActions() {
setOperationAction({ISD::FCBRT, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, ISD::FEXP,
ISD::FEXP2, ISD::FEXP10, ISD::FFLOOR, ISD::FNEARBYINT,
ISD::FCEIL, ISD::FRINT, ISD::FTRUNC, ISD::LROUND,
- ISD::LLROUND, ISD::LRINT, ISD::LLRINT},
+ ISD::LLROUND, ISD::LRINT, ISD::LLRINT, ISD::FROUNDEVEN},
{MVT::f32, MVT::f64, MVT::f128}, Expand);
// Default ISD::TRAP to expand (which turns it into abort).
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index b5ceaaa14b4fd5e..ba47d3eabea0e58 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -163,6 +163,7 @@ bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(unsigned Opc) const {
case ISD::FTRUNC:
case ISD::FRINT:
case ISD::FNEARBYINT:
+ case ISD::FROUNDEVEN:
case ISD::FROUND:
case ISD::FFLOOR:
case ISD::FMINNUM:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 607d59db7bcf709..cc16a7d73dfff3b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -329,8 +329,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
// Library functions. These default to Expand, but we have instructions
// for them.
- setOperationAction({ISD::FCEIL, ISD::FPOW, ISD::FABS, ISD::FFLOOR, ISD::FRINT,
- ISD::FTRUNC, ISD::FMINNUM, ISD::FMAXNUM},
+ setOperationAction({ISD::FCEIL, ISD::FPOW, ISD::FABS, ISD::FFLOOR,
+ ISD::FROUNDEVEN, ISD::FTRUNC, ISD::FMINNUM, ISD::FMAXNUM},
MVT::f32, Legal);
setOperationAction(ISD::FLOG2, MVT::f32, Custom);
@@ -341,7 +341,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FNEARBYINT, {MVT::f16, MVT::f32, MVT::f64}, Custom);
- setOperationAction(ISD::FROUNDEVEN, {MVT::f16, MVT::f32, MVT::f64}, Custom);
+ setOperationAction(ISD::FRINT, {MVT::f16, MVT::f32, MVT::f64}, Custom);
setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, Custom);
@@ -457,14 +457,14 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
for (MVT VT : FloatVectorTypes) {
setOperationAction(
- {ISD::FABS, ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD,
- ISD::FCEIL, ISD::FCOS, ISD::FDIV, ISD::FEXP2,
- ISD::FEXP, ISD::FLOG2, ISD::FREM, ISD::FLOG,
- ISD::FLOG10, ISD::FPOW, ISD::FFLOOR, ISD::FTRUNC,
- ISD::FMUL, ISD::FMA, ISD::FRINT, ISD::FNEARBYINT,
- ISD::FSQRT, ISD::FSIN, ISD::FSUB, ISD::FNEG,
- ISD::VSELECT, ISD::SELECT_CC, ISD::FCOPYSIGN, ISD::VECTOR_SHUFFLE,
- ISD::SETCC, ISD::FCANONICALIZE},
+ {ISD::FABS, ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD,
+ ISD::FCEIL, ISD::FCOS, ISD::FDIV, ISD::FEXP2,
+ ISD::FEXP, ISD::FLOG2, ISD::FREM, ISD::FLOG,
+ ISD::FLOG10, ISD::FPOW, ISD::FFLOOR, ISD::FTRUNC,
+ ISD::FMUL, ISD::FMA, ISD::FRINT, ISD::FNEARBYINT,
+ ISD::FSQRT, ISD::FSIN, ISD::FSUB, ISD::FNEG,
+ ISD::VSELECT, ISD::SELECT_CC, ISD::FCOPYSIGN, ISD::VECTOR_SHUFFLE,
+ ISD::SETCC, ISD::FCANONICALIZE, ISD::FROUNDEVEN},
VT, Expand);
}
@@ -585,6 +585,7 @@ static bool fnegFoldsIntoOpcode(unsigned Opc) {
case ISD::FTRUNC:
case ISD::FRINT:
case ISD::FNEARBYINT:
+ case ISD::FROUNDEVEN:
case ISD::FCANONICALIZE:
case AMDGPUISD::RCP:
case AMDGPUISD::RCP_LEGACY:
@@ -2368,7 +2369,8 @@ SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::BITCAST, SL, MVT::f64, Tmp2);
}
-SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const {
+SDValue AMDGPUTargetLowering::LowerFROUNDEVEN(SDValue Op,
+ SelectionDAG &DAG) const {
SDLoc SL(Op);
SDValue Src = Op.getOperand(0);
@@ -2395,18 +2397,19 @@ SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getSelect(SL, MVT::f64, Cond, Src, Tmp2);
}
-SDValue AMDGPUTargetLowering::LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const {
+SDValue AMDGPUTargetLowering::LowerFNEARBYINT(SDValue Op,
+ SelectionDAG &DAG) const {
// FNEARBYINT and FRINT are the same, except in their handling of FP
// exceptions. Those aren't really meaningful for us, and OpenCL only has
// rint, so just treat them as equivalent.
- return DAG.getNode(ISD::FRINT, SDLoc(Op), Op.getValueType(), Op.getOperand(0));
+ return DAG.getNode(ISD::FROUNDEVEN, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(0));
}
-SDValue AMDGPUTargetLowering::LowerFROUNDEVEN(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const {
auto VT = Op.getValueType();
auto Arg = Op.getOperand(0u);
- return DAG.getNode(ISD::FRINT, SDLoc(Op), VT, Arg);
+ return DAG.getNode(ISD::FROUNDEVEN, SDLoc(Op), VT, Arg);
}
// XXX - May require not supporting f32 denormals?
@@ -2936,7 +2939,7 @@ SDValue AMDGPUTargetLowering::lowerFEXP(SDValue Op, SelectionDAG &DAG) const {
PL = getMad(DAG, SL, VT, XH, CL, Mad0, Flags);
}
- SDValue E = DAG.getNode(ISD::FRINT, SL, VT, PH, Flags);
+ SDValue E = DAG.getNode(ISD::FROUNDEVEN, SL, VT, PH, Flags);
// It is unsafe to contract this fsub into the PH multiply.
SDValue PHSubE = DAG.getNode(ISD::FSUB, SL, VT, PH, E, FlagsNoContract);
@@ -4680,6 +4683,7 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
case ISD::FTRUNC:
case ISD::FRINT:
case ISD::FNEARBYINT: // XXX - Should fround be handled?
+ case ISD::FROUNDEVEN:
case ISD::FSIN:
case ISD::FCANONICALIZE:
case AMDGPUISD::RCP:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 3d70ed150df12f8..f0bb83b21a3f87a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1076,27 +1076,30 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.scalarize(0)
.lower();
- // Lower roundeven into G_FRINT
- getActionDefinitionsBuilder({G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
- .scalarize(0)
- .lower();
+ // Lower G_FNEARBYINT and G_FRINT into G_INTRINSIC_ROUNDEVEN
+ getActionDefinitionsBuilder({G_INTRINSIC_ROUND, G_FRINT, G_FNEARBYINT})
+ .scalarize(0)
+ .lower();
if (ST.has16BitInsts()) {
- getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_FCEIL, G_FRINT})
- .legalFor({S16, S32, S64})
- .clampScalar(0, S16, S64)
- .scalarize(0);
+ getActionDefinitionsBuilder(
+ {G_INTRINSIC_TRUNC, G_FCEIL, G_INTRINSIC_ROUNDEVEN})
+ .legalFor({S16, S32, S64})
+ .clampScalar(0, S16, S64)
+ .scalarize(0);
} else if (ST.getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) {
- getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_FCEIL, G_FRINT})
- .legalFor({S32, S64})
- .clampScalar(0, S32, S64)
- .scalarize(0);
+ getActionDefinitionsBuilder(
+ {G_INTRINSIC_TRUNC, G_FCEIL, G_INTRINSIC_ROUNDEVEN})
+ .legalFor({S32, S64})
+ .clampScalar(0, S32, S64)
+ .scalarize(0);
} else {
- getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_FCEIL, G_FRINT})
- .legalFor({S32})
- .customFor({S64})
- .clampScalar(0, S32, S64)
- .scalarize(0);
+ getActionDefinitionsBuilder(
+ {G_INTRINSIC_TRUNC, G_FCEIL, G_INTRINSIC_ROUNDEVEN})
+ .legalFor({S32})
+ .customFor({S64})
+ .clampScalar(0, S32, S64)
+ .scalarize(0);
}
getActionDefinitionsBuilder(G_PTR_ADD)
@@ -1980,8 +1983,8 @@ bool AMDGPULegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
switch (MI.getOpcode()) {
case TargetOpcode::G_ADDRSPACE_CAST:
return legalizeAddrSpaceCast(MI, MRI, B);
- case TargetOpcode::G_FRINT:
- return legalizeFrint(MI, MRI, B);
+ case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
+ return legalizeFroundeven(MI, MRI, B);
case TargetOpcode::G_FCEIL:
return legalizeFceil(MI, MRI, B);
case TargetOpcode::G_FREM:
@@ -2286,9 +2289,9 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast(
return true;
}
-bool AMDGPULegalizerInfo::legalizeFrint(
- MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &B) const {
+bool AMDGPULegalizerInfo::legalizeFroundeven(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
Register Src = MI.getOperand(1).getReg();
LLT Ty = MRI.getType(Src);
assert(Ty.isScalar() && Ty.getSizeInBits() == 64);
@@ -3567,7 +3570,7 @@ bool AMDGPULegalizerInfo::legalizeFExp(MachineInstr &MI,
PL = getMad(B, Ty, XH.getReg(0), CL.getReg(0), Mad0, Flags);
}
- auto E = B.buildFRint(Ty, PH, Flags);
+ auto E = B.buildIntrinsicRoundeven(Ty, PH, Flags);
// It is unsafe to contract this fsub into the PH multiply.
auto PHSubE = B.buildFSub(Ty, PH, E, FlagsNoContract);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index b90fb76a4ccda1a..855fa0ddc214fe8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -43,8 +43,8 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
bool legalizeAddrSpaceCast(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
- bool legalizeFrint(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &B) const;
+ bool legalizeFroundeven(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
bool legalizeFceil(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
bool legalizeFrem(MachineInstr &MI, MachineRegisterInfo &MRI,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 5b056bd9e5dba2c..2f7b42d87a165a2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -3718,7 +3718,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_FMA:
case AMDGPU::G_FFLOOR:
case AMDGPU::G_FCEIL:
- case AMDGPU::G_FRINT:
+ case AMDGPU::G_INTRINSIC_ROUNDEVEN:
case AMDGPU::G_FMINNUM:
case AMDGPU::G_FMAXNUM:
case AMDGPU::G_INTRINSIC_TRUNC:
diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index ad072cfe23b17d9..c1ba9c514874eb7 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -101,7 +101,7 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FSUB, MVT::f32, Expand);
- setOperationAction({ISD::FCEIL, ISD::FTRUNC, ISD::FRINT, ISD::FFLOOR},
+ setOperationAction({ISD::FCEIL, ISD::FTRUNC, ISD::FROUNDEVEN, ISD::FFLOOR},
MVT::f64, Custom);
setOperationAction(ISD::SELECT_CC, {MVT::f32, MVT::i32}, Custom);
diff --git a/llvm/lib/Target/AMDGPU/R600Instructions.td b/llvm/lib/Target/AMDGPU/R600Instructions.td
index f4dfbe8adc75d25..f82bd55beccc0ce 100644
--- a/llvm/lib/Target/AMDGPU/R600Instructions.td
+++ b/llvm/lib/Target/AMDGPU/R600Instructions.td
@@ -782,7 +782,7 @@ def SETNE_DX10 : R600_2OP <
def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>;
def TRUNC : R600_1OP_Helper <0x11, "TRUNC", ftrunc>;
def CEIL : R600_1OP_Helper <0x12, "CEIL", fceil>;
-def RNDNE : R600_1OP_Helper <0x13, "RNDNE", frint>;
+def RNDNE : R600_1OP_Helper <0x13, "RNDNE", froundeven>;
def FLOOR : R600_1OP_Helper <0x14, "FLOOR", ffloor>;
def MOV : R600_1OP <0x19, "MOV", []>;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index f8f1e6d6c9097cc..b6648249caa166f 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -479,9 +479,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
{MVT::f32, MVT::f64}, Legal);
if (Subtarget->haveRoundOpsF64())
- setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FRINT}, MVT::f64, Legal);
+ setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FROUNDEVEN}, MVT::f64,
+ Legal);
else
- setOperationAction({ISD::FCEIL, ISD::FTRUNC, ISD::FRINT, ISD::FFLOOR},
+ setOperationAction({ISD::FCEIL, ISD::FTRUNC, ISD::FROUNDEVEN, ISD::FFLOOR},
MVT::f64, Custom);
setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index f3309049e7a7fd9..375cabc0249c31d 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -419,7 +419,7 @@ let SubtargetPredicate = HasSALUFloatInsts, Uses = [MODE],
def S_CEIL_F32 : SOP1_F32_Inst<"s_ceil_f32", fceil>;
def S_FLOOR_F32 : SOP1_F32_Inst<"s_floor_f32", ffloor>;
def S_TRUNC_F32 : SOP1_F32_Inst<"s_trunc_f32", ftrunc>;
- def S_RNDNE_F32 : SOP1_F32_Inst<"s_rndne_f32", frint>;
+ def S_RNDNE_F32 : SOP1_F32_Inst<"s_rndne_f32", froundeven>;
let FPDPRounding = 1 in
def S_CVT_F16_F32 : SOP1_F32_Inst<"s_cvt_f16_f32", fpround, f16, f32>;
@@ -427,7 +427,7 @@ let SubtargetPredicate = HasSALUFloatInsts, Uses = [MODE],
def S_CEIL_F16 : SOP1_F32_Inst<"s_ceil_f16", fceil, f16>;
def S_FLOOR_F16 : SOP1_F32_Inst<"s_floor_f16", ffloor, f16>;
def S_TRUNC_F16 : SOP1_F32_Inst<"s_trunc_f16", ftrunc, f16>;
- def S_RNDNE_F16 : SOP1_F32_Inst<"s_rndne_f16", frint, f16>;
+ def S_RNDNE_F16 : SOP1_F32_Inst<"s_rndne_f16", froundeven, f16>;
} // End mayRaiseFPException = 1
} // End SubtargetPredicate = HasSALUFloatInsts, Uses = [MODE]
// SchedRW = [WriteSFPU], isReMaterializable = 1
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 734db326fb77ddd..317d636c886d0e3 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -317,7 +317,7 @@ defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP1_F32_I32, AMDGPUcvt_f3
defm V_FRACT_F32 : VOP1Inst <"v_fract_f32", VOP_F32_F32, AMDGPUfract>;
defm V_TRUNC_F32 : VOP1Inst <"v_trunc_f32", VOP_F32_F32, ftrunc>;
defm V_CEIL_F32 : VOP1Inst <"v_ceil_f32", VOP_F32_F32, fceil>;
-defm V_RNDNE_F32 : VOP1Inst <"v_rndne_f32", VOP_F32_F32, frint>;
+defm V_RNDNE_F32 : VOP1Inst <"v_rndne_f32", VOP_F32_F32, froundeven>;
defm V_FLOOR_F32 : VOP1Inst <"v_floor_f32", VOP_F32_F32, ffloor>;
let TRANS = 1, SchedRW = [WriteTrans32] in {
@@ -458,7 +458,7 @@ let SubtargetPredicate = isGFX7Plus in {
let SchedRW = [WriteDoubleAdd] in {
defm V_TRUNC_F64 : VOP1Inst<"v_trunc_f64", VOP_F64_F64, ftrunc>;
defm V_CEIL_F64 : VOP1Inst<"v_ceil_f64", VOP_F64_F64, fceil>;
- defm V_RNDNE_F64 : VOP1Inst<"v_rndne_f64", VOP_F64_F64, frint>;
+ defm V_RNDNE_F64 : VOP1Inst<"v_rndne_f64", VOP_F64_F64, froundeven>;
defm V_FLOOR_F64 : VOP1Inst<"v_floor_f64", VOP_F64_F64, ffloor>;
} // End SchedRW = [WriteDoubleAdd]
} // End SubtargetPredicate = isGFX7Plus
@@ -502,7 +502,7 @@ defm V_FREXP_EXP_I16_F16_t16 : VOP1Inst <"v_frexp_exp_i16_f16_t16", VOP_I16_F16_
defm V_FLOOR_F16 : VOP1Inst_t16 <"v_floor_f16", VOP_F16_F16, ffloor>;
defm V_CEIL_F16 : VOP1Inst_t16 <"v_ceil_f16", VOP_F16_F16, fceil>;
defm V_TRUNC_F16 : VOP1Inst_t16 <"v_trunc_f16", VOP_F16_F16, ftrunc>;
-defm V_RNDNE_F16 : VOP1Inst_t16 <"v_rndne_f16", VOP_F16_F16, frint>;
+defm V_RNDNE_F16 : VOP1Inst_t16 <"v_rndne_f16", VOP_F16_F16, froundeven>;
let FPDPRounding = 1 in {
defm V_FRACT_F16 : VOP1Inst_t16 <"v_fract_f16", VOP_F16_F16, AMDGPUfract>;
} // End FPDPRounding = 1
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 20e0210bcec5b6a..882ff380140a5f6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -796,6 +796,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
setOperationAction(ISD::FRINT, MVT::f80, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
+ setOperationAction(ISD::FROUNDEVEN, MVT::f80, Expand);
setOperationAction(ISD::FMA, MVT::f80, Expand);
setOperationAction(ISD::LROUND, MVT::f80, Expand);
setOperationAction(ISD::LLROUND, MVT::f80, Expand);
@@ -929,6 +930,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FTRUNC, VT, Expand);
setOperationAction(ISD::FRINT, VT, Expand);
setOperationAction(ISD::FNEARBYINT, VT, Expand);
+ setOperationAction(ISD::FROUNDEVEN, VT, Expand);
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
@@ -2111,6 +2113,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FRINT, VT, Legal);
setOperationAction(ISD::FNEARBYINT, VT, Legal);
setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
+ setOperationAction(ISD::FROUNDEVEN, VT, Legal);
+ setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);
setOperationAction(ISD::FROUND, VT, Custom);
@@ -43796,6 +43800,7 @@ static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG,
case ISD::FCEIL:
case ISD::FTRUNC:
case ISD::FNEARBYINT:
+ case ISD::FROUNDEVEN:
case ISD::FROUND:
case ISD::FFLOOR:
case X86ISD::FRCP:
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fround.ll b/llvm/test/Analysis/CostModel/AMDGPU/fround.ll
index c4dd524efdd8447..88adabda0bfbb6e 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fround.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fround.ll
@@ -142,21 +142,21 @@ define i32 @nearbyint(i32 %arg) {
define i32 @rint(i32 %arg) {
; FAST-LABEL: 'rint'
-; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.rint.f32(float undef)
-; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
-; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
-; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
-; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.rint.f64(double undef)
-; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
-; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
-; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
+; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.rint.f32(float undef)
+; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
+; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
+; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
+; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.rint.f64(double undef)
+; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
+; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
+; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
;
; SLOW-LABEL: 'rint'
-; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.rint.f32(float undef)
-; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
-; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
-; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
+; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.rint.f32(float undef)
+; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
+; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
+; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.rint.f64(double undef)
; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
@@ -164,21 +164,21 @@ define i32 @rint(i32 %arg) {
; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
;
; FAST-SIZE-LABEL: 'rint'
-; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.rint.f32(float undef)
-; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
-; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
-; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
-; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.rint.f64(double undef)
-; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
-; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
-; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.rint.f32(float undef)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.rint.f64(double undef)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; SLOW-SIZE-LABEL: 'rint'
-; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.rint.f32(float undef)
-; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
-; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
-; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
+; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.rint.f32(float undef)
+; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
+; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
+; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.rint.f64(double undef)
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
@@ -198,6 +198,64 @@ define i32 @rint(i32 %arg) {
ret i32 undef
}
+define i32 @roundeven(i32 %arg) {
+; FAST-LABEL: 'roundeven'
+; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.roundeven.f32(float undef)
+; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> undef)
+; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.roundeven.v8f32(<8 x float> undef)
+; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.roundeven.v16f32(<16 x float> undef)
+; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.roundeven.f64(double undef)
+; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> undef)
+; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.roundeven.v4f64(<4 x double> undef)
+; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.roundeven.v8f64(<8 x double> undef)
+; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
+;
+; SLOW-LABEL: 'roundeven'
+; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.roundeven.f32(float undef)
+; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> undef)
+; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.roundeven.v8f32(<8 x float> undef)
+; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.roundeven.v16f32(<16 x float> undef)
+; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.roundeven.f64(double undef)
+; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> undef)
+; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.roundeven.v4f64(<4 x double> undef)
+; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.roundeven.v8f64(<8 x double> undef)
+; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
+;
+; FAST-SIZE-LABEL: 'roundeven'
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.roundeven.f32(float undef)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> undef)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.roundeven.v8f32(<8 x float> undef)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.roundeven.v16f32(<16 x float> undef)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.roundeven.f64(double undef)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> undef)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.roundeven.v4f64(<4 x double> undef)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.roundeven.v8f64(<8 x double> undef)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; SLOW-SIZE-LABEL: 'roundeven'
+; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.roundeven.f32(float undef)
+; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> undef)
+; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.roundeven.v8f32(<8 x float> undef)
+; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.roundeven.v16f32(<16 x float> undef)
+; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.roundeven.f64(double undef)
+; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> undef)
+; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.roundeven.v4f64(<4 x double> undef)
+; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.roundeven.v8f64(<8 x double> undef)
+; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+ %F32 = call float @llvm.roundeven.f32(float undef)
+ %V4F32 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> undef)
+ %V8F32 = call <8 x float> @llvm.roundeven.v8f32(<8 x float> undef)
+ %V16F32 = call <16 x float> @llvm.roundeven.v16f32(<16 x float> undef)
+
+ %F64 = call double @llvm.roundeven.f64(double undef)
+ %V2F64 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> undef)
+ %V4F64 = call <4 x double> @llvm.roundeven.v4f64(<4 x double> undef)
+ %V8F64 = call <8 x double> @llvm.roundeven.v8f64(<8 x double> undef)
+
+ ret i32 undef
+}
+
define i32 @trunc(i32 %arg) {
; FAST-LABEL: 'trunc'
; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.trunc.f32(float undef)
@@ -296,6 +354,16 @@ declare <2 x double> @llvm.rint.v2f64(<2 x double>)
declare <4 x double> @llvm.rint.v4f64(<4 x double>)
declare <8 x double> @llvm.rint.v8f64(<8 x double>)
+declare float @llvm.roundeven.f32(float)
+declare <4 x float> @llvm.roundeven.v4f32(<4 x float>)
+declare <8 x float> @llvm.roundeven.v8f32(<8 x float>)
+declare <16 x float> @llvm.roundeven.v16f32(<16 x float>)
+
+declare double @llvm.roundeven.f64(double)
+declare <2 x double> @llvm.roundeven.v2f64(<2 x double>)
+declare <4 x double> @llvm.roundeven.v4f64(<4 x double>)
+declare <8 x double> @llvm.roundeven.v8f64(<8 x double>)
+
declare float @llvm.trunc.f32(float)
declare <4 x float> @llvm.trunc.v4f32(<4 x float>)
declare <8 x float> @llvm.trunc.v8f32(<8 x float>)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frint.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frint.mir
deleted file mode 100644
index aaed64f95b08c9d..000000000000000
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frint.mir
+++ /dev/null
@@ -1,110 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=bonaire -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
-# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
-
----
-name: frint_s32_vv
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $vgpr0
-
- ; GCN-LABEL: name: frint_s32_vv
- ; GCN: liveins: $vgpr0
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN-NEXT: %1:vgpr_32 = nofpexcept V_RNDNE_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: $vgpr0 = COPY %1
- %0:vgpr(s32) = COPY $vgpr0
- %1:vgpr(s32) = G_FRINT %0
- $vgpr0 = COPY %1
-...
-
----
-name: frint_s32_vs
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $sgpr0
-
- ; GCN-LABEL: name: frint_s32_vs
- ; GCN: liveins: $sgpr0
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GCN-NEXT: %1:vgpr_32 = nofpexcept V_RNDNE_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: $vgpr0 = COPY %1
- %0:sgpr(s32) = COPY $sgpr0
- %1:vgpr(s32) = G_FRINT %0
- $vgpr0 = COPY %1
-...
-
----
-name: frint_fneg_s32_vv
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $vgpr0
-
- ; GCN-LABEL: name: frint_fneg_s32_vv
- ; GCN: liveins: $vgpr0
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_RNDNE_F32_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: $vgpr0 = COPY %2
- %0:vgpr(s32) = COPY $vgpr0
- %1:vgpr(s32) = G_FNEG %0
- %2:vgpr(s32) = G_FRINT %1
- $vgpr0 = COPY %2
-...
-
----
-name: frint_s64_vv
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1
-
- ; GCN-LABEL: name: frint_s64_vv
- ; GCN: liveins: $vgpr0_vgpr1
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GCN-NEXT: %1:vreg_64 = nofpexcept V_RNDNE_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: $vgpr0_vgpr1 = COPY %1
- %0:vgpr(s64) = COPY $vgpr0_vgpr1
- %1:vgpr(s64) = G_FRINT %0
- $vgpr0_vgpr1 = COPY %1
-...
-
----
-name: frint_s64_fneg_vv
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1
-
- ; GCN-LABEL: name: frint_s64_fneg_vv
- ; GCN: liveins: $vgpr0_vgpr1
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GCN-NEXT: %2:vreg_64 = nofpexcept V_RNDNE_F64_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: $vgpr0_vgpr1 = COPY %2
- %0:vgpr(s64) = COPY $vgpr0_vgpr1
- %1:vgpr(s64) = G_FNEG %0
- %2:vgpr(s64) = G_FRINT %1
- $vgpr0_vgpr1 = COPY %2
-...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frint.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frint.s16.mir
deleted file mode 100644
index 0e6a3ccacd16810..000000000000000
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frint.s16.mir
+++ /dev/null
@@ -1,97 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
-
----
-name: frint_s16_ss
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $sgpr0
-
- ; GCN-LABEL: name: frint_s16_ss
- ; GCN: liveins: $sgpr0
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
- ; GCN-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
- ; GCN-NEXT: [[FRINT:%[0-9]+]]:sreg_32(s16) = G_FRINT [[TRUNC]]
- ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FRINT]](s16)
- ; GCN-NEXT: $sgpr0 = COPY [[COPY1]](s32)
- %0:sgpr(s32) = COPY $sgpr0
- %1:sgpr(s16) = G_TRUNC %0
- %2:sgpr(s16) = G_FRINT %1
- %3:sgpr(s32) = G_ANYEXT %2
- $sgpr0 = COPY %3
-...
-
----
-name: frint_s16_vv
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $vgpr0
-
- ; GCN-LABEL: name: frint_s16_vv
- ; GCN: liveins: $vgpr0
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_RNDNE_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: $vgpr0 = COPY %2
- %0:vgpr(s32) = COPY $vgpr0
- %1:vgpr(s16) = G_TRUNC %0
- %2:vgpr(s16) = G_FRINT %1
- %3:vgpr(s32) = G_ANYEXT %2
- $vgpr0 = COPY %3
-...
-
----
-name: frint_s16_vs
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $sgpr0
-
- ; GCN-LABEL: name: frint_s16_vs
- ; GCN: liveins: $sgpr0
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_RNDNE_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: $vgpr0 = COPY %2
- %0:sgpr(s32) = COPY $sgpr0
- %1:sgpr(s16) = G_TRUNC %0
- %2:vgpr(s16) = G_FRINT %1
- %3:vgpr(s32) = G_ANYEXT %2
- $vgpr0 = COPY %3
-...
-
----
-name: frint_fneg_s16_vv
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $vgpr0
-
- ; GCN-LABEL: name: frint_fneg_s16_vv
- ; GCN: liveins: $vgpr0
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_RNDNE_F16_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: $vgpr0 = COPY %3
- %0:vgpr(s32) = COPY $vgpr0
- %1:vgpr(s16) = G_TRUNC %0
- %2:vgpr(s16) = G_FNEG %1
- %3:vgpr(s16) = G_FRINT %2
- %4:vgpr(s32) = G_ANYEXT %3
- $vgpr0 = COPY %4
-...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-scalar-float-sop1.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-scalar-float-sop1.mir
index bb86413964098ba..4e2f33b9505d076 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-scalar-float-sop1.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-scalar-float-sop1.mir
@@ -212,7 +212,7 @@ body: |
; GFX1150-NEXT: %1:sreg_32 = nofpexcept S_RNDNE_F32 [[COPY]], implicit $mode
; GFX1150-NEXT: $sgpr0 = COPY %1
%0:sgpr(s32) = COPY $sgpr0
- %1:sgpr(s32) = G_FRINT %0
+ %1:sgpr(s32) = G_INTRINSIC_ROUNDEVEN %0
$sgpr0 = COPY %1(s32)
...
@@ -295,7 +295,7 @@ body: |
; GFX1150-NEXT: $sgpr0 = COPY %2
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s16) = G_TRUNC %0(s32)
- %2:sgpr(s16) = G_FRINT %1
+ %2:sgpr(s16) = G_INTRINSIC_ROUNDEVEN %1
%3:sgpr(s32) = G_ANYEXT %2(s16)
$sgpr0 = COPY %3(s32)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fexp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fexp.mir
index 7bcd4e9d9cf67b6..1b675a83307e866 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fexp.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fexp.mir
@@ -21,10 +21,10 @@ body: |
; GFX6-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[C]], [[FNEG]]
; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E54AE0BE0000000
; GFX6-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[C1]], [[FMA]]
- ; GFX6-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FMUL]]
- ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[FRINT]]
+ ; GFX6-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL]]
+ ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]]
; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FSUB]], [[FMA1]]
- ; GFX6-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT]](s32)
+ ; GFX6-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](s32)
; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[INT]], [[FPTOSI]](s32)
; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000
@@ -52,10 +52,10 @@ body: |
; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL1]]
; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[AND]], [[C2]]
; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FADD]]
- ; GFX8-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FMUL]]
- ; GFX8-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[FRINT]]
+ ; GFX8-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL]]
+ ; GFX8-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]]
; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FSUB1]], [[FADD1]]
- ; GFX8-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT]](s32)
+ ; GFX8-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](s32)
; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](s32)
; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[INT]], [[FPTOSI]](s32)
; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000
@@ -78,10 +78,10 @@ body: |
; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[C]], [[FNEG]]
; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E54AE0BE0000000
; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[C1]], [[FMA]]
- ; GFX9-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FMUL]]
- ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[FRINT]]
+ ; GFX9-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL]]
+ ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]]
; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FSUB]], [[FMA1]]
- ; GFX9-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT]](s32)
+ ; GFX9-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](s32)
; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
; GFX9-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[INT]], [[FPTOSI]](s32)
; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000
@@ -115,10 +115,10 @@ body: |
; GFX6-NEXT: [[FMA:%[0-9]+]]:_(s32) = nnan G_FMA [[COPY]], [[C]], [[FNEG]]
; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E54AE0BE0000000
; GFX6-NEXT: [[FMA1:%[0-9]+]]:_(s32) = nnan G_FMA [[COPY]], [[C1]], [[FMA]]
- ; GFX6-NEXT: [[FRINT:%[0-9]+]]:_(s32) = nnan G_FRINT [[FMUL]]
- ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nnan G_FSUB [[FMUL]], [[FRINT]]
+ ; GFX6-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = nnan G_INTRINSIC_ROUNDEVEN [[FMUL]]
+ ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nnan G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]]
; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FSUB]], [[FMA1]]
- ; GFX6-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT]](s32)
+ ; GFX6-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](s32)
; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = nnan G_FLDEXP [[INT]], [[FPTOSI]](s32)
; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000
@@ -146,10 +146,10 @@ body: |
; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL2]], [[FMUL1]]
; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = nnan G_FMUL [[AND]], [[C2]]
; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL3]], [[FADD]]
- ; GFX8-NEXT: [[FRINT:%[0-9]+]]:_(s32) = nnan G_FRINT [[FMUL]]
- ; GFX8-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = nnan G_FSUB [[FMUL]], [[FRINT]]
+ ; GFX8-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = nnan G_INTRINSIC_ROUNDEVEN [[FMUL]]
+ ; GFX8-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = nnan G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]]
; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s32) = nnan G_FADD [[FSUB1]], [[FADD1]]
- ; GFX8-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT]](s32)
+ ; GFX8-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](s32)
; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](s32)
; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = nnan G_FLDEXP [[INT]], [[FPTOSI]](s32)
; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000
@@ -172,10 +172,10 @@ body: |
; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = nnan G_FMA [[COPY]], [[C]], [[FNEG]]
; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E54AE0BE0000000
; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = nnan G_FMA [[COPY]], [[C1]], [[FMA]]
- ; GFX9-NEXT: [[FRINT:%[0-9]+]]:_(s32) = nnan G_FRINT [[FMUL]]
- ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nnan G_FSUB [[FMUL]], [[FRINT]]
+ ; GFX9-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = nnan G_INTRINSIC_ROUNDEVEN [[FMUL]]
+ ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nnan G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]]
; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FSUB]], [[FMA1]]
- ; GFX9-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT]](s32)
+ ; GFX9-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](s32)
; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
; GFX9-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = nnan G_FLDEXP [[INT]], [[FPTOSI]](s32)
; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000
@@ -210,10 +210,10 @@ body: |
; GFX6-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[C]], [[FNEG]]
; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E54AE0BE0000000
; GFX6-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[C1]], [[FMA]]
- ; GFX6-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FMUL]]
- ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[FRINT]]
+ ; GFX6-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL]]
+ ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]]
; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FSUB]], [[FMA1]]
- ; GFX6-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT]](s32)
+ ; GFX6-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](s32)
; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[INT]], [[FPTOSI]](s32)
; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000
@@ -229,10 +229,10 @@ body: |
; GFX6-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FMUL1]]
; GFX6-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[C]], [[FNEG1]]
; GFX6-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[C1]], [[FMA2]]
- ; GFX6-NEXT: [[FRINT1:%[0-9]+]]:_(s32) = G_FRINT [[FMUL1]]
- ; GFX6-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[FMUL1]], [[FRINT1]]
+ ; GFX6-NEXT: [[INTRINSIC_ROUNDEVEN1:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL1]]
+ ; GFX6-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[FMUL1]], [[INTRINSIC_ROUNDEVEN1]]
; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FSUB1]], [[FMA3]]
- ; GFX6-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT1]](s32)
+ ; GFX6-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN1]](s32)
; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32)
; GFX6-NEXT: [[FLDEXP1:%[0-9]+]]:_(s32) = G_FLDEXP [[INT1]], [[FPTOSI1]](s32)
; GFX6-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C2]]
@@ -258,10 +258,10 @@ body: |
; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL1]]
; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[AND]], [[C2]]
; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FADD]]
- ; GFX8-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FMUL]]
- ; GFX8-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[FRINT]]
+ ; GFX8-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL]]
+ ; GFX8-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]]
; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FSUB1]], [[FADD1]]
- ; GFX8-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT]](s32)
+ ; GFX8-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](s32)
; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](s32)
; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[INT]], [[FPTOSI]](s32)
; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000
@@ -281,10 +281,10 @@ body: |
; GFX8-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL6]], [[FMUL5]]
; GFX8-NEXT: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[AND1]], [[C2]]
; GFX8-NEXT: [[FADD4:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FADD3]]
- ; GFX8-NEXT: [[FRINT1:%[0-9]+]]:_(s32) = G_FRINT [[FMUL4]]
- ; GFX8-NEXT: [[FSUB3:%[0-9]+]]:_(s32) = G_FSUB [[FMUL4]], [[FRINT1]]
+ ; GFX8-NEXT: [[INTRINSIC_ROUNDEVEN1:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL4]]
+ ; GFX8-NEXT: [[FSUB3:%[0-9]+]]:_(s32) = G_FSUB [[FMUL4]], [[INTRINSIC_ROUNDEVEN1]]
; GFX8-NEXT: [[FADD5:%[0-9]+]]:_(s32) = G_FADD [[FSUB3]], [[FADD4]]
- ; GFX8-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT1]](s32)
+ ; GFX8-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN1]](s32)
; GFX8-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD5]](s32)
; GFX8-NEXT: [[FLDEXP1:%[0-9]+]]:_(s32) = G_FLDEXP [[INT1]], [[FPTOSI1]](s32)
; GFX8-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C3]]
@@ -305,10 +305,10 @@ body: |
; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[C]], [[FNEG]]
; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E54AE0BE0000000
; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[C1]], [[FMA]]
- ; GFX9-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FMUL]]
- ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[FRINT]]
+ ; GFX9-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL]]
+ ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]]
; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FSUB]], [[FMA1]]
- ; GFX9-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT]](s32)
+ ; GFX9-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](s32)
; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
; GFX9-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[INT]], [[FPTOSI]](s32)
; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000
@@ -324,10 +324,10 @@ body: |
; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FMUL1]]
; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[C]], [[FNEG1]]
; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[C1]], [[FMA2]]
- ; GFX9-NEXT: [[FRINT1:%[0-9]+]]:_(s32) = G_FRINT [[FMUL1]]
- ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[FMUL1]], [[FRINT1]]
+ ; GFX9-NEXT: [[INTRINSIC_ROUNDEVEN1:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL1]]
+ ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[FMUL1]], [[INTRINSIC_ROUNDEVEN1]]
; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FSUB1]], [[FMA3]]
- ; GFX9-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT1]](s32)
+ ; GFX9-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN1]](s32)
; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32)
; GFX9-NEXT: [[FLDEXP1:%[0-9]+]]:_(s32) = G_FLDEXP [[INT1]], [[FPTOSI1]](s32)
; GFX9-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C2]]
@@ -359,10 +359,10 @@ body: |
; GFX6-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[C]], [[FNEG]]
; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E54AE0BE0000000
; GFX6-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[C1]], [[FMA]]
- ; GFX6-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FMUL]]
- ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[FRINT]]
+ ; GFX6-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL]]
+ ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]]
; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FSUB]], [[FMA1]]
- ; GFX6-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT]](s32)
+ ; GFX6-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](s32)
; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[INT]], [[FPTOSI]](s32)
; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000
@@ -378,10 +378,10 @@ body: |
; GFX6-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FMUL1]]
; GFX6-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[C]], [[FNEG1]]
; GFX6-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[C1]], [[FMA2]]
- ; GFX6-NEXT: [[FRINT1:%[0-9]+]]:_(s32) = G_FRINT [[FMUL1]]
- ; GFX6-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[FMUL1]], [[FRINT1]]
+ ; GFX6-NEXT: [[INTRINSIC_ROUNDEVEN1:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL1]]
+ ; GFX6-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[FMUL1]], [[INTRINSIC_ROUNDEVEN1]]
; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FSUB1]], [[FMA3]]
- ; GFX6-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT1]](s32)
+ ; GFX6-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN1]](s32)
; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32)
; GFX6-NEXT: [[FLDEXP1:%[0-9]+]]:_(s32) = G_FLDEXP [[INT1]], [[FPTOSI1]](s32)
; GFX6-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C2]]
@@ -393,10 +393,10 @@ body: |
; GFX6-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[FMUL2]]
; GFX6-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[C]], [[FNEG2]]
; GFX6-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[C1]], [[FMA4]]
- ; GFX6-NEXT: [[FRINT2:%[0-9]+]]:_(s32) = G_FRINT [[FMUL2]]
- ; GFX6-NEXT: [[FSUB2:%[0-9]+]]:_(s32) = G_FSUB [[FMUL2]], [[FRINT2]]
+ ; GFX6-NEXT: [[INTRINSIC_ROUNDEVEN2:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL2]]
+ ; GFX6-NEXT: [[FSUB2:%[0-9]+]]:_(s32) = G_FSUB [[FMUL2]], [[INTRINSIC_ROUNDEVEN2]]
; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FSUB2]], [[FMA5]]
- ; GFX6-NEXT: [[FPTOSI2:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT2]](s32)
+ ; GFX6-NEXT: [[FPTOSI2:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN2]](s32)
; GFX6-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](s32)
; GFX6-NEXT: [[FLDEXP2:%[0-9]+]]:_(s32) = G_FLDEXP [[INT2]], [[FPTOSI2]](s32)
; GFX6-NEXT: [[FCMP4:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV2]](s32), [[C2]]
@@ -422,10 +422,10 @@ body: |
; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL1]]
; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[AND]], [[C2]]
; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FADD]]
- ; GFX8-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FMUL]]
- ; GFX8-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[FRINT]]
+ ; GFX8-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL]]
+ ; GFX8-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]]
; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FSUB1]], [[FADD1]]
- ; GFX8-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT]](s32)
+ ; GFX8-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](s32)
; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](s32)
; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[INT]], [[FPTOSI]](s32)
; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000
@@ -445,10 +445,10 @@ body: |
; GFX8-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL6]], [[FMUL5]]
; GFX8-NEXT: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[AND1]], [[C2]]
; GFX8-NEXT: [[FADD4:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FADD3]]
- ; GFX8-NEXT: [[FRINT1:%[0-9]+]]:_(s32) = G_FRINT [[FMUL4]]
- ; GFX8-NEXT: [[FSUB3:%[0-9]+]]:_(s32) = G_FSUB [[FMUL4]], [[FRINT1]]
+ ; GFX8-NEXT: [[INTRINSIC_ROUNDEVEN1:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL4]]
+ ; GFX8-NEXT: [[FSUB3:%[0-9]+]]:_(s32) = G_FSUB [[FMUL4]], [[INTRINSIC_ROUNDEVEN1]]
; GFX8-NEXT: [[FADD5:%[0-9]+]]:_(s32) = G_FADD [[FSUB3]], [[FADD4]]
- ; GFX8-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT1]](s32)
+ ; GFX8-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN1]](s32)
; GFX8-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD5]](s32)
; GFX8-NEXT: [[FLDEXP1:%[0-9]+]]:_(s32) = G_FLDEXP [[INT1]], [[FPTOSI1]](s32)
; GFX8-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C3]]
@@ -464,10 +464,10 @@ body: |
; GFX8-NEXT: [[FADD6:%[0-9]+]]:_(s32) = G_FADD [[FMUL10]], [[FMUL9]]
; GFX8-NEXT: [[FMUL11:%[0-9]+]]:_(s32) = G_FMUL [[AND2]], [[C2]]
; GFX8-NEXT: [[FADD7:%[0-9]+]]:_(s32) = G_FADD [[FMUL11]], [[FADD6]]
- ; GFX8-NEXT: [[FRINT2:%[0-9]+]]:_(s32) = G_FRINT [[FMUL8]]
- ; GFX8-NEXT: [[FSUB5:%[0-9]+]]:_(s32) = G_FSUB [[FMUL8]], [[FRINT2]]
+ ; GFX8-NEXT: [[INTRINSIC_ROUNDEVEN2:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL8]]
+ ; GFX8-NEXT: [[FSUB5:%[0-9]+]]:_(s32) = G_FSUB [[FMUL8]], [[INTRINSIC_ROUNDEVEN2]]
; GFX8-NEXT: [[FADD8:%[0-9]+]]:_(s32) = G_FADD [[FSUB5]], [[FADD7]]
- ; GFX8-NEXT: [[FPTOSI2:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT2]](s32)
+ ; GFX8-NEXT: [[FPTOSI2:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN2]](s32)
; GFX8-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD8]](s32)
; GFX8-NEXT: [[FLDEXP2:%[0-9]+]]:_(s32) = G_FLDEXP [[INT2]], [[FPTOSI2]](s32)
; GFX8-NEXT: [[FCMP4:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV2]](s32), [[C3]]
@@ -488,10 +488,10 @@ body: |
; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[C]], [[FNEG]]
; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E54AE0BE0000000
; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[C1]], [[FMA]]
- ; GFX9-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FMUL]]
- ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[FRINT]]
+ ; GFX9-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL]]
+ ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]]
; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FSUB]], [[FMA1]]
- ; GFX9-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT]](s32)
+ ; GFX9-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](s32)
; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
; GFX9-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[INT]], [[FPTOSI]](s32)
; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000
@@ -507,10 +507,10 @@ body: |
; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FMUL1]]
; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[C]], [[FNEG1]]
; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[C1]], [[FMA2]]
- ; GFX9-NEXT: [[FRINT1:%[0-9]+]]:_(s32) = G_FRINT [[FMUL1]]
- ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[FMUL1]], [[FRINT1]]
+ ; GFX9-NEXT: [[INTRINSIC_ROUNDEVEN1:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL1]]
+ ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[FMUL1]], [[INTRINSIC_ROUNDEVEN1]]
; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FSUB1]], [[FMA3]]
- ; GFX9-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT1]](s32)
+ ; GFX9-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN1]](s32)
; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32)
; GFX9-NEXT: [[FLDEXP1:%[0-9]+]]:_(s32) = G_FLDEXP [[INT1]], [[FPTOSI1]](s32)
; GFX9-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C2]]
@@ -522,10 +522,10 @@ body: |
; GFX9-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[FMUL2]]
; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[C]], [[FNEG2]]
; GFX9-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[C1]], [[FMA4]]
- ; GFX9-NEXT: [[FRINT2:%[0-9]+]]:_(s32) = G_FRINT [[FMUL2]]
- ; GFX9-NEXT: [[FSUB2:%[0-9]+]]:_(s32) = G_FSUB [[FMUL2]], [[FRINT2]]
+ ; GFX9-NEXT: [[INTRINSIC_ROUNDEVEN2:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL2]]
+ ; GFX9-NEXT: [[FSUB2:%[0-9]+]]:_(s32) = G_FSUB [[FMUL2]], [[INTRINSIC_ROUNDEVEN2]]
; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FSUB2]], [[FMA5]]
- ; GFX9-NEXT: [[FPTOSI2:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT2]](s32)
+ ; GFX9-NEXT: [[FPTOSI2:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN2]](s32)
; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](s32)
; GFX9-NEXT: [[FLDEXP2:%[0-9]+]]:_(s32) = G_FLDEXP [[INT2]], [[FPTOSI2]](s32)
; GFX9-NEXT: [[FCMP4:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV2]](s32), [[C2]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-frint.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-frint.mir
deleted file mode 100644
index b208c1283f34b49..000000000000000
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-frint.mir
+++ /dev/null
@@ -1,220 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -o - %s | FileCheck -check-prefix=SI %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -run-pass=legalizer -o - %s | FileCheck -check-prefix=CI %s
-
----
-name: test_frint_s16
-body: |
- bb.0:
- liveins: $vgpr0
-
- ; SI-LABEL: name: test_frint_s16
- ; SI: liveins: $vgpr0
- ; SI-NEXT: {{ $}}
- ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
- ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
- ; SI-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT]]
- ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT]](s32)
- ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
- ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
- ; CI-LABEL: name: test_frint_s16
- ; CI: liveins: $vgpr0
- ; CI-NEXT: {{ $}}
- ; CI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
- ; CI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
- ; CI-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT]]
- ; CI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT]](s32)
- ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
- ; CI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
- %0:_(s32) = COPY $vgpr0
- %1:_(s16) = G_TRUNC %0
- %2:_(s16) = G_FRINT %1
- %3:_(s32) = G_ANYEXT %2
- $vgpr0 = COPY %3
-...
-
----
-name: test_frint_s32
-body: |
- bb.0:
- liveins: $vgpr0
-
- ; SI-LABEL: name: test_frint_s32
- ; SI: liveins: $vgpr0
- ; SI-NEXT: {{ $}}
- ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; SI-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[COPY]]
- ; SI-NEXT: $vgpr0 = COPY [[FRINT]](s32)
- ; CI-LABEL: name: test_frint_s32
- ; CI: liveins: $vgpr0
- ; CI-NEXT: {{ $}}
- ; CI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; CI-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[COPY]]
- ; CI-NEXT: $vgpr0 = COPY [[FRINT]](s32)
- %0:_(s32) = COPY $vgpr0
- %1:_(s32) = G_FRINT %0
- $vgpr0 = COPY %1
-...
-
----
-name: test_frint_s64
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1
-
- ; SI-LABEL: name: test_frint_s64
- ; SI: liveins: $vgpr0_vgpr1
- ; SI-NEXT: {{ $}}
- ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
- ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808
- ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4841369599423283200
- ; SI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]]
- ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[C1]], [[AND]]
- ; SI-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[OR]]
- ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[OR]]
- ; SI-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[FADD]], [[FNEG]]
- ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x432FFFFFFFFFFFFF
- ; SI-NEXT: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[COPY]]
- ; SI-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[FABS]](s64), [[C2]]
- ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[COPY]], [[FADD1]]
- ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64)
- ; CI-LABEL: name: test_frint_s64
- ; CI: liveins: $vgpr0_vgpr1
- ; CI-NEXT: {{ $}}
- ; CI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
- ; CI-NEXT: [[FRINT:%[0-9]+]]:_(s64) = G_FRINT [[COPY]]
- ; CI-NEXT: $vgpr0_vgpr1 = COPY [[FRINT]](s64)
- %0:_(s64) = COPY $vgpr0_vgpr1
- %1:_(s64) = G_FRINT %0
- $vgpr0_vgpr1 = COPY %1
-...
-
----
-name: test_frint_v2s16
-body: |
- bb.0:
- liveins: $vgpr0
-
- ; SI-LABEL: name: test_frint_v2s16
- ; SI: liveins: $vgpr0
- ; SI-NEXT: {{ $}}
- ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
- ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
- ; SI-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT]]
- ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT]](s32)
- ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
- ; SI-NEXT: [[FRINT1:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT1]]
- ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT1]](s32)
- ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16)
- ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16)
- ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
- ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
- ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- ; CI-LABEL: name: test_frint_v2s16
- ; CI: liveins: $vgpr0
- ; CI-NEXT: {{ $}}
- ; CI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
- ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
- ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; CI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
- ; CI-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT]]
- ; CI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT]](s32)
- ; CI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
- ; CI-NEXT: [[FRINT1:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT1]]
- ; CI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT1]](s32)
- ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16)
- ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16)
- ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
- ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
- ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- %0:_(<2 x s16>) = COPY $vgpr0
- %1:_(<2 x s16>) = G_FRINT %0
- $vgpr0 = COPY %1
-...
-
----
-name: test_frint_v2s32
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1
-
- ; SI-LABEL: name: test_frint_v2s32
- ; SI: liveins: $vgpr0_vgpr1
- ; SI-NEXT: {{ $}}
- ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
- ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
- ; SI-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[UV]]
- ; SI-NEXT: [[FRINT1:%[0-9]+]]:_(s32) = G_FRINT [[UV1]]
- ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FRINT]](s32), [[FRINT1]](s32)
- ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
- ; CI-LABEL: name: test_frint_v2s32
- ; CI: liveins: $vgpr0_vgpr1
- ; CI-NEXT: {{ $}}
- ; CI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
- ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
- ; CI-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[UV]]
- ; CI-NEXT: [[FRINT1:%[0-9]+]]:_(s32) = G_FRINT [[UV1]]
- ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FRINT]](s32), [[FRINT1]](s32)
- ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
- %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
- %1:_(<2 x s32>) = G_FRINT %0
- $vgpr0_vgpr1 = COPY %1
-...
-
----
-name: test_frint_v2s64
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1_vgpr2_vgpr3
-
- ; SI-LABEL: name: test_frint_v2s64
- ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
- ; SI-NEXT: {{ $}}
- ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
- ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
- ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808
- ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4841369599423283200
- ; SI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[UV]], [[C]]
- ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[C1]], [[AND]]
- ; SI-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[UV]], [[OR]]
- ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[OR]]
- ; SI-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[FADD]], [[FNEG]]
- ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x432FFFFFFFFFFFFF
- ; SI-NEXT: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[UV]]
- ; SI-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[FABS]](s64), [[C2]]
- ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[UV]], [[FADD1]]
- ; SI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[C]]
- ; SI-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[C1]], [[AND1]]
- ; SI-NEXT: [[FADD2:%[0-9]+]]:_(s64) = G_FADD [[UV1]], [[OR1]]
- ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[OR1]]
- ; SI-NEXT: [[FADD3:%[0-9]+]]:_(s64) = G_FADD [[FADD2]], [[FNEG1]]
- ; SI-NEXT: [[FABS1:%[0-9]+]]:_(s64) = G_FABS [[UV1]]
- ; SI-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[FABS1]](s64), [[C2]]
- ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[FCMP1]](s1), [[UV1]], [[FADD3]]
- ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64)
- ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
- ; CI-LABEL: name: test_frint_v2s64
- ; CI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
- ; CI-NEXT: {{ $}}
- ; CI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
- ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
- ; CI-NEXT: [[FRINT:%[0-9]+]]:_(s64) = G_FRINT [[UV]]
- ; CI-NEXT: [[FRINT1:%[0-9]+]]:_(s64) = G_FRINT [[UV1]]
- ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FRINT]](s64), [[FRINT1]](s64)
- ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
- %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
- %1:_(<2 x s64>) = G_FRINT %0
- $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
-...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-frint.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-frint.mir
deleted file mode 100644
index 9f4383bf30c2a33..000000000000000
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-frint.mir
+++ /dev/null
@@ -1,36 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=amdgpu-regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
-# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=amdgpu-regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
-
----
-name: frint_s
-legalized: true
-
-body: |
- bb.0:
- liveins: $sgpr0
- ; CHECK-LABEL: name: frint_s
- ; CHECK: liveins: $sgpr0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; CHECK-NEXT: [[FRINT:%[0-9]+]]:vgpr(s32) = G_FRINT [[COPY1]]
- %0:_(s32) = COPY $sgpr0
- %1:_(s32) = G_FRINT %0
-...
-
----
-name: frint_v
-legalized: true
-
-body: |
- bb.0:
- liveins: $vgpr0
- ; CHECK-LABEL: name: frint_v
- ; CHECK: liveins: $vgpr0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[FRINT:%[0-9]+]]:vgpr(s32) = G_FRINT [[COPY]]
- %0:_(s32) = COPY $vgpr0
- %1:_(s32) = G_FRINT %0
-...
More information about the cfe-commits
mailing list