[llvm] dcb2da1 - [AMDGPU] Add a new intrinsic to control fp_trunc rounding mode
Julien Pages via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 11 09:09:08 PST 2022
Author: Julien Pages
Date: 2022-02-11T12:08:23-05:00
New Revision: dcb2da13f16ec96291e5927dcd8cb4a8741988c7
URL: https://github.com/llvm/llvm-project/commit/dcb2da13f16ec96291e5927dcd8cb4a8741988c7
DIFF: https://github.com/llvm/llvm-project/commit/dcb2da13f16ec96291e5927dcd8cb4a8741988c7.diff
LOG: [AMDGPU] Add a new intrinsic to control fp_trunc rounding mode
Add a new llvm.fptrunc.round intrinsic to precisely control
the rounding mode when converting from f32 to f16.
Differential Revision: https://reviews.llvm.org/D110579
Added:
llvm/test/CodeGen/AMDGPU/fail.llvm.fptrunc.round.ll
llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll
llvm/test/Verifier/llvm.fptrunc.round.ll
Modified:
llvm/docs/LangRef.rst
llvm/include/llvm/CodeGen/ISDOpcodes.h
llvm/include/llvm/IR/Intrinsics.td
llvm/include/llvm/Support/TargetOpcodes.def
llvm/include/llvm/Target/GenericOpcodes.td
llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
llvm/lib/IR/Verifier.cpp
llvm/lib/Target/AMDGPU/AMDGPUGISel.td
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.td
llvm/lib/Target/AMDGPU/SIInstructions.td
llvm/lib/Target/AMDGPU/SIModeRegister.cpp
llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
Removed:
################################################################################
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 9b819dcd94196..6b44b7e7355c8 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -24049,3 +24049,43 @@ Semantics:
The '``llvm.preserve.struct.access.index``' intrinsic produces the same result
as a getelementptr with base ``base`` and access operands ``{0, gep_index}``.
+
+'``llvm.fptrunc.round``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+ declare <ty2>
+ @llvm.fptrunc.round(<type> <value>, metadata <rounding mode>)
+
+Overview:
+"""""""""
+
+The '``llvm.fptrunc.round``' intrinsic truncates
+:ref:`floating-point <t_floating>` ``value`` to type ``ty2``
+with a specified rounding mode.
+
+Arguments:
+""""""""""
+
+The '``llvm.fptrunc.round``' intrinsic takes a :ref:`floating-point
+<t_floating>` value to cast and a :ref:`floating-point <t_floating>` type
+to cast it to. This argument must be larger in size than the result.
+
+The second argument specifies the rounding mode as described in the constrained
+intrinsics section.
+For this intrinsic, the "round.dynamic" mode is not supported.
+
+Semantics:
+""""""""""
+
+The '``llvm.fptrunc.round``' intrinsic casts a ``value`` from a larger
+:ref:`floating-point <t_floating>` type to a smaller :ref:`floating-point
+<t_floating>` type.
+This intrinsic is assumed to execute in the default :ref:`floating-point
+environment <floatenv>` *except* for the rounding mode.
+This intrinsic is not supported on all targets. Some targets may not support
+all rounding modes.
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 9fedb531db0c4..5168b20162a97 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -462,6 +462,9 @@ enum NodeType {
STRICT_FSETCC,
STRICT_FSETCCS,
+ // FPTRUNC_ROUND - This corresponds to the fptrunc_round intrinsic.
+ FPTRUNC_ROUND,
+
/// FMA - Perform a * b + c with no intermediate rounding step.
FMA,
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index e7d38276f35e5..7991bc0d039cd 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -909,6 +909,12 @@ let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn] in {
}
// FIXME: Consider maybe adding intrinsics for sitofp, uitofp.
+
+// Truncate a floating point number with a specific rounding mode
+def int_fptrunc_round : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
+ [ llvm_anyfloat_ty, llvm_metadata_ty ],
+ [ IntrNoMem, IntrWillReturn ]>;
+
//===------------------------- Expect Intrinsics --------------------------===//
//
def int_expect : DefaultAttrsIntrinsic<[llvm_anyint_ty],
diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def
index 428cbb44705d8..ca8876f51fae9 100644
--- a/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/llvm/include/llvm/Support/TargetOpcodes.def
@@ -322,6 +322,9 @@ HANDLE_TARGET_OPCODE(G_BITCAST)
/// Generic freeze.
HANDLE_TARGET_OPCODE(G_FREEZE)
+// INTRINSIC fptrunc_round intrinsic.
+HANDLE_TARGET_OPCODE(G_INTRINSIC_FPTRUNC_ROUND)
+
/// INTRINSIC trunc intrinsic.
HANDLE_TARGET_OPCODE(G_INTRINSIC_TRUNC)
diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td
index 2af20ab6a53f5..8ec12a9bc5d09 100644
--- a/llvm/include/llvm/Target/GenericOpcodes.td
+++ b/llvm/include/llvm/Target/GenericOpcodes.td
@@ -965,6 +965,12 @@ def G_FNEARBYINT : GenericInstruction {
//------------------------------------------------------------------------------
// Opcodes for LLVM Intrinsics
//------------------------------------------------------------------------------
+def G_INTRINSIC_FPTRUNC_ROUND : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type1:$src1, i32imm:$round_mode);
+ let hasSideEffects = false;
+}
+
def G_INTRINSIC_TRUNC : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1);
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 6d415c9c7f90c..5ac0803bc61f4 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -2252,6 +2252,23 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
Info.OrigRet = {Register(), Type::getVoidTy(CI.getContext()), 0};
return CLI->lowerCall(MIRBuilder, Info);
}
+ case Intrinsic::fptrunc_round: {
+ unsigned Flags = MachineInstr::copyFlagsFromInstruction(CI);
+
+ // Convert the metadata argument to a constant integer
+ Metadata *MD = cast<MetadataAsValue>(CI.getArgOperand(1))->getMetadata();
+ Optional<RoundingMode> RoundMode =
+ convertStrToRoundingMode(cast<MDString>(MD)->getString());
+
+ // Add the Rounding mode as an integer
+ MIRBuilder
+ .buildInstr(TargetOpcode::G_INTRINSIC_FPTRUNC_ROUND,
+ {getOrCreateVReg(CI)},
+ {getOrCreateVReg(*CI.getArgOperand(0))}, Flags)
+ .addImm((int)RoundMode.getValue());
+
+ return true;
+ }
#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
case Intrinsic::INTRINSIC:
#include "llvm/IR/ConstrainedOps.def"
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 01230a36e744a..78da827c96f74 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6343,6 +6343,29 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
#include "llvm/IR/VPIntrinsics.def"
visitVectorPredicationIntrinsic(cast<VPIntrinsic>(I));
return;
+ case Intrinsic::fptrunc_round: {
+ // Get the last argument, the metadata and convert it to an integer in the
+ // call
+ Metadata *MD = cast<MetadataAsValue>(I.getArgOperand(1))->getMetadata();
+ Optional<RoundingMode> RoundMode =
+ convertStrToRoundingMode(cast<MDString>(MD)->getString());
+
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+
+ // Propagate fast-math-flags from IR to node(s).
+ SDNodeFlags Flags;
+ Flags.copyFMF(*cast<FPMathOperator>(&I));
+ SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
+
+ SDValue Result;
+ Result = DAG.getNode(
+ ISD::FPTRUNC_ROUND, sdl, VT, getValue(I.getArgOperand(0)),
+ DAG.getTargetConstant((int)RoundMode.getValue(), sdl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ setValue(&I, Result);
+
+ return;
+ }
case Intrinsic::fmuladd: {
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 4626150371897..e613589068371 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -4772,6 +4772,27 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
"an array");
break;
}
+ case Intrinsic::fptrunc_round: {
+ // Check the rounding mode
+ Metadata *MD = nullptr;
+ auto *MAV = dyn_cast<MetadataAsValue>(Call.getOperand(1));
+ if (MAV)
+ MD = MAV->getMetadata();
+
+ Assert(MD != nullptr, "missing rounding mode argument", Call);
+
+ Assert(isa<MDString>(MD),
+ ("invalid value for llvm.fptrunc.round metadata operand"
+ " (the operand should be a string)"),
+ MD);
+
+ Optional<RoundingMode> RoundMode =
+ convertStrToRoundingMode(cast<MDString>(MD)->getString());
+ Assert(RoundMode.hasValue() &&
+ RoundMode.getValue() != RoundingMode::Dynamic,
+ "unsupported rounding mode argument", Call);
+ break;
+ }
#define INSTRUCTION(NAME, NARGS, ROUND_MODE, INTRINSIC) \
case Intrinsic::INTRINSIC:
#include "llvm/IR/ConstrainedOps.def"
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 7fd94a977be77..786fc54c466cb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -222,6 +222,9 @@ def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_FMAX, SIbuffer_atomic_fmax>;
def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_CMPSWAP, SIbuffer_atomic_cmpswap>;
def : GINodeEquiv<G_AMDGPU_S_BUFFER_LOAD, SIsbuffer_load>;
+def : GINodeEquiv<G_FPTRUNC_ROUND_UPWARD, SIfptrunc_round_upward>;
+def : GINodeEquiv<G_FPTRUNC_ROUND_DOWNWARD, SIfptrunc_round_downward>;
+
class GISelSop2Pat <
SDPatternOperator node,
Instruction inst,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 23d970f6d1bff..0fbdb0d33b74d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -4486,6 +4486,8 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(CONST_DATA_PTR)
NODE_NAME_CASE(PC_ADD_REL_OFFSET)
NODE_NAME_CASE(LDS)
+ NODE_NAME_CASE(FPTRUNC_ROUND_UPWARD)
+ NODE_NAME_CASE(FPTRUNC_ROUND_DOWNWARD)
NODE_NAME_CASE(DUMMY_CHAIN)
case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
NODE_NAME_CASE(LOAD_D16_HI)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index b41506157b685..cfd91426270fe 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -483,6 +483,9 @@ enum NodeType : unsigned {
CONST_DATA_PTR,
PC_ADD_REL_OFFSET,
LDS,
+ FPTRUNC_ROUND_UPWARD,
+ FPTRUNC_ROUND_DOWNWARD,
+
DUMMY_CHAIN,
FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
LOAD_D16_HI,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index ef2b72252ea72..a1a69030df8d4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -839,6 +839,11 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.scalarize(0)
.lower();
+ getActionDefinitionsBuilder(G_INTRINSIC_FPTRUNC_ROUND)
+ .customFor({S16, S32})
+ .scalarize(0)
+ .lower();
+
// Lower roundeven into G_FRINT
getActionDefinitionsBuilder({G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
.scalarize(0)
@@ -1759,6 +1764,8 @@ bool AMDGPULegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
case TargetOpcode::G_CTLZ:
case TargetOpcode::G_CTTZ:
return legalizeCTLZ_CTTZ(MI, MRI, B);
+ case TargetOpcode::G_INTRINSIC_FPTRUNC_ROUND:
+ return legalizeFPTruncRound(MI, B);
default:
return false;
}
@@ -4963,6 +4970,27 @@ static bool replaceWithConstant(MachineIRBuilder &B, MachineInstr &MI, int64_t C
return true;
}
+bool AMDGPULegalizerInfo::legalizeFPTruncRound(MachineInstr &MI,
+ MachineIRBuilder &B) const {
+ unsigned Opc;
+ int RoundMode = MI.getOperand(2).getImm();
+
+ if (RoundMode == (int)RoundingMode::TowardPositive)
+ Opc = AMDGPU::G_FPTRUNC_ROUND_UPWARD;
+ else if (RoundMode == (int)RoundingMode::TowardNegative)
+ Opc = AMDGPU::G_FPTRUNC_ROUND_DOWNWARD;
+ else
+ return false;
+
+ B.buildInstr(Opc)
+ .addDef(MI.getOperand(0).getReg())
+ .addUse(MI.getOperand(1).getReg());
+
+ MI.eraseFromParent();
+
+ return true;
+}
+
bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
MachineInstr &MI) const {
MachineIRBuilder &B = Helper.MIRBuilder;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index 964a41d3d7406..291e95dcfb89b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -169,6 +169,8 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
bool legalizeBVHIntrinsic(MachineInstr &MI, MachineIRBuilder &B) const;
+ bool legalizeFPTruncRound(MachineInstr &MI, MachineIRBuilder &B) const;
+
bool legalizeImageIntrinsic(
MachineInstr &MI, MachineIRBuilder &B,
GISelChangeObserver &Observer,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 56693805cc360..f2b5beaa40790 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -4570,6 +4570,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[0] = AMDGPU::getValueMapping(Bank, 1);
break;
}
+ case AMDGPU::G_FPTRUNC_ROUND_UPWARD:
+ case AMDGPU::G_FPTRUNC_ROUND_DOWNWARD:
+ return getDefaultMappingVOP(MI);
}
return getInstructionMapping(/*ID*/1, /*Cost*/1,
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index dc89a3f2554be..ea73fea467c8c 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -17,6 +17,7 @@
#include "AMDGPUTargetMachine.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
+#include "llvm/ADT/FloatingPointMode.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
@@ -602,6 +603,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SINT_TO_FP, MVT::f16, Promote);
setOperationAction(ISD::UINT_TO_FP, MVT::f16, Promote);
setOperationAction(ISD::FROUND, MVT::f16, Custom);
+ setOperationAction(ISD::FPTRUNC_ROUND, MVT::f16, Custom);
// F16 - VOP2 Actions.
setOperationAction(ISD::BR_CC, MVT::f16, Expand);
@@ -4740,6 +4742,24 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return lowerBUILD_VECTOR(Op, DAG);
case ISD::FP_ROUND:
return lowerFP_ROUND(Op, DAG);
+ case ISD::FPTRUNC_ROUND: {
+ unsigned Opc;
+ SDLoc DL(Op);
+
+ if (Op.getOperand(0)->getValueType(0) != MVT::f32)
+ return SDValue();
+
+ // Get the rounding mode from the last operand
+ int RoundMode = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ if (RoundMode == (int)RoundingMode::TowardPositive)
+ Opc = AMDGPUISD::FPTRUNC_ROUND_UPWARD;
+ else if (RoundMode == (int)RoundingMode::TowardNegative)
+ Opc = AMDGPUISD::FPTRUNC_ROUND_DOWNWARD;
+ else
+ return SDValue();
+
+ return DAG.getNode(Opc, DL, Op.getNode()->getVTList(), Op->getOperand(0));
+ }
case ISD::TRAP:
return lowerTRAP(Op, DAG);
case ISD::DEBUGTRAP:
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index e738d92446d14..73544048e79cc 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -255,6 +255,14 @@ def SIdenorm_mode : SDNode<"AMDGPUISD::DENORM_MODE",
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]
>;
+def SIfptrunc_round_upward : SDNode<"AMDGPUISD::FPTRUNC_ROUND_UPWARD",
+ SDTFPRoundOp
+>;
+
+def SIfptrunc_round_downward : SDNode<"AMDGPUISD::FPTRUNC_ROUND_DOWNWARD",
+ SDTFPRoundOp
+>;
+
//===----------------------------------------------------------------------===//
// ValueType helpers
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 952eecb07459b..873b4ff3516e1 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -186,6 +186,22 @@ def EXIT_STRICT_WQM : SPseudoInstSI <(outs SReg_1:$sdst), (ins SReg_1:$src0)> {
let mayStore = 0;
}
+// Pseudo instructions used for @llvm.fptrunc.round upward
+// and @llvm.fptrunc.round downward.
+// These intrinsics will be legalized to G_FPTRUNC_ROUND_UPWARD
+// and G_FPTRUNC_ROUND_DOWNWARD before being lowered to
+// FPTRUNC_UPWARD_PSEUDO and FPTRUNC_DOWNWARD_PSEUDO.
+// The final codegen is done in the ModeRegister pass.
+let Uses = [MODE, EXEC] in {
+def FPTRUNC_UPWARD_PSEUDO : VPseudoInstSI <(outs VGPR_32:$vdst),
+ (ins VGPR_32:$src0),
+ [(set f16:$vdst, (SIfptrunc_round_upward f32:$src0))]>;
+
+def FPTRUNC_DOWNWARD_PSEUDO : VPseudoInstSI <(outs VGPR_32:$vdst),
+ (ins VGPR_32:$src0),
+ [(set f16:$vdst, (SIfptrunc_round_downward f32:$src0))]>;
+} // End Uses = [MODE, EXEC]
+
// Invert the exec mask and overwrite the inactive lanes of dst with inactive,
// restoring it after we're done.
let Defs = [SCC] in {
@@ -3183,3 +3199,15 @@ def G_SI_CALL : AMDGPUGenericInstruction {
// TODO: Should really base this on the call target
let isConvergent = 1;
}
+
+def G_FPTRUNC_ROUND_UPWARD : AMDGPUGenericInstruction {
+ let OutOperandList = (outs type0:$vdst);
+ let InOperandList = (ins type1:$src0);
+ let hasSideEffects = 0;
+}
+
+def G_FPTRUNC_ROUND_DOWNWARD : AMDGPUGenericInstruction {
+ let OutOperandList = (outs type0:$vdst);
+ let InOperandList = (ins type1:$src0);
+ let hasSideEffects = 0;
+}
diff --git a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
index 24a8879b5684c..1f572eedb4134 100644
--- a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
+++ b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
@@ -162,7 +162,9 @@ FunctionPass *llvm::createSIModeRegisterPass() { return new SIModeRegister(); }
// double precision setting.
Status SIModeRegister::getInstructionMode(MachineInstr &MI,
const SIInstrInfo *TII) {
- if (TII->usesFPDPRounding(MI)) {
+ if (TII->usesFPDPRounding(MI) ||
+ MI.getOpcode() == AMDGPU::FPTRUNC_UPWARD_PSEUDO ||
+ MI.getOpcode() == AMDGPU::FPTRUNC_DOWNWARD_PSEUDO) {
switch (MI.getOpcode()) {
case AMDGPU::V_INTERP_P1LL_F16:
case AMDGPU::V_INTERP_P1LV_F16:
@@ -170,6 +172,18 @@ Status SIModeRegister::getInstructionMode(MachineInstr &MI,
// f16 interpolation instructions need double precision round to zero
return Status(FP_ROUND_MODE_DP(3),
FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_ZERO));
+ case AMDGPU::FPTRUNC_UPWARD_PSEUDO: {
+ // Replacing the pseudo by a real instruction
+ MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32));
+ return Status(FP_ROUND_MODE_DP(3),
+ FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_INF));
+ }
+ case AMDGPU::FPTRUNC_DOWNWARD_PSEUDO: {
+ // Replacing the pseudo by a real instruction
+ MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32));
+ return Status(FP_ROUND_MODE_DP(3),
+ FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEGINF));
+ }
default:
return DefaultStatus;
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
index 117766814b339..2127229d6a08d 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -131,6 +131,10 @@
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
#
+# DEBUG-NEXT: G_INTRINSIC_FPTRUNC_ROUND (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
+# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
+#
# DEBUG-NEXT: G_INTRINSIC_TRUNC (opcode {{[0-9]+}}): 1 type index, 0 imm indices
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
diff --git a/llvm/test/CodeGen/AMDGPU/fail.llvm.fptrunc.round.ll b/llvm/test/CodeGen/AMDGPU/fail.llvm.fptrunc.round.ll
new file mode 100644
index 0000000000000..7bfe11fcd30ce
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fail.llvm.fptrunc.round.ll
@@ -0,0 +1,11 @@
+; RUN: not --crash llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s --ignore-case --check-prefix=FAIL
+; RUN: not --crash llc -global-isel -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s --ignore-case --check-prefix=FAIL
+
+define amdgpu_gs void @test_fptrunc_round_legalization(double %a, i32 %data0, <4 x i32> %data1, half addrspace(1)* %out) {
+; FAIL: LLVM ERROR: Cannot select
+ %res = call half @llvm.fptrunc.round.f64(double %a, metadata !"round.upward")
+ store half %res, half addrspace(1)* %out, align 4
+ ret void
+}
+
+declare half @llvm.fptrunc.round.f64(double, metadata)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll
new file mode 100644
index 0000000000000..b4787f3eefe5e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll
@@ -0,0 +1,52 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck %s
+
+define amdgpu_gs void @test_fptrunc_round_upward(float %a, i32 %data0, <4 x i32> %data1, half addrspace(1)* %out) {
+; CHECK-LABEL: test_fptrunc_round_upward:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
+; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0
+; CHECK-NEXT: global_store_short v[6:7], v0, off
+; CHECK-NEXT: s_endpgm
+ %res = call half @llvm.fptrunc.round(float %a, metadata !"round.upward")
+ store half %res, half addrspace(1)* %out, align 4
+ ret void
+}
+
+define amdgpu_gs void @test_fptrunc_round_downward(float %a, i32 %data0, <4 x i32> %data1, half addrspace(1)* %out) {
+; CHECK-LABEL: test_fptrunc_round_downward:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
+; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0
+; CHECK-NEXT: global_store_short v[6:7], v0, off
+; CHECK-NEXT: s_endpgm
+ %res = call half @llvm.fptrunc.round(float %a, metadata !"round.downward")
+ store half %res, half addrspace(1)* %out, align 4
+ ret void
+}
+
+define amdgpu_gs void @test_fptrunc_round_upward_multiple_calls(float %a, float %b, i32 %data0, <4 x i32> %data1, half addrspace(1)* %out) {
+; CHECK-LABEL: test_fptrunc_round_upward_multiple_calls:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
+; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0
+; CHECK-NEXT: v_cvt_f16_f32_e32 v2, v1
+; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 2
+; CHECK-NEXT: v_cvt_f16_f32_e32 v1, v1
+; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 0
+; CHECK-NEXT: v_add_f16_e32 v0, v0, v2
+; CHECK-NEXT: v_add_f16_e32 v0, v1, v0
+; CHECK-NEXT: global_store_short v[7:8], v0, off
+; CHECK-NEXT: s_endpgm
+ %res1 = call half @llvm.fptrunc.round(float %a, metadata !"round.upward")
+ %res2 = call half @llvm.fptrunc.round(float %b, metadata !"round.upward")
+ %res3 = call half @llvm.fptrunc.round(float %b, metadata !"round.downward")
+ %res4 = fadd half %res1, %res2
+ %res5 = fadd half %res3, %res4
+ store half %res5, half addrspace(1)* %out, align 4
+ ret void
+}
+
+declare half @llvm.fptrunc.round(float, metadata)
diff --git a/llvm/test/Verifier/llvm.fptrunc.round.ll b/llvm/test/Verifier/llvm.fptrunc.round.ll
new file mode 100644
index 0000000000000..17ca81a77d692
--- /dev/null
+++ b/llvm/test/Verifier/llvm.fptrunc.round.ll
@@ -0,0 +1,13 @@
+; RUN: not opt -verify < %s 2>&1 | FileCheck %s
+
+declare half @llvm.fptrunc.round(float, metadata)
+
+define void @test_fptrunc_round_dynamic(float %a) {
+; CHECK: unsupported rounding mode argument
+ %res = call half @llvm.fptrunc.round(float %a, metadata !"round.dynamic")
+; CHECK: unsupported rounding mode argument
+ %res1 = call half @llvm.fptrunc.round(float %a, metadata !"round.test")
+; CHECK: invalid value for llvm.fptrunc.round metadata operand (the operand should be a string)
+ %res2 = call half @llvm.fptrunc.round(float %a, metadata i32 5)
+ ret void
+}
More information about the llvm-commits
mailing list