[llvm] r289096 - AMDGPU: Make f16 ConstantFP legal
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 8 12:14:47 PST 2016
Author: arsenm
Date: Thu Dec 8 14:14:46 2016
New Revision: 289096
URL: http://llvm.org/viewvc/llvm-project?rev=289096&view=rev
Log:
AMDGPU: Make f16 ConstantFP legal
Not having this legal led to combine failures, resulting
in dumb things like bitcasts of constants not being folded
away.
The only reason I'm leaving the v_mov_b32 hack that f32
already uses is to avoid madak formation test regressions.
PeepholeOptimizer has an ordering issue where the immediate
fold attempt is into the sgpr->vgpr copy instead of the actual
use. Running it twice avoids that problem.
Modified:
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h
llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
llvm/trunk/test/CodeGen/AMDGPU/br_cc.f16.ll
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=289096&r1=289095&r2=289096&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Thu Dec 8 14:14:46 2016
@@ -277,7 +277,7 @@ SITargetLowering::SITargetLowering(const
setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
// F16 - Constant Actions.
- setOperationAction(ISD::ConstantFP, MVT::f16, Custom);
+ setOperationAction(ISD::ConstantFP, MVT::f16, Legal);
// F16 - Load/Store Actions.
setOperationAction(ISD::LOAD, MVT::f16, Promote);
@@ -1848,9 +1848,6 @@ SDValue SITargetLowering::LowerOperation
case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG);
case ISD::ADDRSPACECAST: return lowerADDRSPACECAST(Op, DAG);
case ISD::TRAP: return lowerTRAP(Op, DAG);
-
- case ISD::ConstantFP:
- return lowerConstantFP(Op, DAG);
case ISD::FP_ROUND:
return lowerFP_ROUND(Op, DAG);
}
@@ -2055,15 +2052,6 @@ SDValue SITargetLowering::getFPExtOrFPTr
DAG.getNode(ISD::FTRUNC, DL, VT, Op);
}
-SDValue SITargetLowering::lowerConstantFP(SDValue Op, SelectionDAG &DAG) const {
- if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(Op)) {
- return DAG.getConstant(FP->getValueAPF().bitcastToAPInt().getZExtValue(),
- SDLoc(Op), MVT::i32);
- }
-
- return SDValue();
-}
-
SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
assert(Op.getValueType() == MVT::f16 &&
"Do not know how to custom lower FP_ROUND for non-f16 type");
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h?rev=289096&r1=289095&r2=289096&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h Thu Dec 8 14:14:46 2016
@@ -53,9 +53,6 @@ class SITargetLowering final : public AM
const SDLoc &DL,
EVT VT) const;
- /// \brief Custom lowering for ISD::ConstantFP.
- SDValue lowerConstantFP(SDValue Op, SelectionDAG &DAG) const;
-
/// \brief Custom lowering for ISD::FP_ROUND for MVT::f16.
SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstructions.td?rev=289096&r1=289095&r2=289096&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td Thu Dec 8 14:14:46 2016
@@ -706,12 +706,25 @@ def : Pat <
(S_MOV_B32 imm:$imm)
>;
+// FIXME: Workaround for ordering issue with peephole optimizer where
+// a register class copy interferes with immediate folding. Should
+// use s_mov_b32, which can be shrunk to s_movk_i32
+def : Pat <
+ (VGPRImm<(f16 fpimm)>:$imm),
+ (V_MOV_B32_e32 (f16 (bitcast_fpimm_to_i32 $imm)))
+>;
+
def : Pat <
(f32 fpimm:$imm),
(S_MOV_B32 (f32 (bitcast_fpimm_to_i32 $imm)))
>;
def : Pat <
+ (f16 fpimm:$imm),
+ (S_MOV_B32 (i32 (bitcast_fpimm_to_i32 $imm)))
+>;
+
+def : Pat <
(i32 frameindex:$fi),
(V_MOV_B32_e32 (i32 (frameindex_to_targetframeindex $fi)))
>;
Modified: llvm/trunk/test/CodeGen/AMDGPU/br_cc.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/br_cc.f16.ll?rev=289096&r1=289095&r2=289096&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/br_cc.f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/br_cc.f16.ll Thu Dec 8 14:14:46 2016
@@ -47,8 +47,9 @@ two:
; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
; SI: v_cmp_ngt_f32_e32 vcc, v[[B_F32]], v[[A_F32]]
-; VI: v_cmp_nle_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
; SI: s_cbranch_vccz
+
+; VI: v_cmp_nlt_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
; VI: s_cbranch_vccnz
; VI: one{{$}}
@@ -85,7 +86,7 @@ two:
; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
; SI: v_cmp_nlt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
-; VI: v_cmp_nge_f16_e32 vcc, v[[B_F16]], v[[A_F16]]
+; VI: v_cmp_ngt_f16_e32 vcc, v[[B_F16]], v[[A_F16]]
; GCN: s_cbranch_vccnz
; GCN: one{{$}}
More information about the llvm-commits
mailing list