[llvm] 60023e3 - AMDGPU: Use default operand for VOP3P clamp
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 21 09:14:27 PST 2020
Author: Matt Arsenault
Date: 2020-02-21T12:14:18-05:00
New Revision: 60023e347116e5004295e8c7f2f09cc1855d4d84
URL: https://github.com/llvm/llvm-project/commit/60023e347116e5004295e8c7f2f09cc1855d4d84
DIFF: https://github.com/llvm/llvm-project/commit/60023e347116e5004295e8c7f2f09cc1855d4d84.diff
LOG: AMDGPU: Use default operand for VOP3P clamp
We don't use this, and matching from the def doesn't make much sense.
There are multiple tablegen bugs with default operand
handling. undef_tied_input should work to handle the vdst_in
correctly, but this breaks the operand register class constraint which
it should be able to infer.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.td
llvm/lib/Target/AMDGPU/VOP3Instructions.td
llvm/lib/Target/AMDGPU/VOP3PInstructions.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 16a41be23eed..3c38dbe059fe 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -263,8 +263,6 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
SDValue &Clamp, SDValue &Omod) const;
bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
- bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
- SDValue &Clamp) const;
bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3OpSel0(SDValue In, SDValue &Src, SDValue &SrcMods,
@@ -2590,17 +2588,6 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
return true;
}
-bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src,
- SDValue &SrcMods,
- SDValue &Clamp) const {
- SDLoc SL(In);
-
- // FIXME: Handle clamp and op_sel
- Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
-
- return SelectVOP3PMods(In, Src, SrcMods);
-}
-
bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
Src = In;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 559ede882fc5..d25cf12be661 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1302,7 +1302,6 @@ def VOP3Mods_nnan : ComplexPattern<fAny, 2, "SelectVOP3Mods_NNaN">;
def VOP3OMods : ComplexPattern<untyped, 3, "SelectVOP3OMods">;
def VOP3PMods : ComplexPattern<untyped, 2, "SelectVOP3PMods">;
-def VOP3PMods0 : ComplexPattern<untyped, 3, "SelectVOP3PMods0">;
def VOP3OpSel : ComplexPattern<untyped, 2, "SelectVOP3OpSel">;
def VOP3OpSel0 : ComplexPattern<untyped, 3, "SelectVOP3OpSel0">;
@@ -1704,7 +1703,7 @@ class getInsVOP3P <RegisterOperand Src0RC, RegisterOperand Src1RC,
!if (HasClamp,
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
Src1Mod:$src1_modifiers, Src1RC:$src1,
- clampmod:$clamp,
+ clampmod0:$clamp,
op_sel:$op_sel, op_sel_hi:$op_sel_hi,
neg_lo:$neg_lo, neg_hi:$neg_hi),
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
@@ -1716,7 +1715,7 @@ class getInsVOP3P <RegisterOperand Src0RC, RegisterOperand Src1RC,
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
Src1Mod:$src1_modifiers, Src1RC:$src1,
Src2Mod:$src2_modifiers, Src2RC:$src2,
- clampmod:$clamp,
+ clampmod0:$clamp,
op_sel:$op_sel, op_sel_hi:$op_sel_hi,
neg_lo:$neg_lo, neg_hi:$neg_hi),
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 717b5bde3616..cf1c56aeb3ec 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -34,18 +34,16 @@ class getVOP3ModPat<VOPProfile P, SDPatternOperator node> {
class getVOP3PModPat<VOPProfile P, SDPatternOperator node> {
list<dag> ret3 = [(set P.DstVT:$vdst,
- (DivergentFragOrOp<node, P>.ret (P.Src0VT !if(P.HasClamp, (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
- (VOP3PMods P.Src0VT:$src0, i32:$src0_modifiers))),
- (P.Src1VT (VOP3PMods P.Src1VT:$src1, i32:$src1_modifiers)),
- (P.Src2VT (VOP3PMods P.Src2VT:$src2, i32:$src2_modifiers))))];
+ (DivergentFragOrOp<node, P>.ret (P.Src0VT (VOP3PMods P.Src0VT:$src0, i32:$src0_modifiers)),
+ (P.Src1VT (VOP3PMods P.Src1VT:$src1, i32:$src1_modifiers)),
+ (P.Src2VT (VOP3PMods P.Src2VT:$src2, i32:$src2_modifiers))))];
list<dag> ret2 = [(set P.DstVT:$vdst,
- (DivergentFragOrOp<node, P>.ret !if(P.HasClamp, (P.Src0VT (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)),
- (P.Src0VT (VOP3PMods P.Src0VT:$src0, i32:$src0_modifiers))),
- (P.Src1VT (VOP3PMods P.Src1VT:$src1, i32:$src1_modifiers))))];
+ (DivergentFragOrOp<node, P>.ret (P.Src0VT (VOP3PMods P.Src0VT:$src0, i32:$src0_modifiers)),
+ (P.Src1VT (VOP3PMods P.Src1VT:$src1, i32:$src1_modifiers))))];
list<dag> ret1 = [(set P.DstVT:$vdst,
- (DivergentFragOrOp<node, P>.ret (P.Src0VT (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))];
+ (DivergentFragOrOp<node, P>.ret (P.Src0VT (VOP3PMods P.Src0VT:$src0, i32:$src0_modifiers))))];
list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
!if(!eq(P.NumSrcArgs, 2), ret2,
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 9f949115c4f3..4fd381c10de8 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -29,9 +29,14 @@ class VOP3_VOP3PInst<string OpName, VOPProfile P, bit UseTiedOutput = 0,
!con(
(ins FP16InputMods:$src0_modifiers, VCSrc_f16:$src0,
FP16InputMods:$src1_modifiers, VCSrc_f16:$src1,
- FP16InputMods:$src2_modifiers, VCSrc_f16:$src2,
- clampmod:$clamp),
- !if(UseTiedOutput, (ins VGPR_32:$vdst_in), (ins))),
+ FP16InputMods:$src2_modifiers, VCSrc_f16:$src2),
+ // FIXME: clampmod0 misbehaves with the non-default vdst_in
+ // following it. For now workaround this by requiring clamp
+ // in tied patterns. This should use undef_tied_input, but it
+ // seems underdeveloped and doesns't apply the right register
+ // class constraints.
+ !if(UseTiedOutput, (ins clampmod:$clamp, VGPR_32:$vdst_in),
+ (ins clampmod0:$clamp))),
(ins op_sel:$op_sel, op_sel_hi:$op_sel_hi));
let Constraints = !if(UseTiedOutput, "$vdst = $vdst_in", "");
@@ -75,8 +80,8 @@ def V_PK_LSHRREV_B16 : VOP3PInst<"v_pk_lshrrev_b16", VOP3_Profile<VOP_V2I16_V2I1
// The constant will be emitted as a mov, and folded later.
// TODO: We could directly encode the immediate now
def : GCNPat<
- (add (v2i16 (VOP3PMods0 v2i16:$src0, i32:$src0_modifiers, i1:$clamp)), NegSubInlineConstV216:$src1),
- (V_PK_SUB_U16 $src0_modifiers, $src0, SRCMODS.OP_SEL_1, NegSubInlineConstV216:$src1, $clamp)
+ (add (v2i16 (VOP3PMods v2i16:$src0, i32:$src0_modifiers)), NegSubInlineConstV216:$src1),
+ (V_PK_SUB_U16 $src0_modifiers, $src0, SRCMODS.OP_SEL_1, NegSubInlineConstV216:$src1)
>;
multiclass MadFmaMixPats<SDPatternOperator fma_like,
@@ -284,7 +289,7 @@ multiclass DotPats<SDPatternOperator dot_op,
VOP3PInst dot_inst> {
let SubtargetPredicate = dot_inst.SubtargetPredicate in
def : GCNPat <
- (dot_op (dot_inst.Pfl.Src0VT (VOP3PMods0 dot_inst.Pfl.Src0VT:$src0, i32:$src0_modifiers)),
+ (dot_op (dot_inst.Pfl.Src0VT (VOP3PMods dot_inst.Pfl.Src0VT:$src0, i32:$src0_modifiers)),
(dot_inst.Pfl.Src1VT (VOP3PMods dot_inst.Pfl.Src1VT:$src1, i32:$src1_modifiers)),
(dot_inst.Pfl.Src2VT (VOP3PMods dot_inst.Pfl.Src2VT:$src2, i32:$src2_modifiers)), i1:$clamp),
(dot_inst $src0_modifiers, $src0, $src1_modifiers, $src1, $src2_modifiers, $src2, (as_i1timm $clamp))>;
More information about the llvm-commits
mailing list