[llvm] d2a9739 - AMDGPU/GlobalISel: Eliminate SelectVOP3Mods_f32
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 27 14:54:01 PST 2020
Author: Matt Arsenault
Date: 2020-01-27T17:53:54-05:00
New Revision: d2a9739274f687971d0283a18c4ed58af81394f4
URL: https://github.com/llvm/llvm-project/commit/d2a9739274f687971d0283a18c4ed58af81394f4
DIFF: https://github.com/llvm/llvm-project/commit/d2a9739274f687971d0283a18c4ed58af81394f4.diff
LOG: AMDGPU/GlobalISel: Eliminate SelectVOP3Mods_f32
Trivial type predicates should be moved into the tablegen pattern
itself, and not checked inside complex patterns. This eliminates a
redundant complex pattern, and fixes select source modifiers for
GlobalISel.
I have further patches which fully handle select in tablegen and
remove all of the C++ selection, although it requires the ugliness to
support the entire range of legal register types.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.td
llvm/lib/Target/AMDGPU/SIInstructions.td
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-select.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index eb4c4e0eba01..07f500b136e8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -252,7 +252,6 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
- bool SelectVOP3Mods_f32(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const;
bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
@@ -2430,15 +2429,6 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
return isNoNanSrc(Src);
}
-bool AMDGPUDAGToDAGISel::SelectVOP3Mods_f32(SDValue In, SDValue &Src,
- SDValue &SrcMods) const {
- if (In.getValueType() == MVT::f32)
- return SelectVOP3Mods(In, Src, SrcMods);
- Src = In;
- SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);;
- return true;
-}
-
bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
return false;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 93c06745a76f..009815f5f92e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1120,6 +1120,9 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
}
bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {
+ if (selectImpl(I, *CoverageInfo))
+ return true;
+
MachineBasicBlock *BB = I.getParent();
const DebugLoc &DL = I.getDebugLoc();
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index e96cf16c6611..9ee5aad07f5c 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1282,8 +1282,6 @@ def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
def VOP3NoMods : ComplexPattern<untyped, 1, "SelectVOP3NoMods">;
// VOP3Mods, but the input source is known to never be NaN.
def VOP3Mods_nnan : ComplexPattern<fAny, 2, "SelectVOP3Mods_NNaN">;
-// VOP3Mods, but only allowed for f32 operands.
-def VOP3Mods_f32 : ComplexPattern<fAny, 2, "SelectVOP3Mods_f32">;
def VOP3OMods : ComplexPattern<untyped, 3, "SelectVOP3OMods">;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index d6f69faa4f38..19b235f0bc78 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -888,18 +888,22 @@ def : FMADModsPat<V_MAD_F16, AMDGPUfmad_ftz, f16> {
let SubtargetPredicate = Has16BitInsts;
}
-multiclass SelectPat <ValueType vt> {
- def : GCNPat <
- (vt (select i1:$src0, (VOP3Mods_f32 vt:$src1, i32:$src1_mods),
- (VOP3Mods_f32 vt:$src2, i32:$src2_mods))),
- (V_CNDMASK_B32_e64 $src2_mods, $src2, $src1_mods, $src1, $src0)
- >;
-}
+class VOPSelectModsPat <ValueType vt> : GCNPat <
+ (vt (select i1:$src0, (VOP3Mods vt:$src1, i32:$src1_mods),
+ (VOP3Mods vt:$src2, i32:$src2_mods))),
+ (V_CNDMASK_B32_e64 FP32InputMods:$src2_mods, VSrc_b32:$src2,
+ FP32InputMods:$src1_mods, VSrc_b32:$src1, SSrc_i1:$src0)
+>;
+
+class VOPSelectPat <ValueType vt> : GCNPat <
+ (vt (select i1:$src0, vt:$src1, vt:$src2)),
+ (V_CNDMASK_B32_e64 0, VSrc_b32:$src2, 0, VSrc_b32:$src1, SSrc_i1:$src0)
+>;
-defm : SelectPat <i16>;
-defm : SelectPat <i32>;
-defm : SelectPat <f16>;
-defm : SelectPat <f32>;
+def : VOPSelectModsPat <i32>;
+def : VOPSelectModsPat <f32>;
+def : VOPSelectPat <f16>;
+def : VOPSelectPat <i16>;
let AddedComplexity = 1 in {
def : GCNPat <
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-select.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-select.mir
index c162d38567d0..cbf21d2cda6f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-select.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-select.mir
@@ -344,3 +344,219 @@ body: |
S_ENDPGM 0, implicit %5
...
+
+# Fold source modifiers into VOP select
+---
+name: select_s32_vcc_fneg_lhs
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+
+ ; GCN-LABEL: name: select_s32_vcc_fneg_lhs
+ ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY3]], 1, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec
+ ; GCN: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:vgpr(s32) = COPY $vgpr1
+ %2:vgpr(s32) = COPY $vgpr2
+ %3:vgpr(s32) = COPY $vgpr3
+ %4:vgpr(s32) = G_FNEG %2
+ %5:vcc(s1) = G_ICMP intpred(eq), %0, %1
+ %6:vgpr(s32) = G_SELECT %5, %4, %3
+ S_ENDPGM 0, implicit %6
+
+...
+
+---
+name: select_s32_vcc_fneg_rhs
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+
+ ; GCN-LABEL: name: select_s32_vcc_fneg_rhs
+ ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 1, [[COPY3]], 0, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec
+ ; GCN: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:vgpr(s32) = COPY $vgpr1
+ %2:vgpr(s32) = COPY $vgpr2
+ %3:vgpr(s32) = COPY $vgpr3
+ %4:vgpr(s32) = G_FNEG %3
+ %5:vcc(s1) = G_ICMP intpred(eq), %0, %1
+ %6:vgpr(s32) = G_SELECT %5, %2, %4
+ S_ENDPGM 0, implicit %6
+
+...
+
+---
+name: select_s32_vcc_fneg_fabs_lhs
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+
+ ; GCN-LABEL: name: select_s32_vcc_fneg_fabs_lhs
+ ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 3, [[COPY3]], [[V_CMP_EQ_U32_e64_]], implicit $exec
+ ; GCN: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:vgpr(s32) = COPY $vgpr1
+ %2:vgpr(s32) = COPY $vgpr2
+ %3:vgpr(s32) = COPY $vgpr3
+ %4:vgpr(s32) = G_FABS %3
+ %5:vgpr(s32) = G_FNEG %4
+ %6:vcc(s1) = G_ICMP intpred(eq), %0, %1
+ %7:vgpr(s32) = G_SELECT %6, %5, %2
+ S_ENDPGM 0, implicit %7
+
+...
+
+# Make sure we don't try to fold source modifiers into non-32 bit value.
+---
+name: select_s16_vcc_fneg_lhs
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+
+ ; GCN-LABEL: name: select_s16_vcc_fneg_lhs
+ ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
+ ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[COPY3]], implicit $exec
+ ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[V_XOR_B32_e32_]], [[V_CMP_EQ_U32_e64_]], implicit $exec
+ ; GCN: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:vgpr(s32) = COPY $vgpr1
+ %2:vgpr(s32) = COPY $vgpr2
+ %3:vgpr(s32) = COPY $vgpr3
+ %4:vgpr(s16) = G_TRUNC %0
+ %5:vgpr(s16) = G_TRUNC %1
+ %6:vgpr(s16) = G_FNEG %4
+ %7:vcc(s1) = G_ICMP intpred(eq), %2, %3
+ %8:vgpr(s16) = G_SELECT %7, %6, %5
+ S_ENDPGM 0, implicit %8
+
+...
+
+
+# Make sure we don't try to fold source modifiers into a vector
+---
+name: select_v2s16_vcc_fneg_lhs
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+
+ ; GCN-LABEL: name: select_v2s16_vcc_fneg_lhs
+ ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416
+ ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY2]], implicit $exec
+ ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[V_XOR_B32_e32_]], [[V_CMP_EQ_U32_e64_]], implicit $exec
+ ; GCN: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:vgpr(s32) = COPY $vgpr1
+ %2:vgpr(<2 x s16>) = COPY $vgpr2
+ %3:vgpr(<2 x s16>) = COPY $vgpr3
+ %4:vgpr(<2 x s16>) = G_FNEG %3
+ %5:vcc(s1) = G_ICMP intpred(eq), %0, %1
+ %6:vgpr(<2 x s16>) = G_SELECT %5, %4, %3
+ S_ENDPGM 0, implicit %6
+
+...
+
+# Make sure we don't try to fold source modifiers into a scalar select
+
+---
+name: select_s32_scc_fneg_lhs
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
+
+ ; GCN-LABEL: name: select_s32_scc_fneg_lhs
+ ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; GCN: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
+ ; GCN: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc
+ ; GCN: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GCN: $scc = COPY [[COPY4]]
+ ; GCN: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[S_XOR_B32_]], [[COPY3]], implicit $scc
+ ; GCN: S_ENDPGM 0, implicit [[S_CSELECT_B32_]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s32) = COPY $sgpr1
+ %2:sgpr(s32) = COPY $sgpr2
+ %3:sgpr(s32) = COPY $sgpr3
+ %4:sgpr(s32) = G_FNEG %2
+ %5:sgpr(s32) = G_ICMP intpred(eq), %0, %1
+ %6:sgpr(s32) = G_SELECT %5, %4, %3
+ S_ENDPGM 0, implicit %6
+
+...
+
+---
+name: select_s32_scc_fneg_rhs
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
+
+ ; GCN-LABEL: name: select_s32_scc_fneg_rhs
+ ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; GCN: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY3]], [[S_MOV_B32_]], implicit-def $scc
+ ; GCN: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc
+ ; GCN: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GCN: $scc = COPY [[COPY4]]
+ ; GCN: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY2]], [[S_XOR_B32_]], implicit $scc
+ ; GCN: S_ENDPGM 0, implicit [[S_CSELECT_B32_]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s32) = COPY $sgpr1
+ %2:sgpr(s32) = COPY $sgpr2
+ %3:sgpr(s32) = COPY $sgpr3
+ %4:sgpr(s32) = G_FNEG %3
+ %5:sgpr(s32) = G_ICMP intpred(eq), %0, %1
+ %6:sgpr(s32) = G_SELECT %5, %2, %4
+ S_ENDPGM 0, implicit %6
+
+...
More information about the llvm-commits
mailing list