[llvm] d2a9739 - AMDGPU/GlobalISel: Eliminate SelectVOP3Mods_f32

Mon Jan 27 14:54:01 PST 2020

Author: Matt Arsenault
Date: 2020-01-27T17:53:54-05:00
New Revision: d2a9739274f687971d0283a18c4ed58af81394f4

URL: https://github.com/llvm/llvm-project/commit/d2a9739274f687971d0283a18c4ed58af81394f4
DIFF: https://github.com/llvm/llvm-project/commit/d2a9739274f687971d0283a18c4ed58af81394f4.diff

LOG: AMDGPU/GlobalISel: Eliminate SelectVOP3Mods_f32

Trivial type predicates should be moved into the tablegen pattern
itself, and not checked inside complex patterns. This eliminates a
redundant complex pattern, and fixes select source modifiers for
GlobalISel.

I have further patches which fully handle select in tablegen and
remove all of the C++ selection, although it requires the ugliness to
support the entire range of legal register types.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
    llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
    llvm/lib/Target/AMDGPU/SIInstrInfo.td
    llvm/lib/Target/AMDGPU/SIInstructions.td
    llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-select.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index eb4c4e0eba01..07f500b136e8 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -252,7 +252,6 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
   bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
 
   bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
-  bool SelectVOP3Mods_f32(SDValue In, SDValue &Src, SDValue &SrcMods) const;
   bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const;
   bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
   bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
@@ -2430,15 +2429,6 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
   return isNoNanSrc(Src);
 }
 
-bool AMDGPUDAGToDAGISel::SelectVOP3Mods_f32(SDValue In, SDValue &Src,
-                                            SDValue &SrcMods) const {
-  if (In.getValueType() == MVT::f32)
-    return SelectVOP3Mods(In, Src, SrcMods);
-  Src = In;
-  SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);;
-  return true;
-}
-
 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
   if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
     return false;

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 93c06745a76f..009815f5f92e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1120,6 +1120,9 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
 }
 
 bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {
+  if (selectImpl(I, *CoverageInfo))
+    return true;
+
   MachineBasicBlock *BB = I.getParent();
   const DebugLoc &DL = I.getDebugLoc();
 

diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index e96cf16c6611..9ee5aad07f5c 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1282,8 +1282,6 @@ def VOP3Mods  : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
 def VOP3NoMods : ComplexPattern<untyped, 1, "SelectVOP3NoMods">;
 // VOP3Mods, but the input source is known to never be NaN.
 def VOP3Mods_nnan : ComplexPattern<fAny, 2, "SelectVOP3Mods_NNaN">;
-// VOP3Mods, but only allowed for f32 operands.
-def VOP3Mods_f32 : ComplexPattern<fAny, 2, "SelectVOP3Mods_f32">;
 
 def VOP3OMods : ComplexPattern<untyped, 3, "SelectVOP3OMods">;
 

diff  --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index d6f69faa4f38..19b235f0bc78 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -888,18 +888,22 @@ def : FMADModsPat<V_MAD_F16, AMDGPUfmad_ftz, f16> {
   let SubtargetPredicate = Has16BitInsts;
 }
 
-multiclass SelectPat <ValueType vt> {
-  def : GCNPat <
-    (vt (select i1:$src0, (VOP3Mods_f32 vt:$src1, i32:$src1_mods),
-                          (VOP3Mods_f32 vt:$src2, i32:$src2_mods))),
-    (V_CNDMASK_B32_e64 $src2_mods, $src2, $src1_mods, $src1, $src0)
-  >;
-}
+class VOPSelectModsPat <ValueType vt> : GCNPat <
+  (vt (select i1:$src0, (VOP3Mods vt:$src1, i32:$src1_mods),
+                        (VOP3Mods vt:$src2, i32:$src2_mods))),
+  (V_CNDMASK_B32_e64 FP32InputMods:$src2_mods, VSrc_b32:$src2,
+                     FP32InputMods:$src1_mods, VSrc_b32:$src1, SSrc_i1:$src0)
+>;
+
+class VOPSelectPat <ValueType vt> : GCNPat <
+  (vt (select i1:$src0, vt:$src1, vt:$src2)),
+  (V_CNDMASK_B32_e64 0, VSrc_b32:$src2, 0, VSrc_b32:$src1, SSrc_i1:$src0)
+>;
 
-defm : SelectPat <i16>;
-defm : SelectPat <i32>;
-defm : SelectPat <f16>;
-defm : SelectPat <f32>;
+def : VOPSelectModsPat <i32>;
+def : VOPSelectModsPat <f32>;
+def : VOPSelectPat <f16>;
+def : VOPSelectPat <i16>;
 
 let AddedComplexity = 1 in {
 def : GCNPat <

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-select.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-select.mir
index c162d38567d0..cbf21d2cda6f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-select.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-select.mir
@@ -344,3 +344,219 @@ body: |
     S_ENDPGM 0, implicit %5
 
 ...
+
+# Fold source modifiers into VOP select
+---
+name:            select_s32_vcc_fneg_lhs
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0, $vgpr1, $vgpr2, $vgpr3
+
+    ; GCN-LABEL: name: select_s32_vcc_fneg_lhs
+    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+    ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY3]], 1, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec
+    ; GCN: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(s32) = COPY $vgpr2
+    %3:vgpr(s32) = COPY $vgpr3
+    %4:vgpr(s32) = G_FNEG %2
+    %5:vcc(s1) = G_ICMP intpred(eq), %0, %1
+    %6:vgpr(s32) = G_SELECT %5, %4, %3
+    S_ENDPGM 0, implicit %6
+
+...
+
+---
+name:            select_s32_vcc_fneg_rhs
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0, $vgpr1, $vgpr2, $vgpr3
+
+    ; GCN-LABEL: name: select_s32_vcc_fneg_rhs
+    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+    ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 1, [[COPY3]], 0, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec
+    ; GCN: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(s32) = COPY $vgpr2
+    %3:vgpr(s32) = COPY $vgpr3
+    %4:vgpr(s32) = G_FNEG %3
+    %5:vcc(s1) = G_ICMP intpred(eq), %0, %1
+    %6:vgpr(s32) = G_SELECT %5, %2, %4
+    S_ENDPGM 0, implicit %6
+
+...
+
+---
+name:            select_s32_vcc_fneg_fabs_lhs
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0, $vgpr1, $vgpr2, $vgpr3
+
+    ; GCN-LABEL: name: select_s32_vcc_fneg_fabs_lhs
+    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+    ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 3, [[COPY3]], [[V_CMP_EQ_U32_e64_]], implicit $exec
+    ; GCN: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(s32) = COPY $vgpr2
+    %3:vgpr(s32) = COPY $vgpr3
+    %4:vgpr(s32) = G_FABS %3
+    %5:vgpr(s32) = G_FNEG %4
+    %6:vcc(s1) = G_ICMP intpred(eq), %0, %1
+    %7:vgpr(s32) = G_SELECT %6, %5, %2
+    S_ENDPGM 0, implicit %7
+
+...
+
+# Make sure we don't try to fold source modifiers into non-32 bit value.
+---
+name:            select_s16_vcc_fneg_lhs
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0, $vgpr1, $vgpr2, $vgpr3
+
+    ; GCN-LABEL: name: select_s16_vcc_fneg_lhs
+    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
+    ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec
+    ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[COPY3]], implicit $exec
+    ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[V_XOR_B32_e32_]], [[V_CMP_EQ_U32_e64_]], implicit $exec
+    ; GCN: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(s32) = COPY $vgpr2
+    %3:vgpr(s32) = COPY $vgpr3
+    %4:vgpr(s16) = G_TRUNC %0
+    %5:vgpr(s16) = G_TRUNC %1
+    %6:vgpr(s16) = G_FNEG %4
+    %7:vcc(s1) = G_ICMP intpred(eq), %2, %3
+    %8:vgpr(s16) = G_SELECT %7, %6, %5
+    S_ENDPGM 0, implicit %8
+
+...
+
+
+# Make sure we don't try to fold source modifiers into a vector
+---
+name:            select_v2s16_vcc_fneg_lhs
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0, $vgpr1, $vgpr2, $vgpr3
+
+    ; GCN-LABEL: name: select_v2s16_vcc_fneg_lhs
+    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416
+    ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY2]], implicit $exec
+    ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[V_XOR_B32_e32_]], [[V_CMP_EQ_U32_e64_]], implicit $exec
+    ; GCN: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(<2 x s16>) = COPY $vgpr2
+    %3:vgpr(<2 x s16>) = COPY $vgpr3
+    %4:vgpr(<2 x s16>) = G_FNEG %3
+    %5:vcc(s1) = G_ICMP intpred(eq), %0, %1
+    %6:vgpr(<2 x s16>) = G_SELECT %5, %4, %3
+    S_ENDPGM 0, implicit %6
+
+...
+
+# Make sure we don't try to fold source modifiers into a scalar select
+
+---
+name:            select_s32_scc_fneg_lhs
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $sgpr0, $sgpr1, $sgpr2, $sgpr3
+
+    ; GCN-LABEL: name: select_s32_scc_fneg_lhs
+    ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+    ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
+    ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+    ; GCN: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
+    ; GCN: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc
+    ; GCN: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc
+    ; GCN: $scc = COPY [[COPY4]]
+    ; GCN: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[S_XOR_B32_]], [[COPY3]], implicit $scc
+    ; GCN: S_ENDPGM 0, implicit [[S_CSELECT_B32_]]
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s32) = COPY $sgpr1
+    %2:sgpr(s32) = COPY $sgpr2
+    %3:sgpr(s32) = COPY $sgpr3
+    %4:sgpr(s32) = G_FNEG %2
+    %5:sgpr(s32) = G_ICMP intpred(eq), %0, %1
+    %6:sgpr(s32) = G_SELECT %5, %4, %3
+    S_ENDPGM 0, implicit %6
+
+...
+
+---
+name:            select_s32_scc_fneg_rhs
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $sgpr0, $sgpr1, $sgpr2, $sgpr3
+
+    ; GCN-LABEL: name: select_s32_scc_fneg_rhs
+    ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+    ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
+    ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+    ; GCN: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY3]], [[S_MOV_B32_]], implicit-def $scc
+    ; GCN: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc
+    ; GCN: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc
+    ; GCN: $scc = COPY [[COPY4]]
+    ; GCN: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY2]], [[S_XOR_B32_]], implicit $scc
+    ; GCN: S_ENDPGM 0, implicit [[S_CSELECT_B32_]]
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s32) = COPY $sgpr1
+    %2:sgpr(s32) = COPY $sgpr2
+    %3:sgpr(s32) = COPY $sgpr3
+    %4:sgpr(s32) = G_FNEG %3
+    %5:sgpr(s32) = G_ICMP intpred(eq), %0, %1
+    %6:sgpr(s32) = G_SELECT %5, %2, %4
+    S_ENDPGM 0, implicit %6
+
+...