[llvm] e60d658 - AMDGPU/GlobalISel: Handle VOP3NoMods

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 27 09:16:01 PST 2020


Author: Matt Arsenault
Date: 2020-01-27T09:03:44-08:00
New Revision: e60d6582604bd4367377074fef0ac459c0e3df26

URL: https://github.com/llvm/llvm-project/commit/e60d6582604bd4367377074fef0ac459c0e3df26
DIFF: https://github.com/llvm/llvm-project/commit/e60d6582604bd4367377074fef0ac459c0e3df26.diff

LOG: AMDGPU/GlobalISel: Handle VOP3NoMods

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUGISel.td
    llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
    llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
    llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fma.s32.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmad.s32.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index a02555e6a7a5..6fa0ce1bcdee 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -31,6 +31,10 @@ def gi_vop3mods :
     GIComplexOperandMatcher<s32, "selectVOP3Mods">,
     GIComplexPatternEquiv<VOP3Mods>;
 
+def gi_vop3_no_mods :
+    GIComplexOperandMatcher<s32, "selectVOP3NoMods">,
+    GIComplexPatternEquiv<VOP3NoMods>;
+
 def gi_vop3mods_nnan :
     GIComplexOperandMatcher<s32, "selectVOP3Mods_nnan">,
     GIComplexPatternEquiv<VOP3Mods_nnan>;

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 8ac944449d68..ae6d6489e92d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -2218,6 +2218,18 @@ AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const {
   }};
 }
 
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectVOP3NoMods(MachineOperand &Root) const {
+  Register Reg = Root.getReg();
+  const MachineInstr *Def = getDefIgnoringCopies(Reg, *MRI);
+  if (Def && (Def->getOpcode() == AMDGPU::G_FNEG ||
+              Def->getOpcode() == AMDGPU::G_FABS))
+    return {};
+  return {{
+      [=](MachineInstrBuilder &MIB) { MIB.addReg(Reg); },
+  }};
+}
+
 InstructionSelector::ComplexRendererFns
 AMDGPUInstructionSelector::selectVOP3Mods_nnan(MachineOperand &Root) const {
   Register Src;

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index f946469a6985..2a0460c53374 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -142,6 +142,9 @@ class AMDGPUInstructionSelector : public InstructionSelector {
   selectVOP3OMods(MachineOperand &Root) const;
   InstructionSelector::ComplexRendererFns
   selectVOP3Mods(MachineOperand &Root) const;
+
+  ComplexRendererFns selectVOP3NoMods(MachineOperand &Root) const;
+
   InstructionSelector::ComplexRendererFns
   selectVOP3Mods_nnan(MachineOperand &Root) const;
 

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fma.s32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fma.s32.mir
index ec3f4449f60a..ffdef614b546 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fma.s32.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fma.s32.mir
@@ -23,15 +23,15 @@ body: |
     ; GFX9-DL: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9-DL: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX9-DL: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-    ; GFX9-DL: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
-    ; GFX9-DL: S_ENDPGM 0, implicit [[V_FMA_F32_]]
+    ; GFX9-DL: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
+    ; GFX9-DL: S_ENDPGM 0, implicit [[V_FMAC_F32_e64_]]
     ; GFX10-LABEL: name: fma_f32
     ; GFX10: $vcc_hi = IMPLICIT_DEF
     ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-    ; GFX10: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
-    ; GFX10: S_ENDPGM 0, implicit [[V_FMA_F32_]]
+    ; GFX10: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
+    ; GFX10: S_ENDPGM 0, implicit [[V_FMAC_F32_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
     %2:vgpr(s32) = COPY $vgpr2
@@ -60,15 +60,15 @@ body: |
     ; GFX9-DL: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9-DL: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX9-DL: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-    ; GFX9-DL: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 1, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
-    ; GFX9-DL: S_ENDPGM 0, implicit [[V_FMA_F32_]]
+    ; GFX9-DL: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e64 1, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
+    ; GFX9-DL: S_ENDPGM 0, implicit [[V_FMAC_F32_e64_]]
     ; GFX10-LABEL: name: fma_f32_fneg_src0
     ; GFX10: $vcc_hi = IMPLICIT_DEF
     ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-    ; GFX10: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 1, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
-    ; GFX10: S_ENDPGM 0, implicit [[V_FMA_F32_]]
+    ; GFX10: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e64 1, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
+    ; GFX10: S_ENDPGM 0, implicit [[V_FMAC_F32_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
     %2:vgpr(s32) = COPY $vgpr2
@@ -98,15 +98,15 @@ body: |
     ; GFX9-DL: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9-DL: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX9-DL: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-    ; GFX9-DL: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 0, [[COPY]], 1, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
-    ; GFX9-DL: S_ENDPGM 0, implicit [[V_FMA_F32_]]
+    ; GFX9-DL: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e64 0, [[COPY]], 1, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
+    ; GFX9-DL: S_ENDPGM 0, implicit [[V_FMAC_F32_e64_]]
     ; GFX10-LABEL: name: fma_f32_fneg_src1
     ; GFX10: $vcc_hi = IMPLICIT_DEF
     ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-    ; GFX10: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 0, [[COPY]], 1, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
-    ; GFX10: S_ENDPGM 0, implicit [[V_FMA_F32_]]
+    ; GFX10: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e64 0, [[COPY]], 1, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
+    ; GFX10: S_ENDPGM 0, implicit [[V_FMAC_F32_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
     %2:vgpr(s32) = COPY $vgpr2

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmad.s32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmad.s32.mir
index fde4fba9c282..071e6072418c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmad.s32.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmad.s32.mir
@@ -16,15 +16,15 @@ body: |
     ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-    ; GFX6: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
-    ; GFX6: S_ENDPGM 0, implicit [[V_MAD_F32_]]
+    ; GFX6: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
+    ; GFX6: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]]
     ; GFX10-LABEL: name: fmad_f32
     ; GFX10: $vcc_hi = IMPLICIT_DEF
     ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-    ; GFX10: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
-    ; GFX10: S_ENDPGM 0, implicit [[V_MAD_F32_]]
+    ; GFX10: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
+    ; GFX10: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
     %2:vgpr(s32) = COPY $vgpr2


        


More information about the llvm-commits mailing list