[llvm] 1247865 - AMDGPU/GlobalISel: Select llvm.amdgcn.fmad.ftz

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 30 08:15:24 PST 2019


Author: Matt Arsenault
Date: 2019-12-30T11:12:35-05:00
New Revision: 1247865fe024e073c206b3803096df8477a60bab

URL: https://github.com/llvm/llvm-project/commit/1247865fe024e073c206b3803096df8477a60bab
DIFF: https://github.com/llvm/llvm-project/commit/1247865fe024e073c206b3803096df8477a60bab.diff

LOG: AMDGPU/GlobalISel: Select llvm.amdgcn.fmad.ftz

Added: 
    llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmad.ftz.mir

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
    llvm/lib/Target/AMDGPU/SIInstructions.td

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index 7b95a5c88671..13afcb4cdb92 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -246,7 +246,7 @@ def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", AMDGPUFmasOp,
 // Denominator, src2 = Numerator).
 def AMDGPUdiv_fixup_impl : SDNode<"AMDGPUISD::DIV_FIXUP", SDTFPTernaryOp>;
 
-def AMDGPUfmad_ftz : SDNode<"AMDGPUISD::FMAD_FTZ", SDTFPTernaryOp>;
+def AMDGPUfmad_ftz_impl : SDNode<"AMDGPUISD::FMAD_FTZ", SDTFPTernaryOp>;
 
 // Look Up 2.0 / pi src0 with segment select src1[4:0]
 def AMDGPUtrig_preop : SDNode<"AMDGPUISD::TRIG_PREOP", AMDGPUTrigPreOp>;
@@ -461,3 +461,7 @@ def AMDGPUpk_i16_i32 : PatFrags<(ops node:$src0, node:$src1),
 def AMDGPUpk_u16_u32 : PatFrags<(ops node:$src0, node:$src1),
   [(int_amdgcn_cvt_pk_u16 node:$src0, node:$src1),
   (AMDGPUpk_u16_u32_impl node:$src0, node:$src1)]>;
+
+def AMDGPUfmad_ftz : PatFrags<(ops node:$src0, node:$src1, node:$src2),
+  [(int_amdgcn_fmad_ftz node:$src0, node:$src1, node:$src2),
+   (AMDGPUfmad_ftz_impl node:$src0, node:$src1, node:$src2)]>;

diff  --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 013bd5fd40a8..79bbbc1eab60 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -782,13 +782,14 @@ defm : FMADPat <f32, V_MAC_F32_e64>;
 
 class FMADModsPat<Instruction inst, SDPatternOperator mad_opr, ValueType Ty>
   : GCNPat<
-  (Ty (mad_opr (VOP3Mods Ty:$src0, i32:$src0_mod),
-  (VOP3Mods Ty:$src1, i32:$src1_mod),
-  (VOP3Mods Ty:$src2, i32:$src2_mod))),
+  (Ty (mad_opr (Ty (VOP3Mods Ty:$src0, i32:$src0_mod)),
+               (Ty (VOP3Mods Ty:$src1, i32:$src1_mod)),
+               (Ty (VOP3Mods Ty:$src2, i32:$src2_mod)))),
   (inst $src0_mod, $src0, $src1_mod, $src1,
   $src2_mod, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
 >;
 
+// FIXME: This should select to V_MAC_F32
 def : FMADModsPat<V_MAD_F32, AMDGPUfmad_ftz, f32>;
 def : FMADModsPat<V_MAD_F16, AMDGPUfmad_ftz, f16> {
   let SubtargetPredicate = Has16BitInsts;

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmad.ftz.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmad.ftz.mir
new file mode 100644
index 000000000000..da601b86fdb8
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmad.ftz.mir
@@ -0,0 +1,233 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+fp32-denormals -run-pass=instruction-select -verify-machineinstrs %s -o -  | FileCheck -check-prefix=GCN %s
+# RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals -run-pass=instruction-select -verify-machineinstrs %s -o -  | FileCheck -check-prefix=GCN %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+fp32-denormals -run-pass=instruction-select -verify-machineinstrs %s -o -  | FileCheck -check-prefix=GCN %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-fp32-denormals -run-pass=instruction-select -verify-machineinstrs %s -o -  | FileCheck -check-prefix=GCN %s
+
+---
+name: fmad_ftz_s32_vvvv
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2
+
+    ; GCN-LABEL: name: fmad_ftz_s32_vvvv
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2
+    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GCN: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit [[V_MAD_F32_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(s32) = COPY $vgpr2
+    %3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %1, %2
+    S_ENDPGM 0, implicit %3
+...
+
+---
+name: fmad_ftz_s32_vsvv
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0, $vgpr1
+
+    ; GCN-LABEL: name: fmad_ftz_s32_vsvv
+    ; GCN: liveins: $sgpr0, $vgpr0, $vgpr1
+    ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GCN: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit [[V_MAD_F32_]]
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:vgpr(s32) = COPY $vgpr0
+    %2:vgpr(s32) = COPY $vgpr1
+    %3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %1, %2
+    S_ENDPGM 0, implicit %3
+...
+
+---
+name: fmad_ftz_s32_vvsv
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0, $vgpr1
+
+    ; GCN-LABEL: name: fmad_ftz_s32_vvsv
+    ; GCN: liveins: $sgpr0, $vgpr0, $vgpr1
+    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GCN: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit [[V_MAD_F32_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:sgpr(s32) = COPY $sgpr0
+    %2:vgpr(s32) = COPY $vgpr1
+    %3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %1, %2
+    S_ENDPGM 0, implicit %3
+...
+
+---
+name: fmad_ftz_s32_vvvs
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0, $vgpr1
+
+    ; GCN-LABEL: name: fmad_ftz_s32_vvvs
+    ; GCN: liveins: $sgpr0, $vgpr0, $vgpr1
+    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GCN: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit [[V_MAD_F32_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr0
+    %2:sgpr(s32) = COPY $sgpr0
+    %3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %1, %2
+    S_ENDPGM 0, implicit %3
+...
+
+
+# Same SGPR used, so doesn't violate the constant bus restriction.
+---
+name: fmad_ftz_s32_vssv
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+
+    ; GCN-LABEL: name: fmad_ftz_s32_vssv
+    ; GCN: liveins: $sgpr0, $vgpr0
+    ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit [[V_MAD_F32_]]
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:vgpr(s32) = COPY $vgpr0
+    %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: fmad_ftz_s32_vsvs
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+
+    ; GCN-LABEL: name: fmad_ftz_s32_vsvs
+    ; GCN: liveins: $sgpr0, $vgpr0
+    ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit [[V_MAD_F32_]]
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:vgpr(s32) = COPY $vgpr0
+    %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %1, %0
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: fmad_ftz_s32_vvss
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+
+    ; GCN-LABEL: name: fmad_ftz_s32_vvss
+    ; GCN: liveins: $sgpr0, $vgpr0
+    ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY1]], 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit [[V_MAD_F32_]]
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:vgpr(s32) = COPY $vgpr0
+    %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %1, %0, %0
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: fmad_ftz_s32_vsss
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+
+    ; GCN-LABEL: name: fmad_ftz_s32_vsss
+    ; GCN: liveins: $sgpr0, $vgpr0
+    ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GCN: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit [[V_MAD_F32_]]
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %0, %0
+    S_ENDPGM 0, implicit %1
+...
+
+
+# FIXME: This should probably have been fixed by RegBankSelect, but we should fail to select it.
+# ---
+# name: fmad_ftz_s32_vssv_constant_bus_violation
+# legalized: true
+# regBankSelected: true
+# tracksRegLiveness: true
+
+# body: |
+#   bb.0:
+#     liveins: $sgpr0, $sgpr1, $vgpr0
+
+#     %0:sgpr(s32) = COPY $sgpr0
+#     %1:sgpr(s32) = COPY $sgpr1
+#     %2:vgpr(s32) = COPY $vgpr0
+#     %3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %1, %2
+#     S_ENDPGM 0, implicit %3
+# ...
+
+---
+name: fmad_ftz_s32_vvv_fneg_v
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2
+
+    ; GCN-LABEL: name: fmad_ftz_s32_vvv_fneg_v
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2
+    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GCN: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit [[V_MAD_F32_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(s32) = COPY $vgpr2
+    %3:vgpr(s32) = G_FNEG %2
+    %4:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %1, %3
+    S_ENDPGM 0, implicit %4
+...


        


More information about the llvm-commits mailing list