[llvm] 1247865 - AMDGPU/GlobalISel: Select llvm.amdgcn.fmad.ftz
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 30 08:15:24 PST 2019
Author: Matt Arsenault
Date: 2019-12-30T11:12:35-05:00
New Revision: 1247865fe024e073c206b3803096df8477a60bab
URL: https://github.com/llvm/llvm-project/commit/1247865fe024e073c206b3803096df8477a60bab
DIFF: https://github.com/llvm/llvm-project/commit/1247865fe024e073c206b3803096df8477a60bab.diff
LOG: AMDGPU/GlobalISel: Select llvm.amdgcn.fmad.ftz
Added:
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmad.ftz.mir
Modified:
llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
llvm/lib/Target/AMDGPU/SIInstructions.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index 7b95a5c88671..13afcb4cdb92 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -246,7 +246,7 @@ def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", AMDGPUFmasOp,
// Denominator, src2 = Numerator).
def AMDGPUdiv_fixup_impl : SDNode<"AMDGPUISD::DIV_FIXUP", SDTFPTernaryOp>;
-def AMDGPUfmad_ftz : SDNode<"AMDGPUISD::FMAD_FTZ", SDTFPTernaryOp>;
+def AMDGPUfmad_ftz_impl : SDNode<"AMDGPUISD::FMAD_FTZ", SDTFPTernaryOp>;
// Look Up 2.0 / pi src0 with segment select src1[4:0]
def AMDGPUtrig_preop : SDNode<"AMDGPUISD::TRIG_PREOP", AMDGPUTrigPreOp>;
@@ -461,3 +461,7 @@ def AMDGPUpk_i16_i32 : PatFrags<(ops node:$src0, node:$src1),
def AMDGPUpk_u16_u32 : PatFrags<(ops node:$src0, node:$src1),
[(int_amdgcn_cvt_pk_u16 node:$src0, node:$src1),
(AMDGPUpk_u16_u32_impl node:$src0, node:$src1)]>;
+
+def AMDGPUfmad_ftz : PatFrags<(ops node:$src0, node:$src1, node:$src2),
+ [(int_amdgcn_fmad_ftz node:$src0, node:$src1, node:$src2),
+ (AMDGPUfmad_ftz_impl node:$src0, node:$src1, node:$src2)]>;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 013bd5fd40a8..79bbbc1eab60 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -782,13 +782,14 @@ defm : FMADPat <f32, V_MAC_F32_e64>;
class FMADModsPat<Instruction inst, SDPatternOperator mad_opr, ValueType Ty>
: GCNPat<
- (Ty (mad_opr (VOP3Mods Ty:$src0, i32:$src0_mod),
- (VOP3Mods Ty:$src1, i32:$src1_mod),
- (VOP3Mods Ty:$src2, i32:$src2_mod))),
+ (Ty (mad_opr (Ty (VOP3Mods Ty:$src0, i32:$src0_mod)),
+ (Ty (VOP3Mods Ty:$src1, i32:$src1_mod)),
+ (Ty (VOP3Mods Ty:$src2, i32:$src2_mod)))),
(inst $src0_mod, $src0, $src1_mod, $src1,
$src2_mod, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
>;
+// FIXME: This should select to V_MAC_F32
def : FMADModsPat<V_MAD_F32, AMDGPUfmad_ftz, f32>;
def : FMADModsPat<V_MAD_F16, AMDGPUfmad_ftz, f16> {
let SubtargetPredicate = Has16BitInsts;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmad.ftz.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmad.ftz.mir
new file mode 100644
index 000000000000..da601b86fdb8
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmad.ftz.mir
@@ -0,0 +1,233 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+fp32-denormals -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
+# RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+fp32-denormals -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-fp32-denormals -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
+
+---
+name: fmad_ftz_s32_vvvv
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ ; GCN-LABEL: name: fmad_ftz_s32_vvvv
+ ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GCN: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
+ ; GCN: S_ENDPGM 0, implicit [[V_MAD_F32_]]
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:vgpr(s32) = COPY $vgpr1
+ %2:vgpr(s32) = COPY $vgpr2
+ %3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %1, %2
+ S_ENDPGM 0, implicit %3
+...
+
+---
+name: fmad_ftz_s32_vsvv
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0, $vgpr1
+
+ ; GCN-LABEL: name: fmad_ftz_s32_vsvv
+ ; GCN: liveins: $sgpr0, $vgpr0, $vgpr1
+ ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GCN: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
+ ; GCN: S_ENDPGM 0, implicit [[V_MAD_F32_]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:vgpr(s32) = COPY $vgpr0
+ %2:vgpr(s32) = COPY $vgpr1
+ %3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %1, %2
+ S_ENDPGM 0, implicit %3
+...
+
+---
+name: fmad_ftz_s32_vvsv
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0, $vgpr1
+
+ ; GCN-LABEL: name: fmad_ftz_s32_vvsv
+ ; GCN: liveins: $sgpr0, $vgpr0, $vgpr1
+ ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GCN: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
+ ; GCN: S_ENDPGM 0, implicit [[V_MAD_F32_]]
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:sgpr(s32) = COPY $sgpr0
+ %2:vgpr(s32) = COPY $vgpr1
+ %3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %1, %2
+ S_ENDPGM 0, implicit %3
+...
+
+---
+name: fmad_ftz_s32_vvvs
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0, $vgpr1
+
+ ; GCN-LABEL: name: fmad_ftz_s32_vvvs
+ ; GCN: liveins: $sgpr0, $vgpr0, $vgpr1
+ ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GCN: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
+ ; GCN: S_ENDPGM 0, implicit [[V_MAD_F32_]]
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:vgpr(s32) = COPY $vgpr0
+ %2:sgpr(s32) = COPY $sgpr0
+ %3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %1, %2
+ S_ENDPGM 0, implicit %3
+...
+
+
+# Same SGPR used, so doesn't violate the constant bus restriction.
+---
+name: fmad_ftz_s32_vssv
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0
+
+ ; GCN-LABEL: name: fmad_ftz_s32_vssv
+ ; GCN: liveins: $sgpr0, $vgpr0
+ ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec
+ ; GCN: S_ENDPGM 0, implicit [[V_MAD_F32_]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:vgpr(s32) = COPY $vgpr0
+ %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %0, %1
+ S_ENDPGM 0, implicit %2
+...
+
+---
+name: fmad_ftz_s32_vsvs
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0
+
+ ; GCN-LABEL: name: fmad_ftz_s32_vsvs
+ ; GCN: liveins: $sgpr0, $vgpr0
+ ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec
+ ; GCN: S_ENDPGM 0, implicit [[V_MAD_F32_]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:vgpr(s32) = COPY $vgpr0
+ %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %1, %0
+ S_ENDPGM 0, implicit %2
+...
+
+---
+name: fmad_ftz_s32_vvss
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0
+
+ ; GCN-LABEL: name: fmad_ftz_s32_vvss
+ ; GCN: liveins: $sgpr0, $vgpr0
+ ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY1]], 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec
+ ; GCN: S_ENDPGM 0, implicit [[V_MAD_F32_]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:vgpr(s32) = COPY $vgpr0
+ %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %1, %0, %0
+ S_ENDPGM 0, implicit %2
+...
+
+---
+name: fmad_ftz_s32_vsss
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0
+
+ ; GCN-LABEL: name: fmad_ftz_s32_vsss
+ ; GCN: liveins: $sgpr0, $vgpr0
+ ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GCN: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec
+ ; GCN: S_ENDPGM 0, implicit [[V_MAD_F32_]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %0, %0
+ S_ENDPGM 0, implicit %1
+...
+
+
+# FIXME: This should probably have been fixed by RegBankSelect, but we should fail to select it.
+# ---
+# name: fmad_ftz_s32_vssv_constant_bus_violation
+# legalized: true
+# regBankSelected: true
+# tracksRegLiveness: true
+
+# body: |
+# bb.0:
+# liveins: $sgpr0, $sgpr1, $vgpr0
+
+# %0:sgpr(s32) = COPY $sgpr0
+# %1:sgpr(s32) = COPY $sgpr1
+# %2:vgpr(s32) = COPY $vgpr0
+# %3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %1, %2
+# S_ENDPGM 0, implicit %3
+# ...
+
+---
+name: fmad_ftz_s32_vvv_fneg_v
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ ; GCN-LABEL: name: fmad_ftz_s32_vvv_fneg_v
+ ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GCN: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $exec
+ ; GCN: S_ENDPGM 0, implicit [[V_MAD_F32_]]
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:vgpr(s32) = COPY $vgpr1
+ %2:vgpr(s32) = COPY $vgpr2
+ %3:vgpr(s32) = G_FNEG %2
+ %4:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %1, %3
+ S_ENDPGM 0, implicit %4
+...
More information about the llvm-commits
mailing list