[llvm] 7238faa - [AMDGPU] Add patterns for mad/mac legacy f32 instructions
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 8 07:24:10 PDT 2020
Author: Jay Foad
Date: 2020-10-08T15:20:06+01:00
New Revision: 7238faa4ae977523903192e287d442eb53c49ee5
URL: https://github.com/llvm/llvm-project/commit/7238faa4ae977523903192e287d442eb53c49ee5
DIFF: https://github.com/llvm/llvm-project/commit/7238faa4ae977523903192e287d442eb53c49ee5.diff
LOG: [AMDGPU] Add patterns for mad/mac legacy f32 instructions
Note that all subtargets up to GFX10.1 have v_mad_legacy_f32, but GFX8/9
lack v_mac_legacy_f32. GFX10.3 has no mad/mac f32 instructions at all.
Differential Revision: https://reviews.llvm.org/D88890
Added:
Modified:
llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
llvm/lib/Target/AMDGPU/SIInstructions.td
llvm/lib/Target/AMDGPU/VOP2Instructions.td
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index cdb686fe0043..6d2e29590abf 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -6889,6 +6889,8 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
Opc == AMDGPU::V_MAC_F32_e64_vi ||
+ Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
+ Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
Opc == AMDGPU::V_MAC_F16_e64_vi ||
Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
Opc == AMDGPU::V_FMAC_F32_e64_vi ||
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 5955cc75c8ea..33c666f29a2d 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -385,6 +385,8 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
if (Res && (MI.getOpcode() == AMDGPU::V_MAC_F32_e64_vi ||
MI.getOpcode() == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
MI.getOpcode() == AMDGPU::V_MAC_F32_e64_gfx10 ||
+ MI.getOpcode() == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
+ MI.getOpcode() == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
MI.getOpcode() == AMDGPU::V_MAC_F16_e64_vi ||
MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_vi ||
MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_gfx10 ||
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 7cffe615f3b3..068d8dc2a0fe 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -866,7 +866,8 @@ def : GCNPat <
// VOP2 Patterns
//===----------------------------------------------------------------------===//
-// TODO: Check only no src2 mods?
+// NoMods pattern used for mac. If there are any source modifiers then it's
+// better to select mad instead of mac.
class FMADPat <ValueType vt, Instruction inst, SDPatternOperator node>
: GCNPat <(vt (node (vt (VOP3NoMods vt:$src0)),
(vt (VOP3NoMods vt:$src1)),
@@ -875,18 +876,29 @@ class FMADPat <ValueType vt, Instruction inst, SDPatternOperator node>
SRCMODS.NONE, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
>;
-
// Prefer mac form when there are no modifiers.
let AddedComplexity = 9 in {
+let OtherPredicates = [HasMadMacF32Insts] in {
def : FMADPat <f32, V_MAC_F32_e64, fmad>;
def : FMADPat <f32, V_MAC_F32_e64, AMDGPUfmad_ftz>;
+// Don't allow source modifiers. If there are any source modifiers then it's
+// better to select mad instead of mac.
+let SubtargetPredicate = isGFX6GFX7GFX10 in
+def : GCNPat <
+ (f32 (fadd (AMDGPUfmul_legacy (VOP3NoMods f32:$src0),
+ (VOP3NoMods f32:$src1)),
+ (VOP3NoMods f32:$src2))),
+ (V_MAC_LEGACY_F32_e64 SRCMODS.NONE, $src0, SRCMODS.NONE, $src1,
+ SRCMODS.NONE, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+} // OtherPredicates = [HasMadMacF32Insts]
+
let SubtargetPredicate = Has16BitInsts in {
def : FMADPat <f16, V_MAC_F16_e64, fmad>;
def : FMADPat <f16, V_MAC_F16_e64, AMDGPUfmad_ftz>;
-}
-
-}
+} // SubtargetPredicate = Has16BitInsts
+} // AddedComplexity = 9
class FMADModsPat<ValueType Ty, Instruction inst, SDPatternOperator mad_opr>
: GCNPat<
@@ -897,11 +909,20 @@ class FMADModsPat<ValueType Ty, Instruction inst, SDPatternOperator mad_opr>
$src2_mod, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
>;
-let SubtargetPredicate = HasMadMacF32Insts in
+let OtherPredicates = [HasMadMacF32Insts] in {
def : FMADModsPat<f32, V_MAD_F32, AMDGPUfmad_ftz>;
-def : FMADModsPat<f16, V_MAD_F16, AMDGPUfmad_ftz> {
- let SubtargetPredicate = Has16BitInsts;
-}
+
+def : GCNPat <
+ (f32 (fadd (AMDGPUfmul_legacy (VOP3Mods f32:$src0, i32:$src0_mod),
+ (VOP3Mods f32:$src1, i32:$src1_mod)),
+ (VOP3Mods f32:$src2, i32:$src2_mod))),
+ (V_MAD_LEGACY_F32 $src0_mod, $src0, $src1_mod, $src1,
+ $src2_mod, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+} // OtherPredicates = [HasMadMacF32Insts]
+
+let SubtargetPredicate = Has16BitInsts in
+def : FMADModsPat<f16, V_MAD_F16, AMDGPUfmad_ftz>;
class VOPSelectModsPat <ValueType vt> : GCNPat <
(vt (select i1:$src0, (VOP3Mods vt:$src1, i32:$src1_mods),
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 4c263de673d6..09f65c5c944e 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -499,11 +499,15 @@ let OtherPredicates = [HasMadMacF32Insts] in {
let Constraints = "$vdst = $src2", DisableEncoding="$src2",
isConvertibleToThreeAddress = 1 in {
defm V_MAC_F32 : VOP2Inst <"v_mac_f32", VOP_MAC_F32>;
-}
+
+let SubtargetPredicate = isGFX6GFX7GFX10 in
+defm V_MAC_LEGACY_F32 : VOP2Inst <"v_mac_legacy_f32", VOP_MAC_F32>;
+} // End Constraints = "$vdst = $src2", DisableEncoding="$src2",
+ // isConvertibleToThreeAddress = 1
def V_MADAK_F32 : VOP2_Pseudo <"v_madak_f32", VOP_MADAK_F32, []>;
} // End OtherPredicates = [HasMadMacF32Insts]
-}
+} // End mayRaiseFPException = 0
// No patterns so that the scalar instructions are always selected.
// The scalar versions will be replaced with vector when needed later.
@@ -557,10 +561,6 @@ defm V_MAX_LEGACY_F32 : VOP2Inst <"v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfma
} // End SubtargetPredicate = isGFX6GFX7
let isCommutable = 1 in {
-let SubtargetPredicate = isGFX6GFX7GFX10 in {
-let OtherPredicates = [HasMadMacF32Insts] in
-defm V_MAC_LEGACY_F32 : VOP2Inst <"v_mac_legacy_f32", VOP_F32_F32_F32>;
-} // End SubtargetPredicate = isGFX6GFX7GFX10
let SubtargetPredicate = isGFX6GFX7 in {
defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, srl>;
defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, sra>;
@@ -1322,7 +1322,6 @@ let SubtargetPredicate = isGFX6GFX7 in {
defm V_ADD_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x003>;
defm V_SUB_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x004>;
defm V_SUBREV_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x005>;
-let OtherPredicates = [HasMadMacF32Insts] in
defm V_MAC_LEGACY_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x006>;
defm V_MUL_LEGACY_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x007>;
defm V_MUL_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x008>;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll
index 3ebe19a156f0..ebe3ffd06ced 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll
@@ -1,53 +1,196 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
-; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX101 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX103 %s
define float @v_mul_legacy_f32(float %a, float %b) {
-; GCN-LABEL: v_mul_legacy_f32:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; GFX6-LABEL: v_mul_legacy_f32:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_mul_legacy_f32:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_mul_legacy_f32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX101-LABEL: v_mul_legacy_f32:
+; GFX101: ; %bb.0:
+; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX101-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX101-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX101-NEXT: ; implicit-def: $vcc_hi
+; GFX101-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX103-LABEL: v_mul_legacy_f32:
+; GFX103: ; %bb.0:
+; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX103-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX103-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX103-NEXT: ; implicit-def: $vcc_hi
+; GFX103-NEXT: s_setpc_b64 s[30:31]
%result = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
ret float %result
}
define float @v_mul_legacy_undef0_f32(float %a) {
-; GCN-LABEL: v_mul_legacy_undef0_f32:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_legacy_f32_e32 v0, s4, v0
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; GFX6-LABEL: v_mul_legacy_undef0_f32:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: v_mul_legacy_f32_e32 v0, s4, v0
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_mul_legacy_undef0_f32:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_mul_legacy_f32_e32 v0, s4, v0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_mul_legacy_undef0_f32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mul_legacy_f32_e32 v0, s4, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX101-LABEL: v_mul_legacy_undef0_f32:
+; GFX101: ; %bb.0:
+; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX101-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX101-NEXT: v_mul_legacy_f32_e32 v0, s4, v0
+; GFX101-NEXT: ; implicit-def: $vcc_hi
+; GFX101-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX103-LABEL: v_mul_legacy_undef0_f32:
+; GFX103: ; %bb.0:
+; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX103-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX103-NEXT: v_mul_legacy_f32_e32 v0, s4, v0
+; GFX103-NEXT: ; implicit-def: $vcc_hi
+; GFX103-NEXT: s_setpc_b64 s[30:31]
%result = call float @llvm.amdgcn.fmul.legacy(float undef, float %a)
ret float %result
}
define float @v_mul_legacy_undef1_f32(float %a) {
-; GCN-LABEL: v_mul_legacy_undef1_f32:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_legacy_f32_e32 v0, s4, v0
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; GFX6-LABEL: v_mul_legacy_undef1_f32:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: v_mul_legacy_f32_e32 v0, s4, v0
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_mul_legacy_undef1_f32:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_mul_legacy_f32_e32 v0, s4, v0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_mul_legacy_undef1_f32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mul_legacy_f32_e32 v0, s4, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX101-LABEL: v_mul_legacy_undef1_f32:
+; GFX101: ; %bb.0:
+; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX101-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX101-NEXT: v_mul_legacy_f32_e32 v0, s4, v0
+; GFX101-NEXT: ; implicit-def: $vcc_hi
+; GFX101-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX103-LABEL: v_mul_legacy_undef1_f32:
+; GFX103: ; %bb.0:
+; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX103-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX103-NEXT: v_mul_legacy_f32_e32 v0, s4, v0
+; GFX103-NEXT: ; implicit-def: $vcc_hi
+; GFX103-NEXT: s_setpc_b64 s[30:31]
%result = call float @llvm.amdgcn.fmul.legacy(float %a, float undef)
ret float %result
}
define float @v_mul_legacy_undef_f32() {
-; GCN-LABEL: v_mul_legacy_undef_f32:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_legacy_f32_e64 v0, s4, s4
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; GFX6-LABEL: v_mul_legacy_undef_f32:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: v_mul_legacy_f32_e64 v0, s4, s4
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_mul_legacy_undef_f32:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_mul_legacy_f32_e64 v0, s4, s4
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_mul_legacy_undef_f32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mul_legacy_f32_e64 v0, s4, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX101-LABEL: v_mul_legacy_undef_f32:
+; GFX101: ; %bb.0:
+; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX101-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX101-NEXT: v_mul_legacy_f32_e64 v0, s4, s4
+; GFX101-NEXT: ; implicit-def: $vcc_hi
+; GFX101-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX103-LABEL: v_mul_legacy_undef_f32:
+; GFX103: ; %bb.0:
+; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX103-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX103-NEXT: v_mul_legacy_f32_e64 v0, s4, s4
+; GFX103-NEXT: ; implicit-def: $vcc_hi
+; GFX103-NEXT: s_setpc_b64 s[30:31]
%result = call float @llvm.amdgcn.fmul.legacy(float undef, float undef)
ret float %result
}
define float @v_mul_legacy_fabs_f32(float %a, float %b) {
-; GCN-LABEL: v_mul_legacy_fabs_f32:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_legacy_f32_e64 v0, |v0|, |v1|
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; GFX6-LABEL: v_mul_legacy_fabs_f32:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: v_mul_legacy_f32_e64 v0, |v0|, |v1|
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_mul_legacy_fabs_f32:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_mul_legacy_f32_e64 v0, |v0|, |v1|
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_mul_legacy_fabs_f32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mul_legacy_f32_e64 v0, |v0|, |v1|
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX101-LABEL: v_mul_legacy_fabs_f32:
+; GFX101: ; %bb.0:
+; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX101-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX101-NEXT: v_mul_legacy_f32_e64 v0, |v0|, |v1|
+; GFX101-NEXT: ; implicit-def: $vcc_hi
+; GFX101-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX103-LABEL: v_mul_legacy_fabs_f32:
+; GFX103: ; %bb.0:
+; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX103-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX103-NEXT: v_mul_legacy_f32_e64 v0, |v0|, |v1|
+; GFX103-NEXT: ; implicit-def: $vcc_hi
+; GFX103-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call float @llvm.fabs.f32(float %a)
%b.fabs = call float @llvm.fabs.f32(float %b)
%result = call float @llvm.amdgcn.fmul.legacy(float %a.fabs, float %b.fabs)
@@ -55,76 +198,311 @@ define float @v_mul_legacy_fabs_f32(float %a, float %b) {
}
define float @v_mul_legacy_fneg_f32(float %a, float %b) {
-; GCN-LABEL: v_mul_legacy_fneg_f32:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_legacy_f32_e64 v0, -v0, -v1
-; GCN-NEXT: s_setpc_b64 s[30:31]
- %a.fabs = fneg float %a
- %b.fabs = fneg float %b
- %result = call float @llvm.amdgcn.fmul.legacy(float %a.fabs, float %b.fabs)
+; GFX6-LABEL: v_mul_legacy_fneg_f32:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: v_mul_legacy_f32_e64 v0, -v0, -v1
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_mul_legacy_fneg_f32:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_mul_legacy_f32_e64 v0, -v0, -v1
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_mul_legacy_fneg_f32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mul_legacy_f32_e64 v0, -v0, -v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX101-LABEL: v_mul_legacy_fneg_f32:
+; GFX101: ; %bb.0:
+; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX101-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX101-NEXT: v_mul_legacy_f32_e64 v0, -v0, -v1
+; GFX101-NEXT: ; implicit-def: $vcc_hi
+; GFX101-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX103-LABEL: v_mul_legacy_fneg_f32:
+; GFX103: ; %bb.0:
+; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX103-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX103-NEXT: v_mul_legacy_f32_e64 v0, -v0, -v1
+; GFX103-NEXT: ; implicit-def: $vcc_hi
+; GFX103-NEXT: s_setpc_b64 s[30:31]
+ %a.fneg = fneg float %a
+ %b.fneg = fneg float %b
+ %result = call float @llvm.amdgcn.fmul.legacy(float %a.fneg, float %b.fneg)
ret float %result
}
-; TODO: Should match mac_legacy/mad_legacy
define float @v_mad_legacy_f32(float %a, float %b, float %c) {
-; GCN-LABEL: v_mad_legacy_f32:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
-; GCN-NEXT: v_add_f32_e32 v0, v0, v2
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; GFX6-LABEL: v_mad_legacy_f32:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: v_mac_legacy_f32_e64 v2, v0, v1
+; GFX6-NEXT: v_mov_b32_e32 v0, v2
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_mad_legacy_f32:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_mad_legacy_f32 v0, v0, v1, v2
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_mad_legacy_f32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mad_legacy_f32 v0, v0, v1, v2
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX101-LABEL: v_mad_legacy_f32:
+; GFX101: ; %bb.0:
+; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX101-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX101-NEXT: v_mac_legacy_f32_e64 v2, v0, v1
+; GFX101-NEXT: ; implicit-def: $vcc_hi
+; GFX101-NEXT: v_mov_b32_e32 v0, v2
+; GFX101-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX103-LABEL: v_mad_legacy_f32:
+; GFX103: ; %bb.0:
+; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX103-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX103-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX103-NEXT: ; implicit-def: $vcc_hi
+; GFX103-NEXT: v_add_f32_e32 v0, v0, v2
+; GFX103-NEXT: s_setpc_b64 s[30:31]
%mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
%add = fadd float %mul, %c
ret float %add
}
+define float @v_mad_legacy_fneg_f32(float %a, float %b, float %c) {
+; GFX6-LABEL: v_mad_legacy_fneg_f32:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: v_mad_legacy_f32 v0, -v0, -v1, v2
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_mad_legacy_fneg_f32:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_mad_legacy_f32 v0, -v0, -v1, v2
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_mad_legacy_fneg_f32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mad_legacy_f32 v0, -v0, -v1, v2
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX101-LABEL: v_mad_legacy_fneg_f32:
+; GFX101: ; %bb.0:
+; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX101-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX101-NEXT: v_mad_legacy_f32 v0, -v0, -v1, v2
+; GFX101-NEXT: ; implicit-def: $vcc_hi
+; GFX101-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX103-LABEL: v_mad_legacy_fneg_f32:
+; GFX103: ; %bb.0:
+; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX103-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX103-NEXT: v_mul_legacy_f32_e64 v0, -v0, -v1
+; GFX103-NEXT: ; implicit-def: $vcc_hi
+; GFX103-NEXT: v_add_f32_e32 v0, v0, v2
+; GFX103-NEXT: s_setpc_b64 s[30:31]
+ %a.fneg = fneg float %a
+ %b.fneg = fneg float %b
+ %mul = call float @llvm.amdgcn.fmul.legacy(float %a.fneg, float %b.fneg)
+ %add = fadd float %mul, %c
+ ret float %add
+}
+
define amdgpu_ps float @s_mul_legacy_f32(float inreg %a, float inreg %b) {
-; GCN-LABEL: s_mul_legacy_f32:
-; GCN: ; %bb.0:
-; GCN-NEXT: v_mov_b32_e32 v0, s1
-; GCN-NEXT: v_mul_legacy_f32_e32 v0, s0, v0
-; GCN-NEXT: ; return to shader part epilog
+; GFX6-LABEL: s_mul_legacy_f32:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: v_mov_b32_e32 v0, s1
+; GFX6-NEXT: v_mul_legacy_f32_e32 v0, s0, v0
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX8-LABEL: s_mul_legacy_f32:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: v_mov_b32_e32 v0, s1
+; GFX8-NEXT: v_mul_legacy_f32_e32 v0, s0, v0
+; GFX8-NEXT: ; return to shader part epilog
+;
+; GFX9-LABEL: s_mul_legacy_f32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: v_mov_b32_e32 v0, s1
+; GFX9-NEXT: v_mul_legacy_f32_e32 v0, s0, v0
+; GFX9-NEXT: ; return to shader part epilog
+;
+; GFX101-LABEL: s_mul_legacy_f32:
+; GFX101: ; %bb.0:
+; GFX101-NEXT: v_mul_legacy_f32_e64 v0, s0, s1
+; GFX101-NEXT: ; implicit-def: $vcc_hi
+; GFX101-NEXT: ; return to shader part epilog
+;
+; GFX103-LABEL: s_mul_legacy_f32:
+; GFX103: ; %bb.0:
+; GFX103-NEXT: v_mul_legacy_f32_e64 v0, s0, s1
+; GFX103-NEXT: ; implicit-def: $vcc_hi
+; GFX103-NEXT: ; return to shader part epilog
%result = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
ret float %result
}
define float @v_mul_legacy_f32_1.0(float %a) {
-; GCN-LABEL: v_mul_legacy_f32_1.0:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_legacy_f32_e32 v0, 1.0, v0
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; GFX6-LABEL: v_mul_legacy_f32_1.0:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: v_mul_legacy_f32_e32 v0, 1.0, v0
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_mul_legacy_f32_1.0:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_mul_legacy_f32_e32 v0, 1.0, v0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_mul_legacy_f32_1.0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mul_legacy_f32_e32 v0, 1.0, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX101-LABEL: v_mul_legacy_f32_1.0:
+; GFX101: ; %bb.0:
+; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX101-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX101-NEXT: v_mul_legacy_f32_e32 v0, 1.0, v0
+; GFX101-NEXT: ; implicit-def: $vcc_hi
+; GFX101-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX103-LABEL: v_mul_legacy_f32_1.0:
+; GFX103: ; %bb.0:
+; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX103-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX103-NEXT: v_mul_legacy_f32_e32 v0, 1.0, v0
+; GFX103-NEXT: ; implicit-def: $vcc_hi
+; GFX103-NEXT: s_setpc_b64 s[30:31]
%result = call float @llvm.amdgcn.fmul.legacy(float %a, float 1.0)
ret float %result
}
define float @v_mul_legacy_f32_1.0_swap(float %b) {
-; GCN-LABEL: v_mul_legacy_f32_1.0_swap:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_legacy_f32_e32 v0, 1.0, v0
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; GFX6-LABEL: v_mul_legacy_f32_1.0_swap:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: v_mul_legacy_f32_e32 v0, 1.0, v0
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_mul_legacy_f32_1.0_swap:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_mul_legacy_f32_e32 v0, 1.0, v0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_mul_legacy_f32_1.0_swap:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mul_legacy_f32_e32 v0, 1.0, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX101-LABEL: v_mul_legacy_f32_1.0_swap:
+; GFX101: ; %bb.0:
+; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX101-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX101-NEXT: v_mul_legacy_f32_e32 v0, 1.0, v0
+; GFX101-NEXT: ; implicit-def: $vcc_hi
+; GFX101-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX103-LABEL: v_mul_legacy_f32_1.0_swap:
+; GFX103: ; %bb.0:
+; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX103-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX103-NEXT: v_mul_legacy_f32_e32 v0, 1.0, v0
+; GFX103-NEXT: ; implicit-def: $vcc_hi
+; GFX103-NEXT: s_setpc_b64 s[30:31]
%result = call float @llvm.amdgcn.fmul.legacy(float 1.0, float %b)
ret float %result
}
define float @v_mul_legacy_f32_2.0(float %a) {
-; GCN-LABEL: v_mul_legacy_f32_2.0:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_legacy_f32_e32 v0, 2.0, v0
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; GFX6-LABEL: v_mul_legacy_f32_2.0:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: v_mul_legacy_f32_e32 v0, 2.0, v0
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_mul_legacy_f32_2.0:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_mul_legacy_f32_e32 v0, 2.0, v0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_mul_legacy_f32_2.0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mul_legacy_f32_e32 v0, 2.0, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX101-LABEL: v_mul_legacy_f32_2.0:
+; GFX101: ; %bb.0:
+; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX101-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX101-NEXT: v_mul_legacy_f32_e32 v0, 2.0, v0
+; GFX101-NEXT: ; implicit-def: $vcc_hi
+; GFX101-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX103-LABEL: v_mul_legacy_f32_2.0:
+; GFX103: ; %bb.0:
+; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX103-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX103-NEXT: v_mul_legacy_f32_e32 v0, 2.0, v0
+; GFX103-NEXT: ; implicit-def: $vcc_hi
+; GFX103-NEXT: s_setpc_b64 s[30:31]
%result = call float @llvm.amdgcn.fmul.legacy(float %a, float 2.0)
ret float %result
}
define float @v_mul_legacy_f32_2.0_swap(float %b) {
-; GCN-LABEL: v_mul_legacy_f32_2.0_swap:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_legacy_f32_e32 v0, 2.0, v0
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; GFX6-LABEL: v_mul_legacy_f32_2.0_swap:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: v_mul_legacy_f32_e32 v0, 2.0, v0
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_mul_legacy_f32_2.0_swap:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_mul_legacy_f32_e32 v0, 2.0, v0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_mul_legacy_f32_2.0_swap:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mul_legacy_f32_e32 v0, 2.0, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX101-LABEL: v_mul_legacy_f32_2.0_swap:
+; GFX101: ; %bb.0:
+; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX101-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX101-NEXT: v_mul_legacy_f32_e32 v0, 2.0, v0
+; GFX101-NEXT: ; implicit-def: $vcc_hi
+; GFX101-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX103-LABEL: v_mul_legacy_f32_2.0_swap:
+; GFX103: ; %bb.0:
+; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX103-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX103-NEXT: v_mul_legacy_f32_e32 v0, 2.0, v0
+; GFX103-NEXT: ; implicit-def: $vcc_hi
+; GFX103-NEXT: s_setpc_b64 s[30:31]
%result = call float @llvm.amdgcn.fmul.legacy(float 2.0, float %b)
ret float %result
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll
index be8462d09064..a91745b636d9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll
@@ -1,9 +1,11 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
-
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MADMACF32,GFX6 %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MADMACF32,GFX8 %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MADMACF32,GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MADMACF32,GFX101 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOMADMACF32,GFX103 %s
; GCN-LABEL: {{^}}test_mul_legacy_f32:
-; GCN: v_mul_legacy_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
+; GCN: v_mul_legacy_f32_e{{(32|64)}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}}
define amdgpu_kernel void @test_mul_legacy_f32(float addrspace(1)* %out, float %a, float %b) #0 {
%result = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
store float %result, float addrspace(1)* %out, align 4
@@ -11,7 +13,7 @@ define amdgpu_kernel void @test_mul_legacy_f32(float addrspace(1)* %out, float %
}
; GCN-LABEL: {{^}}test_mul_legacy_undef0_f32:
-; GCN: v_mul_legacy_f32_e32
+; GCN: v_mul_legacy_f32_e{{(32|64)}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}}
define amdgpu_kernel void @test_mul_legacy_undef0_f32(float addrspace(1)* %out, float %a) #0 {
%result = call float @llvm.amdgcn.fmul.legacy(float undef, float %a)
store float %result, float addrspace(1)* %out, align 4
@@ -19,7 +21,7 @@ define amdgpu_kernel void @test_mul_legacy_undef0_f32(float addrspace(1)* %out,
}
; GCN-LABEL: {{^}}test_mul_legacy_undef1_f32:
-; GCN: v_mul_legacy_f32_e32
+; GCN: v_mul_legacy_f32_e{{(32|64)}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}}
define amdgpu_kernel void @test_mul_legacy_undef1_f32(float addrspace(1)* %out, float %a) #0 {
%result = call float @llvm.amdgcn.fmul.legacy(float %a, float undef)
store float %result, float addrspace(1)* %out, align 4
@@ -27,7 +29,7 @@ define amdgpu_kernel void @test_mul_legacy_undef1_f32(float addrspace(1)* %out,
}
; GCN-LABEL: {{^}}test_mul_legacy_fabs_f32:
-; GCN: v_mul_legacy_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|, |v{{[0-9]+}}|
+; GCN: v_mul_legacy_f32_e{{(32|64)}} v{{[0-9]+}}, |s{{[0-9]+}}|, |{{[sv][0-9]+}}|
define amdgpu_kernel void @test_mul_legacy_fabs_f32(float addrspace(1)* %out, float %a, float %b) #0 {
%a.fabs = call float @llvm.fabs.f32(float %a)
%b.fabs = call float @llvm.fabs.f32(float %b)
@@ -36,10 +38,13 @@ define amdgpu_kernel void @test_mul_legacy_fabs_f32(float addrspace(1)* %out, fl
ret void
}
-; TODO: Should match mac_legacy/mad_legacy
; GCN-LABEL: {{^}}test_mad_legacy_f32:
-; GCN: v_mul_legacy_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
-; GCN: v_add_f32_e32
+; GFX6: v_mac_legacy_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
+; GFX8: v_mad_legacy_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
+; GFX9: v_mad_legacy_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
+; GFX101: v_mac_legacy_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
+; GFX103: v_mul_legacy_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
+; GFX103: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
define amdgpu_kernel void @test_mad_legacy_f32(float addrspace(1)* %out, float %a, float %b, float %c) #0 {
%mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
%add = fadd float %mul, %c
@@ -47,6 +52,19 @@ define amdgpu_kernel void @test_mad_legacy_f32(float addrspace(1)* %out, float %
ret void
}
+; GCN-LABEL: {{^}}test_mad_legacy_fneg_f32:
+; MADMACF32: v_mad_legacy_f32 v{{[0-9]+}}, -s{{[0-9]+}}, -{{[sv][0-9]+}}, v{{[0-9]+}}
+; NOMADMACF32: v_mul_legacy_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, -s{{[0-9]+}}
+; NOMADMACF32: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
+define amdgpu_kernel void @test_mad_legacy_fneg_f32(float addrspace(1)* %out, float %a, float %b, float %c) #0 {
+ %a.fneg = fneg float %a
+ %b.fneg = fneg float %b
+ %mul = call float @llvm.amdgcn.fmul.legacy(float %a.fneg, float %b.fneg)
+ %add = fadd float %mul, %c
+ store float %add, float addrspace(1)* %out, align 4
+ ret void
+}
+
declare float @llvm.fabs.f32(float) #1
declare float @llvm.amdgcn.fmul.legacy(float, float) #1
More information about the llvm-commits
mailing list