[llvm] 7238faa - [AMDGPU] Add patterns for mad/mac legacy f32 instructions

Jay Foad via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 8 07:24:10 PDT 2020


Author: Jay Foad
Date: 2020-10-08T15:20:06+01:00
New Revision: 7238faa4ae977523903192e287d442eb53c49ee5

URL: https://github.com/llvm/llvm-project/commit/7238faa4ae977523903192e287d442eb53c49ee5
DIFF: https://github.com/llvm/llvm-project/commit/7238faa4ae977523903192e287d442eb53c49ee5.diff

LOG: [AMDGPU] Add patterns for mad/mac legacy f32 instructions

Note that all subtargets up to GFX10.1 have v_mad_legacy_f32, but GFX8/9
lack v_mac_legacy_f32. GFX10.3 has no mad/mac f32 instructions at all.

Differential Revision: https://reviews.llvm.org/D88890

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
    llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
    llvm/lib/Target/AMDGPU/SIInstructions.td
    llvm/lib/Target/AMDGPU/VOP2Instructions.td
    llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index cdb686fe0043..6d2e29590abf 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -6889,6 +6889,8 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
       Opc == AMDGPU::V_MAC_F32_e64_vi ||
+      Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
+      Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
       Opc == AMDGPU::V_MAC_F16_e64_vi ||
       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
       Opc == AMDGPU::V_FMAC_F32_e64_vi ||

diff  --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 5955cc75c8ea..33c666f29a2d 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -385,6 +385,8 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
   if (Res && (MI.getOpcode() == AMDGPU::V_MAC_F32_e64_vi ||
               MI.getOpcode() == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
               MI.getOpcode() == AMDGPU::V_MAC_F32_e64_gfx10 ||
+              MI.getOpcode() == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
+              MI.getOpcode() == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
               MI.getOpcode() == AMDGPU::V_MAC_F16_e64_vi ||
               MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_vi ||
               MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_gfx10 ||

diff  --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 7cffe615f3b3..068d8dc2a0fe 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -866,7 +866,8 @@ def : GCNPat <
 // VOP2 Patterns
 //===----------------------------------------------------------------------===//
 
-// TODO: Check only no src2 mods?
+// NoMods pattern used for mac. If there are any source modifiers then it's
+// better to select mad instead of mac.
 class FMADPat <ValueType vt, Instruction inst, SDPatternOperator node>
   : GCNPat <(vt (node (vt (VOP3NoMods vt:$src0)),
                       (vt (VOP3NoMods vt:$src1)),
@@ -875,18 +876,29 @@ class FMADPat <ValueType vt, Instruction inst, SDPatternOperator node>
           SRCMODS.NONE, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
 >;
 
-
 // Prefer mac form when there are no modifiers.
 let AddedComplexity = 9 in {
+let OtherPredicates = [HasMadMacF32Insts] in {
 def : FMADPat <f32, V_MAC_F32_e64, fmad>;
 def : FMADPat <f32, V_MAC_F32_e64, AMDGPUfmad_ftz>;
 
+// Don't allow source modifiers. If there are any source modifiers then it's
+// better to select mad instead of mac.
+let SubtargetPredicate = isGFX6GFX7GFX10 in
+def : GCNPat <
+      (f32 (fadd (AMDGPUfmul_legacy (VOP3NoMods f32:$src0),
+                                    (VOP3NoMods f32:$src1)),
+                 (VOP3NoMods f32:$src2))),
+      (V_MAC_LEGACY_F32_e64 SRCMODS.NONE, $src0, SRCMODS.NONE, $src1,
+                            SRCMODS.NONE, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+} // OtherPredicates = [HasMadMacF32Insts]
+
 let SubtargetPredicate = Has16BitInsts in {
 def : FMADPat <f16, V_MAC_F16_e64, fmad>;
 def : FMADPat <f16, V_MAC_F16_e64, AMDGPUfmad_ftz>;
-}
-
-}
+} // SubtargetPredicate = Has16BitInsts
+} // AddedComplexity = 9
 
 class FMADModsPat<ValueType Ty, Instruction inst, SDPatternOperator mad_opr>
   : GCNPat<
@@ -897,11 +909,20 @@ class FMADModsPat<ValueType Ty, Instruction inst, SDPatternOperator mad_opr>
   $src2_mod, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
 >;
 
-let SubtargetPredicate = HasMadMacF32Insts in
+let OtherPredicates = [HasMadMacF32Insts] in {
 def : FMADModsPat<f32, V_MAD_F32, AMDGPUfmad_ftz>;
-def : FMADModsPat<f16, V_MAD_F16, AMDGPUfmad_ftz> {
-  let SubtargetPredicate = Has16BitInsts;
-}
+
+def : GCNPat <
+      (f32 (fadd (AMDGPUfmul_legacy (VOP3Mods f32:$src0, i32:$src0_mod),
+                                    (VOP3Mods f32:$src1, i32:$src1_mod)),
+                 (VOP3Mods f32:$src2, i32:$src2_mod))),
+      (V_MAD_LEGACY_F32 $src0_mod, $src0, $src1_mod, $src1,
+                        $src2_mod, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+} // OtherPredicates = [HasMadMacF32Insts]
+
+let SubtargetPredicate = Has16BitInsts in
+def : FMADModsPat<f16, V_MAD_F16, AMDGPUfmad_ftz>;
 
 class VOPSelectModsPat <ValueType vt> : GCNPat <
   (vt (select i1:$src0, (VOP3Mods vt:$src1, i32:$src1_mods),

diff  --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 4c263de673d6..09f65c5c944e 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -499,11 +499,15 @@ let OtherPredicates = [HasMadMacF32Insts] in {
 let Constraints = "$vdst = $src2", DisableEncoding="$src2",
     isConvertibleToThreeAddress = 1 in {
 defm V_MAC_F32 : VOP2Inst <"v_mac_f32", VOP_MAC_F32>;
-}
+
+let SubtargetPredicate = isGFX6GFX7GFX10 in
+defm V_MAC_LEGACY_F32 : VOP2Inst <"v_mac_legacy_f32", VOP_MAC_F32>;
+} // End Constraints = "$vdst = $src2", DisableEncoding="$src2",
+  //     isConvertibleToThreeAddress = 1
 
 def V_MADAK_F32 : VOP2_Pseudo <"v_madak_f32", VOP_MADAK_F32, []>;
 } // End OtherPredicates = [HasMadMacF32Insts]
-}
+} // End mayRaiseFPException = 0
 
 // No patterns so that the scalar instructions are always selected.
 // The scalar versions will be replaced with vector when needed later.
@@ -557,10 +561,6 @@ defm V_MAX_LEGACY_F32 : VOP2Inst <"v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfma
 } // End SubtargetPredicate = isGFX6GFX7
 
 let isCommutable = 1 in {
-let SubtargetPredicate = isGFX6GFX7GFX10 in {
-let OtherPredicates = [HasMadMacF32Insts] in
-defm V_MAC_LEGACY_F32 : VOP2Inst <"v_mac_legacy_f32", VOP_F32_F32_F32>;
-} // End SubtargetPredicate = isGFX6GFX7GFX10
 let SubtargetPredicate = isGFX6GFX7 in {
 defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, srl>;
 defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, sra>;
@@ -1322,7 +1322,6 @@ let SubtargetPredicate = isGFX6GFX7 in {
 defm V_ADD_F32            : VOP2_Real_gfx6_gfx7_gfx10<0x003>;
 defm V_SUB_F32            : VOP2_Real_gfx6_gfx7_gfx10<0x004>;
 defm V_SUBREV_F32         : VOP2_Real_gfx6_gfx7_gfx10<0x005>;
-let OtherPredicates = [HasMadMacF32Insts] in
 defm V_MAC_LEGACY_F32     : VOP2_Real_gfx6_gfx7_gfx10<0x006>;
 defm V_MUL_LEGACY_F32     : VOP2_Real_gfx6_gfx7_gfx10<0x007>;
 defm V_MUL_F32            : VOP2_Real_gfx6_gfx7_gfx10<0x008>;

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll
index 3ebe19a156f0..ebe3ffd06ced 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll
@@ -1,53 +1,196 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
-; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=tahiti  -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=tonga   -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900  -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX101 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX103 %s
 
 define float @v_mul_legacy_f32(float %a, float %b) {
-; GCN-LABEL: v_mul_legacy_f32:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GFX6-LABEL: v_mul_legacy_f32:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
+; GFX6-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_mul_legacy_f32:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_mul_legacy_f32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX101-LABEL: v_mul_legacy_f32:
+; GFX101:       ; %bb.0:
+; GFX101-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX101-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX101-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
+; GFX101-NEXT:    ; implicit-def: $vcc_hi
+; GFX101-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX103-LABEL: v_mul_legacy_f32:
+; GFX103:       ; %bb.0:
+; GFX103-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX103-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX103-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
+; GFX103-NEXT:    ; implicit-def: $vcc_hi
+; GFX103-NEXT:    s_setpc_b64 s[30:31]
   %result = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
   ret float %result
 }
 
 define float @v_mul_legacy_undef0_f32(float %a) {
-; GCN-LABEL: v_mul_legacy_undef0_f32:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_mul_legacy_f32_e32 v0, s4, v0
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GFX6-LABEL: v_mul_legacy_undef0_f32:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, s4, v0
+; GFX6-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_mul_legacy_undef0_f32:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, s4, v0
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_mul_legacy_undef0_f32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, s4, v0
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX101-LABEL: v_mul_legacy_undef0_f32:
+; GFX101:       ; %bb.0:
+; GFX101-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX101-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX101-NEXT:    v_mul_legacy_f32_e32 v0, s4, v0
+; GFX101-NEXT:    ; implicit-def: $vcc_hi
+; GFX101-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX103-LABEL: v_mul_legacy_undef0_f32:
+; GFX103:       ; %bb.0:
+; GFX103-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX103-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX103-NEXT:    v_mul_legacy_f32_e32 v0, s4, v0
+; GFX103-NEXT:    ; implicit-def: $vcc_hi
+; GFX103-NEXT:    s_setpc_b64 s[30:31]
   %result = call float @llvm.amdgcn.fmul.legacy(float undef, float %a)
   ret float %result
 }
 
 define float @v_mul_legacy_undef1_f32(float %a) {
-; GCN-LABEL: v_mul_legacy_undef1_f32:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_mul_legacy_f32_e32 v0, s4, v0
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GFX6-LABEL: v_mul_legacy_undef1_f32:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, s4, v0
+; GFX6-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_mul_legacy_undef1_f32:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, s4, v0
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_mul_legacy_undef1_f32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, s4, v0
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX101-LABEL: v_mul_legacy_undef1_f32:
+; GFX101:       ; %bb.0:
+; GFX101-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX101-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX101-NEXT:    v_mul_legacy_f32_e32 v0, s4, v0
+; GFX101-NEXT:    ; implicit-def: $vcc_hi
+; GFX101-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX103-LABEL: v_mul_legacy_undef1_f32:
+; GFX103:       ; %bb.0:
+; GFX103-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX103-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX103-NEXT:    v_mul_legacy_f32_e32 v0, s4, v0
+; GFX103-NEXT:    ; implicit-def: $vcc_hi
+; GFX103-NEXT:    s_setpc_b64 s[30:31]
   %result = call float @llvm.amdgcn.fmul.legacy(float %a, float undef)
   ret float %result
 }
 
 define float @v_mul_legacy_undef_f32() {
-; GCN-LABEL: v_mul_legacy_undef_f32:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_mul_legacy_f32_e64 v0, s4, s4
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GFX6-LABEL: v_mul_legacy_undef_f32:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT:    v_mul_legacy_f32_e64 v0, s4, s4
+; GFX6-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_mul_legacy_undef_f32:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_mul_legacy_f32_e64 v0, s4, s4
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_mul_legacy_undef_f32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_mul_legacy_f32_e64 v0, s4, s4
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX101-LABEL: v_mul_legacy_undef_f32:
+; GFX101:       ; %bb.0:
+; GFX101-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX101-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX101-NEXT:    v_mul_legacy_f32_e64 v0, s4, s4
+; GFX101-NEXT:    ; implicit-def: $vcc_hi
+; GFX101-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX103-LABEL: v_mul_legacy_undef_f32:
+; GFX103:       ; %bb.0:
+; GFX103-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX103-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX103-NEXT:    v_mul_legacy_f32_e64 v0, s4, s4
+; GFX103-NEXT:    ; implicit-def: $vcc_hi
+; GFX103-NEXT:    s_setpc_b64 s[30:31]
   %result = call float @llvm.amdgcn.fmul.legacy(float undef, float undef)
   ret float %result
 }
 
 define float @v_mul_legacy_fabs_f32(float %a, float %b) {
-; GCN-LABEL: v_mul_legacy_fabs_f32:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_mul_legacy_f32_e64 v0, |v0|, |v1|
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GFX6-LABEL: v_mul_legacy_fabs_f32:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT:    v_mul_legacy_f32_e64 v0, |v0|, |v1|
+; GFX6-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_mul_legacy_fabs_f32:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_mul_legacy_f32_e64 v0, |v0|, |v1|
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_mul_legacy_fabs_f32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_mul_legacy_f32_e64 v0, |v0|, |v1|
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX101-LABEL: v_mul_legacy_fabs_f32:
+; GFX101:       ; %bb.0:
+; GFX101-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX101-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX101-NEXT:    v_mul_legacy_f32_e64 v0, |v0|, |v1|
+; GFX101-NEXT:    ; implicit-def: $vcc_hi
+; GFX101-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX103-LABEL: v_mul_legacy_fabs_f32:
+; GFX103:       ; %bb.0:
+; GFX103-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX103-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX103-NEXT:    v_mul_legacy_f32_e64 v0, |v0|, |v1|
+; GFX103-NEXT:    ; implicit-def: $vcc_hi
+; GFX103-NEXT:    s_setpc_b64 s[30:31]
   %a.fabs = call float @llvm.fabs.f32(float %a)
   %b.fabs = call float @llvm.fabs.f32(float %b)
   %result = call float @llvm.amdgcn.fmul.legacy(float %a.fabs, float %b.fabs)
@@ -55,76 +198,311 @@ define float @v_mul_legacy_fabs_f32(float %a, float %b) {
 }
 
 define float @v_mul_legacy_fneg_f32(float %a, float %b) {
-; GCN-LABEL: v_mul_legacy_fneg_f32:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_mul_legacy_f32_e64 v0, -v0, -v1
-; GCN-NEXT:    s_setpc_b64 s[30:31]
-  %a.fabs = fneg float %a
-  %b.fabs = fneg float %b
-  %result = call float @llvm.amdgcn.fmul.legacy(float %a.fabs, float %b.fabs)
+; GFX6-LABEL: v_mul_legacy_fneg_f32:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT:    v_mul_legacy_f32_e64 v0, -v0, -v1
+; GFX6-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_mul_legacy_fneg_f32:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_mul_legacy_f32_e64 v0, -v0, -v1
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_mul_legacy_fneg_f32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_mul_legacy_f32_e64 v0, -v0, -v1
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX101-LABEL: v_mul_legacy_fneg_f32:
+; GFX101:       ; %bb.0:
+; GFX101-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX101-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX101-NEXT:    v_mul_legacy_f32_e64 v0, -v0, -v1
+; GFX101-NEXT:    ; implicit-def: $vcc_hi
+; GFX101-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX103-LABEL: v_mul_legacy_fneg_f32:
+; GFX103:       ; %bb.0:
+; GFX103-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX103-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX103-NEXT:    v_mul_legacy_f32_e64 v0, -v0, -v1
+; GFX103-NEXT:    ; implicit-def: $vcc_hi
+; GFX103-NEXT:    s_setpc_b64 s[30:31]
+  %a.fneg = fneg float %a
+  %b.fneg = fneg float %b
+  %result = call float @llvm.amdgcn.fmul.legacy(float %a.fneg, float %b.fneg)
   ret float %result
 }
 
-; TODO: Should match mac_legacy/mad_legacy
 define float @v_mad_legacy_f32(float %a, float %b, float %c) {
-; GCN-LABEL: v_mad_legacy_f32:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
-; GCN-NEXT:    v_add_f32_e32 v0, v0, v2
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GFX6-LABEL: v_mad_legacy_f32:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT:    v_mac_legacy_f32_e64 v2, v0, v1
+; GFX6-NEXT:    v_mov_b32_e32 v0, v2
+; GFX6-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_mad_legacy_f32:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_mad_legacy_f32 v0, v0, v1, v2
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_mad_legacy_f32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_mad_legacy_f32 v0, v0, v1, v2
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX101-LABEL: v_mad_legacy_f32:
+; GFX101:       ; %bb.0:
+; GFX101-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX101-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX101-NEXT:    v_mac_legacy_f32_e64 v2, v0, v1
+; GFX101-NEXT:    ; implicit-def: $vcc_hi
+; GFX101-NEXT:    v_mov_b32_e32 v0, v2
+; GFX101-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX103-LABEL: v_mad_legacy_f32:
+; GFX103:       ; %bb.0:
+; GFX103-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX103-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX103-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
+; GFX103-NEXT:    ; implicit-def: $vcc_hi
+; GFX103-NEXT:    v_add_f32_e32 v0, v0, v2
+; GFX103-NEXT:    s_setpc_b64 s[30:31]
   %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
   %add = fadd float %mul, %c
   ret float %add
 }
 
+define float @v_mad_legacy_fneg_f32(float %a, float %b, float %c) {
+; GFX6-LABEL: v_mad_legacy_fneg_f32:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT:    v_mad_legacy_f32 v0, -v0, -v1, v2
+; GFX6-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_mad_legacy_fneg_f32:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_mad_legacy_f32 v0, -v0, -v1, v2
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_mad_legacy_fneg_f32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_mad_legacy_f32 v0, -v0, -v1, v2
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX101-LABEL: v_mad_legacy_fneg_f32:
+; GFX101:       ; %bb.0:
+; GFX101-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX101-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX101-NEXT:    v_mad_legacy_f32 v0, -v0, -v1, v2
+; GFX101-NEXT:    ; implicit-def: $vcc_hi
+; GFX101-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX103-LABEL: v_mad_legacy_fneg_f32:
+; GFX103:       ; %bb.0:
+; GFX103-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX103-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX103-NEXT:    v_mul_legacy_f32_e64 v0, -v0, -v1
+; GFX103-NEXT:    ; implicit-def: $vcc_hi
+; GFX103-NEXT:    v_add_f32_e32 v0, v0, v2
+; GFX103-NEXT:    s_setpc_b64 s[30:31]
+  %a.fneg = fneg float %a
+  %b.fneg = fneg float %b
+  %mul = call float @llvm.amdgcn.fmul.legacy(float %a.fneg, float %b.fneg)
+  %add = fadd float %mul, %c
+  ret float %add
+}
+
 define amdgpu_ps float @s_mul_legacy_f32(float inreg %a, float inreg %b) {
-; GCN-LABEL: s_mul_legacy_f32:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    v_mov_b32_e32 v0, s1
-; GCN-NEXT:    v_mul_legacy_f32_e32 v0, s0, v0
-; GCN-NEXT:    ; return to shader part epilog
+; GFX6-LABEL: s_mul_legacy_f32:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    v_mov_b32_e32 v0, s1
+; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, s0, v0
+; GFX6-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: s_mul_legacy_f32:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    v_mov_b32_e32 v0, s1
+; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, s0, v0
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX9-LABEL: s_mul_legacy_f32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    v_mov_b32_e32 v0, s1
+; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, s0, v0
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX101-LABEL: s_mul_legacy_f32:
+; GFX101:       ; %bb.0:
+; GFX101-NEXT:    v_mul_legacy_f32_e64 v0, s0, s1
+; GFX101-NEXT:    ; implicit-def: $vcc_hi
+; GFX101-NEXT:    ; return to shader part epilog
+;
+; GFX103-LABEL: s_mul_legacy_f32:
+; GFX103:       ; %bb.0:
+; GFX103-NEXT:    v_mul_legacy_f32_e64 v0, s0, s1
+; GFX103-NEXT:    ; implicit-def: $vcc_hi
+; GFX103-NEXT:    ; return to shader part epilog
   %result = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
   ret float %result
 }
 
 define float @v_mul_legacy_f32_1.0(float %a) {
-; GCN-LABEL: v_mul_legacy_f32_1.0:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_mul_legacy_f32_e32 v0, 1.0, v0
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GFX6-LABEL: v_mul_legacy_f32_1.0:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, 1.0, v0
+; GFX6-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_mul_legacy_f32_1.0:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, 1.0, v0
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_mul_legacy_f32_1.0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, 1.0, v0
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX101-LABEL: v_mul_legacy_f32_1.0:
+; GFX101:       ; %bb.0:
+; GFX101-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX101-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX101-NEXT:    v_mul_legacy_f32_e32 v0, 1.0, v0
+; GFX101-NEXT:    ; implicit-def: $vcc_hi
+; GFX101-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX103-LABEL: v_mul_legacy_f32_1.0:
+; GFX103:       ; %bb.0:
+; GFX103-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX103-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX103-NEXT:    v_mul_legacy_f32_e32 v0, 1.0, v0
+; GFX103-NEXT:    ; implicit-def: $vcc_hi
+; GFX103-NEXT:    s_setpc_b64 s[30:31]
   %result = call float @llvm.amdgcn.fmul.legacy(float %a, float 1.0)
   ret float %result
 }
 
 define float @v_mul_legacy_f32_1.0_swap(float %b) {
-; GCN-LABEL: v_mul_legacy_f32_1.0_swap:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_mul_legacy_f32_e32 v0, 1.0, v0
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GFX6-LABEL: v_mul_legacy_f32_1.0_swap:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, 1.0, v0
+; GFX6-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_mul_legacy_f32_1.0_swap:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, 1.0, v0
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_mul_legacy_f32_1.0_swap:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, 1.0, v0
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX101-LABEL: v_mul_legacy_f32_1.0_swap:
+; GFX101:       ; %bb.0:
+; GFX101-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX101-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX101-NEXT:    v_mul_legacy_f32_e32 v0, 1.0, v0
+; GFX101-NEXT:    ; implicit-def: $vcc_hi
+; GFX101-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX103-LABEL: v_mul_legacy_f32_1.0_swap:
+; GFX103:       ; %bb.0:
+; GFX103-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX103-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX103-NEXT:    v_mul_legacy_f32_e32 v0, 1.0, v0
+; GFX103-NEXT:    ; implicit-def: $vcc_hi
+; GFX103-NEXT:    s_setpc_b64 s[30:31]
   %result = call float @llvm.amdgcn.fmul.legacy(float 1.0, float %b)
   ret float %result
 }
 
 define float @v_mul_legacy_f32_2.0(float %a) {
-; GCN-LABEL: v_mul_legacy_f32_2.0:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_mul_legacy_f32_e32 v0, 2.0, v0
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GFX6-LABEL: v_mul_legacy_f32_2.0:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, 2.0, v0
+; GFX6-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_mul_legacy_f32_2.0:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, 2.0, v0
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_mul_legacy_f32_2.0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, 2.0, v0
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX101-LABEL: v_mul_legacy_f32_2.0:
+; GFX101:       ; %bb.0:
+; GFX101-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX101-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX101-NEXT:    v_mul_legacy_f32_e32 v0, 2.0, v0
+; GFX101-NEXT:    ; implicit-def: $vcc_hi
+; GFX101-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX103-LABEL: v_mul_legacy_f32_2.0:
+; GFX103:       ; %bb.0:
+; GFX103-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX103-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX103-NEXT:    v_mul_legacy_f32_e32 v0, 2.0, v0
+; GFX103-NEXT:    ; implicit-def: $vcc_hi
+; GFX103-NEXT:    s_setpc_b64 s[30:31]
   %result = call float @llvm.amdgcn.fmul.legacy(float %a, float 2.0)
   ret float %result
 }
 
 define float @v_mul_legacy_f32_2.0_swap(float %b) {
-; GCN-LABEL: v_mul_legacy_f32_2.0_swap:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_mul_legacy_f32_e32 v0, 2.0, v0
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GFX6-LABEL: v_mul_legacy_f32_2.0_swap:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, 2.0, v0
+; GFX6-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_mul_legacy_f32_2.0_swap:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, 2.0, v0
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_mul_legacy_f32_2.0_swap:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, 2.0, v0
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX101-LABEL: v_mul_legacy_f32_2.0_swap:
+; GFX101:       ; %bb.0:
+; GFX101-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX101-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX101-NEXT:    v_mul_legacy_f32_e32 v0, 2.0, v0
+; GFX101-NEXT:    ; implicit-def: $vcc_hi
+; GFX101-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX103-LABEL: v_mul_legacy_f32_2.0_swap:
+; GFX103:       ; %bb.0:
+; GFX103-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX103-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX103-NEXT:    v_mul_legacy_f32_e32 v0, 2.0, v0
+; GFX103-NEXT:    ; implicit-def: $vcc_hi
+; GFX103-NEXT:    s_setpc_b64 s[30:31]
   %result = call float @llvm.amdgcn.fmul.legacy(float 2.0, float %b)
   ret float %result
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll
index be8462d09064..a91745b636d9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll
@@ -1,9 +1,11 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
-
+; RUN: llc -march=amdgcn -mcpu=tahiti  -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MADMACF32,GFX6 %s
+; RUN: llc -march=amdgcn -mcpu=tonga   -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MADMACF32,GFX8 %s
+; RUN: llc -march=amdgcn -mcpu=gfx900  -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MADMACF32,GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MADMACF32,GFX101 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOMADMACF32,GFX103 %s
 
 ; GCN-LABEL: {{^}}test_mul_legacy_f32:
-; GCN: v_mul_legacy_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
+; GCN: v_mul_legacy_f32_e{{(32|64)}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}}
 define amdgpu_kernel void @test_mul_legacy_f32(float addrspace(1)* %out, float %a, float %b) #0 {
   %result = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
   store float %result, float addrspace(1)* %out, align 4
@@ -11,7 +13,7 @@ define amdgpu_kernel void @test_mul_legacy_f32(float addrspace(1)* %out, float %
 }
 
 ; GCN-LABEL: {{^}}test_mul_legacy_undef0_f32:
-; GCN: v_mul_legacy_f32_e32
+; GCN: v_mul_legacy_f32_e{{(32|64)}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}}
 define amdgpu_kernel void @test_mul_legacy_undef0_f32(float addrspace(1)* %out, float %a) #0 {
   %result = call float @llvm.amdgcn.fmul.legacy(float undef, float %a)
   store float %result, float addrspace(1)* %out, align 4
@@ -19,7 +21,7 @@ define amdgpu_kernel void @test_mul_legacy_undef0_f32(float addrspace(1)* %out,
 }
 
 ; GCN-LABEL: {{^}}test_mul_legacy_undef1_f32:
-; GCN: v_mul_legacy_f32_e32
+; GCN: v_mul_legacy_f32_e{{(32|64)}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}}
 define amdgpu_kernel void @test_mul_legacy_undef1_f32(float addrspace(1)* %out, float %a) #0 {
   %result = call float @llvm.amdgcn.fmul.legacy(float %a, float undef)
   store float %result, float addrspace(1)* %out, align 4
@@ -27,7 +29,7 @@ define amdgpu_kernel void @test_mul_legacy_undef1_f32(float addrspace(1)* %out,
 }
 
 ; GCN-LABEL: {{^}}test_mul_legacy_fabs_f32:
-; GCN: v_mul_legacy_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|, |v{{[0-9]+}}|
+; GCN: v_mul_legacy_f32_e{{(32|64)}} v{{[0-9]+}}, |s{{[0-9]+}}|, |{{[sv][0-9]+}}|
 define amdgpu_kernel void @test_mul_legacy_fabs_f32(float addrspace(1)* %out, float %a, float %b) #0 {
   %a.fabs = call float @llvm.fabs.f32(float %a)
   %b.fabs = call float @llvm.fabs.f32(float %b)
@@ -36,10 +38,13 @@ define amdgpu_kernel void @test_mul_legacy_fabs_f32(float addrspace(1)* %out, fl
   ret void
 }
 
-; TODO: Should match mac_legacy/mad_legacy
 ; GCN-LABEL: {{^}}test_mad_legacy_f32:
-; GCN: v_mul_legacy_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
-; GCN: v_add_f32_e32
+; GFX6: v_mac_legacy_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
+; GFX8: v_mad_legacy_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
+; GFX9: v_mad_legacy_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
+; GFX101: v_mac_legacy_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
+; GFX103: v_mul_legacy_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
+; GFX103: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
 define amdgpu_kernel void @test_mad_legacy_f32(float addrspace(1)* %out, float %a, float %b, float %c) #0 {
   %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
   %add = fadd float %mul, %c
@@ -47,6 +52,19 @@ define amdgpu_kernel void @test_mad_legacy_f32(float addrspace(1)* %out, float %
   ret void
 }
 
+; GCN-LABEL: {{^}}test_mad_legacy_fneg_f32:
+; MADMACF32: v_mad_legacy_f32 v{{[0-9]+}}, -s{{[0-9]+}}, -{{[sv][0-9]+}}, v{{[0-9]+}}
+; NOMADMACF32: v_mul_legacy_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, -s{{[0-9]+}}
+; NOMADMACF32: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
+define amdgpu_kernel void @test_mad_legacy_fneg_f32(float addrspace(1)* %out, float %a, float %b, float %c) #0 {
+  %a.fneg = fneg float %a
+  %b.fneg = fneg float %b
+  %mul = call float @llvm.amdgcn.fmul.legacy(float %a.fneg, float %b.fneg)
+  %add = fadd float %mul, %c
+  store float %add, float addrspace(1)* %out, align 4
+  ret void
+}
+
 declare float @llvm.fabs.f32(float) #1
 declare float @llvm.amdgcn.fmul.legacy(float, float) #1
 


        


More information about the llvm-commits mailing list