[llvm] 8bed52c - [AMDGPU] Make more use of madmk/fmamk instructions

Jay Foad via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 2 02:33:04 PST 2022


Author: Jay Foad
Date: 2022-03-02T10:22:10Z
New Revision: 8bed52c9eb8b871aaee3e082b71e60c5dbf2fc46

URL: https://github.com/llvm/llvm-project/commit/8bed52c9eb8b871aaee3e082b71e60c5dbf2fc46
DIFF: https://github.com/llvm/llvm-project/commit/8bed52c9eb8b871aaee3e082b71e60c5dbf2fc46.diff

LOG: [AMDGPU] Make more use of madmk/fmamk instructions

In convertToThreeAddress handle VOP2 mac/fmac instructions with a
literal src0 operand, since these are prime candidates for
converting to madmk/fmamk.

Previously this would only happen if src0 (or src1) was a register
defined by a move-immediate instruction, but in many cases these
operands have already been folded because SIFoldOperands runs
before TwoAddressInstructionPass.

Differential Revision: https://reviews.llvm.org/D120736

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
    llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir
    llvm/test/CodeGen/AMDGPU/twoaddr-mad.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 05cef2e6616ce..96168fa2f4748 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3255,6 +3255,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
                   Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
                   Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
                   Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
+  bool Src0Literal = false;
 
   switch (Opc) {
   default:
@@ -3281,7 +3282,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
       return nullptr;
 
     if (Src0->isImm() && !isInlineConstant(MI, Src0Idx, *Src0))
-      return nullptr;
+      Src0Literal = true;
 
     break;
   }
@@ -3319,7 +3320,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
     };
 
     int64_t Imm;
-    if (getFoldableImm(Src2, Imm, &DefMI)) {
+    if (!Src0Literal && getFoldableImm(Src2, Imm, &DefMI)) {
       unsigned NewOpc =
           IsFMA ? (IsF16 ? AMDGPU::V_FMAAK_F16 : AMDGPU::V_FMAAK_F32)
                 : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
@@ -3339,7 +3340,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
     unsigned NewOpc = IsFMA
                           ? (IsF16 ? AMDGPU::V_FMAMK_F16 : AMDGPU::V_FMAMK_F32)
                           : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
-    if (getFoldableImm(Src1, Imm, &DefMI)) {
+    if (!Src0Literal && getFoldableImm(Src1, Imm, &DefMI)) {
       if (pseudoToMCOpcode(NewOpc) != -1) {
         MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))
                   .add(*Dst)
@@ -3353,7 +3354,11 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
         return MIB;
       }
     }
-    if (getFoldableImm(Src0, Imm, &DefMI)) {
+    if (Src0Literal || getFoldableImm(Src0, Imm, &DefMI)) {
+      if (Src0Literal) {
+        Imm = Src0->getImm();
+        DefMI = nullptr;
+      }
       if (pseudoToMCOpcode(NewOpc) != -1 &&
           isOperandLegal(
               MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0),
@@ -3366,12 +3371,19 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
         updateLiveVariables(LV, MI, *MIB);
         if (LIS)
           LIS->ReplaceMachineInstrInMaps(MI, *MIB);
-        killDef();
+        if (DefMI)
+          killDef();
         return MIB;
       }
     }
   }
 
+  // VOP2 mac/fmac with a literal operand cannot be converted to VOP3 mad/fma
+  // because VOP3 does not allow a literal operand.
+  // TODO: Remove this restriction for GFX10.
+  if (Src0Literal)
+    return nullptr;
+
   unsigned NewOpc = IsFMA ? IsF16 ? AMDGPU::V_FMA_F16_gfx9_e64
                                   : IsF64 ? AMDGPU::V_FMA_F64_e64
                                           : IsLegacy

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
index e6163221c6991..ae9aeb99b258d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
@@ -1102,66 +1102,65 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) {
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-NEXT:    s_mov_b32 s8, 0x12d8fb
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, s8
+; GISEL-NEXT:    v_cvt_f32_ubyte0_e32 v4, 0
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, s8
 ; GISEL-NEXT:    s_sub_u32 s6, 0, s8
 ; GISEL-NEXT:    s_cselect_b32 s4, 1, 0
-; GISEL-NEXT:    v_cvt_f32_ubyte0_e32 v5, 0
-; GISEL-NEXT:    v_mov_b32_e32 v6, v4
+; GISEL-NEXT:    v_madmk_f32 v6, v4, 0x4f800000, v5
 ; GISEL-NEXT:    s_and_b32 s4, s4, 1
-; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
-; GISEL-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v5
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v6
+; GISEL-NEXT:    v_mac_f32_e32 v5, 0x4f800000, v4
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v6
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v5
+; GISEL-NEXT:    v_mul_f32_e32 v6, 0x5f7ffffc, v4
 ; GISEL-NEXT:    s_cmp_lg_u32 s4, 0
 ; GISEL-NEXT:    s_subb_u32 s7, 0, 0
 ; GISEL-NEXT:    s_bfe_i32 s4, -1, 0x10000
 ; GISEL-NEXT:    s_bfe_i32 s5, -1, 0x10000
-; GISEL-NEXT:    v_mul_f32_e32 v6, 0x5f7ffffc, v4
 ; GISEL-NEXT:    v_mul_f32_e32 v7, 0x5f7ffffc, v5
+; GISEL-NEXT:    v_mul_f32_e32 v8, 0x2f800000, v6
 ; GISEL-NEXT:    v_mov_b32_e32 v5, s4
 ; GISEL-NEXT:    v_mov_b32_e32 v4, s5
-; GISEL-NEXT:    v_mul_f32_e32 v8, 0x2f800000, v6
+; GISEL-NEXT:    v_mul_f32_e32 v9, 0x2f800000, v7
 ; GISEL-NEXT:    s_sub_u32 s9, 0, s8
 ; GISEL-NEXT:    s_cselect_b32 s4, 1, 0
-; GISEL-NEXT:    v_mul_f32_e32 v9, 0x2f800000, v7
 ; GISEL-NEXT:    v_trunc_f32_e32 v8, v8
-; GISEL-NEXT:    s_and_b32 s4, s4, 1
 ; GISEL-NEXT:    v_trunc_f32_e32 v9, v9
+; GISEL-NEXT:    s_and_b32 s4, s4, 1
 ; GISEL-NEXT:    v_mac_f32_e32 v6, 0xcf800000, v8
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v8, v8
 ; GISEL-NEXT:    v_mac_f32_e32 v7, 0xcf800000, v9
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v9, v9
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
+; GISEL-NEXT:    v_mul_lo_u32 v10, s6, v8
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v7
 ; GISEL-NEXT:    s_cmp_lg_u32 s4, 0
 ; GISEL-NEXT:    s_subb_u32 s10, 0, 0
-; GISEL-NEXT:    v_mul_lo_u32 v10, s9, v8
+; GISEL-NEXT:    v_mul_lo_u32 v11, s9, v9
 ; GISEL-NEXT:    s_bfe_i32 s4, -1, 0x10000
 ; GISEL-NEXT:    s_bfe_i32 s11, -1, 0x10000
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v7
-; GISEL-NEXT:    v_mul_lo_u32 v11, s6, v9
-; GISEL-NEXT:    v_mul_lo_u32 v12, s9, v6
-; GISEL-NEXT:    v_mul_lo_u32 v13, s10, v6
-; GISEL-NEXT:    v_mul_hi_u32 v14, s9, v6
-; GISEL-NEXT:    v_mov_b32_e32 v15, s4
-; GISEL-NEXT:    v_mul_lo_u32 v16, s6, v7
-; GISEL-NEXT:    v_mul_lo_u32 v17, s7, v7
-; GISEL-NEXT:    v_mul_hi_u32 v18, s6, v7
+; GISEL-NEXT:    v_mul_lo_u32 v12, s6, v6
+; GISEL-NEXT:    v_mul_lo_u32 v13, s7, v6
+; GISEL-NEXT:    v_mul_hi_u32 v14, s6, v6
+; GISEL-NEXT:    v_mul_lo_u32 v15, s9, v7
+; GISEL-NEXT:    v_mul_lo_u32 v16, s10, v7
+; GISEL-NEXT:    v_mul_hi_u32 v17, s9, v7
+; GISEL-NEXT:    v_mov_b32_e32 v18, s4
 ; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v13, v10
 ; GISEL-NEXT:    v_mul_lo_u32 v13, v8, v12
 ; GISEL-NEXT:    v_mul_hi_u32 v19, v6, v12
 ; GISEL-NEXT:    v_mul_hi_u32 v12, v8, v12
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v17, v11
-; GISEL-NEXT:    v_mul_lo_u32 v17, v9, v16
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v16, v11
+; GISEL-NEXT:    v_mul_lo_u32 v16, v9, v15
 ; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v14
-; GISEL-NEXT:    v_mul_hi_u32 v14, v7, v16
-; GISEL-NEXT:    v_mul_hi_u32 v16, v9, v16
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v18
-; GISEL-NEXT:    v_mul_lo_u32 v18, v7, v11
-; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v17, v18
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v17, v14
+; GISEL-NEXT:    v_mul_hi_u32 v14, v7, v15
+; GISEL-NEXT:    v_mul_hi_u32 v15, v9, v15
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v17
+; GISEL-NEXT:    v_mul_lo_u32 v17, v7, v11
+; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v16, v17
+; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v16, v14
 ; GISEL-NEXT:    v_mul_lo_u32 v14, v6, v10
-; GISEL-NEXT:    v_mul_lo_u32 v17, v8, v10
+; GISEL-NEXT:    v_mul_lo_u32 v16, v8, v10
 ; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v14
 ; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
 ; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v19
@@ -1170,124 +1169,124 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) {
 ; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[4:5]
 ; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v14, v19
 ; GISEL-NEXT:    v_mul_lo_u32 v19, v9, v11
-; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v17, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v16, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
 ; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v13
 ; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v17, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v18, v17
-; GISEL-NEXT:    v_mul_hi_u32 v18, v7, v11
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v16, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v17, v16
+; GISEL-NEXT:    v_mul_hi_u32 v17, v7, v11
 ; GISEL-NEXT:    v_mul_hi_u32 v11, v9, v11
-; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v19, v16
+; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v19, v15
 ; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v16, v18
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v18, vcc, v19, v18
+; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v17
+; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v19, v17
 ; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
 ; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v16, v17
-; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v16
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
-; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v18, v17
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v17, v16
 ; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v13
 ; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v14
 ; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v12
 ; GISEL-NEXT:    v_addc_u32_e32 v8, vcc, v8, v10, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v10, s9, v6
-; GISEL-NEXT:    v_mul_lo_u32 v12, s10, v6
-; GISEL-NEXT:    v_mul_hi_u32 v13, s9, v6
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v16
+; GISEL-NEXT:    v_mul_lo_u32 v10, s6, v6
+; GISEL-NEXT:    v_mul_lo_u32 v12, s7, v6
+; GISEL-NEXT:    v_mul_hi_u32 v13, s6, v6
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v15
 ; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, v9, v11, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v11, s6, v7
-; GISEL-NEXT:    v_mul_lo_u32 v14, s7, v7
-; GISEL-NEXT:    v_mul_hi_u32 v16, s6, v7
-; GISEL-NEXT:    v_mul_lo_u32 v17, s9, v8
-; GISEL-NEXT:    v_mul_lo_u32 v18, v8, v10
+; GISEL-NEXT:    v_mul_lo_u32 v11, s9, v7
+; GISEL-NEXT:    v_mul_lo_u32 v14, s10, v7
+; GISEL-NEXT:    v_mul_hi_u32 v15, s9, v7
+; GISEL-NEXT:    v_mul_lo_u32 v16, s6, v8
+; GISEL-NEXT:    v_mul_lo_u32 v17, v8, v10
 ; GISEL-NEXT:    v_mul_hi_u32 v19, v6, v10
 ; GISEL-NEXT:    v_mul_hi_u32 v10, v8, v10
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v17
-; GISEL-NEXT:    v_mul_lo_u32 v17, s6, v9
-; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
-; GISEL-NEXT:    v_mul_lo_u32 v17, v9, v11
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v16
+; GISEL-NEXT:    v_mul_lo_u32 v16, s9, v9
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
+; GISEL-NEXT:    v_mul_lo_u32 v16, v9, v11
 ; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
 ; GISEL-NEXT:    v_mul_hi_u32 v13, v7, v11
 ; GISEL-NEXT:    v_mul_hi_u32 v11, v9, v11
-; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
-; GISEL-NEXT:    v_mul_lo_u32 v16, v7, v14
-; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v17, v16
-; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v16, v13
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
+; GISEL-NEXT:    v_mul_lo_u32 v15, v7, v14
+; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v16, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v15, v13
 ; GISEL-NEXT:    v_mul_lo_u32 v13, v6, v12
-; GISEL-NEXT:    v_mul_lo_u32 v16, v8, v12
-; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v18, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v15, v8, v12
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v17, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[4:5]
 ; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v19
 ; GISEL-NEXT:    v_mul_hi_u32 v13, v6, v12
 ; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v18, s[4:5], v18, v19
+; GISEL-NEXT:    v_add_i32_e64 v17, s[4:5], v17, v19
 ; GISEL-NEXT:    v_mul_lo_u32 v19, v9, v14
-; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v16, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v15, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
 ; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v13
 ; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v16, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v17, v16
-; GISEL-NEXT:    v_mul_hi_u32 v17, v7, v14
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v15, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v16, v15
+; GISEL-NEXT:    v_mul_hi_u32 v16, v7, v14
 ; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v19, v11
 ; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v17
-; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v19, v17
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v16
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v19, v16
 ; GISEL-NEXT:    v_mov_b32_e32 v19, s11
 ; GISEL-NEXT:    v_mul_hi_u32 v12, v8, v12
 ; GISEL-NEXT:    v_mul_hi_u32 v14, v9, v14
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v18
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v16
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v18
-; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v17, v16
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v17
+; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v17
+; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v16, v15
 ; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v14, v16
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v14, v15
 ; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v10
 ; GISEL-NEXT:    v_addc_u32_e32 v8, vcc, v8, v12, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v10, v3, v6
-; GISEL-NEXT:    v_mul_hi_u32 v12, v2, v6
-; GISEL-NEXT:    v_mul_hi_u32 v6, v3, v6
+; GISEL-NEXT:    v_mul_lo_u32 v10, v1, v6
+; GISEL-NEXT:    v_mul_hi_u32 v12, v0, v6
+; GISEL-NEXT:    v_mul_hi_u32 v6, v1, v6
 ; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v11
 ; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, v9, v13, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v11, v1, v7
-; GISEL-NEXT:    v_mul_hi_u32 v13, v0, v7
-; GISEL-NEXT:    v_mul_hi_u32 v7, v1, v7
-; GISEL-NEXT:    v_mul_lo_u32 v14, v2, v8
-; GISEL-NEXT:    v_mul_lo_u32 v16, v3, v8
-; GISEL-NEXT:    v_mul_hi_u32 v17, v2, v8
-; GISEL-NEXT:    v_mul_hi_u32 v8, v3, v8
-; GISEL-NEXT:    v_mul_lo_u32 v18, v0, v9
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v18
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v11, v3, v7
+; GISEL-NEXT:    v_mul_hi_u32 v13, v2, v7
+; GISEL-NEXT:    v_mul_hi_u32 v7, v3, v7
+; GISEL-NEXT:    v_mul_lo_u32 v14, v0, v8
+; GISEL-NEXT:    v_mul_lo_u32 v15, v1, v8
+; GISEL-NEXT:    v_mul_hi_u32 v16, v0, v8
+; GISEL-NEXT:    v_mul_hi_u32 v8, v1, v8
+; GISEL-NEXT:    v_mul_lo_u32 v17, v2, v9
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v17
+; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
-; GISEL-NEXT:    v_mul_lo_u32 v11, v1, v9
-; GISEL-NEXT:    v_mul_hi_u32 v13, v0, v9
-; GISEL-NEXT:    v_mul_hi_u32 v9, v1, v9
+; GISEL-NEXT:    v_mul_lo_u32 v11, v3, v9
+; GISEL-NEXT:    v_mul_hi_u32 v13, v2, v9
+; GISEL-NEXT:    v_mul_hi_u32 v9, v3, v9
 ; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v14
 ; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v6, s[4:5], v16, v6
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v6, s[4:5], v15, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
 ; GISEL-NEXT:    v_add_i32_e64 v7, s[4:5], v11, v7
 ; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
 ; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v12
 ; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v17
+; GISEL-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v16
 ; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v13
 ; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v14, v10
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v16, v12
-; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v18, v17
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v15, v12
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v17, v16
 ; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
 ; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v10
 ; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
@@ -1299,60 +1298,60 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) {
 ; GISEL-NEXT:    v_mul_hi_u32 v6, s8, v6
 ; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
 ; GISEL-NEXT:    v_mul_lo_u32 v13, s8, v7
-; GISEL-NEXT:    v_mul_lo_u32 v16, 0, v7
+; GISEL-NEXT:    v_mul_lo_u32 v15, 0, v7
 ; GISEL-NEXT:    v_mul_hi_u32 v7, s8, v7
 ; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
 ; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
 ; GISEL-NEXT:    v_mul_lo_u32 v8, s8, v8
 ; GISEL-NEXT:    v_mul_lo_u32 v9, s8, v9
 ; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v14, v8
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v16, v9
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v15, v9
 ; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
 ; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
-; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v12
-; GISEL-NEXT:    v_subb_u32_e64 v8, s[4:5], v3, v6, vcc
-; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v6
-; GISEL-NEXT:    v_cmp_le_u32_e64 s[4:5], s8, v2
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v12
+; GISEL-NEXT:    v_subb_u32_e64 v8, s[4:5], v1, v6, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v6
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[4:5], s8, v0
 ; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
-; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v13
-; GISEL-NEXT:    v_subb_u32_e64 v9, s[6:7], v1, v7, s[4:5]
-; GISEL-NEXT:    v_sub_i32_e64 v1, s[6:7], v1, v7
-; GISEL-NEXT:    v_cmp_le_u32_e64 s[6:7], s8, v0
+; GISEL-NEXT:    v_sub_i32_e64 v2, s[4:5], v2, v13
+; GISEL-NEXT:    v_subb_u32_e64 v9, s[6:7], v3, v7, s[4:5]
+; GISEL-NEXT:    v_sub_i32_e64 v3, s[6:7], v3, v7
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[6:7], s8, v2
 ; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[6:7]
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, v15, v6, s[6:7]
-; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, v6, s[6:7]
+; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
 ; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v9
-; GISEL-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc
-; GISEL-NEXT:    v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5]
-; GISEL-NEXT:    v_subrev_i32_e32 v7, vcc, s8, v2
-; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v6, v18, v7, vcc
+; GISEL-NEXT:    v_subbrev_u32_e64 v3, vcc, 0, v3, s[4:5]
+; GISEL-NEXT:    v_subrev_i32_e32 v7, vcc, s8, v0
+; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
 ; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s8, v7
 ; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, vcc
-; GISEL-NEXT:    v_subrev_i32_e32 v11, vcc, s8, v0
-; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; GISEL-NEXT:    v_subrev_i32_e32 v11, vcc, s8, v2
+; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
 ; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s8, v11
 ; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
-; GISEL-NEXT:    v_cndmask_b32_e32 v10, v19, v10, vcc
-; GISEL-NEXT:    v_subrev_i32_e32 v13, vcc, s8, v7
-; GISEL-NEXT:    v_subbrev_u32_e32 v14, vcc, 0, v3, vcc
 ; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
-; GISEL-NEXT:    v_cndmask_b32_e32 v4, v4, v12, vcc
-; GISEL-NEXT:    v_subrev_i32_e32 v12, vcc, s8, v11
-; GISEL-NEXT:    v_subbrev_u32_e32 v15, vcc, 0, v1, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
-; GISEL-NEXT:    v_cndmask_b32_e32 v7, v7, v13, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v4, v11, v12, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v14, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
-; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, v15, s[4:5]
-; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, v4, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, v9, v1, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v8, v3, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v4, v10, vcc
+; GISEL-NEXT:    v_subrev_i32_e32 v10, vcc, s8, v7
+; GISEL-NEXT:    v_subbrev_u32_e32 v13, vcc, 0, v1, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
+; GISEL-NEXT:    v_cndmask_b32_e32 v12, v19, v12, vcc
+; GISEL-NEXT:    v_subrev_i32_e32 v14, vcc, s8, v11
+; GISEL-NEXT:    v_subbrev_u32_e32 v15, vcc, 0, v3, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v7, v10, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, v11, v14, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, v15, s[4:5]
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, v7, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, v9, v3, s[4:5]
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; CGP-LABEL: v_urem_v2i64_oddk_denom:

diff  --git a/llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir b/llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir
index efe21edee135b..0e8867e1ed95f 100644
--- a/llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir
+++ b/llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir
@@ -164,7 +164,7 @@ body:             |
 ...
 
 # GCN-LABEL: name: test_fmaak_otherimm_src0_f32
-# GCN: %2:vgpr_32 = V_FMAC_F32_e32 1120403456, %0, %2, implicit $mode, implicit $exec
+# GCN: %2:vgpr_32 = V_FMAMK_F32 %0, 1120403456, %1, implicit $mode, implicit $exec
 
 ---
 name:            test_fmaak_otherimm_src0_f32

diff  --git a/llvm/test/CodeGen/AMDGPU/twoaddr-mad.mir b/llvm/test/CodeGen/AMDGPU/twoaddr-mad.mir
index cb7d4032a2445..4e916ae93ada7 100644
--- a/llvm/test/CodeGen/AMDGPU/twoaddr-mad.mir
+++ b/llvm/test/CodeGen/AMDGPU/twoaddr-mad.mir
@@ -152,7 +152,7 @@ body:             |
 # Non-inline immediate uses constant bus already.
 
 # GCN-LABEL: name: test_madak_otherimm_src0_f32
-# GCN: %1:vgpr_32 = V_MAC_F32_e32 1120403456, %0, %1, implicit $mode, implicit $exec
+# GCN: %1:vgpr_32 = V_MADMK_F32 %0, 1120403456, %2:vgpr_32, implicit $mode, implicit $exec
 
 ---
 name:            test_madak_otherimm_src0_f32


        


More information about the llvm-commits mailing list