[llvm] 8bed52c - [AMDGPU] Make more use of madmk/fmamk instructions
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 2 02:33:04 PST 2022
Author: Jay Foad
Date: 2022-03-02T10:22:10Z
New Revision: 8bed52c9eb8b871aaee3e082b71e60c5dbf2fc46
URL: https://github.com/llvm/llvm-project/commit/8bed52c9eb8b871aaee3e082b71e60c5dbf2fc46
DIFF: https://github.com/llvm/llvm-project/commit/8bed52c9eb8b871aaee3e082b71e60c5dbf2fc46.diff
LOG: [AMDGPU] Make more use of madmk/fmamk instructions
In convertToThreeAddress handle VOP2 mac/fmac instructions with a
literal src0 operand, since these are prime candidates for
converting to madmk/fmamk.
Previously this would only happen if src0 (or src1) was a register
defined by a move-immediate instruction, but in many cases these
operands have already been folded because SIFoldOperands runs
before TwoAddressInstructionPass.
Differential Revision: https://reviews.llvm.org/D120736
Added:
Modified:
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir
llvm/test/CodeGen/AMDGPU/twoaddr-mad.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 05cef2e6616ce..96168fa2f4748 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3255,6 +3255,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
+ bool Src0Literal = false;
switch (Opc) {
default:
@@ -3281,7 +3282,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
return nullptr;
if (Src0->isImm() && !isInlineConstant(MI, Src0Idx, *Src0))
- return nullptr;
+ Src0Literal = true;
break;
}
@@ -3319,7 +3320,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
};
int64_t Imm;
- if (getFoldableImm(Src2, Imm, &DefMI)) {
+ if (!Src0Literal && getFoldableImm(Src2, Imm, &DefMI)) {
unsigned NewOpc =
IsFMA ? (IsF16 ? AMDGPU::V_FMAAK_F16 : AMDGPU::V_FMAAK_F32)
: (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
@@ -3339,7 +3340,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
unsigned NewOpc = IsFMA
? (IsF16 ? AMDGPU::V_FMAMK_F16 : AMDGPU::V_FMAMK_F32)
: (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
- if (getFoldableImm(Src1, Imm, &DefMI)) {
+ if (!Src0Literal && getFoldableImm(Src1, Imm, &DefMI)) {
if (pseudoToMCOpcode(NewOpc) != -1) {
MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))
.add(*Dst)
@@ -3353,7 +3354,11 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
return MIB;
}
}
- if (getFoldableImm(Src0, Imm, &DefMI)) {
+ if (Src0Literal || getFoldableImm(Src0, Imm, &DefMI)) {
+ if (Src0Literal) {
+ Imm = Src0->getImm();
+ DefMI = nullptr;
+ }
if (pseudoToMCOpcode(NewOpc) != -1 &&
isOperandLegal(
MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0),
@@ -3366,12 +3371,19 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
updateLiveVariables(LV, MI, *MIB);
if (LIS)
LIS->ReplaceMachineInstrInMaps(MI, *MIB);
- killDef();
+ if (DefMI)
+ killDef();
return MIB;
}
}
}
+ // VOP2 mac/fmac with a literal operand cannot be converted to VOP3 mad/fma
+ // because VOP3 does not allow a literal operand.
+ // TODO: Remove this restriction for GFX10.
+ if (Src0Literal)
+ return nullptr;
+
unsigned NewOpc = IsFMA ? IsF16 ? AMDGPU::V_FMA_F16_gfx9_e64
: IsF64 ? AMDGPU::V_FMA_F64_e64
: IsLegacy
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
index e6163221c6991..ae9aeb99b258d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
@@ -1102,66 +1102,65 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) {
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: s_mov_b32 s8, 0x12d8fb
-; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s8
+; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v4, 0
+; GISEL-NEXT: v_cvt_f32_u32_e32 v5, s8
; GISEL-NEXT: s_sub_u32 s6, 0, s8
; GISEL-NEXT: s_cselect_b32 s4, 1, 0
-; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v5, 0
-; GISEL-NEXT: v_mov_b32_e32 v6, v4
+; GISEL-NEXT: v_madmk_f32 v6, v4, 0x4f800000, v5
; GISEL-NEXT: s_and_b32 s4, s4, 1
-; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
-; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v5
-; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
-; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v6
+; GISEL-NEXT: v_mac_f32_e32 v5, 0x4f800000, v4
+; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v6
+; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5
+; GISEL-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v4
; GISEL-NEXT: s_cmp_lg_u32 s4, 0
; GISEL-NEXT: s_subb_u32 s7, 0, 0
; GISEL-NEXT: s_bfe_i32 s4, -1, 0x10000
; GISEL-NEXT: s_bfe_i32 s5, -1, 0x10000
-; GISEL-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v4
; GISEL-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v5
+; GISEL-NEXT: v_mul_f32_e32 v8, 0x2f800000, v6
; GISEL-NEXT: v_mov_b32_e32 v5, s4
; GISEL-NEXT: v_mov_b32_e32 v4, s5
-; GISEL-NEXT: v_mul_f32_e32 v8, 0x2f800000, v6
+; GISEL-NEXT: v_mul_f32_e32 v9, 0x2f800000, v7
; GISEL-NEXT: s_sub_u32 s9, 0, s8
; GISEL-NEXT: s_cselect_b32 s4, 1, 0
-; GISEL-NEXT: v_mul_f32_e32 v9, 0x2f800000, v7
; GISEL-NEXT: v_trunc_f32_e32 v8, v8
-; GISEL-NEXT: s_and_b32 s4, s4, 1
; GISEL-NEXT: v_trunc_f32_e32 v9, v9
+; GISEL-NEXT: s_and_b32 s4, s4, 1
; GISEL-NEXT: v_mac_f32_e32 v6, 0xcf800000, v8
; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8
; GISEL-NEXT: v_mac_f32_e32 v7, 0xcf800000, v9
; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v9
; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6
+; GISEL-NEXT: v_mul_lo_u32 v10, s6, v8
+; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7
; GISEL-NEXT: s_cmp_lg_u32 s4, 0
; GISEL-NEXT: s_subb_u32 s10, 0, 0
-; GISEL-NEXT: v_mul_lo_u32 v10, s9, v8
+; GISEL-NEXT: v_mul_lo_u32 v11, s9, v9
; GISEL-NEXT: s_bfe_i32 s4, -1, 0x10000
; GISEL-NEXT: s_bfe_i32 s11, -1, 0x10000
-; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7
-; GISEL-NEXT: v_mul_lo_u32 v11, s6, v9
-; GISEL-NEXT: v_mul_lo_u32 v12, s9, v6
-; GISEL-NEXT: v_mul_lo_u32 v13, s10, v6
-; GISEL-NEXT: v_mul_hi_u32 v14, s9, v6
-; GISEL-NEXT: v_mov_b32_e32 v15, s4
-; GISEL-NEXT: v_mul_lo_u32 v16, s6, v7
-; GISEL-NEXT: v_mul_lo_u32 v17, s7, v7
-; GISEL-NEXT: v_mul_hi_u32 v18, s6, v7
+; GISEL-NEXT: v_mul_lo_u32 v12, s6, v6
+; GISEL-NEXT: v_mul_lo_u32 v13, s7, v6
+; GISEL-NEXT: v_mul_hi_u32 v14, s6, v6
+; GISEL-NEXT: v_mul_lo_u32 v15, s9, v7
+; GISEL-NEXT: v_mul_lo_u32 v16, s10, v7
+; GISEL-NEXT: v_mul_hi_u32 v17, s9, v7
+; GISEL-NEXT: v_mov_b32_e32 v18, s4
; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v10
; GISEL-NEXT: v_mul_lo_u32 v13, v8, v12
; GISEL-NEXT: v_mul_hi_u32 v19, v6, v12
; GISEL-NEXT: v_mul_hi_u32 v12, v8, v12
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v17, v11
-; GISEL-NEXT: v_mul_lo_u32 v17, v9, v16
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v16, v11
+; GISEL-NEXT: v_mul_lo_u32 v16, v9, v15
; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14
-; GISEL-NEXT: v_mul_hi_u32 v14, v7, v16
-; GISEL-NEXT: v_mul_hi_u32 v16, v9, v16
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v18
-; GISEL-NEXT: v_mul_lo_u32 v18, v7, v11
-; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v18
-; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v14, vcc, v17, v14
+; GISEL-NEXT: v_mul_hi_u32 v14, v7, v15
+; GISEL-NEXT: v_mul_hi_u32 v15, v9, v15
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v17
+; GISEL-NEXT: v_mul_lo_u32 v17, v7, v11
+; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v17
+; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v14, vcc, v16, v14
; GISEL-NEXT: v_mul_lo_u32 v14, v6, v10
-; GISEL-NEXT: v_mul_lo_u32 v17, v8, v10
+; GISEL-NEXT: v_mul_lo_u32 v16, v8, v10
; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v13, v14
; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5]
; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v13, v19
@@ -1170,124 +1169,124 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) {
; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5]
; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v14, v19
; GISEL-NEXT: v_mul_lo_u32 v19, v9, v11
-; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v17, v12
-; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[4:5]
+; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v16, v12
+; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5]
; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v13
; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5]
-; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v17, v13
-; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v17, vcc, v18, v17
-; GISEL-NEXT: v_mul_hi_u32 v18, v7, v11
+; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v16, v13
+; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v16, vcc, v17, v16
+; GISEL-NEXT: v_mul_hi_u32 v17, v7, v11
; GISEL-NEXT: v_mul_hi_u32 v11, v9, v11
-; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v16
+; GISEL-NEXT: v_add_i32_e32 v15, vcc, v19, v15
; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v18
-; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v18, vcc, v19, v18
+; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v17
+; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v17, vcc, v19, v17
; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14
; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v17
-; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16
+; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14
-; GISEL-NEXT: v_add_i32_e32 v14, vcc, v18, v17
+; GISEL-NEXT: v_add_i32_e32 v14, vcc, v17, v16
; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v13
; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v14
; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v12
; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v10, vcc
-; GISEL-NEXT: v_mul_lo_u32 v10, s9, v6
-; GISEL-NEXT: v_mul_lo_u32 v12, s10, v6
-; GISEL-NEXT: v_mul_hi_u32 v13, s9, v6
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v16
+; GISEL-NEXT: v_mul_lo_u32 v10, s6, v6
+; GISEL-NEXT: v_mul_lo_u32 v12, s7, v6
+; GISEL-NEXT: v_mul_hi_u32 v13, s6, v6
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v15
; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v11, vcc
-; GISEL-NEXT: v_mul_lo_u32 v11, s6, v7
-; GISEL-NEXT: v_mul_lo_u32 v14, s7, v7
-; GISEL-NEXT: v_mul_hi_u32 v16, s6, v7
-; GISEL-NEXT: v_mul_lo_u32 v17, s9, v8
-; GISEL-NEXT: v_mul_lo_u32 v18, v8, v10
+; GISEL-NEXT: v_mul_lo_u32 v11, s9, v7
+; GISEL-NEXT: v_mul_lo_u32 v14, s10, v7
+; GISEL-NEXT: v_mul_hi_u32 v15, s9, v7
+; GISEL-NEXT: v_mul_lo_u32 v16, s6, v8
+; GISEL-NEXT: v_mul_lo_u32 v17, v8, v10
; GISEL-NEXT: v_mul_hi_u32 v19, v6, v10
; GISEL-NEXT: v_mul_hi_u32 v10, v8, v10
-; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v17
-; GISEL-NEXT: v_mul_lo_u32 v17, s6, v9
-; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17
-; GISEL-NEXT: v_mul_lo_u32 v17, v9, v11
+; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v16
+; GISEL-NEXT: v_mul_lo_u32 v16, s9, v9
+; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16
+; GISEL-NEXT: v_mul_lo_u32 v16, v9, v11
; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13
; GISEL-NEXT: v_mul_hi_u32 v13, v7, v11
; GISEL-NEXT: v_mul_hi_u32 v11, v9, v11
-; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16
-; GISEL-NEXT: v_mul_lo_u32 v16, v7, v14
-; GISEL-NEXT: v_add_i32_e32 v16, vcc, v17, v16
-; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, v16, v13
+; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15
+; GISEL-NEXT: v_mul_lo_u32 v15, v7, v14
+; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15
+; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v15, v13
; GISEL-NEXT: v_mul_lo_u32 v13, v6, v12
-; GISEL-NEXT: v_mul_lo_u32 v16, v8, v12
-; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v18, v13
-; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5]
+; GISEL-NEXT: v_mul_lo_u32 v15, v8, v12
+; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v17, v13
+; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[4:5]
; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v13, v19
; GISEL-NEXT: v_mul_hi_u32 v13, v6, v12
; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5]
-; GISEL-NEXT: v_add_i32_e64 v18, s[4:5], v18, v19
+; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v17, v19
; GISEL-NEXT: v_mul_lo_u32 v19, v9, v14
-; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v16, v10
-; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5]
+; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v15, v10
+; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5]
; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v13
; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5]
-; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v16, v13
-; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v16, vcc, v17, v16
-; GISEL-NEXT: v_mul_hi_u32 v17, v7, v14
+; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v15, v13
+; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15
+; GISEL-NEXT: v_mul_hi_u32 v16, v7, v14
; GISEL-NEXT: v_add_i32_e32 v11, vcc, v19, v11
; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v17
-; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v17, vcc, v19, v17
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v16
+; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v16
; GISEL-NEXT: v_mov_b32_e32 v19, s11
; GISEL-NEXT: v_mul_hi_u32 v12, v8, v12
; GISEL-NEXT: v_mul_hi_u32 v14, v9, v14
-; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v18
-; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v16
-; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v18
-; GISEL-NEXT: v_add_i32_e32 v16, vcc, v17, v16
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v17
+; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v15
+; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v17
+; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15
; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v16
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v15
; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10
; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v12, vcc
-; GISEL-NEXT: v_mul_lo_u32 v10, v3, v6
-; GISEL-NEXT: v_mul_hi_u32 v12, v2, v6
-; GISEL-NEXT: v_mul_hi_u32 v6, v3, v6
+; GISEL-NEXT: v_mul_lo_u32 v10, v1, v6
+; GISEL-NEXT: v_mul_hi_u32 v12, v0, v6
+; GISEL-NEXT: v_mul_hi_u32 v6, v1, v6
; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v11
; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v13, vcc
-; GISEL-NEXT: v_mul_lo_u32 v11, v1, v7
-; GISEL-NEXT: v_mul_hi_u32 v13, v0, v7
-; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7
-; GISEL-NEXT: v_mul_lo_u32 v14, v2, v8
-; GISEL-NEXT: v_mul_lo_u32 v16, v3, v8
-; GISEL-NEXT: v_mul_hi_u32 v17, v2, v8
-; GISEL-NEXT: v_mul_hi_u32 v8, v3, v8
-; GISEL-NEXT: v_mul_lo_u32 v18, v0, v9
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v18
-; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
+; GISEL-NEXT: v_mul_lo_u32 v11, v3, v7
+; GISEL-NEXT: v_mul_hi_u32 v13, v2, v7
+; GISEL-NEXT: v_mul_hi_u32 v7, v3, v7
+; GISEL-NEXT: v_mul_lo_u32 v14, v0, v8
+; GISEL-NEXT: v_mul_lo_u32 v15, v1, v8
+; GISEL-NEXT: v_mul_hi_u32 v16, v0, v8
+; GISEL-NEXT: v_mul_hi_u32 v8, v1, v8
+; GISEL-NEXT: v_mul_lo_u32 v17, v2, v9
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v17
+; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13
-; GISEL-NEXT: v_mul_lo_u32 v11, v1, v9
-; GISEL-NEXT: v_mul_hi_u32 v13, v0, v9
-; GISEL-NEXT: v_mul_hi_u32 v9, v1, v9
+; GISEL-NEXT: v_mul_lo_u32 v11, v3, v9
+; GISEL-NEXT: v_mul_hi_u32 v13, v2, v9
+; GISEL-NEXT: v_mul_hi_u32 v9, v3, v9
; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v14
; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5]
-; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v16, v6
-; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5]
+; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v15, v6
+; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5]
; GISEL-NEXT: v_add_i32_e64 v7, s[4:5], v11, v7
; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5]
; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v12
; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5]
-; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v6, v17
+; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v6, v16
; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v13
; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v10, vcc, v14, v10
-; GISEL-NEXT: v_add_i32_e32 v12, vcc, v16, v12
-; GISEL-NEXT: v_add_i32_e32 v14, vcc, v18, v17
+; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v12
+; GISEL-NEXT: v_add_i32_e32 v14, vcc, v17, v16
; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13
; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10
; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
@@ -1299,60 +1298,60 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) {
; GISEL-NEXT: v_mul_hi_u32 v6, s8, v6
; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13
; GISEL-NEXT: v_mul_lo_u32 v13, s8, v7
-; GISEL-NEXT: v_mul_lo_u32 v16, 0, v7
+; GISEL-NEXT: v_mul_lo_u32 v15, 0, v7
; GISEL-NEXT: v_mul_hi_u32 v7, s8, v7
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10
; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11
; GISEL-NEXT: v_mul_lo_u32 v8, s8, v8
; GISEL-NEXT: v_mul_lo_u32 v9, s8, v9
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v14, v8
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v16, v9
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v15, v9
; GISEL-NEXT: v_add_i32_e32 v6, vcc, v8, v6
; GISEL-NEXT: v_add_i32_e32 v7, vcc, v9, v7
-; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v12
-; GISEL-NEXT: v_subb_u32_e64 v8, s[4:5], v3, v6, vcc
-; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v6
-; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v2
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v12
+; GISEL-NEXT: v_subb_u32_e64 v8, s[4:5], v1, v6, vcc
+; GISEL-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v6
+; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v0
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
-; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v13
-; GISEL-NEXT: v_subb_u32_e64 v9, s[6:7], v1, v7, s[4:5]
-; GISEL-NEXT: v_sub_i32_e64 v1, s[6:7], v1, v7
-; GISEL-NEXT: v_cmp_le_u32_e64 s[6:7], s8, v0
+; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v13
+; GISEL-NEXT: v_subb_u32_e64 v9, s[6:7], v3, v7, s[4:5]
+; GISEL-NEXT: v_sub_i32_e64 v3, s[6:7], v3, v7
+; GISEL-NEXT: v_cmp_le_u32_e64 s[6:7], s8, v2
; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[6:7]
; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v8
-; GISEL-NEXT: v_cndmask_b32_e64 v6, v15, v6, s[6:7]
-; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[6:7]
+; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9
-; GISEL-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc
-; GISEL-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5]
-; GISEL-NEXT: v_subrev_i32_e32 v7, vcc, s8, v2
-; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v6, v18, v7, vcc
+; GISEL-NEXT: v_subbrev_u32_e64 v3, vcc, 0, v3, s[4:5]
+; GISEL-NEXT: v_subrev_i32_e32 v7, vcc, s8, v0
+; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v7
; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc
-; GISEL-NEXT: v_subrev_i32_e32 v11, vcc, s8, v0
-; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; GISEL-NEXT: v_subrev_i32_e32 v11, vcc, s8, v2
+; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v11
; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
-; GISEL-NEXT: v_cndmask_b32_e32 v10, v19, v10, vcc
-; GISEL-NEXT: v_subrev_i32_e32 v13, vcc, s8, v7
-; GISEL-NEXT: v_subbrev_u32_e32 v14, vcc, 0, v3, vcc
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v12, vcc
-; GISEL-NEXT: v_subrev_i32_e32 v12, vcc, s8, v11
-; GISEL-NEXT: v_subbrev_u32_e32 v15, vcc, 0, v1, vcc
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10
-; GISEL-NEXT: v_cndmask_b32_e32 v7, v7, v13, vcc
-; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v4
-; GISEL-NEXT: v_cndmask_b32_e64 v4, v11, v12, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v14, vcc
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
-; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v15, s[4:5]
-; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v5
-; GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc
+; GISEL-NEXT: v_subrev_i32_e32 v10, vcc, s8, v7
+; GISEL-NEXT: v_subbrev_u32_e32 v13, vcc, 0, v1, vcc
+; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
+; GISEL-NEXT: v_cndmask_b32_e32 v12, v19, v12, vcc
+; GISEL-NEXT: v_subrev_i32_e32 v14, vcc, s8, v11
+; GISEL-NEXT: v_subbrev_u32_e32 v15, vcc, 0, v3, vcc
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
+; GISEL-NEXT: v_cndmask_b32_e32 v4, v7, v10, vcc
+; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v12
+; GISEL-NEXT: v_cndmask_b32_e64 v7, v11, v14, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v3, v3, v15, s[4:5]
+; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v6
+; GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v7, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[4:5]
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; CGP-LABEL: v_urem_v2i64_oddk_denom:
diff --git a/llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir b/llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir
index efe21edee135b..0e8867e1ed95f 100644
--- a/llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir
+++ b/llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir
@@ -164,7 +164,7 @@ body: |
...
# GCN-LABEL: name: test_fmaak_otherimm_src0_f32
-# GCN: %2:vgpr_32 = V_FMAC_F32_e32 1120403456, %0, %2, implicit $mode, implicit $exec
+# GCN: %2:vgpr_32 = V_FMAMK_F32 %0, 1120403456, %1, implicit $mode, implicit $exec
---
name: test_fmaak_otherimm_src0_f32
diff --git a/llvm/test/CodeGen/AMDGPU/twoaddr-mad.mir b/llvm/test/CodeGen/AMDGPU/twoaddr-mad.mir
index cb7d4032a2445..4e916ae93ada7 100644
--- a/llvm/test/CodeGen/AMDGPU/twoaddr-mad.mir
+++ b/llvm/test/CodeGen/AMDGPU/twoaddr-mad.mir
@@ -152,7 +152,7 @@ body: |
# Non-inline immediate uses constant bus already.
# GCN-LABEL: name: test_madak_otherimm_src0_f32
-# GCN: %1:vgpr_32 = V_MAC_F32_e32 1120403456, %0, %1, implicit $mode, implicit $exec
+# GCN: %1:vgpr_32 = V_MADMK_F32 %0, 1120403456, %2:vgpr_32, implicit $mode, implicit $exec
---
name: test_madak_otherimm_src0_f32
More information about the llvm-commits
mailing list