[llvm] 40222dd - [X86] Fix the vnni machine combine issue.
via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 28 22:51:29 PDT 2023
Author: Luo, Yuanke
Date: 2023-04-29T13:51:08+08:00
New Revision: 40222ddcf8f54fe523b2d14ab7005ebf412330f1
URL: https://github.com/llvm/llvm-project/commit/40222ddcf8f54fe523b2d14ab7005ebf412330f1
DIFF: https://github.com/llvm/llvm-project/commit/40222ddcf8f54fe523b2d14ab7005ebf412330f1.diff
LOG: [X86] Fix the vnni machine combine issue.
The previous patch (D148980) didn't set the InstrIdxForVirtReg correctly
in genAlternativeDpCodeSequence(). It causes vnni lit test failure when
LLVM_ENABLE_EXPENSIVE_CHECKS is on.
Added:
Modified:
llvm/lib/CodeGen/MachineCombiner.cpp
llvm/lib/Target/X86/X86InstrInfo.cpp
llvm/test/CodeGen/X86/avx512vnni-combine.ll
llvm/test/CodeGen/X86/avxvnni-combine.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/MachineCombiner.cpp b/llvm/lib/CodeGen/MachineCombiner.cpp
index 5c58d3b446925..4ae95bfa7266d 100644
--- a/llvm/lib/CodeGen/MachineCombiner.cpp
+++ b/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -91,8 +91,7 @@ class MachineCombiner : public MachineFunctionPass {
private:
bool combineInstructions(MachineBasicBlock *);
- MachineInstr *getOperandDef(const MachineOperand &MO,
- SmallVectorImpl<MachineInstr *> &InsInstrs);
+ MachineInstr *getOperandDef(const MachineOperand &MO);
bool isTransientMI(const MachineInstr *MI);
unsigned getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
@@ -151,28 +150,11 @@ void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
}
MachineInstr *
-MachineCombiner::getOperandDef(const MachineOperand &MO,
- SmallVectorImpl<MachineInstr *> &InsInstrs) {
+MachineCombiner::getOperandDef(const MachineOperand &MO) {
MachineInstr *DefInstr = nullptr;
// We need a virtual register definition.
if (MO.isReg() && MO.getReg().isVirtual())
DefInstr = MRI->getUniqueVRegDef(MO.getReg());
- // Since the new instructions are not inserted into the machine function,
- // the def-use information is not added in MRI. So it is possible that
- // the register is defined in new instructions.
- if (!DefInstr) {
- for (auto *MI : InsInstrs) {
- for (const MachineOperand &DefMO : MI->operands()) {
- if (!(DefMO.isReg() && DefMO.getReg().isVirtual()))
- continue;
- if (!DefMO.isDef())
- continue;
- if (DefMO.getReg() != MO.getReg())
- continue;
- DefInstr = MI;
- }
- }
- }
// PHI's have no depth etc.
if (DefInstr && DefInstr->isPHI())
DefInstr = nullptr;
@@ -257,7 +239,7 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
LatencyOp = TSchedModel.computeOperandLatency(DefInstr, DefIdx,
InstrPtr, UseIdx);
} else {
- MachineInstr *DefInstr = getOperandDef(MO, InsInstrs);
+ MachineInstr *DefInstr = getOperandDef(MO);
if (DefInstr && (TII->getMachineCombinerTraceStrategy() !=
MachineTraceStrategy::TS_Local ||
DefInstr->getParent() == &MBB)) {
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index aa823e6e7d4c9..560115ac69ba4 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -9855,6 +9855,7 @@ genAlternativeDpCodeSequence(MachineInstr &Root, const TargetInstrInfo &TII,
Madd->untieRegOperand(1);
Madd->removeOperand(1);
Madd->getOperand(0).setReg(NewReg);
+ InstrIdxForVirtReg.insert(std::make_pair(NewReg, 0));
// Create vpaddd.
Register DstReg = Root.getOperand(0).getReg();
bool IsKill = Root.getOperand(1).isKill();
@@ -9862,7 +9863,6 @@ genAlternativeDpCodeSequence(MachineInstr &Root, const TargetInstrInfo &TII,
BuildMI(*MF, MIMetadata(Root), TII.get(AddOpc), DstReg)
.addReg(Root.getOperand(1).getReg(), getKillRegState(IsKill))
.addReg(Madd->getOperand(0).getReg(), getKillRegState(true));
- InstrIdxForVirtReg.insert(std::make_pair(DstReg, 0));
InsInstrs.push_back(Madd);
InsInstrs.push_back(Add);
DelInstrs.push_back(&Root);
diff --git a/llvm/test/CodeGen/X86/avx512vnni-combine.ll b/llvm/test/CodeGen/X86/avx512vnni-combine.ll
index 6a1b25d7f8725..7a0527be05419 100644
--- a/llvm/test/CodeGen/X86/avx512vnni-combine.ll
+++ b/llvm/test/CodeGen/X86/avx512vnni-combine.ll
@@ -181,12 +181,12 @@ define void @bar_512(i32 %0, ptr %1, <8 x i64> %2, ptr %3) {
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB2_7: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vmovdqa64 -64(%rsi,%r8), %zmm1
-; CHECK-NEXT: vmovdqa64 (%rsi,%r8), %zmm2
-; CHECK-NEXT: vpdpwssd -64(%rdx,%r8), %zmm0, %zmm1
-; CHECK-NEXT: vmovdqa64 %zmm1, -64(%rsi,%r8)
-; CHECK-NEXT: vpmaddwd (%rdx,%r8), %zmm0, %zmm1
-; CHECK-NEXT: vpaddd %zmm1, %zmm2, %zmm1
+; CHECK-NEXT: vmovdqa64 (%rsi,%r8), %zmm1
+; CHECK-NEXT: vpmaddwd -64(%rdx,%r8), %zmm0, %zmm2
+; CHECK-NEXT: vpaddd -64(%rsi,%r8), %zmm2, %zmm2
+; CHECK-NEXT: vmovdqa64 %zmm2, -64(%rsi,%r8)
+; CHECK-NEXT: vpmaddwd (%rdx,%r8), %zmm0, %zmm2
+; CHECK-NEXT: vpaddd %zmm2, %zmm1, %zmm1
; CHECK-NEXT: vmovdqa64 %zmm1, (%rsi,%r8)
; CHECK-NEXT: addq $2, %rcx
; CHECK-NEXT: subq $-128, %r8
diff --git a/llvm/test/CodeGen/X86/avxvnni-combine.ll b/llvm/test/CodeGen/X86/avxvnni-combine.ll
index 3eabd3ff8b6da..edd27bb63c707 100644
--- a/llvm/test/CodeGen/X86/avxvnni-combine.ll
+++ b/llvm/test/CodeGen/X86/avxvnni-combine.ll
@@ -278,7 +278,8 @@ define void @bar_128(i32 %0, ptr %1, <2 x i64> %2, ptr %3) {
; ADL-NEXT: vpmaddwd -16(%rdx,%r8), %xmm0, %xmm2
; ADL-NEXT: vpaddd -16(%rsi,%r8), %xmm2, %xmm2
; ADL-NEXT: vmovdqa %xmm2, -16(%rsi,%r8)
-; ADL-NEXT: {vex} vpdpwssd (%rdx,%r8), %xmm0, %xmm1
+; ADL-NEXT: vpmaddwd (%rdx,%r8), %xmm0, %xmm2
+; ADL-NEXT: vpaddd %xmm2, %xmm1, %xmm1
; ADL-NEXT: vmovdqa %xmm1, (%rsi,%r8)
; ADL-NEXT: addq $2, %rcx
; ADL-NEXT: addq $32, %r8
@@ -313,12 +314,12 @@ define void @bar_128(i32 %0, ptr %1, <2 x i64> %2, ptr %3) {
; SPR-NEXT: xorl %ecx, %ecx
; SPR-NEXT: .p2align 4, 0x90
; SPR-NEXT: .LBB2_7: # =>This Inner Loop Header: Depth=1
-; SPR-NEXT: vmovdqa -16(%rsi,%r8), %xmm1
-; SPR-NEXT: vmovdqa (%rsi,%r8), %xmm2
-; SPR-NEXT: {vex} vpdpwssd -16(%rdx,%r8), %xmm0, %xmm1
-; SPR-NEXT: vmovdqa %xmm1, -16(%rsi,%r8)
-; SPR-NEXT: vpmaddwd (%rdx,%r8), %xmm0, %xmm1
-; SPR-NEXT: vpaddd %xmm1, %xmm2, %xmm1
+; SPR-NEXT: vmovdqa (%rsi,%r8), %xmm1
+; SPR-NEXT: vpmaddwd -16(%rdx,%r8), %xmm0, %xmm2
+; SPR-NEXT: vpaddd -16(%rsi,%r8), %xmm2, %xmm2
+; SPR-NEXT: vmovdqa %xmm2, -16(%rsi,%r8)
+; SPR-NEXT: vpmaddwd (%rdx,%r8), %xmm0, %xmm2
+; SPR-NEXT: vpaddd %xmm2, %xmm1, %xmm1
; SPR-NEXT: vmovdqa %xmm1, (%rsi,%r8)
; SPR-NEXT: addq $2, %rcx
; SPR-NEXT: addq $32, %r8
@@ -353,12 +354,12 @@ define void @bar_128(i32 %0, ptr %1, <2 x i64> %2, ptr %3) {
; AVX512-NEXT: xorl %ecx, %ecx
; AVX512-NEXT: .p2align 4, 0x90
; AVX512-NEXT: .LBB2_7: # =>This Inner Loop Header: Depth=1
-; AVX512-NEXT: vmovdqa -16(%rsi,%r8), %xmm1
-; AVX512-NEXT: vmovdqa (%rsi,%r8), %xmm2
-; AVX512-NEXT: vpdpwssd -16(%rdx,%r8), %xmm0, %xmm1
-; AVX512-NEXT: vmovdqa %xmm1, -16(%rsi,%r8)
-; AVX512-NEXT: vpmaddwd (%rdx,%r8), %xmm0, %xmm1
-; AVX512-NEXT: vpaddd %xmm1, %xmm2, %xmm1
+; AVX512-NEXT: vmovdqa (%rsi,%r8), %xmm1
+; AVX512-NEXT: vpmaddwd -16(%rdx,%r8), %xmm0, %xmm2
+; AVX512-NEXT: vpaddd -16(%rsi,%r8), %xmm2, %xmm2
+; AVX512-NEXT: vmovdqa %xmm2, -16(%rsi,%r8)
+; AVX512-NEXT: vpmaddwd (%rdx,%r8), %xmm0, %xmm2
+; AVX512-NEXT: vpaddd %xmm2, %xmm1, %xmm1
; AVX512-NEXT: vmovdqa %xmm1, (%rsi,%r8)
; AVX512-NEXT: addq $2, %rcx
; AVX512-NEXT: addq $32, %r8
@@ -718,7 +719,8 @@ define void @bar_256(i32 %0, ptr %1, <4 x i64> %2, ptr %3) {
; ADL-NEXT: vpmaddwd -32(%rdx,%r8), %ymm0, %ymm2
; ADL-NEXT: vpaddd -32(%rsi,%r8), %ymm2, %ymm2
; ADL-NEXT: vmovdqa %ymm2, -32(%rsi,%r8)
-; ADL-NEXT: {vex} vpdpwssd (%rdx,%r8), %ymm0, %ymm1
+; ADL-NEXT: vpmaddwd (%rdx,%r8), %ymm0, %ymm2
+; ADL-NEXT: vpaddd %ymm2, %ymm1, %ymm1
; ADL-NEXT: vmovdqa %ymm1, (%rsi,%r8)
; ADL-NEXT: addq $2, %rcx
; ADL-NEXT: addq $64, %r8
@@ -754,12 +756,12 @@ define void @bar_256(i32 %0, ptr %1, <4 x i64> %2, ptr %3) {
; SPR-NEXT: xorl %ecx, %ecx
; SPR-NEXT: .p2align 4, 0x90
; SPR-NEXT: .LBB5_7: # =>This Inner Loop Header: Depth=1
-; SPR-NEXT: vmovdqa -32(%rsi,%r8), %ymm1
-; SPR-NEXT: vmovdqa (%rsi,%r8), %ymm2
-; SPR-NEXT: {vex} vpdpwssd -32(%rdx,%r8), %ymm0, %ymm1
-; SPR-NEXT: vmovdqa %ymm1, -32(%rsi,%r8)
-; SPR-NEXT: vpmaddwd (%rdx,%r8), %ymm0, %ymm1
-; SPR-NEXT: vpaddd %ymm1, %ymm2, %ymm1
+; SPR-NEXT: vmovdqa (%rsi,%r8), %ymm1
+; SPR-NEXT: vpmaddwd -32(%rdx,%r8), %ymm0, %ymm2
+; SPR-NEXT: vpaddd -32(%rsi,%r8), %ymm2, %ymm2
+; SPR-NEXT: vmovdqa %ymm2, -32(%rsi,%r8)
+; SPR-NEXT: vpmaddwd (%rdx,%r8), %ymm0, %ymm2
+; SPR-NEXT: vpaddd %ymm2, %ymm1, %ymm1
; SPR-NEXT: vmovdqa %ymm1, (%rsi,%r8)
; SPR-NEXT: addq $2, %rcx
; SPR-NEXT: addq $64, %r8
@@ -795,12 +797,12 @@ define void @bar_256(i32 %0, ptr %1, <4 x i64> %2, ptr %3) {
; AVX512-NEXT: xorl %ecx, %ecx
; AVX512-NEXT: .p2align 4, 0x90
; AVX512-NEXT: .LBB5_7: # =>This Inner Loop Header: Depth=1
-; AVX512-NEXT: vmovdqa -32(%rsi,%r8), %ymm1
-; AVX512-NEXT: vmovdqa (%rsi,%r8), %ymm2
-; AVX512-NEXT: vpdpwssd -32(%rdx,%r8), %ymm0, %ymm1
-; AVX512-NEXT: vmovdqa %ymm1, -32(%rsi,%r8)
-; AVX512-NEXT: vpmaddwd (%rdx,%r8), %ymm0, %ymm1
-; AVX512-NEXT: vpaddd %ymm1, %ymm2, %ymm1
+; AVX512-NEXT: vmovdqa (%rsi,%r8), %ymm1
+; AVX512-NEXT: vpmaddwd -32(%rdx,%r8), %ymm0, %ymm2
+; AVX512-NEXT: vpaddd -32(%rsi,%r8), %ymm2, %ymm2
+; AVX512-NEXT: vmovdqa %ymm2, -32(%rsi,%r8)
+; AVX512-NEXT: vpmaddwd (%rdx,%r8), %ymm0, %ymm2
+; AVX512-NEXT: vpaddd %ymm2, %ymm1, %ymm1
; AVX512-NEXT: vmovdqa %ymm1, (%rsi,%r8)
; AVX512-NEXT: addq $2, %rcx
; AVX512-NEXT: addq $64, %r8
More information about the llvm-commits
mailing list