[llvm] 1e46dcb - [TwoAddressInstructionPass] Put all new instructions into DistanceMap

Thu Oct 28 11:12:11 PDT 2021

Author: Guozhi Wei
Date: 2021-10-28T11:11:59-07:00
New Revision: 1e46dcb77b51846c7ea97cba7fd95bc1f0911a09

URL: https://github.com/llvm/llvm-project/commit/1e46dcb77b51846c7ea97cba7fd95bc1f0911a09
DIFF: https://github.com/llvm/llvm-project/commit/1e46dcb77b51846c7ea97cba7fd95bc1f0911a09.diff

LOG: [TwoAddressInstructionPass] Put all new instructions into DistanceMap

In function convertInstTo3Addr, after converting a two address instruction into
three address instruction, only the last new instruction is inserted into
DistanceMap. This is wrong, DistanceMap should track all instructions from the
beginning of current MBB to the working instruction. When a two address
instruction is converted to three address instruction, multiple instructions may
be generated (usually an extra COPY is generated), all of them should be
inserted into DistanceMap.

Similarly when unfolding memory operand in function tryInstructionTransform
DistanceMap is not maintained correctly.

Differential Revision: https://reviews.llvm.org/D111857

Added: 
    llvm/test/CodeGen/X86/distancemap.mir

Modified: 
    llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
    llvm/test/CodeGen/X86/addcarry.ll
    llvm/test/CodeGen/X86/sadd_sat_plus.ll
    llvm/test/CodeGen/X86/sadd_sat_vec.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index 461d25e39568..fdd2bc6c9f8b 100644

--- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -134,7 +134,7 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
 
   bool convertInstTo3Addr(MachineBasicBlock::iterator &mi,
                           MachineBasicBlock::iterator &nmi, Register RegA,
-                          Register RegB, unsigned Dist);
+                          Register RegB, unsigned &Dist);
 
   bool isDefTooClose(Register Reg, unsigned Dist, MachineInstr *MI);
 
@@ -146,7 +146,7 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
   bool tryInstructionTransform(MachineBasicBlock::iterator &mi,
                                MachineBasicBlock::iterator &nmi,
                                unsigned SrcIdx, unsigned DstIdx,
-                               unsigned Dist, bool shouldOnlyCommute);
+                               unsigned &Dist, bool shouldOnlyCommute);
 
   bool tryInstructionCommute(MachineInstr *MI,
                              unsigned DstOpIdx,
@@ -674,7 +674,7 @@ bool TwoAddressInstructionPass::isProfitableToConv3Addr(Register RegA,
 /// Return true if this transformation was successful.
 bool TwoAddressInstructionPass::convertInstTo3Addr(
     MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi,
-    Register RegA, Register RegB, unsigned Dist) {
+    Register RegA, Register RegB, unsigned &Dist) {
   MachineInstrSpan MIS(mi, MBB);
   MachineInstr *NewMI = TII->convertToThreeAddress(*mi, LV);
   if (!NewMI)
@@ -723,7 +723,9 @@ bool TwoAddressInstructionPass::convertInstTo3Addr(
   if (LIS && !SingleInst)
     LIS->repairIntervalsInRange(MBB, MIS.begin(), MIS.end(), OrigRegs);
 
-  DistanceMap.insert(std::make_pair(NewMI, Dist));
+  for (MachineInstr &MI : MIS)
+    DistanceMap.insert(std::make_pair(&MI, Dist++));
+  Dist--;
   mi = NewMI;
   nmi = std::next(mi);
 
@@ -1226,7 +1228,7 @@ bool TwoAddressInstructionPass::
 tryInstructionTransform(MachineBasicBlock::iterator &mi,
                         MachineBasicBlock::iterator &nmi,
                         unsigned SrcIdx, unsigned DstIdx,
-                        unsigned Dist, bool shouldOnlyCommute) {
+                        unsigned &Dist, bool shouldOnlyCommute) {
   if (OptLevel == CodeGenOpt::None)
     return false;
 
@@ -1334,6 +1336,8 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
         // look "normal" to the transformation logic.
         MBB->insert(mi, NewMIs[0]);
         MBB->insert(mi, NewMIs[1]);
+        DistanceMap.insert(std::make_pair(NewMIs[0], Dist++));
+        DistanceMap.insert(std::make_pair(NewMIs[1], Dist));
 
         LLVM_DEBUG(dbgs() << "2addr:    NEW LOAD: " << *NewMIs[0]
                           << "2addr:    NEW INST: " << *NewMIs[1]);
@@ -1389,6 +1393,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
           }
 
           MI.eraseFromParent();
+          DistanceMap.erase(&MI);
 
           // Update LiveIntervals.
           if (LIS) {
@@ -1405,6 +1410,9 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
           LLVM_DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");
           NewMIs[0]->eraseFromParent();
           NewMIs[1]->eraseFromParent();
+          DistanceMap.erase(NewMIs[0]);
+          DistanceMap.erase(NewMIs[1]);
+          Dist--;
         }
       }
     }

diff  --git a/llvm/test/CodeGen/X86/addcarry.ll b/llvm/test/CodeGen/X86/addcarry.ll
index 0db0d4ebd43c..8b6a7f628b91 100644
--- a/llvm/test/CodeGen/X86/addcarry.ll
+++ b/llvm/test/CodeGen/X86/addcarry.ll
@@ -179,7 +179,7 @@ define i8 @e(i32* nocapture %a, i32 %b) nounwind {
 ; CHECK-NEXT:    leal (%rsi,%rcx), %edx
 ; CHECK-NEXT:    addl %esi, %edx
 ; CHECK-NEXT:    setb %al
-; CHECK-NEXT:    addl %esi, %ecx
+; CHECK-NEXT:    addl %ecx, %esi
 ; CHECK-NEXT:    movl %edx, (%rdi)
 ; CHECK-NEXT:    adcb $0, %al
 ; CHECK-NEXT:    retq

diff  --git a/llvm/test/CodeGen/X86/distancemap.mir b/llvm/test/CodeGen/X86/distancemap.mir
new file mode 100644
index 000000000000..b389a0c6cae7
--- /dev/null
+++ b/llvm/test/CodeGen/X86/distancemap.mir
@@ -0,0 +1,95 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc %s -o - -mtriple=x86_64-unknown-linux -run-pass=twoaddressinstruction -verify-machineinstrs | FileCheck %s
+
+# In TwoAddressInstructionPass, new instructions should be added to DistanceMap.
+# In this case, function convertInstTo3Addr is called on the first ADD
+# instruction, extra COPY instructions are generated. If they are not added to
+# DistanceMap, function noUseAfterLastDef computes a wrong value for the later
+# ADD instruction, and a 
diff erent commute decision is made.
+
+--- |
+  ; ModuleID = 't.ll'
+  source_filename = "t.ll"
+  target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+  target triple = "x86_64-unknown-linux"
+
+  declare i16 @llvm.sadd.sat.i16(i16, i16)
+
+  define signext i16 @func16(i16 signext %x, i16 signext %y, i16 signext %z) nounwind {
+    %a = mul i16 %y, %z
+    %tmp = call i16 @llvm.sadd.sat.i16(i16 %x, i16 %a)
+    ret i16 %tmp
+  }
+
+...
+---
+name:            func16
+alignment:       16
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gr32 }
+  - { id: 1, class: gr32 }
+  - { id: 2, class: gr32 }
+  - { id: 3, class: gr16 }
+  - { id: 4, class: gr32 }
+  - { id: 5, class: gr16 }
+  - { id: 6, class: gr16 }
+  - { id: 7, class: gr32 }
+  - { id: 8, class: gr32 }
+  - { id: 9, class: gr32 }
+  - { id: 10, class: gr16 }
+  - { id: 11, class: gr32 }
+  - { id: 13, class: gr32 }
+  - { id: 14, class: gr16 }
+frameInfo:
+  maxAlignment:    1
+body:             |
+  bb.0 :
+    liveins: $edi, $esi, $edx
+    ; CHECK-LABEL: name: func16
+    ; CHECK: liveins: $edi, $esi, $edx
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY killed $edx
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY killed $esi
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY killed $edi
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr16 = COPY killed [[COPY2]].sub_16bit
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr32 = COPY [[COPY1]]
+    ; CHECK-NEXT: [[IMUL32rr:%[0-9]+]]:gr32 = IMUL32rr [[IMUL32rr]], killed [[COPY]], implicit-def dead $eflags
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr16 = COPY killed [[IMUL32rr]].sub_16bit
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:gr64_nosp = IMPLICIT_DEF
+    ; CHECK-NEXT: [[DEF]].sub_16bit:gr64_nosp = COPY [[COPY3]]
+    ; CHECK-NEXT: [[DEF1:%[0-9]+]]:gr64_nosp = IMPLICIT_DEF
+    ; CHECK-NEXT: [[DEF1]].sub_16bit:gr64_nosp = COPY [[COPY4]]
+    ; CHECK-NEXT: [[LEA64_32r:%[0-9]+]]:gr32 = LEA64_32r killed [[DEF]], 1, killed [[DEF1]], 0, $noreg
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gr16 = COPY killed [[LEA64_32r]].sub_16bit
+    ; CHECK-NEXT: [[MOVSX32rr16_:%[0-9]+]]:gr32 = MOVSX32rr16 killed [[COPY5]]
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr32 = COPY [[MOVSX32rr16_]]
+    ; CHECK-NEXT: [[SAR32ri:%[0-9]+]]:gr32 = SAR32ri [[SAR32ri]], 15, implicit-def dead $eflags
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr32 = COPY [[SAR32ri]]
+    ; CHECK-NEXT: [[XOR32ri:%[0-9]+]]:gr32 = XOR32ri [[XOR32ri]], -32768, implicit-def dead $eflags
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr16 = COPY [[COPY3]]
+    ; CHECK-NEXT: [[ADD16rr:%[0-9]+]]:gr16 = ADD16rr [[ADD16rr]], killed [[COPY4]], implicit-def $eflags
+    ; CHECK-NEXT: undef %11.sub_16bit:gr32 = COPY killed [[ADD16rr]]
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr32 = COPY [[XOR32ri]]
+    ; CHECK-NEXT: [[CMOV32rr:%[0-9]+]]:gr32 = CMOV32rr [[CMOV32rr]], killed %11, 1, implicit killed $eflags
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr16 = COPY killed [[CMOV32rr]].sub_16bit
+    ; CHECK-NEXT: $ax = COPY killed [[COPY6]]
+    ; CHECK-NEXT: RET 0, killed $ax
+    %2:gr32 = COPY killed $edx
+    %1:gr32 = COPY killed $esi
+    %0:gr32 = COPY killed $edi
+    %3:gr16 = COPY killed %0.sub_16bit:gr32
+    %4:gr32 = IMUL32rr killed %1:gr32, killed %2:gr32, implicit-def dead $eflags
+    %5:gr16 = COPY killed %4.sub_16bit:gr32
+    %6:gr16 = ADD16rr %3:gr16, %5:gr16, implicit-def dead $eflags
+    %7:gr32 = MOVSX32rr16 killed %6:gr16
+    %8:gr32 = SAR32ri killed %7:gr32, 15, implicit-def dead $eflags
+    %9:gr32 = XOR32ri killed %8:gr32, -32768, implicit-def dead $eflags
+    %10:gr16 = ADD16rr killed %3:gr16, killed %5:gr16, implicit-def $eflags
+    %11:gr32 = INSERT_SUBREG undef %12:gr32, killed %10:gr16, %subreg.sub_16bit
+    %13:gr32 = CMOV32rr killed %11:gr32, killed %9:gr32, 0, implicit killed $eflags
+    %14:gr16 = COPY killed %13.sub_16bit:gr32
+    $ax = COPY killed %14:gr16
+    RET 0, killed $ax
+
+...

diff  --git a/llvm/test/CodeGen/X86/sadd_sat_plus.ll b/llvm/test/CodeGen/X86/sadd_sat_plus.ll
index 6deab30470b4..06799bd8862f 100644
--- a/llvm/test/CodeGen/X86/sadd_sat_plus.ll
+++ b/llvm/test/CodeGen/X86/sadd_sat_plus.ll
@@ -95,8 +95,8 @@ define signext i16 @func16(i16 signext %x, i16 signext %y, i16 signext %z) nounw
 ; X64-NEXT:    cwtl
 ; X64-NEXT:    sarl $15, %eax
 ; X64-NEXT:    xorl $-32768, %eax # imm = 0x8000
-; X64-NEXT:    addw %di, %si
-; X64-NEXT:    cmovnol %esi, %eax
+; X64-NEXT:    addw %si, %di
+; X64-NEXT:    cmovnol %edi, %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
   %a = mul i16 %y, %z
@@ -131,8 +131,8 @@ define signext i8 @func8(i8 signext %x, i8 signext %y, i8 signext %z) nounwind {
 ; X64-NEXT:    leal (%rdi,%rax), %ecx
 ; X64-NEXT:    sarb $7, %cl
 ; X64-NEXT:    xorb $-128, %cl
-; X64-NEXT:    addb %dil, %al
-; X64-NEXT:    movzbl %al, %edx
+; X64-NEXT:    addb %al, %dil
+; X64-NEXT:    movzbl %dil, %edx
 ; X64-NEXT:    movzbl %cl, %eax
 ; X64-NEXT:    cmovnol %edx, %eax
 ; X64-NEXT:    # kill: def $al killed $al killed $eax

diff  --git a/llvm/test/CodeGen/X86/sadd_sat_vec.ll b/llvm/test/CodeGen/X86/sadd_sat_vec.ll
index 334c7fba0e55..e83209a870e2 100644
--- a/llvm/test/CodeGen/X86/sadd_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/sadd_sat_vec.ll
@@ -434,8 +434,8 @@ define void @v1i8(<1 x i8>* %px, <1 x i8>* %py, <1 x i8>* %pz) nounwind {
 ; SSE-NEXT:    leal (%rax,%rcx), %esi
 ; SSE-NEXT:    sarb $7, %sil
 ; SSE-NEXT:    xorb $-128, %sil
-; SSE-NEXT:    addb %al, %cl
-; SSE-NEXT:    movzbl %cl, %eax
+; SSE-NEXT:    addb %cl, %al
+; SSE-NEXT:    movzbl %al, %eax
 ; SSE-NEXT:    movzbl %sil, %ecx
 ; SSE-NEXT:    cmovnol %eax, %ecx
 ; SSE-NEXT:    movb %cl, (%rdx)
@@ -448,8 +448,8 @@ define void @v1i8(<1 x i8>* %px, <1 x i8>* %py, <1 x i8>* %pz) nounwind {
 ; AVX-NEXT:    leal (%rax,%rcx), %esi
 ; AVX-NEXT:    sarb $7, %sil
 ; AVX-NEXT:    xorb $-128, %sil
-; AVX-NEXT:    addb %al, %cl
-; AVX-NEXT:    movzbl %cl, %eax
+; AVX-NEXT:    addb %cl, %al
+; AVX-NEXT:    movzbl %al, %eax
 ; AVX-NEXT:    movzbl %sil, %ecx
 ; AVX-NEXT:    cmovnol %eax, %ecx
 ; AVX-NEXT:    movb %cl, (%rdx)
@@ -470,9 +470,9 @@ define void @v1i16(<1 x i16>* %px, <1 x i16>* %py, <1 x i16>* %pz) nounwind {
 ; SSE-NEXT:    movswl %si, %esi
 ; SSE-NEXT:    sarl $15, %esi
 ; SSE-NEXT:    xorl $-32768, %esi # imm = 0x8000
-; SSE-NEXT:    addw %ax, %cx
-; SSE-NEXT:    cmovol %esi, %ecx
-; SSE-NEXT:    movw %cx, (%rdx)
+; SSE-NEXT:    addw %cx, %ax
+; SSE-NEXT:    cmovol %esi, %eax
+; SSE-NEXT:    movw %ax, (%rdx)
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: v1i16:
@@ -483,9 +483,9 @@ define void @v1i16(<1 x i16>* %px, <1 x i16>* %py, <1 x i16>* %pz) nounwind {
 ; AVX-NEXT:    movswl %si, %esi
 ; AVX-NEXT:    sarl $15, %esi
 ; AVX-NEXT:    xorl $-32768, %esi # imm = 0x8000
-; AVX-NEXT:    addw %ax, %cx
-; AVX-NEXT:    cmovol %esi, %ecx
-; AVX-NEXT:    movw %cx, (%rdx)
+; AVX-NEXT:    addw %cx, %ax
+; AVX-NEXT:    cmovol %esi, %eax
+; AVX-NEXT:    movw %ax, (%rdx)
 ; AVX-NEXT:    retq
   %x = load <1 x i16>, <1 x i16>* %px
   %y = load <1 x i16>, <1 x i16>* %py