[llvm] r373877 - [X86] Support LEA64_32r in processInstrForSlow3OpLEA and use INC/DEC when possible.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sun Oct 6 23:27:55 PDT 2019


Author: ctopper
Date: Sun Oct  6 23:27:55 2019
New Revision: 373877

URL: http://llvm.org/viewvc/llvm-project?rev=373877&view=rev
Log:
[X86] Support LEA64_32r in processInstrForSlow3OpLEA and use INC/DEC when possible.

Move the erasing and iterator updating inside to match the
other slow LEA function.

I've adapted code from optTwoAddrLEA and basically rebuilt the
implementation here. We do lose the kill flags now just like
optTwoAddrLEA. This runs late enough in the pipeline that
shouldn't really be a problem.

Modified:
    llvm/trunk/lib/Target/X86/X86FixupLEAs.cpp
    llvm/trunk/test/CodeGen/X86/leaFixup32.mir
    llvm/trunk/test/CodeGen/X86/leaFixup64.mir
    llvm/trunk/test/CodeGen/X86/select-1-or-neg1.ll

Modified: llvm/trunk/lib/Target/X86/X86FixupLEAs.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FixupLEAs.cpp?rev=373877&r1=373876&r2=373877&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86FixupLEAs.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86FixupLEAs.cpp Sun Oct  6 23:27:55 2019
@@ -67,8 +67,8 @@ class FixupLEAPass : public MachineFunct
   /// - LEA that uses RIP relative addressing mode
   /// - LEA that uses 16-bit addressing mode "
   /// This function currently handles the first 2 cases only.
-  MachineInstr *processInstrForSlow3OpLEA(MachineInstr &MI,
-                                          MachineBasicBlock &MBB);
+  void processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
+                                 MachineBasicBlock &MBB, bool OptIncDec);
 
   /// Look for LEAs that are really two address LEAs that we might be able to
   /// turn into regular ADD instructions.
@@ -216,14 +216,10 @@ bool FixupLEAPass::runOnMachineFunction(
       if (optTwoAddrLEA(I, MBB, OptIncDec, UseLEAForSP))
         continue;
 
-      if (IsSlowLEA) {
+      if (IsSlowLEA)
         processInstructionForSlowLEA(I, MBB);
-      } else if (IsSlow3OpsLEA) {
-        if (auto *NewMI = processInstrForSlow3OpLEA(*I, MBB)) {
-          MBB.erase(I);
-          I = NewMI;
-        }
-      }
+      else if (IsSlow3OpsLEA)
+        processInstrForSlow3OpLEA(I, MBB, OptIncDec);
     }
 
     // Second pass for creating LEAs. This may reverse some of the
@@ -301,18 +297,14 @@ static inline bool isInefficientLEAReg(u
          Reg == X86::R13D || Reg == X86::R13;
 }
 
-static inline bool isRegOperand(const MachineOperand &Op) {
-  return Op.isReg() && Op.getReg() != X86::NoRegister;
-}
-
 /// Returns true if this LEA uses base an index registers, and the base register
 /// is known to be inefficient for the subtarget.
 // TODO: use a variant scheduling class to model the latency profile
 // of LEA instructions, and implement this logic as a scheduling predicate.
 static inline bool hasInefficientLEABaseReg(const MachineOperand &Base,
                                             const MachineOperand &Index) {
-  return Base.isReg() && isInefficientLEAReg(Base.getReg()) &&
-         isRegOperand(Index);
+  return Base.isReg() && isInefficientLEAReg(Base.getReg()) && Index.isReg() &&
+         Index.getReg() != X86::NoRegister;
 }
 
 static inline bool hasLEAOffset(const MachineOperand &Offset) {
@@ -534,112 +526,150 @@ void FixupLEAPass::processInstructionFor
   }
 }
 
-MachineInstr *
-FixupLEAPass::processInstrForSlow3OpLEA(MachineInstr &MI,
-                                        MachineBasicBlock &MBB) {
+void FixupLEAPass::processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
+                                             MachineBasicBlock &MBB,
+                                             bool OptIncDec) {
+  MachineInstr &MI = *I;
   const unsigned LEAOpcode = MI.getOpcode();
 
-  const MachineOperand &Dst =     MI.getOperand(0);
+  const MachineOperand &Dest =    MI.getOperand(0);
   const MachineOperand &Base =    MI.getOperand(1 + X86::AddrBaseReg);
   const MachineOperand &Scale =   MI.getOperand(1 + X86::AddrScaleAmt);
   const MachineOperand &Index =   MI.getOperand(1 + X86::AddrIndexReg);
   const MachineOperand &Offset =  MI.getOperand(1 + X86::AddrDisp);
   const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
 
-  if (!(TII->isThreeOperandsLEA(MI) ||
-        hasInefficientLEABaseReg(Base, Index)) ||
+  if (!(TII->isThreeOperandsLEA(MI) || hasInefficientLEABaseReg(Base, Index)) ||
       !TII->isSafeToClobberEFLAGS(MBB, MI) ||
       Segment.getReg() != X86::NoRegister)
-    return nullptr;
+    return;
+
+  Register DestReg = Dest.getReg();
+  Register BaseReg = Base.getReg();
+  Register IndexReg = Index.getReg();
+
+  if (MI.getOpcode() == X86::LEA64_32r) {
+    if (BaseReg != 0)
+      BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
+    if (IndexReg != 0)
+      IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
+  }
 
-  Register DstR = Dst.getReg();
-  Register BaseR = Base.getReg();
-  Register IndexR = Index.getReg();
-  Register SSDstR =
-      (LEAOpcode == X86::LEA64_32r) ? Register(getX86SubSuperRegister(DstR, 64))
-                                    : DstR;
   bool IsScale1 = Scale.getImm() == 1;
-  bool IsInefficientBase = isInefficientLEAReg(BaseR);
-  bool IsInefficientIndex = isInefficientLEAReg(IndexR);
+  bool IsInefficientBase = isInefficientLEAReg(BaseReg);
+  bool IsInefficientIndex = isInefficientLEAReg(IndexReg);
 
   // Skip these cases since it takes more than 2 instructions
   // to replace the LEA instruction.
-  if (IsInefficientBase && SSDstR == BaseR && !IsScale1)
-    return nullptr;
-  if (LEAOpcode == X86::LEA64_32r && IsInefficientBase &&
-      (IsInefficientIndex || !IsScale1))
-    return nullptr;
-
-  const DebugLoc DL = MI.getDebugLoc();
-  const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(LEAOpcode));
-  const MCInstrDesc &ADDri = TII->get(getADDriFromLEA(LEAOpcode, Offset));
+  if (IsInefficientBase && DestReg == BaseReg && !IsScale1)
+    return;
 
   LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MI.dump(););
   LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);
 
+  MachineInstr *NewMI = nullptr;
+
   // First try to replace LEA with one or two (for the 3-op LEA case)
   // add instructions:
   // 1.lea (%base,%index,1), %base => add %index,%base
   // 2.lea (%base,%index,1), %index => add %base,%index
-  if (IsScale1 && (DstR == BaseR || DstR == IndexR)) {
-    const MachineOperand &Src = DstR == BaseR ? Index : Base;
-    MachineInstr *NewMI =
-        BuildMI(MBB, MI, DL, ADDrr, DstR).addReg(DstR).add(Src);
+  if (IsScale1 && (DestReg == BaseReg || DestReg == IndexReg)) {
+    unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
+    if (DestReg != BaseReg)
+      std::swap(BaseReg, IndexReg);
+
+    if (MI.getOpcode() == X86::LEA64_32r) {
+      // TODO: Do we need the super register implicit use?
+      NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
+                  .addReg(BaseReg)
+                  .addReg(IndexReg)
+                  .addReg(Base.getReg(), RegState::Implicit)
+                  .addReg(Index.getReg(), RegState::Implicit);
+    } else {
+      NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
+                  .addReg(BaseReg)
+                  .addReg(IndexReg);
+    }
+  } else if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) {
+    // If the base is inefficient try switching the index and base operands,
+    // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction:
+    // lea offset(%base,%index,scale),%dst =>
+    // lea (%base,%index,scale); add offset,%dst
+    NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode))
+                .add(Dest)
+                .add(IsInefficientBase ? Index : Base)
+                .add(Scale)
+                .add(IsInefficientBase ? Base : Index)
+                .addImm(0)
+                .add(Segment);
     LLVM_DEBUG(NewMI->dump(););
-    // Create ADD instruction for the Offset in case of 3-Ops LEA.
-    if (hasLEAOffset(Offset)) {
-      NewMI = BuildMI(MBB, MI, DL, ADDri, DstR).addReg(DstR).add(Offset);
-      LLVM_DEBUG(NewMI->dump(););
-    }
-    return NewMI;
   }
-  // If the base is inefficient try switching the index and base operands,
-  // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction:
-  // lea offset(%base,%index,scale),%dst =>
-  // lea (%base,%index,scale); add offset,%dst
-  if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) {
-    MachineInstr *NewMI = BuildMI(MBB, MI, DL, TII->get(LEAOpcode))
-                              .add(Dst)
-                              .add(IsInefficientBase ? Index : Base)
-                              .add(Scale)
-                              .add(IsInefficientBase ? Base : Index)
-                              .addImm(0)
-                              .add(Segment);
-    LLVM_DEBUG(NewMI->dump(););
+
+  // If either replacement succeeded above, add the offset if needed, then
+  // replace the instruction.
+  if (NewMI) {
     // Create ADD instruction for the Offset in case of 3-Ops LEA.
     if (hasLEAOffset(Offset)) {
-      NewMI = BuildMI(MBB, MI, DL, ADDri, DstR).addReg(DstR).add(Offset);
-      LLVM_DEBUG(NewMI->dump(););
+      if (OptIncDec && Offset.isImm() &&
+          (Offset.getImm() == 1 || Offset.getImm() == -1)) {
+        unsigned NewOpc =
+            getINCDECFromLEA(MI.getOpcode(), Offset.getImm() == 1);
+        NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
+                    .addReg(DestReg);
+        LLVM_DEBUG(NewMI->dump(););
+      } else {
+        unsigned NewOpc = getADDriFromLEA(MI.getOpcode(), Offset);
+        NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
+                    .addReg(DestReg)
+                    .add(Offset);
+        LLVM_DEBUG(NewMI->dump(););
+      }
     }
-    return NewMI;
+
+    MBB.erase(I);
+    I = NewMI;
+    return;
   }
+
   // Handle the rest of the cases with inefficient base register:
-  assert(SSDstR != BaseR && "SSDstR == BaseR should be handled already!");
+  assert(DestReg != BaseReg && "DestReg == BaseReg should be handled already!");
   assert(IsInefficientBase && "efficient base should be handled already!");
 
+  // FIXME: Handle LEA64_32r.
+  if (LEAOpcode == X86::LEA64_32r)
+    return;
+
   // lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst
   if (IsScale1 && !hasLEAOffset(Offset)) {
-    bool BIK = Base.isKill() && BaseR != IndexR;
-    TII->copyPhysReg(MBB, MI, DL, DstR, BaseR, BIK);
+    bool BIK = Base.isKill() && BaseReg != IndexReg;
+    TII->copyPhysReg(MBB, MI, MI.getDebugLoc(), DestReg, BaseReg, BIK);
     LLVM_DEBUG(MI.getPrevNode()->dump(););
 
-    MachineInstr *NewMI =
-        BuildMI(MBB, MI, DL, ADDrr, DstR).addReg(DstR).add(Index);
+    unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
+    NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
+                .addReg(DestReg)
+                .add(Index);
     LLVM_DEBUG(NewMI->dump(););
-    return NewMI;
+    return;
   }
+
   // lea offset(%base,%index,scale), %dst =>
   // lea offset( ,%index,scale), %dst; add %base,%dst
-  MachineInstr *NewMI = BuildMI(MBB, MI, DL, TII->get(LEAOpcode))
-                            .add(Dst)
-                            .addReg(0)
-                            .add(Scale)
-                            .add(Index)
-                            .add(Offset)
-                            .add(Segment);
+  NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode))
+              .add(Dest)
+              .addReg(0)
+              .add(Scale)
+              .add(Index)
+              .add(Offset)
+              .add(Segment);
   LLVM_DEBUG(NewMI->dump(););
 
-  NewMI = BuildMI(MBB, MI, DL, ADDrr, DstR).addReg(DstR).add(Base);
+  unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
+  NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
+              .addReg(DestReg)
+              .add(Base);
   LLVM_DEBUG(NewMI->dump(););
-  return NewMI;
+
+  MBB.erase(I);
+  I = NewMI;
 }

Modified: llvm/trunk/test/CodeGen/X86/leaFixup32.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/leaFixup32.mir?rev=373877&r1=373876&r2=373877&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/leaFixup32.mir (original)
+++ llvm/trunk/test/CodeGen/X86/leaFixup32.mir Sun Oct  6 23:27:55 2019
@@ -104,7 +104,7 @@ frameInfo:
 body:             |
   bb.0 (%ir-block.0):
     liveins: $eax, $ebp
-    ; CHECK: $eax = ADD32rr $eax, killed $ebp
+    ; CHECK: $eax = ADD32rr $eax, $ebp
     ; CHECK: $eax = ADD32ri8 $eax, -5
  
     $eax = LEA32r killed $eax, 1, killed $ebp, -5, $noreg
@@ -139,7 +139,7 @@ frameInfo:
 body:             |
   bb.0 (%ir-block.0):
     liveins: $eax, $ebp
-    ; CHECK: $ebp = ADD32rr $ebp, killed $eax
+    ; CHECK: $ebp = ADD32rr $ebp, $eax
     ; CHECK: $ebp = ADD32ri8 $ebp, -5
  
     $ebp = LEA32r killed $ebp, 1, killed $eax, -5, $noreg
@@ -315,7 +315,7 @@ frameInfo:
 body:             |
   bb.0 (%ir-block.0):
     liveins: $eax, $ebp
-    ; CHECK: $eax = ADD32rr $eax, killed $ebp
+    ; CHECK: $eax = ADD32rr $eax, $ebp
     ; CHECK: $eax = ADD32ri $eax, 129
  
     $eax = LEA32r killed $eax, 1, killed $ebp, 129, $noreg

Modified: llvm/trunk/test/CodeGen/X86/leaFixup64.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/leaFixup64.mir?rev=373877&r1=373876&r2=373877&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/leaFixup64.mir (original)
+++ llvm/trunk/test/CodeGen/X86/leaFixup64.mir Sun Oct  6 23:27:55 2019
@@ -177,8 +177,8 @@ frameInfo:
 body:             |
   bb.0 (%ir-block.0):
     liveins: $rax, $rbp
-    ; CHECK: $eax = LEA64_32r killed $rax, 1, killed $rbp, 0
-    ; CHECK: $eax = ADD32ri8 $eax, -5
+    ; CHECK: $eax = ADD32rr $eax, $ebp, implicit-def $eflags, implicit $rax, implicit $rbp
+    ; CHECK: $eax = ADD32ri8 $eax, -5, implicit-def $eflags
  
     $eax = LEA64_32r killed $rax, 1, killed $rbp, -5, $noreg
     RETQ $eax
@@ -212,8 +212,8 @@ frameInfo:
 body:             |
   bb.0 (%ir-block.0):
     liveins: $rax, $rbp
-    ; CHECK: $ebp = LEA64_32r killed $rax, 1,  killed $rbp, 0
-    ; CHECK: $ebp = ADD32ri8 $ebp, -5
+    ; CHECK: $ebp = ADD32rr $ebp, $eax, implicit-def $eflags, implicit $rbp, implicit $rax
+    ; CHECK: $ebp = ADD32ri8 $ebp, -5, implicit-def $eflags
  
     $ebp = LEA64_32r killed $rbp, 1, killed $rax, -5, $noreg
     RETQ $ebp
@@ -281,7 +281,7 @@ frameInfo:
 body:             |
   bb.0 (%ir-block.0):
     liveins: $rax, $rbp
-    ; CHECK: $rax = ADD64rr $rax, killed $rbp
+    ; CHECK: $rax = ADD64rr $rax, $rbp
     ; CHECK: $rax = ADD64ri8 $rax, -5
  
     $rax = LEA64r killed $rax, 1, killed $rbp, -5, $noreg
@@ -316,7 +316,7 @@ frameInfo:
 body:             |
   bb.0 (%ir-block.0):
     liveins: $rax, $rbp
-    ; CHECK: $rbp = ADD64rr $rbp, killed $rax
+    ; CHECK: $rbp = ADD64rr $rbp, $rax
     ; CHECK: $rbp = ADD64ri8 $rbp, -5
  
     $rbp = LEA64r killed $rbp, 1, killed $rax, -5, $noreg
@@ -635,8 +635,8 @@ frameInfo:
 body:             |
   bb.0 (%ir-block.0):
     liveins: $rax, $rbp
-    ; CHECK: $eax = LEA64_32r killed $rax, 1, killed $rbp, 0
-    ; CHECK: $eax = ADD32ri $eax, 129
+    ; CHECK: $eax = ADD32rr $eax, $ebp, implicit-def $eflags
+    ; CHECK: $eax = ADD32ri $eax, 129, implicit-def $eflags
  
     $eax = LEA64_32r killed $rax, 1, killed $rbp, 129, $noreg
     RETQ $eax
@@ -772,8 +772,8 @@ frameInfo:
 body:             |
   bb.0 (%ir-block.0):
     liveins: $rax, $rbp
-    ; CHECK: $rax = ADD64rr $rax, killed $rbp
-    ; CHECK: $rax = ADD64ri32 $rax, 129
+    ; CHECK: $rax = ADD64rr $rax, $rbp, implicit-def $eflags
+    ; CHECK: $rax = ADD64ri32 $rax, 129, implicit-def $eflags
  
     $rax = LEA64r killed $rax, 1, killed $rbp, 129, $noreg
     RETQ $eax

Modified: llvm/trunk/test/CodeGen/X86/select-1-or-neg1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/select-1-or-neg1.ll?rev=373877&r1=373876&r2=373877&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/select-1-or-neg1.ll (original)
+++ llvm/trunk/test/CodeGen/X86/select-1-or-neg1.ll Sun Oct  6 23:27:55 2019
@@ -19,8 +19,8 @@ define i32 @PR28968(i32 %x) {
 ; SLOWLEA3-NEXT:    xorl %eax, %eax
 ; SLOWLEA3-NEXT:    cmpl $1, %edi
 ; SLOWLEA3-NEXT:    sete %al
-; SLOWLEA3-NEXT:    leal (%rax,%rax), %eax
-; SLOWLEA3-NEXT:    addl $-1, %eax
+; SLOWLEA3-NEXT:    addl %eax, %eax
+; SLOWLEA3-NEXT:    decl %eax
 ; SLOWLEA3-NEXT:    retq
   %cmp = icmp eq i32 %x, 1
   %sel = select i1 %cmp, i32 1, i32 -1




More information about the llvm-commits mailing list