[llvm] b200c51 - Reland [MachineCopyPropagation] Extend MCP to do trivial copy backward propagation.

Kai Luo via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 4 22:36:19 PST 2019


Author: Kai Luo
Date: 2019-12-05T14:32:11+08:00
New Revision: b200c5180e8d6f9ac4e08512a04739ab02cebdb8

URL: https://github.com/llvm/llvm-project/commit/b200c5180e8d6f9ac4e08512a04739ab02cebdb8
DIFF: https://github.com/llvm/llvm-project/commit/b200c5180e8d6f9ac4e08512a04739ab02cebdb8.diff

LOG: Reland [MachineCopyPropagation] Extend MCP to do trivial copy backward propagation.

Fix assertion error
```
bool llvm::MachineOperand::isRenamable() const: Assertion `Register::isPhysicalRegister(getReg()) && "isRenamable should only be checked on physical registers"' failed.
```
by checking if the register is 0 before invoking `isRenamable`.

Added: 
    

Modified: 
    llvm/lib/CodeGen/MachineCopyPropagation.cpp
    llvm/test/CodeGen/PowerPC/machine-backward-cp.mir
    llvm/test/CodeGen/PowerPC/redundant-copy-after-tail-dup.ll
    llvm/test/CodeGen/RISCV/alu64.ll
    llvm/test/CodeGen/RISCV/shifts.ll
    llvm/test/CodeGen/X86/fp128-i128.ll
    llvm/test/CodeGen/X86/fshr.ll
    llvm/test/CodeGen/X86/i128-mul.ll
    llvm/test/CodeGen/X86/mul-i512.ll
    llvm/test/CodeGen/X86/shift-i128.ll
    llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index 34ece614185c..6db388c2564a 100644
--- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -37,6 +37,15 @@
 //    ... // No clobber of %R0
 //    %R1 = COPY %R0 <<< Removed
 //
+// or
+//
+//    $R0 = OP ...
+//    ... // No read/clobber of $R0 and $R1
+//    $R1 = COPY $R0 // $R0 is killed
+// Replace $R0 with $R1 and remove the COPY
+//    $R1 = OP ...
+//    ...
+//
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/DenseMap.h"
@@ -98,6 +107,28 @@ class CopyTracker {
     }
   }
 
+  /// Remove register from copy maps.
+  void invalidateRegister(unsigned Reg, const TargetRegisterInfo &TRI) {
+    // Since Reg might be a subreg of some registers, only invalidate Reg is not
+    // enough. We have to find the COPY defines Reg or registers defined by Reg
+    // and invalidate all of them.
+    DenseSet<unsigned> RegsToInvalidate{Reg};
+    for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) {
+      auto I = Copies.find(*RUI);
+      if (I != Copies.end()) {
+        if (MachineInstr *MI = I->second.MI) {
+          RegsToInvalidate.insert(MI->getOperand(0).getReg());
+          RegsToInvalidate.insert(MI->getOperand(1).getReg());
+        }
+        RegsToInvalidate.insert(I->second.DefRegs.begin(),
+                                I->second.DefRegs.end());
+      }
+    }
+    for (unsigned InvalidReg : RegsToInvalidate)
+      for (MCRegUnitIterator RUI(InvalidReg, &TRI); RUI.isValid(); ++RUI)
+        Copies.erase(*RUI);
+  }
+
   /// Clobber a single register, removing it from the tracker's copy maps.
   void clobberRegister(unsigned Reg, const TargetRegisterInfo &TRI) {
     for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) {
@@ -151,6 +182,38 @@ class CopyTracker {
     return CI->second.MI;
   }
 
+  MachineInstr *findCopyDefViaUnit(unsigned RegUnit,
+                                    const TargetRegisterInfo &TRI) {
+    auto CI = Copies.find(RegUnit);
+    if (CI == Copies.end())
+      return nullptr;
+    if (CI->second.DefRegs.size() != 1)
+      return nullptr;
+    MCRegUnitIterator RUI(CI->second.DefRegs[0], &TRI);
+    return findCopyForUnit(*RUI, TRI, true);
+  }
+
+  MachineInstr *findAvailBackwardCopy(MachineInstr &I, unsigned Reg,
+                                      const TargetRegisterInfo &TRI) {
+    MCRegUnitIterator RUI(Reg, &TRI);
+    MachineInstr *AvailCopy = findCopyDefViaUnit(*RUI, TRI);
+    if (!AvailCopy ||
+        !TRI.isSubRegisterEq(AvailCopy->getOperand(1).getReg(), Reg))
+      return nullptr;
+
+    Register AvailSrc = AvailCopy->getOperand(1).getReg();
+    Register AvailDef = AvailCopy->getOperand(0).getReg();
+    for (const MachineInstr &MI :
+         make_range(AvailCopy->getReverseIterator(), I.getReverseIterator()))
+      for (const MachineOperand &MO : MI.operands())
+        if (MO.isRegMask())
+          // FIXME: Shall we simultaneously invalidate AvailSrc or AvailDef?
+          if (MO.clobbersPhysReg(AvailSrc) || MO.clobbersPhysReg(AvailDef))
+            return nullptr;
+
+    return AvailCopy;
+  }
+
   MachineInstr *findAvailCopy(MachineInstr &DestCopy, unsigned Reg,
                               const TargetRegisterInfo &TRI) {
     // We check the first RegUnit here, since we'll only be interested in the
@@ -211,11 +274,16 @@ class MachineCopyPropagation : public MachineFunctionPass {
   void ClobberRegister(unsigned Reg);
   void ReadRegister(unsigned Reg, MachineInstr &Reader,
                     DebugType DT);
-  void CopyPropagateBlock(MachineBasicBlock &MBB);
+  void ForwardCopyPropagateBlock(MachineBasicBlock &MBB);
+  void BackwardCopyPropagateBlock(MachineBasicBlock &MBB);
   bool eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned Def);
   void forwardUses(MachineInstr &MI);
+  void propagateDefs(MachineInstr &MI);
   bool isForwardableRegClassCopy(const MachineInstr &Copy,
                                  const MachineInstr &UseI, unsigned UseIdx);
+  bool isBackwardPropagatableRegClassCopy(const MachineInstr &Copy,
+                                          const MachineInstr &UseI,
+                                          unsigned UseIdx);
   bool hasImplicitOverlap(const MachineInstr &MI, const MachineOperand &Use);
 
   /// Candidates for deletion.
@@ -313,6 +381,19 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, unsigned Src,
   return true;
 }
 
+bool MachineCopyPropagation::isBackwardPropagatableRegClassCopy(
+    const MachineInstr &Copy, const MachineInstr &UseI, unsigned UseIdx) {
+  Register Def = Copy.getOperand(0).getReg();
+
+  if (const TargetRegisterClass *URC =
+          UseI.getRegClassConstraint(UseIdx, TII, TRI))
+    return URC->contains(Def);
+
+  // We don't process further if UseI is a COPY, since forward copy propagation
+  // should handle that.
+  return false;
+}
+
 /// Decide whether we should forward the source of \param Copy to its use in
 /// \param UseI based on the physical register class constraints of the opcode
 /// and avoiding introducing more cross-class COPYs.
@@ -468,8 +549,9 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
   }
 }
 
-void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
-  LLVM_DEBUG(dbgs() << "MCP: CopyPropagateBlock " << MBB.getName() << "\n");
+void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
+  LLVM_DEBUG(dbgs() << "MCP: ForwardCopyPropagateBlock " << MBB.getName()
+                    << "\n");
 
   for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) {
     MachineInstr *MI = &*I;
@@ -647,6 +729,134 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
   Tracker.clear();
 }
 
+static bool isBackwardPropagatableCopy(MachineInstr &MI,
+                                       const MachineRegisterInfo &MRI) {
+  assert(MI.isCopy() && "MI is expected to be a COPY");
+  Register Def = MI.getOperand(0).getReg();
+  Register Src = MI.getOperand(1).getReg();
+
+  if (!Def || !Src)
+    return false;
+
+  if (MRI.isReserved(Def) || MRI.isReserved(Src))
+    return false;
+
+  return MI.getOperand(1).isRenamable() && MI.getOperand(1).isKill();
+}
+
+void MachineCopyPropagation::propagateDefs(MachineInstr &MI) {
+  if (!Tracker.hasAnyCopies())
+    return;
+
+  for (unsigned OpIdx = 0, OpEnd = MI.getNumOperands(); OpIdx != OpEnd;
+       ++OpIdx) {
+    MachineOperand &MODef = MI.getOperand(OpIdx);
+
+    if (!MODef.isReg() || MODef.isUse())
+      continue;
+
+    // Ignore non-trivial cases.
+    if (MODef.isTied() || MODef.isUndef() || MODef.isImplicit())
+      continue;
+
+    if (!MODef.getReg())
+      continue;
+
+    // We only handle if the register comes from a vreg.
+    if (!MODef.isRenamable())
+      continue;
+
+    MachineInstr *Copy =
+        Tracker.findAvailBackwardCopy(MI, MODef.getReg(), *TRI);
+    if (!Copy)
+      continue;
+
+    Register Def = Copy->getOperand(0).getReg();
+    Register Src = Copy->getOperand(1).getReg();
+
+    if (MODef.getReg() != Src)
+      continue;
+
+    if (!isBackwardPropagatableRegClassCopy(*Copy, MI, OpIdx))
+      continue;
+
+    if (hasImplicitOverlap(MI, MODef))
+      continue;
+
+    LLVM_DEBUG(dbgs() << "MCP: Replacing " << printReg(MODef.getReg(), TRI)
+                      << "\n     with " << printReg(Def, TRI) << "\n     in "
+                      << MI << "     from " << *Copy);
+
+    MODef.setReg(Def);
+    MODef.setIsRenamable(Copy->getOperand(0).isRenamable());
+
+    LLVM_DEBUG(dbgs() << "MCP: After replacement: " << MI << "\n");
+    MaybeDeadCopies.insert(Copy);
+    Changed = true;
+  }
+}
+
+void MachineCopyPropagation::BackwardCopyPropagateBlock(
+    MachineBasicBlock &MBB) {
+  LLVM_DEBUG(dbgs() << "MCP: BackwardCopyPropagateBlock " << MBB.getName()
+                    << "\n");
+
+  for (MachineBasicBlock::reverse_iterator I = MBB.rbegin(), E = MBB.rend();
+       I != E;) {
+    MachineInstr *MI = &*I;
+    ++I;
+
+    // Ignore non-trivial COPYs.
+    if (MI->isCopy() && MI->getNumOperands() == 2 &&
+        !TRI->regsOverlap(MI->getOperand(0).getReg(),
+                          MI->getOperand(1).getReg())) {
+
+      Register Def = MI->getOperand(0).getReg();
+      Register Src = MI->getOperand(1).getReg();
+
+      // Unlike forward cp, we don't invoke propagateDefs here,
+      // just let forward cp do COPY-to-COPY propagation.
+      if (isBackwardPropagatableCopy(*MI, *MRI)) {
+        Tracker.invalidateRegister(Src, *TRI);
+        Tracker.invalidateRegister(Def, *TRI);
+        Tracker.trackCopy(MI, *TRI);
+        continue;
+      }
+    }
+
+    // Invalidate any earlyclobber regs first.
+    for (const MachineOperand &MO : MI->operands())
+      if (MO.isReg() && MO.isEarlyClobber()) {
+        Register Reg = MO.getReg();
+        if (!Reg)
+          continue;
+        Tracker.invalidateRegister(Reg, *TRI);
+      }
+
+    propagateDefs(*MI);
+    for (const MachineOperand &MO : MI->operands()) {
+      if (!MO.isReg())
+        continue;
+
+      if (!MO.getReg())
+        continue;
+
+      if (MO.isDef())
+        Tracker.invalidateRegister(MO.getReg(), *TRI);
+
+      if (MO.readsReg())
+        Tracker.invalidateRegister(MO.getReg(), *TRI);
+    }
+  }
+
+  for (auto *Copy : MaybeDeadCopies)
+    Copy->eraseFromParent();
+
+  MaybeDeadCopies.clear();
+  CopyDbgUsers.clear();
+  Tracker.clear();
+}
+
 bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
   if (skipFunction(MF.getFunction()))
     return false;
@@ -657,8 +867,10 @@ bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
   TII = MF.getSubtarget().getInstrInfo();
   MRI = &MF.getRegInfo();
 
-  for (MachineBasicBlock &MBB : MF)
-    CopyPropagateBlock(MBB);
+  for (MachineBasicBlock &MBB : MF) {
+    BackwardCopyPropagateBlock(MBB);
+    ForwardCopyPropagateBlock(MBB);
+  }
 
   return Changed;
 }

diff  --git a/llvm/test/CodeGen/PowerPC/machine-backward-cp.mir b/llvm/test/CodeGen/PowerPC/machine-backward-cp.mir
index bb2c29fc906d..0374d55c0cb4 100644
--- a/llvm/test/CodeGen/PowerPC/machine-backward-cp.mir
+++ b/llvm/test/CodeGen/PowerPC/machine-backward-cp.mir
@@ -11,8 +11,7 @@ tracksRegLiveness: true
 body: |
   bb.0.entry:
     ; CHECK-LABEL: name: test0
-    ; CHECK: renamable $x4 = LI8 1024
-    ; CHECK: $x3 = COPY killed renamable $x4
+    ; CHECK: $x3 = LI8 1024
     ; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3
     renamable $x4 = LI8 1024
     $x3 = COPY renamable killed $x4
@@ -28,8 +27,7 @@ tracksRegLiveness: true
 body: |
   ; CHECK-LABEL: name: test1
   ; CHECK: bb.0.entry:
-  ; CHECK:   renamable $x5 = LI8 42
-  ; CHECK:   renamable $x4 = COPY killed renamable $x5
+  ; CHECK:   renamable $x4 = LI8 42
   ; CHECK:   B %bb.1
   ; CHECK: bb.1:
   ; CHECK:   liveins: $x4
@@ -139,8 +137,8 @@ body: |
 
     ; CHECK-LABEL: name: iterative_deletion
     ; CHECK: liveins: $x5
-    ; CHECK: renamable $x6 = ADDI8 killed renamable $x5, 1
-    ; CHECK: $x3 = COPY $x6
+    ; CHECK: renamable $x4 = ADDI8 killed renamable $x5, 1
+    ; CHECK: $x3 = COPY $x4
     ; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3
     renamable $x6 = ADDI8 renamable killed $x5, 1
     renamable $x4 = COPY renamable killed $x6
@@ -160,8 +158,8 @@ body: |
     ; CHECK-LABEL: name: Enter
     ; CHECK: liveins: $x4, $x7
     ; CHECK: renamable $x5 = COPY killed renamable $x7
-    ; CHECK: renamable $x6 = ADDI8 killed renamable $x4, 1
-    ; CHECK: $x3 = ADD8 killed renamable $x5, $x6
+    ; CHECK: renamable $x7 = ADDI8 killed renamable $x4, 1
+    ; CHECK: $x3 = ADD8 killed renamable $x5, killed renamable $x7
     ; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3
     renamable $x5 = COPY killed renamable $x7
     renamable $x6 = ADDI8 killed renamable $x4, 1
@@ -181,10 +179,9 @@ body: |
     ; CHECK-LABEL: name: foo
     ; CHECK: liveins: $x4, $x7
     ; CHECK: renamable $x5 = COPY killed renamable $x7
-    ; CHECK: renamable $x6 = ADDI8 renamable $x4, 1
-    ; CHECK: renamable $x7 = COPY killed renamable $x6
-    ; CHECK: renamable $x8 = ADDI8 killed $x4, 2
-    ; CHECK: $x3 = ADD8 killed renamable $x5, $x8
+    ; CHECK: renamable $x7 = ADDI8 renamable $x4, 1
+    ; CHECK: renamable $x6 = ADDI8 killed $x4, 2
+    ; CHECK: $x3 = ADD8 killed renamable $x5, killed renamable $x6
     ; CHECK: $x3 = ADD8 $x3, killed renamable $x7
     ; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3
     renamable $x5 = COPY killed renamable $x7
@@ -208,10 +205,10 @@ body: |
     ; CHECK-LABEL: name: bar
     ; CHECK: liveins: $x4, $x7
     ; CHECK: renamable $x5 = COPY killed renamable $x7
-    ; CHECK: renamable $x6 = ADDI8 renamable $x4, 1
-    ; CHECK: renamable $x8 = COPY $x6
-    ; CHECK: renamable $x6 = ADDI8 renamable $x5, 2
-    ; CHECK: $x3 = ADD8 killed renamable $x5, $x6
+    ; CHECK: renamable $x7 = ADDI8 renamable $x4, 1
+    ; CHECK: renamable $x8 = COPY killed renamable $x7
+    ; CHECK: renamable $x7 = ADDI8 renamable $x5, 2
+    ; CHECK: $x3 = ADD8 killed renamable $x5, killed renamable $x7
     ; CHECK: $x3 = ADD8 $x3, killed renamable $x8
     ; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3
     renamable $x5 = COPY killed renamable $x7
@@ -236,10 +233,9 @@ body: |
     ; CHECK-LABEL: name: bogus
     ; CHECK: liveins: $x7
     ; CHECK: renamable $x5 = COPY renamable $x7
-    ; CHECK: renamable $x6 = ADDI8 $x7, 1
-    ; CHECK: renamable $x7 = COPY $x6
+    ; CHECK: renamable $x4 = ADDI8 $x7, 1
     ; CHECK: renamable $x6 = ADDI8 renamable $x5, 2
-    ; CHECK: $x3 = ADD8 $x7, killed renamable $x5
+    ; CHECK: $x3 = ADD8 killed renamable $x4, killed renamable $x5
     ; CHECK: $x3 = ADD8 $x3, killed renamable $x6
     ; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3
     renamable $x5 = COPY killed renamable $x7
@@ -263,10 +259,10 @@ body: |
     liveins: $x7
     ; CHECK-LABEL: name: foobar
     ; CHECK: liveins: $x7
-    ; CHECK: renamable $x6 = ADDI8 $x7, 1
-    ; CHECK: renamable $x8 = COPY $x6
-    ; CHECK: renamable $x6 = ADDI8 $x7, 2
-    ; CHECK: $x3 = ADD8 $x6, $x7
+    ; CHECK: renamable $x4 = ADDI8 $x7, 1
+    ; CHECK: renamable $x8 = COPY killed renamable $x4
+    ; CHECK: renamable $x4 = ADDI8 $x7, 2
+    ; CHECK: $x3 = ADD8 killed renamable $x4, $x7
     ; CHECK: $x3 = ADD8 $x3, killed renamable $x8
     ; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3
     renamable $x5 = COPY killed renamable $x7
@@ -280,3 +276,22 @@ body: |
     BLR8 implicit $lr8, implicit undef $rm, implicit $x3
 
 ...
+
+---
+name: cross_call
+alignment: 4
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $x2, $x3, $x20
+    ; CHECK-LABEL: name: cross_call
+    ; CHECK: liveins: $x2, $x3, $x20
+    ; CHECK: renamable $x20 = LI8 1024
+    ; CHECK: BL8_NOP @foo, csr_svr464_altivec, implicit-def $lr8, implicit $rm, implicit $x3, implicit-def $x3, implicit $x2
+    ; CHECK: $x3 = COPY killed renamable $x20
+    ; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3
+    renamable $x20 = LI8 1024
+    BL8_NOP @foo, csr_svr464_altivec, implicit-def $lr8, implicit $rm, implicit $x3, implicit-def $x3, implicit $x2
+    $x3 = COPY renamable killed $x20
+    BLR8 implicit $lr8, implicit undef $rm, implicit $x3
+...

diff  --git a/llvm/test/CodeGen/PowerPC/redundant-copy-after-tail-dup.ll b/llvm/test/CodeGen/PowerPC/redundant-copy-after-tail-dup.ll
index 6aaf169dabee..dd41abd093d6 100644
--- a/llvm/test/CodeGen/PowerPC/redundant-copy-after-tail-dup.ll
+++ b/llvm/test/CodeGen/PowerPC/redundant-copy-after-tail-dup.ll
@@ -26,8 +26,7 @@ define dso_local i1 @t(%class.A* %this, i32 %color, i32 %vertex) local_unnamed_a
 ; CHECK-P9-NEXT:    cmplwi r3, 2
 ; CHECK-P9-NEXT:    bge- cr0, .LBB0_6
 ; CHECK-P9-NEXT:  # %bb.3: # %land.lhs.true.1
-; CHECK-P9-NEXT:    li r5, 0
-; CHECK-P9-NEXT:    mr r3, r5
+; CHECK-P9-NEXT:    li r3, 0
 ; CHECK-P9-NEXT:    blr
 ; CHECK-P9-NEXT:  .LBB0_4: # %lor.lhs.false
 ; CHECK-P9-NEXT:    cmplwi cr0, r4, 0

diff  --git a/llvm/test/CodeGen/RISCV/alu64.ll b/llvm/test/CodeGen/RISCV/alu64.ll
index dd336065ef6c..1160b4055674 100644
--- a/llvm/test/CodeGen/RISCV/alu64.ll
+++ b/llvm/test/CodeGen/RISCV/alu64.ll
@@ -224,8 +224,7 @@ define i64 @sll(i64 %a, i64 %b) nounwind {
 ; RV32I-NEXT:    srli a4, a0, 1
 ; RV32I-NEXT:    srl a3, a4, a3
 ; RV32I-NEXT:    or a1, a1, a3
-; RV32I-NEXT:    sll a2, a0, a2
-; RV32I-NEXT:    mv a0, a2
+; RV32I-NEXT:    sll a0, a0, a2
 ; RV32I-NEXT:    ret
   %1 = shl i64 %a, %b
   ret i64 %1
@@ -311,8 +310,7 @@ define i64 @srl(i64 %a, i64 %b) nounwind {
 ; RV32I-NEXT:    slli a4, a1, 1
 ; RV32I-NEXT:    sll a3, a4, a3
 ; RV32I-NEXT:    or a0, a0, a3
-; RV32I-NEXT:    srl a2, a1, a2
-; RV32I-NEXT:    mv a1, a2
+; RV32I-NEXT:    srl a1, a1, a2
 ; RV32I-NEXT:    ret
   %1 = lshr i64 %a, %b
   ret i64 %1

diff  --git a/llvm/test/CodeGen/RISCV/shifts.ll b/llvm/test/CodeGen/RISCV/shifts.ll
index 54c49f3f3ef6..6e1575d9dc5e 100644
--- a/llvm/test/CodeGen/RISCV/shifts.ll
+++ b/llvm/test/CodeGen/RISCV/shifts.ll
@@ -23,8 +23,7 @@ define i64 @lshr64(i64 %a, i64 %b) nounwind {
 ; RV32I-NEXT:    slli a4, a1, 1
 ; RV32I-NEXT:    sll a3, a4, a3
 ; RV32I-NEXT:    or a0, a0, a3
-; RV32I-NEXT:    srl a2, a1, a2
-; RV32I-NEXT:    mv a1, a2
+; RV32I-NEXT:    srl a1, a1, a2
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: lshr64:
@@ -114,8 +113,7 @@ define i64 @shl64(i64 %a, i64 %b) nounwind {
 ; RV32I-NEXT:    srli a4, a0, 1
 ; RV32I-NEXT:    srl a3, a4, a3
 ; RV32I-NEXT:    or a1, a1, a3
-; RV32I-NEXT:    sll a2, a0, a2
-; RV32I-NEXT:    mv a0, a2
+; RV32I-NEXT:    sll a0, a0, a2
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: shl64:
@@ -191,8 +189,7 @@ define i128 @lshr128(i128 %a, i128 %b) nounwind {
 ; RV64I-NEXT:    slli a4, a1, 1
 ; RV64I-NEXT:    sll a3, a4, a3
 ; RV64I-NEXT:    or a0, a0, a3
-; RV64I-NEXT:    srl a2, a1, a2
-; RV64I-NEXT:    mv a1, a2
+; RV64I-NEXT:    srl a1, a1, a2
 ; RV64I-NEXT:    ret
   %1 = lshr i128 %a, %b
   ret i128 %1
@@ -298,8 +295,7 @@ define i128 @shl128(i128 %a, i128 %b) nounwind {
 ; RV64I-NEXT:    srli a4, a0, 1
 ; RV64I-NEXT:    srl a3, a4, a3
 ; RV64I-NEXT:    or a1, a1, a3
-; RV64I-NEXT:    sll a2, a0, a2
-; RV64I-NEXT:    mv a0, a2
+; RV64I-NEXT:    sll a0, a0, a2
 ; RV64I-NEXT:    ret
   %1 = shl i128 %a, %b
   ret i128 %1

diff  --git a/llvm/test/CodeGen/X86/fp128-i128.ll b/llvm/test/CodeGen/X86/fp128-i128.ll
index c47b92f04e49..d99ce45f050c 100644
--- a/llvm/test/CodeGen/X86/fp128-i128.ll
+++ b/llvm/test/CodeGen/X86/fp128-i128.ll
@@ -496,9 +496,8 @@ define void @TestCopySign({ fp128, fp128 }* noalias nocapture sret %agg.result,
 ; AVX-NEXT:    testl %ebp, %ebp
 ; AVX-NEXT:    jle .LBB10_1
 ; AVX-NEXT:  # %bb.2: # %if.then
-; AVX-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm1
+; AVX-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm2
 ; AVX-NEXT:    vmovaps (%rsp), %xmm0 # 16-byte Reload
-; AVX-NEXT:    vmovaps %xmm1, %xmm2
 ; AVX-NEXT:    jmp .LBB10_3
 ; AVX-NEXT:  .LBB10_1:
 ; AVX-NEXT:    vmovaps (%rsp), %xmm2 # 16-byte Reload

diff  --git a/llvm/test/CodeGen/X86/fshr.ll b/llvm/test/CodeGen/X86/fshr.ll
index a655c5804e1b..009f2420575f 100644
--- a/llvm/test/CodeGen/X86/fshr.ll
+++ b/llvm/test/CodeGen/X86/fshr.ll
@@ -279,8 +279,7 @@ define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind {
 ; X86-SLOW-NEXT:    orl %edi, %edx
 ; X86-SLOW-NEXT:    movl %edx, (%esp) # 4-byte Spill
 ; X86-SLOW-NEXT:  .LBB4_2:
-; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-SLOW-NEXT:    movl %ecx, %edx
+; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-SLOW-NEXT:    movl %ebx, %ecx
 ; X86-SLOW-NEXT:    shrl %cl, %edx
 ; X86-SLOW-NEXT:    movb %bl, %ah

diff  --git a/llvm/test/CodeGen/X86/i128-mul.ll b/llvm/test/CodeGen/X86/i128-mul.ll
index 9f9636361a50..e40f10a67dd1 100644
--- a/llvm/test/CodeGen/X86/i128-mul.ll
+++ b/llvm/test/CodeGen/X86/i128-mul.ll
@@ -88,9 +88,8 @@ define i64 @mul1(i64 %n, i64* nocapture %z, i64* nocapture %x, i64 %y) nounwind
 ; X86-NEXT:    movl 4(%eax,%ebp,8), %ecx
 ; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    movl %esi, %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl %edx, %edi
-; X86-NEXT:    mull %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    mull %edi
 ; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    movl %ecx, %eax

diff  --git a/llvm/test/CodeGen/X86/mul-i512.ll b/llvm/test/CodeGen/X86/mul-i512.ll
index 40f6b09288e0..a5050467ac1a 100644
--- a/llvm/test/CodeGen/X86/mul-i512.ll
+++ b/llvm/test/CodeGen/X86/mul-i512.ll
@@ -153,9 +153,8 @@ define void @test_512(i512* %a, i512* %b, i512* %out) nounwind {
 ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl $0, %edx
 ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT:    movl %esi, %ecx
-; X32-NEXT:    movl 8(%esi), %ebx
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    movl 8(%ecx), %ebx
 ; X32-NEXT:    movl %ebx, %eax
 ; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload

diff  --git a/llvm/test/CodeGen/X86/shift-i128.ll b/llvm/test/CodeGen/X86/shift-i128.ll
index 9f9d75cb36ca..6b29bd2207af 100644
--- a/llvm/test/CodeGen/X86/shift-i128.ll
+++ b/llvm/test/CodeGen/X86/shift-i128.ll
@@ -296,8 +296,7 @@ define void @test_shl_i128(i128 %x, i128 %a, i128* nocapture %r) nounwind {
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl %ecx, %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
 ; X86-NEXT:    movl %eax, %ecx
 ; X86-NEXT:    shll %cl, %ebx
 ; X86-NEXT:    movl %ebp, %esi
@@ -534,8 +533,7 @@ define void @test_lshr_v2i128(<2 x i128> %x, <2 x i128> %a, <2 x i128>* nocaptur
 ; X86-NEXT:  .LBB6_9: # %entry
 ; X86-NEXT:    movl %edi, %esi
 ; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl %ecx, %ebp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
 ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X86-NEXT:    shrl %cl, %ebp
 ; X86-NEXT:    testb $32, %cl
@@ -795,9 +793,8 @@ define void @test_ashr_v2i128(<2 x i128> %x, <2 x i128> %a, <2 x i128>* nocaptur
 ; X86-NEXT:  # %bb.4: # %entry
 ; X86-NEXT:    movl %edi, %ebx
 ; X86-NEXT:  .LBB7_5: # %entry
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl %ecx, %ebp
-; X86-NEXT:    movl %ecx, %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    movl %ebp, %edi
 ; X86-NEXT:    movl %edx, %ecx
 ; X86-NEXT:    sarl %cl, %edi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
@@ -835,8 +832,7 @@ define void @test_ashr_v2i128(<2 x i128> %x, <2 x i128> %a, <2 x i128>* nocaptur
 ; X86-NEXT:    movl %esi, %edi
 ; X86-NEXT:  .LBB7_9: # %entry
 ; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X86-NEXT:    sarl %cl, %esi
 ; X86-NEXT:    testb $32, %cl
@@ -850,8 +846,7 @@ define void @test_ashr_v2i128(<2 x i128> %x, <2 x i128> %a, <2 x i128>* nocaptur
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NEXT:    movb $64, %cl
 ; X86-NEXT:    subb %dl, %cl
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT:    movl %ebx, %ebp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
 ; X86-NEXT:    shldl %cl, %ebx, %ebp
 ; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -1059,12 +1054,11 @@ define void @test_shl_v2i128(<2 x i128> %x, <2 x i128> %a, <2 x i128>* nocapture
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    subl $72, %esp
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
 ; X86-NEXT:    movl %ebx, %ecx
 ; X86-NEXT:    shll %cl, %ebp
-; X86-NEXT:    movl %eax, %esi
 ; X86-NEXT:    shll %cl, %esi
 ; X86-NEXT:    movl %edx, %eax
 ; X86-NEXT:    subl $64, %eax
@@ -1130,9 +1124,7 @@ define void @test_shl_v2i128(<2 x i128> %x, <2 x i128> %a, <2 x i128>* nocapture
 ; X86-NEXT:    movl %ecx, %ebp
 ; X86-NEXT:    movl %edx, %ecx
 ; X86-NEXT:    shll %cl, %ebp
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl %ecx, %esi
-; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NEXT:    shll %cl, %esi
 ; X86-NEXT:    testb $32, %dl
 ; X86-NEXT:    movl $0, %edi
@@ -1210,8 +1202,7 @@ define void @test_shl_v2i128(<2 x i128> %x, <2 x i128> %a, <2 x i128>* nocapture
 ; X86-NEXT:    movl %edi, %ecx
 ; X86-NEXT:  .LBB8_23: # %entry
 ; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl %ecx, %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NEXT:    movl %eax, %ecx
 ; X86-NEXT:    shll %cl, %edi
 ; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill

diff  --git a/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll
index 4f26db8869d9..7ce16bbc3d42 100644
--- a/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll
@@ -98,8 +98,8 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
 ; X86-NEXT:    addl %esi, %ecx
 ; X86-NEXT:    adcl $0, %ebp
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    mull %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    mull %edx
 ; X86-NEXT:    movl %edx, %esi
 ; X86-NEXT:    addl %ecx, %eax
 ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill


        


More information about the llvm-commits mailing list