[llvm-commits] [llvm] r106481 - in /llvm/trunk: lib/Target/ARM/ARMLoadStoreOptimizer.cpp test/CodeGen/ARM/2010-06-21-LdStMultipleBug.ll

Evan Cheng evan.cheng at apple.com
Mon Jun 21 14:21:14 PDT 2010


Author: evancheng
Date: Mon Jun 21 16:21:14 2010
New Revision: 106481

URL: http://llvm.org/viewvc/llvm-project?rev=106481&view=rev
Log:
Fix PR7421: bug in kill transferring logic. It was ignoring loads / stores which have already been processed.

Added:
    llvm/trunk/test/CodeGen/ARM/2010-06-21-LdStMultipleBug.ll
Modified:
    llvm/trunk/lib/Target/ARM/ARMLoadStoreOptimizer.cpp

Modified: llvm/trunk/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMLoadStoreOptimizer.cpp?rev=106481&r1=106480&r2=106481&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMLoadStoreOptimizer.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMLoadStoreOptimizer.cpp Mon Jun 21 16:21:14 2010
@@ -74,11 +74,14 @@
   private:
     struct MemOpQueueEntry {
       int Offset;
+      unsigned Reg;
+      bool isKill;
       unsigned Position;
       MachineBasicBlock::iterator MBBI;
       bool Merged;
-      MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i)
-        : Offset(o), Position(p), MBBI(i), Merged(false) {}
+      MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, 
+                      MachineBasicBlock::iterator i)
+        : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
     };
     typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
     typedef MemOpQueue::iterator MemOpQueueIter;
@@ -264,39 +267,53 @@
 
 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
 // success.
-void ARMLoadStoreOpt::
-MergeOpsUpdate(MachineBasicBlock &MBB,
-               MemOpQueue &memOps,
-               unsigned memOpsBegin,
-               unsigned memOpsEnd,
-               unsigned insertAfter,
-               int Offset,
-               unsigned Base,
-               bool BaseKill,
-               int Opcode,
-               ARMCC::CondCodes Pred,
-               unsigned PredReg,
-               unsigned Scratch,
-               DebugLoc dl,
-               SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
+void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
+                                     MemOpQueue &memOps,
+                                     unsigned memOpsBegin, unsigned memOpsEnd,
+                                     unsigned insertAfter, int Offset,
+                                     unsigned Base, bool BaseKill,
+                                     int Opcode,
+                                     ARMCC::CondCodes Pred, unsigned PredReg,
+                                     unsigned Scratch,
+                                     DebugLoc dl,
+                          SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
   // First calculate which of the registers should be killed by the merged
   // instruction.
-  SmallVector<std::pair<unsigned, bool>, 8> Regs;
   const unsigned insertPos = memOps[insertAfter].Position;
+
+  SmallSet<unsigned, 4> UnavailRegs;
+  SmallSet<unsigned, 4> KilledRegs;
+  DenseMap<unsigned, unsigned> Killer;
+  for (unsigned i = 0; i < memOpsBegin; ++i) {
+    if (memOps[i].Position < insertPos && memOps[i].isKill) {
+      unsigned Reg = memOps[i].Reg;
+      if (memOps[i].Merged)
+        UnavailRegs.insert(Reg);
+      else {
+        KilledRegs.insert(Reg);
+        Killer[Reg] = i;
+      }
+    }
+  }
+  for (unsigned i = memOpsEnd, e = memOps.size(); i != e; ++i) {
+    if (memOps[i].Position < insertPos && memOps[i].isKill) {
+      unsigned Reg = memOps[i].Reg;
+      KilledRegs.insert(Reg);
+      Killer[Reg] = i;
+    }
+  }
+
+  SmallVector<std::pair<unsigned, bool>, 8> Regs;
   for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
-    const MachineOperand &MO = memOps[i].MBBI->getOperand(0);
-    unsigned Reg = MO.getReg();
-    bool isKill = MO.isKill();
+    unsigned Reg = memOps[i].Reg;
+    if (UnavailRegs.count(Reg))
+      // Register is killed before and it's not easy / possible to update the
+      // kill marker on already merged instructions. Abort.
+      return;
 
     // If we are inserting the merged operation after an unmerged operation that
     // uses the same register, make sure to transfer any kill flag.
-    for (unsigned j = memOpsEnd, e = memOps.size(); !isKill && j != e; ++j)
-      if (memOps[j].Position<insertPos) {
-        const MachineOperand &MOJ = memOps[j].MBBI->getOperand(0);
-        if (MOJ.getReg() == Reg && MOJ.isKill())
-          isKill = true;
-      }
-
+    bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
     Regs.push_back(std::make_pair(Reg, isKill));
   }
 
@@ -311,13 +328,13 @@
   Merges.push_back(prior(Loc));
   for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
     // Remove kill flags from any unmerged memops that come before insertPos.
-    if (Regs[i-memOpsBegin].second)
-      for (unsigned j = memOpsEnd, e = memOps.size(); j != e; ++j)
-        if (memOps[j].Position<insertPos) {
-          MachineOperand &MOJ = memOps[j].MBBI->getOperand(0);
-          if (MOJ.getReg() == Regs[i-memOpsBegin].first && MOJ.isKill())
-            MOJ.setIsKill(false);
-        }
+    if (Regs[i-memOpsBegin].second) {
+      unsigned Reg = Regs[i-memOpsBegin].first;
+      if (KilledRegs.count(Reg)) {
+        unsigned j = Killer[Reg];
+        memOps[j].MBBI->getOperand(0).setIsKill(false);
+      }
+    }
     MBB.erase(memOps[i].MBBI);
     memOps[i].Merged = true;
   }
@@ -910,6 +927,7 @@
     if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
       return false;
 
+    MachineBasicBlock::iterator NewBBI = MBBI;
     bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
     bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
     bool EvenDeadKill = isLd ?
@@ -954,6 +972,7 @@
                   getKillRegState(OddDeadKill)  | getUndefRegState(OddUndef));
         ++NumSTRD2STM;
       }
+      NewBBI = llvm::prior(MBBI);
     } else {
       // Split into two instructions.
       assert((!isT2 || !OffReg) &&
@@ -974,6 +993,7 @@
                       OddReg, OddDeadKill, false,
                       BaseReg, false, BaseUndef, OffReg, false, OffUndef,
                       Pred, PredReg, TII, isT2);
+        NewBBI = llvm::prior(MBBI);
         InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
                       EvenReg, EvenDeadKill, false,
                       BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
@@ -990,6 +1010,7 @@
                       EvenReg, EvenDeadKill, EvenUndef,
                       BaseReg, false, BaseUndef, OffReg, false, OffUndef,
                       Pred, PredReg, TII, isT2);
+        NewBBI = llvm::prior(MBBI);
         InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
                       OddReg, OddDeadKill, OddUndef,
                       BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
@@ -1001,8 +1022,9 @@
         ++NumSTRD2STR;
     }
 
-    MBBI = prior(MBBI);
     MBB.erase(MI);
+    MBBI = NewBBI;
+    return true;
   }
   return false;
 }
@@ -1035,6 +1057,9 @@
     if (isMemOp) {
       int Opcode = MBBI->getOpcode();
       unsigned Size = getLSMultipleTransferSize(MBBI);
+      const MachineOperand &MO = MBBI->getOperand(0);
+      unsigned Reg = MO.getReg();
+      bool isKill = MO.isDef() ? false : MO.isKill();
       unsigned Base = MBBI->getOperand(1).getReg();
       unsigned PredReg = 0;
       ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
@@ -1056,7 +1081,7 @@
         CurrSize = Size;
         CurrPred = Pred;
         CurrPredReg = PredReg;
-        MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
+        MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
         NumMemOps++;
         Advance = true;
       } else {
@@ -1069,14 +1094,16 @@
           // No need to match PredReg.
           // Continue adding to the queue.
           if (Offset > MemOps.back().Offset) {
-            MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
+            MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
+                                             Position, MBBI));
             NumMemOps++;
             Advance = true;
           } else {
             for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
                  I != E; ++I) {
               if (Offset < I->Offset) {
-                MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI));
+                MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
+                                                 Position, MBBI));
                 NumMemOps++;
                 Advance = true;
                 break;

Added: llvm/trunk/test/CodeGen/ARM/2010-06-21-LdStMultipleBug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/2010-06-21-LdStMultipleBug.ll?rev=106481&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/2010-06-21-LdStMultipleBug.ll (added)
+++ llvm/trunk/test/CodeGen/ARM/2010-06-21-LdStMultipleBug.ll Mon Jun 21 16:21:14 2010
@@ -0,0 +1,148 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin -O3 -mcpu=arm1136jf-s
+; PR7421
+
+%struct.CONTENTBOX = type { i32, i32, i32, i32, i32 }
+%struct.FILE = type { i8* }
+%struct.tilebox = type { %struct.tilebox*, double, double, double, double, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+%struct.UNCOMBOX = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+%struct.cellbox = type { i8*, i32, i32, i32, [9 x i32], i32, i32, i32, i32, i32, i32, i32, double, double, double, double, double, i32, i32, %struct.CONTENTBOX*, %struct.UNCOMBOX*, [8 x %struct.tilebox*] }
+%struct.termbox = type { %struct.termbox*, i32, i32, i32, i32, i32 }
+
+ at .str2708 = external constant [14 x i8], align 4  ; <[14 x i8]*> [#uses=1]
+
+define void @TW_oldinput(%struct.FILE* nocapture %fp) nounwind {
+entry:
+  %xcenter = alloca i32, align 4                  ; <i32*> [#uses=2]
+  %0 = call i32 (%struct.FILE*, i8*, ...)* @fscanf(%struct.FILE* %fp, i8* getelementptr inbounds ([14 x i8]* @.str2708, i32 0, i32 0), i32* undef, i32* undef, i32* %xcenter, i32* null) nounwind ; <i32> [#uses=1]
+  %1 = icmp eq i32 %0, 4                          ; <i1> [#uses=1]
+  br i1 %1, label %bb, label %return
+
+bb:                                               ; preds = %bb445, %entry
+  %2 = load %struct.cellbox** undef, align 4      ; <%struct.cellbox*> [#uses=2]
+  %3 = getelementptr inbounds %struct.cellbox* %2, i32 0, i32 3 ; <i32*> [#uses=1]
+  store i32 undef, i32* %3, align 4
+  %4 = load i32* undef, align 4                   ; <i32> [#uses=3]
+  %5 = icmp eq i32 undef, 1                       ; <i1> [#uses=1]
+  br i1 %5, label %bb10, label %bb445
+
+bb10:                                             ; preds = %bb
+  br i1 undef, label %bb11, label %bb445
+
+bb11:                                             ; preds = %bb10
+  %6 = load %struct.tilebox** undef, align 4      ; <%struct.tilebox*> [#uses=3]
+  %7 = load %struct.termbox** null, align 4       ; <%struct.termbox*> [#uses=1]
+  %8 = getelementptr inbounds %struct.tilebox* %6, i32 0, i32 13 ; <i32*> [#uses=1]
+  %9 = load i32* %8, align 4                      ; <i32> [#uses=3]
+  %10 = getelementptr inbounds %struct.tilebox* %6, i32 0, i32 15 ; <i32*> [#uses=1]
+  %11 = load i32* %10, align 4                    ; <i32> [#uses=1]
+  br i1 false, label %bb12, label %bb13
+
+bb12:                                             ; preds = %bb11
+  unreachable
+
+bb13:                                             ; preds = %bb11
+  %iftmp.40.0.neg = sdiv i32 0, -2                ; <i32> [#uses=2]
+  %12 = sub nsw i32 0, %9                         ; <i32> [#uses=1]
+  %13 = sitofp i32 %12 to double                  ; <double> [#uses=1]
+  %14 = fdiv double %13, 0.000000e+00             ; <double> [#uses=1]
+  %15 = fptosi double %14 to i32                  ; <i32> [#uses=1]
+  %iftmp.41.0.in = add i32 0, %15                 ; <i32> [#uses=1]
+  %iftmp.41.0.neg = sdiv i32 %iftmp.41.0.in, -2   ; <i32> [#uses=3]
+  br i1 undef, label %bb43.loopexit, label %bb21
+
+bb21:                                             ; preds = %bb13
+  %16 = fptosi double undef to i32                ; <i32> [#uses=1]
+  %17 = fsub double undef, 0.000000e+00           ; <double> [#uses=1]
+  %not.460 = fcmp oge double %17, 5.000000e-01    ; <i1> [#uses=1]
+  %18 = zext i1 %not.460 to i32                   ; <i32> [#uses=1]
+  %iftmp.42.0 = add i32 %16, %iftmp.41.0.neg      ; <i32> [#uses=1]
+  %19 = add i32 %iftmp.42.0, %18                  ; <i32> [#uses=1]
+  store i32 %19, i32* undef, align 4
+  %20 = sub nsw i32 0, %9                         ; <i32> [#uses=1]
+  %21 = sitofp i32 %20 to double                  ; <double> [#uses=1]
+  %22 = fdiv double %21, 0.000000e+00             ; <double> [#uses=2]
+  %23 = fptosi double %22 to i32                  ; <i32> [#uses=1]
+  %24 = fsub double %22, undef                    ; <double> [#uses=1]
+  %not.461 = fcmp oge double %24, 5.000000e-01    ; <i1> [#uses=1]
+  %25 = zext i1 %not.461 to i32                   ; <i32> [#uses=1]
+  %iftmp.43.0 = add i32 %23, %iftmp.41.0.neg      ; <i32> [#uses=1]
+  %26 = add i32 %iftmp.43.0, %25                  ; <i32> [#uses=1]
+  %27 = getelementptr inbounds %struct.tilebox* %6, i32 0, i32 10 ; <i32*> [#uses=1]
+  store i32 %26, i32* %27, align 4
+  %28 = fptosi double undef to i32                ; <i32> [#uses=1]
+  %iftmp.45.0 = add i32 %28, %iftmp.40.0.neg      ; <i32> [#uses=1]
+  %29 = add i32 %iftmp.45.0, 0                    ; <i32> [#uses=1]
+  store i32 %29, i32* undef, align 4
+  br label %bb43.loopexit
+
+bb36:                                             ; preds = %bb43.loopexit, %bb36
+  %termptr.0478 = phi %struct.termbox* [ %42, %bb36 ], [ %7, %bb43.loopexit ] ; <%struct.termbox*> [#uses=1]
+  %30 = load i32* undef, align 4                  ; <i32> [#uses=1]
+  %31 = sub nsw i32 %30, %9                       ; <i32> [#uses=1]
+  %32 = sitofp i32 %31 to double                  ; <double> [#uses=1]
+  %33 = fdiv double %32, 0.000000e+00             ; <double> [#uses=1]
+  %34 = fptosi double %33 to i32                  ; <i32> [#uses=1]
+  %iftmp.46.0 = add i32 %34, %iftmp.41.0.neg      ; <i32> [#uses=1]
+  %35 = add i32 %iftmp.46.0, 0                    ; <i32> [#uses=1]
+  store i32 %35, i32* undef, align 4
+  %36 = sub nsw i32 0, %11                        ; <i32> [#uses=1]
+  %37 = sitofp i32 %36 to double                  ; <double> [#uses=1]
+  %38 = fmul double %37, 0.000000e+00             ; <double> [#uses=1]
+  %39 = fptosi double %38 to i32                  ; <i32> [#uses=1]
+  %iftmp.47.0 = add i32 %39, %iftmp.40.0.neg      ; <i32> [#uses=1]
+  %40 = add i32 %iftmp.47.0, 0                    ; <i32> [#uses=1]
+  store i32 %40, i32* undef, align 4
+  %41 = getelementptr inbounds %struct.termbox* %termptr.0478, i32 0, i32 0 ; <%struct.termbox**> [#uses=1]
+  %42 = load %struct.termbox** %41, align 4       ; <%struct.termbox*> [#uses=2]
+  %43 = icmp eq %struct.termbox* %42, null        ; <i1> [#uses=1]
+  br i1 %43, label %bb52.loopexit, label %bb36
+
+bb43.loopexit:                                    ; preds = %bb21, %bb13
+  br i1 undef, label %bb52.loopexit, label %bb36
+
+bb52.loopexit:                                    ; preds = %bb43.loopexit, %bb36
+  %44 = icmp eq i32 %4, 0                         ; <i1> [#uses=1]
+  br i1 %44, label %bb.nph485, label %bb54
+
+bb54:                                             ; preds = %bb52.loopexit
+  switch i32 %4, label %bb62 [
+    i32 2, label %bb56
+    i32 3, label %bb57
+  ]
+
+bb56:                                             ; preds = %bb54
+  br label %bb62
+
+bb57:                                             ; preds = %bb54
+  br label %bb62
+
+bb62:                                             ; preds = %bb57, %bb56, %bb54
+  unreachable
+
+bb.nph485:                                        ; preds = %bb52.loopexit
+  br label %bb248
+
+bb248:                                            ; preds = %bb322, %bb.nph485
+  %45 = icmp eq i32 undef, %4                     ; <i1> [#uses=1]
+  br i1 %45, label %bb322, label %bb249
+
+bb249:                                            ; preds = %bb248
+  %46 = getelementptr inbounds %struct.cellbox* %2, i32 0, i32 21, i32 undef ; <%struct.tilebox**> [#uses=1]
+  %47 = load %struct.tilebox** %46, align 4       ; <%struct.tilebox*> [#uses=1]
+  %48 = getelementptr inbounds %struct.tilebox* %47, i32 0, i32 11 ; <i32*> [#uses=1]
+  store i32 undef, i32* %48, align 4
+  unreachable
+
+bb322:                                            ; preds = %bb248
+  br i1 undef, label %bb248, label %bb445
+
+bb445:                                            ; preds = %bb322, %bb10, %bb
+  %49 = call i32 (%struct.FILE*, i8*, ...)* @fscanf(%struct.FILE* %fp, i8* getelementptr inbounds ([14 x i8]* @.str2708, i32 0, i32 0), i32* undef, i32* undef, i32* %xcenter, i32* null) nounwind ; <i32> [#uses=1]
+  %50 = icmp eq i32 %49, 4                        ; <i1> [#uses=1]
+  br i1 %50, label %bb, label %return
+
+return:                                           ; preds = %bb445, %entry
+  ret void
+}
+
+declare i32 @fscanf(%struct.FILE* nocapture, i8* nocapture, ...) nounwind





More information about the llvm-commits mailing list