[llvm-commits] [llvm] r67068 - in /llvm/trunk: lib/CodeGen/Spiller.cpp lib/CodeGen/Spiller.h test/CodeGen/X86/2009-03-16-SpillerBug.ll

Evan Cheng evan.cheng at apple.com
Mon Mar 16 18:23:09 PDT 2009


Author: evancheng
Date: Mon Mar 16 20:23:09 2009
New Revision: 67068

URL: http://llvm.org/viewvc/llvm-project?rev=67068&view=rev
Log:
Spiller may unfold load / mod / store instructions as an optimization when the would be loaded value is available in a register. It needs to check if it's legal to clobber the register. Also, the register can contain values of multiple spill slots, make sure to check all instead of just the one being unfolded.

Added:
    llvm/trunk/test/CodeGen/X86/2009-03-16-SpillerBug.ll
Modified:
    llvm/trunk/lib/CodeGen/Spiller.cpp
    llvm/trunk/lib/CodeGen/Spiller.h

Modified: llvm/trunk/lib/CodeGen/Spiller.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/Spiller.cpp?rev=67068&r1=67067&r2=67068&view=diff

==============================================================================
--- llvm/trunk/lib/CodeGen/Spiller.cpp (original)
+++ llvm/trunk/lib/CodeGen/Spiller.cpp Mon Mar 16 20:23:09 2009
@@ -28,6 +28,7 @@
 STATISTIC(NumLoads   , "Number of loads added");
 STATISTIC(NumReused  , "Number of values reused");
 STATISTIC(NumDCE     , "Number of copies elided");
+STATISTIC(NumSUnfold , "Number of stores unfolded");
 
 namespace {
   enum SpillerName { simple, local };
@@ -1172,8 +1173,8 @@
           // Okay, we have a two address operand.  We can reuse this physreg as
           // long as we are allowed to clobber the value and there isn't an
           // earlier def that has already clobbered the physreg.
-          CanReuse = Spills.canClobberPhysReg(ReuseSlot) &&
-            !ReusedOperands.isClobbered(PhysReg);
+          CanReuse = !ReusedOperands.isClobbered(PhysReg) &&
+            Spills.canClobberPhysReg(PhysReg);
         }
         
         if (CanReuse) {
@@ -1408,6 +1409,7 @@
               DOUT << "Removing now-noop copy: " << MI;
               // Unset last kill since it's being reused.
               InvalidateKill(InReg, RegKills, KillOps);
+              Spills.disallowClobberPhysReg(InReg);
             }
 
             InvalidateKills(MI, RegKills, KillOps);
@@ -1446,6 +1448,8 @@
           // the value and there isn't an earlier def that has already clobbered
           // the physreg.
           if (PhysReg &&
+              !ReusedOperands.isClobbered(PhysReg) &&
+              Spills.canClobberPhysReg(PhysReg) &&
               !TII->isStoreToStackSlot(&MI, SS)) { // Not profitable!
             MachineOperand *KillOpnd =
               DeadStore->findRegisterUseOperand(PhysReg, true);
@@ -1453,7 +1457,7 @@
             // super-register is needed below.
             if (KillOpnd && !KillOpnd->getSubReg() &&
                 TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, true,NewMIs)){
-             MBB.insert(MII, NewMIs[0]);
+              MBB.insert(MII, NewMIs[0]);
               NewStore = NewMIs[1];
               MBB.insert(MII, NewStore);
               VRM.addSpillSlotUse(SS, NewStore);
@@ -1465,6 +1469,7 @@
               --NextMII;  // backtrack to the unfolded instruction.
               BackTracked = true;
               isDead = true;
+              ++NumSUnfold;
             }
           }
         }
@@ -1519,7 +1524,7 @@
             // If the stack slot value was previously available in some other
             // register, change it now.  Otherwise, make the register
             // available in PhysReg.
-            Spills.addAvailable(StackSlot, SrcReg, false/*!clobber*/);
+            Spills.addAvailable(StackSlot, SrcReg, MI.killsRegister(SrcReg));
           }
         }
       }

Modified: llvm/trunk/lib/CodeGen/Spiller.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/Spiller.h?rev=67068&r1=67067&r2=67068&view=diff

==============================================================================
--- llvm/trunk/lib/CodeGen/Spiller.h (original)
+++ llvm/trunk/lib/CodeGen/Spiller.h Mon Mar 16 20:23:09 2009
@@ -127,15 +127,31 @@
       DOUT << " in physreg " << TRI->getName(Reg) << "\n";
     }
 
-    /// canClobberPhysReg - Return true if the spiller is allowed to change the 
-    /// value of the specified stackslot register if it desires.  The specified
-    /// stack slot must be available in a physreg for this query to make sense.
-    bool canClobberPhysReg(int SlotOrReMat) const {
+    /// canClobberPhysRegForSS - Return true if the spiller is allowed to change
+    /// the value of the specified stackslot register if it desires. The
+    /// specified stack slot must be available in a physreg for this query to
+    /// make sense.
+    bool canClobberPhysRegForSS(int SlotOrReMat) const {
       assert(SpillSlotsOrReMatsAvailable.count(SlotOrReMat) &&
              "Value not available!");
       return SpillSlotsOrReMatsAvailable.find(SlotOrReMat)->second & 1;
     }
 
+    /// canClobberPhysReg - Return true if the spiller is allowed to clobber the
+    /// physical register where values for some stack slot(s) might be
+    /// available.
+    bool canClobberPhysReg(unsigned PhysReg) const {
+      std::multimap<unsigned, int>::const_iterator I =
+        PhysRegsAvailable.lower_bound(PhysReg);
+      while (I != PhysRegsAvailable.end() && I->first == PhysReg) {
+        int SlotOrReMat = I->second;
+        I++;
+        if (!canClobberPhysRegForSS(SlotOrReMat))
+          return false;
+      }
+      return true;
+    }
+
     /// disallowClobberPhysReg - Unset the CanClobber bit of the specified
     /// stackslot register. The register is still available but is no longer
     /// allowed to be modifed.
@@ -305,4 +321,4 @@
   };
 }
 
-#endif
\ No newline at end of file
+#endif

Added: llvm/trunk/test/CodeGen/X86/2009-03-16-SpillerBug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2009-03-16-SpillerBug.ll?rev=67068&view=auto

==============================================================================
--- llvm/trunk/test/CodeGen/X86/2009-03-16-SpillerBug.ll (added)
+++ llvm/trunk/test/CodeGen/X86/2009-03-16-SpillerBug.ll Mon Mar 16 20:23:09 2009
@@ -0,0 +1,167 @@
+; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -stats |& grep spiller | not grep {stores unfolded}
+; rdar://6682365
+
+; Do not clobber a register if another spill slot is available in it and it's marked "do not clobber".
+
+	%struct.CAST_KEY = type { [32 x i32], i32 }
+ at CAST_S_table0 = constant [2 x i32] [i32 821772500, i32 -1616838901], align 32		; <[2 x i32]*> [#uses=0]
+ at CAST_S_table4 = constant [2 x i32] [i32 2127105028, i32 745436345], align 32		; <[2 x i32]*> [#uses=6]
+ at CAST_S_table5 = constant [2 x i32] [i32 -151351395, i32 749497569], align 32		; <[2 x i32]*> [#uses=5]
+ at CAST_S_table6 = constant [2 x i32] [i32 -2048901095, i32 858518887], align 32		; <[2 x i32]*> [#uses=4]
+ at CAST_S_table7 = constant [2 x i32] [i32 -501862387, i32 -1143078916], align 32		; <[2 x i32]*> [#uses=5]
+ at CAST_S_table1 = constant [2 x i32] [i32 522195092, i32 -284448933], align 32		; <[2 x i32]*> [#uses=0]
+ at CAST_S_table2 = constant [2 x i32] [i32 -1913667008, i32 637164959], align 32		; <[2 x i32]*> [#uses=0]
+ at CAST_S_table3 = constant [2 x i32] [i32 -1649212384, i32 532081118], align 32		; <[2 x i32]*> [#uses=0]
+
+define void @CAST_set_key(%struct.CAST_KEY* nocapture %key, i32 %len, i8* nocapture %data) nounwind ssp {
+bb1.thread:
+	%0 = getelementptr [16 x i32]* null, i32 0, i32 5		; <i32*> [#uses=1]
+	%1 = getelementptr [16 x i32]* null, i32 0, i32 8		; <i32*> [#uses=1]
+	%2 = load i32* null, align 4		; <i32> [#uses=1]
+	%3 = shl i32 %2, 24		; <i32> [#uses=1]
+	%4 = load i32* null, align 4		; <i32> [#uses=1]
+	%5 = shl i32 %4, 16		; <i32> [#uses=1]
+	%6 = load i32* null, align 4		; <i32> [#uses=1]
+	%7 = or i32 %5, %3		; <i32> [#uses=1]
+	%8 = or i32 %7, %6		; <i32> [#uses=1]
+	%9 = or i32 %8, 0		; <i32> [#uses=1]
+	%10 = load i32* null, align 4		; <i32> [#uses=1]
+	%11 = shl i32 %10, 24		; <i32> [#uses=1]
+	%12 = load i32* %0, align 4		; <i32> [#uses=1]
+	%13 = shl i32 %12, 16		; <i32> [#uses=1]
+	%14 = load i32* null, align 4		; <i32> [#uses=1]
+	%15 = or i32 %13, %11		; <i32> [#uses=1]
+	%16 = or i32 %15, %14		; <i32> [#uses=1]
+	%17 = or i32 %16, 0		; <i32> [#uses=1]
+	br label %bb11
+
+bb11:		; preds = %bb11, %bb1.thread
+	%18 = phi i32 [ %110, %bb11 ], [ 0, %bb1.thread ]		; <i32> [#uses=1]
+	%19 = phi i32 [ %112, %bb11 ], [ 0, %bb1.thread ]		; <i32> [#uses=0]
+	%20 = phi i32 [ 0, %bb11 ], [ 0, %bb1.thread ]		; <i32> [#uses=0]
+	%21 = phi i32 [ %113, %bb11 ], [ 0, %bb1.thread ]		; <i32> [#uses=1]
+	%X.0.0 = phi i32 [ %9, %bb1.thread ], [ %92, %bb11 ]		; <i32> [#uses=0]
+	%X.1.0 = phi i32 [ %17, %bb1.thread ], [ 0, %bb11 ]		; <i32> [#uses=0]
+	%22 = getelementptr [2 x i32]* @CAST_S_table6, i32 0, i32 %21		; <i32*> [#uses=0]
+	%23 = getelementptr [2 x i32]* @CAST_S_table5, i32 0, i32 %18		; <i32*> [#uses=0]
+	%24 = load i32* null, align 4		; <i32> [#uses=1]
+	%25 = xor i32 0, %24		; <i32> [#uses=1]
+	%26 = xor i32 %25, 0		; <i32> [#uses=1]
+	%27 = xor i32 %26, 0		; <i32> [#uses=4]
+	%28 = and i32 %27, 255		; <i32> [#uses=2]
+	%29 = lshr i32 %27, 8		; <i32> [#uses=1]
+	%30 = and i32 %29, 255		; <i32> [#uses=2]
+	%31 = lshr i32 %27, 16		; <i32> [#uses=1]
+	%32 = and i32 %31, 255		; <i32> [#uses=1]
+	%33 = getelementptr [2 x i32]* @CAST_S_table4, i32 0, i32 %28		; <i32*> [#uses=1]
+	%34 = load i32* %33, align 4		; <i32> [#uses=2]
+	%35 = getelementptr [2 x i32]* @CAST_S_table5, i32 0, i32 %30		; <i32*> [#uses=1]
+	%36 = load i32* %35, align 4		; <i32> [#uses=2]
+	%37 = xor i32 %34, 0		; <i32> [#uses=1]
+	%38 = xor i32 %37, %36		; <i32> [#uses=1]
+	%39 = xor i32 %38, 0		; <i32> [#uses=1]
+	%40 = xor i32 %39, 0		; <i32> [#uses=1]
+	%41 = xor i32 %40, 0		; <i32> [#uses=3]
+	%42 = lshr i32 %41, 8		; <i32> [#uses=1]
+	%43 = and i32 %42, 255		; <i32> [#uses=2]
+	%44 = lshr i32 %41, 16		; <i32> [#uses=1]
+	%45 = and i32 %44, 255		; <i32> [#uses=1]
+	%46 = getelementptr [2 x i32]* @CAST_S_table4, i32 0, i32 %43		; <i32*> [#uses=1]
+	%47 = load i32* %46, align 4		; <i32> [#uses=1]
+	%48 = load i32* null, align 4		; <i32> [#uses=1]
+	%49 = xor i32 %47, 0		; <i32> [#uses=1]
+	%50 = xor i32 %49, %48		; <i32> [#uses=1]
+	%51 = xor i32 %50, 0		; <i32> [#uses=1]
+	%52 = xor i32 %51, 0		; <i32> [#uses=1]
+	%53 = xor i32 %52, 0		; <i32> [#uses=2]
+	%54 = and i32 %53, 255		; <i32> [#uses=1]
+	%55 = lshr i32 %53, 24		; <i32> [#uses=1]
+	%56 = getelementptr [2 x i32]* @CAST_S_table6, i32 0, i32 %55		; <i32*> [#uses=1]
+	%57 = load i32* %56, align 4		; <i32> [#uses=1]
+	%58 = xor i32 0, %57		; <i32> [#uses=1]
+	%59 = xor i32 %58, 0		; <i32> [#uses=1]
+	%60 = xor i32 %59, 0		; <i32> [#uses=1]
+	store i32 %60, i32* null, align 4
+	%61 = getelementptr [2 x i32]* @CAST_S_table4, i32 0, i32 0		; <i32*> [#uses=1]
+	%62 = load i32* %61, align 4		; <i32> [#uses=1]
+	%63 = getelementptr [2 x i32]* @CAST_S_table7, i32 0, i32 %54		; <i32*> [#uses=1]
+	%64 = load i32* %63, align 4		; <i32> [#uses=1]
+	%65 = xor i32 0, %64		; <i32> [#uses=1]
+	%66 = xor i32 %65, 0		; <i32> [#uses=1]
+	store i32 %66, i32* null, align 4
+	%67 = getelementptr [2 x i32]* @CAST_S_table7, i32 0, i32 %45		; <i32*> [#uses=1]
+	%68 = load i32* %67, align 4		; <i32> [#uses=1]
+	%69 = xor i32 %36, %34		; <i32> [#uses=1]
+	%70 = xor i32 %69, 0		; <i32> [#uses=1]
+	%71 = xor i32 %70, %68		; <i32> [#uses=1]
+	%72 = xor i32 %71, 0		; <i32> [#uses=1]
+	store i32 %72, i32* null, align 4
+	%73 = getelementptr [2 x i32]* @CAST_S_table4, i32 0, i32 %32		; <i32*> [#uses=1]
+	%74 = load i32* %73, align 4		; <i32> [#uses=2]
+	%75 = load i32* null, align 4		; <i32> [#uses=1]
+	%76 = getelementptr [2 x i32]* @CAST_S_table6, i32 0, i32 %43		; <i32*> [#uses=1]
+	%77 = load i32* %76, align 4		; <i32> [#uses=1]
+	%78 = getelementptr [2 x i32]* @CAST_S_table7, i32 0, i32 0		; <i32*> [#uses=1]
+	%79 = load i32* %78, align 4		; <i32> [#uses=1]
+	%80 = getelementptr [2 x i32]* @CAST_S_table7, i32 0, i32 %30		; <i32*> [#uses=1]
+	%81 = load i32* %80, align 4		; <i32> [#uses=2]
+	%82 = xor i32 %75, %74		; <i32> [#uses=1]
+	%83 = xor i32 %82, %77		; <i32> [#uses=1]
+	%84 = xor i32 %83, %79		; <i32> [#uses=1]
+	%85 = xor i32 %84, %81		; <i32> [#uses=1]
+	store i32 %85, i32* null, align 4
+	%86 = getelementptr [2 x i32]* @CAST_S_table5, i32 0, i32 %28		; <i32*> [#uses=1]
+	%87 = load i32* %86, align 4		; <i32> [#uses=1]
+	%88 = xor i32 %74, %41		; <i32> [#uses=1]
+	%89 = xor i32 %88, %87		; <i32> [#uses=1]
+	%90 = xor i32 %89, 0		; <i32> [#uses=1]
+	%91 = xor i32 %90, %81		; <i32> [#uses=1]
+	%92 = xor i32 %91, 0		; <i32> [#uses=3]
+	%93 = lshr i32 %92, 16		; <i32> [#uses=1]
+	%94 = and i32 %93, 255		; <i32> [#uses=1]
+	store i32 %94, i32* null, align 4
+	%95 = lshr i32 %92, 24		; <i32> [#uses=2]
+	%96 = getelementptr [2 x i32]* @CAST_S_table4, i32 0, i32 %95		; <i32*> [#uses=1]
+	%97 = load i32* %96, align 4		; <i32> [#uses=1]
+	%98 = getelementptr [2 x i32]* @CAST_S_table5, i32 0, i32 0		; <i32*> [#uses=1]
+	%99 = load i32* %98, align 4		; <i32> [#uses=1]
+	%100 = load i32* null, align 4		; <i32> [#uses=0]
+	%101 = xor i32 %97, 0		; <i32> [#uses=1]
+	%102 = xor i32 %101, %99		; <i32> [#uses=1]
+	%103 = xor i32 %102, 0		; <i32> [#uses=1]
+	%104 = xor i32 %103, 0		; <i32> [#uses=0]
+	store i32 0, i32* null, align 4
+	%105 = xor i32 0, %27		; <i32> [#uses=1]
+	%106 = xor i32 %105, 0		; <i32> [#uses=1]
+	%107 = xor i32 %106, 0		; <i32> [#uses=1]
+	%108 = xor i32 %107, 0		; <i32> [#uses=1]
+	%109 = xor i32 %108, %62		; <i32> [#uses=3]
+	%110 = and i32 %109, 255		; <i32> [#uses=1]
+	%111 = lshr i32 %109, 16		; <i32> [#uses=1]
+	%112 = and i32 %111, 255		; <i32> [#uses=1]
+	%113 = lshr i32 %109, 24		; <i32> [#uses=3]
+	store i32 %113, i32* %1, align 4
+	%114 = load i32* null, align 4		; <i32> [#uses=1]
+	%115 = xor i32 0, %114		; <i32> [#uses=1]
+	%116 = xor i32 %115, 0		; <i32> [#uses=1]
+	%117 = xor i32 %116, 0		; <i32> [#uses=1]
+	%K.0.sum42 = or i32 0, 12		; <i32> [#uses=1]
+	%118 = getelementptr [32 x i32]* null, i32 0, i32 %K.0.sum42		; <i32*> [#uses=1]
+	store i32 %117, i32* %118, align 4
+	%119 = getelementptr [2 x i32]* @CAST_S_table5, i32 0, i32 0		; <i32*> [#uses=0]
+	store i32 0, i32* null, align 4
+	%120 = getelementptr [2 x i32]* @CAST_S_table6, i32 0, i32 %113		; <i32*> [#uses=1]
+	%121 = load i32* %120, align 4		; <i32> [#uses=1]
+	%122 = xor i32 0, %121		; <i32> [#uses=1]
+	store i32 %122, i32* null, align 4
+	%123 = getelementptr [2 x i32]* @CAST_S_table4, i32 0, i32 0		; <i32*> [#uses=1]
+	%124 = load i32* %123, align 4		; <i32> [#uses=1]
+	%125 = getelementptr [2 x i32]* @CAST_S_table7, i32 0, i32 %95		; <i32*> [#uses=1]
+	%126 = load i32* %125, align 4		; <i32> [#uses=1]
+	%127 = xor i32 0, %124		; <i32> [#uses=1]
+	%128 = xor i32 %127, 0		; <i32> [#uses=1]
+	%129 = xor i32 %128, %126		; <i32> [#uses=1]
+	%130 = xor i32 %129, 0		; <i32> [#uses=1]
+	store i32 %130, i32* null, align 4
+	br label %bb11
+}





More information about the llvm-commits mailing list