[llvm-commits] [llvm] r47476 - in /llvm/trunk: lib/Target/X86/X86InstrInfo.cpp test/CodeGen/X86/pic-load-remat.ll

Evan Cheng evan.cheng at apple.com
Fri Feb 22 01:25:47 PST 2008


Author: evancheng
Date: Fri Feb 22 03:25:47 2008
New Revision: 47476

URL: http://llvm.org/viewvc/llvm-project?rev=47476&view=rev
Log:
Allow re-materialization of pic load (controlled by -remat-pic-load for now).

Added:
    llvm/trunk/test/CodeGen/X86/pic-load-remat.ll
Modified:
    llvm/trunk/lib/Target/X86/X86InstrInfo.cpp

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=47476&r1=47475&r2=47476&view=diff

==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Fri Feb 22 03:25:47 2008
@@ -37,6 +37,10 @@
                     cl::desc("Print instructions that the allocator wants to"
                              " fuse, but the X86 backend currently can't"),
                     cl::Hidden);
+  cl::opt<bool>
+  ReMatPICLoad("remat-pic-load",
+               cl::desc("Allow rematerializing pic load"),
+               cl::init(false), cl::Hidden);
 }
 
 X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
@@ -735,10 +739,26 @@
     // Loads from constant pools are trivially rematerializable.
     if (MI->getOperand(1).isReg() && MI->getOperand(2).isImm() &&
         MI->getOperand(3).isReg() && MI->getOperand(4).isCPI() &&
-        MI->getOperand(1).getReg() == 0 &&
         MI->getOperand(2).getImm() == 1 &&
-        MI->getOperand(3).getReg() == 0)
-      return true;
+        MI->getOperand(3).getReg() == 0) {
+      unsigned BaseReg = MI->getOperand(1).getReg();
+      if (BaseReg == 0)
+        return true;
+      if (!ReMatPICLoad)
+        return false;
+      // Allow re-materialization of PIC load.
+      MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+      bool isPICBase = false;
+      for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg),
+             E = MRI.def_end(); I != E; ++I) {
+        MachineInstr *DefMI = I.getOperand().getParent();
+        if (DefMI->getOpcode() != X86::MOVPC32r)
+          return false;
+        assert(!isPICBase && "More than one PIC base?");
+        isPICBase = true;
+      }
+      return isPICBase;
+    }
       
     // If this is a load from a fixed argument slot, we know the value is
     // invariant across the whole function, because we don't redefine argument

Added: llvm/trunk/test/CodeGen/X86/pic-load-remat.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pic-load-remat.ll?rev=47476&view=auto

==============================================================================
--- llvm/trunk/test/CodeGen/X86/pic-load-remat.ll (added)
+++ llvm/trunk/test/CodeGen/X86/pic-load-remat.ll Fri Feb 22 03:25:47 2008
@@ -0,0 +1,45 @@
+; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -mattr=+sse2 -relocation-model=pic -remat-pic-load | grep padd | grep pb
+
+define void @f() nounwind  {
+entry:
+	%tmp4403 = tail call <8 x i16> @llvm.x86.sse2.psubs.w( <8 x i16> zeroinitializer, <8 x i16> zeroinitializer ) nounwind readnone 		; <<8 x i16>> [#uses=2]
+	%tmp4443 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> zeroinitializer, <8 x i16> zeroinitializer ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp4609 = tail call <8 x i16> @llvm.x86.sse2.psll.w( <8 x i16> zeroinitializer, <8 x i16> bitcast (<4 x i32> < i32 14, i32 undef, i32 undef, i32 undef > to <8 x i16>) )		; <<8 x i16>> [#uses=1]
+	%tmp4651 = add <8 x i16> %tmp4609, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1 >		; <<8 x i16>> [#uses=1]
+	%tmp4658 = tail call <8 x i16> @llvm.x86.sse2.psll.w( <8 x i16> %tmp4651, <8 x i16> bitcast (<4 x i32> < i32 1, i32 undef, i32 undef, i32 undef > to <8 x i16>) )		; <<8 x i16>> [#uses=1]
+	%tmp4669 = tail call <8 x i16> @llvm.x86.sse2.pmaxs.w( <8 x i16> < i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170 >, <8 x i16> %tmp4443 ) nounwind readnone 		; <<8 x i16>> [#uses=2]
+	%tmp4679 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> %tmp4669, <8 x i16> %tmp4669 ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp4689 = add <8 x i16> %tmp4679, %tmp4658		; <<8 x i16>> [#uses=1]
+	%tmp4700 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> %tmp4689, <8 x i16> zeroinitializer ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp4708 = bitcast <8 x i16> %tmp4700 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp4772 = add <8 x i16> zeroinitializer, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1 >		; <<8 x i16>> [#uses=1]
+	%tmp4779 = tail call <8 x i16> @llvm.x86.sse2.psll.w( <8 x i16> %tmp4772, <8 x i16> bitcast (<4 x i32> < i32 1, i32 undef, i32 undef, i32 undef > to <8 x i16>) )		; <<8 x i16>> [#uses=1]
+	%tmp4800 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> zeroinitializer, <8 x i16> zeroinitializer ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp4810 = add <8 x i16> %tmp4800, %tmp4779		; <<8 x i16>> [#uses=1]
+	%tmp4821 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> %tmp4810, <8 x i16> zeroinitializer ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp4829 = bitcast <8 x i16> %tmp4821 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp4900 = tail call <8 x i16> @llvm.x86.sse2.psll.w( <8 x i16> zeroinitializer, <8 x i16> bitcast (<4 x i32> < i32 1, i32 undef, i32 undef, i32 undef > to <8 x i16>) )		; <<8 x i16>> [#uses=1]
+	%tmp4911 = tail call <8 x i16> @llvm.x86.sse2.pmaxs.w( <8 x i16> < i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170 >, <8 x i16> zeroinitializer ) nounwind readnone 		; <<8 x i16>> [#uses=2]
+	%tmp4921 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> %tmp4911, <8 x i16> %tmp4911 ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp4931 = add <8 x i16> %tmp4921, %tmp4900		; <<8 x i16>> [#uses=1]
+	%tmp4942 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> %tmp4931, <8 x i16> zeroinitializer ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp4950 = bitcast <8 x i16> %tmp4942 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp4957 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> %tmp4403, <8 x i16> zeroinitializer ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp4958 = bitcast <8 x i16> %tmp4957 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp4967 = tail call <8 x i16> @llvm.x86.sse2.psubs.w( <8 x i16> %tmp4403, <8 x i16> zeroinitializer ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp4968 = bitcast <8 x i16> %tmp4967 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	store <2 x i64> %tmp4829, <2 x i64>* null, align 16
+	store <2 x i64> %tmp4958, <2 x i64>* null, align 16
+	store <2 x i64> %tmp4968, <2 x i64>* null, align 16
+	store <2 x i64> %tmp4950, <2 x i64>* null, align 16
+	store <2 x i64> %tmp4708, <2 x i64>* null, align 16
+	unreachable
+}
+
+declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone 
+
+declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone 
+
+declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone 
+
+declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone 





More information about the llvm-commits mailing list