[llvm-commits] [llvm] r106493 - in /llvm/trunk: lib/CodeGen/TwoAddressInstructionPass.cpp test/CodeGen/X86/2007-01-08-InstrSched.ll test/CodeGen/X86/lsr-reuse.ll test/CodeGen/X86/pic.ll test/CodeGen/X86/stack-align.ll test/CodeGen/X86/tailcallstack64.ll

Dan Gohman gohman at apple.com
Mon Jun 21 15:17:21 PDT 2010


Author: djg
Date: Mon Jun 21 17:17:20 2010
New Revision: 106493

URL: http://llvm.org/viewvc/llvm-project?rev=106493&view=rev
Log:
Teach two-address lowering how to unfold a load to open up commuting
opportunities. For example, this lets it emit this:

   movq (%rax), %rcx
   addq %rdx, %rcx

instead of this:

   movq %rdx, %rcx
   addq (%rax), %rcx

in the case where %rdx has subsequent uses. It's the same number
of instructions, and usually the same encoding size on x86, but
it appears faster, and in general, it may allow better scheduling
for the load.

Modified:
    llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp
    llvm/trunk/test/CodeGen/X86/2007-01-08-InstrSched.ll
    llvm/trunk/test/CodeGen/X86/lsr-reuse.ll
    llvm/trunk/test/CodeGen/X86/pic.ll
    llvm/trunk/test/CodeGen/X86/stack-align.ll
    llvm/trunk/test/CodeGen/X86/tailcallstack64.ll

Modified: llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp?rev=106493&r1=106492&r2=106493&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp (original)
+++ llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp Mon Jun 21 17:17:20 2010
@@ -898,6 +898,90 @@
       }
     }
   }
+
+  // If this is an instruction with a load folded into it, try unfolding
+  // the load, e.g. avoid this:
+  //   movq %rdx, %rcx
+  //   addq (%rax), %rcx
+  // in favor of this:
+  //   movq (%rax), %rcx
+  //   addq %rdx, %rcx
+  // because it's preferable to schedule a load than a register copy.
+  if (TID.mayLoad() && !regBKilled) {
+    // Determine if a load can be unfolded.
+    unsigned LoadRegIndex;
+    unsigned NewOpc =
+      TII->getOpcodeAfterMemoryUnfold(mi->getOpcode(),
+                                      /*UnfoldLoad=*/true,
+                                      /*UnfoldStore=*/false,
+                                      &LoadRegIndex);
+    if (NewOpc != 0) {
+      const TargetInstrDesc &UnfoldTID = TII->get(NewOpc);
+      if (UnfoldTID.getNumDefs() == 1) {
+        MachineFunction &MF = *mbbi->getParent();
+
+        // Unfold the load.
+        DEBUG(dbgs() << "2addr:   UNFOLDING: " << *mi);
+        const TargetRegisterClass *RC =
+          UnfoldTID.OpInfo[LoadRegIndex].getRegClass(TRI);
+        unsigned Reg = MRI->createVirtualRegister(RC);
+        SmallVector<MachineInstr *, 2> NewMIs;
+        bool Success =
+          TII->unfoldMemoryOperand(MF, mi, Reg,
+                                   /*UnfoldLoad=*/true, /*UnfoldStore=*/false,
+                                   NewMIs);
+        (void)Success;
+        assert(Success &&
+               "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold "
+               "succeeded!");
+        assert(NewMIs.size() == 2 &&
+               "Unfolded a load into multiple instructions!");
+        // The load was previously folded, so this is the only use.
+        NewMIs[1]->addRegisterKilled(Reg, TRI);
+
+        // Tentatively insert the instructions into the block so that they
+        // look "normal" to the transformation logic.
+        mbbi->insert(mi, NewMIs[0]);
+        mbbi->insert(mi, NewMIs[1]);
+
+        DEBUG(dbgs() << "2addr:    NEW LOAD: " << *NewMIs[0]
+                     << "2addr:    NEW INST: " << *NewMIs[1]);
+
+        // Transform the instruction, now that it no longer has a load.
+        unsigned NewDstIdx = NewMIs[1]->findRegisterDefOperandIdx(regA);
+        unsigned NewSrcIdx = NewMIs[1]->findRegisterUseOperandIdx(regB);
+        MachineBasicBlock::iterator NewMI = NewMIs[1];
+        bool TransformSuccess =
+          TryInstructionTransform(NewMI, mi, mbbi,
+                                  NewSrcIdx, NewDstIdx, Dist);
+        if (TransformSuccess ||
+            NewMIs[1]->getOperand(NewSrcIdx).isKill()) {
+          // Success, or at least we made an improvement. Keep the unfolded
+          // instructions and discard the original.
+          if (LV) {
+            for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
+              MachineOperand &MO = mi->getOperand(i);
+              if (MO.isReg() && MO.isUse() && MO.isKill())
+                LV->replaceKillInstruction(Reg, mi, NewMIs[0]);
+            }
+            LV->addVirtualRegisterKilled(Reg, NewMIs[1]);
+          }
+          mi->eraseFromParent();
+          mi = NewMIs[1];
+          if (TransformSuccess)
+            return true;
+        } else {
+          // Transforming didn't eliminate the tie and didn't lead to an
+          // improvement. Clean up the unfolded instructions and keep the
+          // original.
+          DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");
+          NewMIs[0]->eraseFromParent();
+          NewMIs[1]->eraseFromParent();
+        }
+      }
+    }
+  }
+
   return false;
 }
 

Modified: llvm/trunk/test/CodeGen/X86/2007-01-08-InstrSched.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2007-01-08-InstrSched.ll?rev=106493&r1=106492&r2=106493&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/2007-01-08-InstrSched.ll (original)
+++ llvm/trunk/test/CodeGen/X86/2007-01-08-InstrSched.ll Mon Jun 21 17:17:20 2010
@@ -11,12 +11,12 @@
     %tmp14 = fadd float %tmp12, %tmp7
     ret float %tmp14
 
-; CHECK: mulss	LCPI0_0(%rip)
-; CHECK: mulss	LCPI0_1(%rip)
+; CHECK: mulss
+; CHECK: mulss
 ; CHECK: addss
-; CHECK: mulss	LCPI0_2(%rip)
+; CHECK: mulss
 ; CHECK: addss
-; CHECK: mulss	LCPI0_3(%rip)
+; CHECK: mulss
 ; CHECK: addss
 ; CHECK: ret
 }

Modified: llvm/trunk/test/CodeGen/X86/lsr-reuse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/lsr-reuse.ll?rev=106493&r1=106492&r2=106493&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/lsr-reuse.ll (original)
+++ llvm/trunk/test/CodeGen/X86/lsr-reuse.ll Mon Jun 21 17:17:20 2010
@@ -465,14 +465,14 @@
 ; And the one at %bb68, where we want to be sure to use superhero mode:
 
 ; CHECK:      BB10_10:
-; CHECK-NEXT:   movaps  %xmm{{.*}}, %xmm{{.*}}
-; CHECK-NEXT:   mulps   48(%r{{[^,]*}}), %xmm{{.*}}
-; CHECK-NEXT:   movaps  %xmm{{.*}}, %xmm{{.*}}
-; CHECK-NEXT:   mulps   32(%r{{[^,]*}}), %xmm{{.*}}
-; CHECK-NEXT:   movaps  %xmm{{.*}}, %xmm{{.*}}
-; CHECK-NEXT:   mulps   16(%r{{[^,]*}}), %xmm{{.*}}
-; CHECK-NEXT:   movaps  %xmm{{.*}}, %xmm{{.*}}
-; CHECK-NEXT:   mulps   (%r{{[^,]*}}), %xmm{{.*}}
+; CHECK-NEXT:   movaps  48(%r{{[^,]*}}), %xmm{{.*}}
+; CHECK-NEXT:   mulps   %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT:   movaps  32(%r{{[^,]*}}), %xmm{{.*}}
+; CHECK-NEXT:   mulps   %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT:   movaps  16(%r{{[^,]*}}), %xmm{{.*}}
+; CHECK-NEXT:   mulps   %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT:   movaps  (%r{{[^,]*}}), %xmm{{.*}}
+; CHECK-NEXT:   mulps   %xmm{{.*}}, %xmm{{.*}}
 ; CHECK-NEXT:   movaps  %xmm{{.*}}, (%r{{[^,]*}})
 ; CHECK-NEXT:   movaps  %xmm{{.*}}, 16(%r{{[^,]*}})
 ; CHECK-NEXT:   movaps  %xmm{{.*}}, 32(%r{{[^,]*}})

Modified: llvm/trunk/test/CodeGen/X86/pic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pic.ll?rev=106493&r1=106492&r2=106493&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pic.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pic.ll Mon Jun 21 17:17:20 2010
@@ -189,7 +189,7 @@
 ; LINUX:   call	.L7$pb
 ; LINUX: .L7$pb:
 ; LINUX:   addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L7$pb),
-; LINUX:   addl	.LJTI7_0 at GOTOFF(
+; LINUX:   .LJTI7_0 at GOTOFF(
 ; LINUX:   jmpl	*
 
 ; LINUX: .LJTI7_0:

Modified: llvm/trunk/test/CodeGen/X86/stack-align.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-align.ll?rev=106493&r1=106492&r2=106493&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-align.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-align.ll Mon Jun 21 17:17:20 2010
@@ -9,14 +9,15 @@
 
 define void @test({ double, double }* byval  %z, double* %P) {
 entry:
+	%tmp3 = load double* @G, align 16		; <double> [#uses=1]
+	%tmp4 = tail call double @fabs( double %tmp3 )		; <double> [#uses=1]
+        volatile store double %tmp4, double* %P
 	%tmp = getelementptr { double, double }* %z, i32 0, i32 0		; <double*> [#uses=1]
-	%tmp1 = load double* %tmp, align 8		; <double> [#uses=1]
+	%tmp1 = volatile load double* %tmp, align 8		; <double> [#uses=1]
 	%tmp2 = tail call double @fabs( double %tmp1 )		; <double> [#uses=1]
     ; CHECK: andpd{{.*}}4(%esp), %xmm
-	%tmp3 = load double* @G, align 16		; <double> [#uses=1]
-	%tmp4 = tail call double @fabs( double %tmp3 )		; <double> [#uses=1]
 	%tmp6 = fadd double %tmp4, %tmp2		; <double> [#uses=1]
-	store double %tmp6, double* %P, align 8
+	volatile store double %tmp6, double* %P, align 8
 	ret void
 }
 

Modified: llvm/trunk/test/CodeGen/X86/tailcallstack64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/tailcallstack64.ll?rev=106493&r1=106492&r2=106493&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/tailcallstack64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/tailcallstack64.ll Mon Jun 21 17:17:20 2010
@@ -2,9 +2,11 @@
 
 ; Check that lowered arguments on the stack do not overwrite each other.
 ; Add %in1 %p1 to a different temporary register (%eax).
-; CHECK: movl  %edi, %eax
+; CHECK: movl  32(%rsp), %eax
 ; Move param %in1 to temp register (%r10d).
 ; CHECK: movl  40(%rsp), %r10d
+; Add %in1 %p1 to a different temporary register (%eax).
+; CHECK: addl %edi, %eax
 ; Move param %in2 to stack.
 ; CHECK: movl  %r10d, 32(%rsp)
 ; Move result of addition to stack.





More information about the llvm-commits mailing list