[llvm-commits] [llvm] r105991 - in /llvm/trunk: lib/CodeGen/TwoAddressInstructionPass.cpp test/CodeGen/ARM/2009-11-01-NeonMoves.ll

Bob Wilson bob.wilson at apple.com
Mon Jun 14 22:56:32 PDT 2010


Author: bwilson
Date: Tue Jun 15 00:56:31 2010
New Revision: 105991

URL: http://llvm.org/viewvc/llvm-project?rev=105991&view=rev
Log:
Generalize the pre-coalescing of extract_subregs feeding reg_sequences,
replacing the overly conservative checks that I had introduced recently to
deal with correctness issues.  This makes a pretty noticable difference
in our testcases where reg_sequences are used.  I've updated one test to
check that we no longer emit the unnecessary subreg moves.

Modified:
    llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp
    llvm/trunk/test/CodeGen/ARM/2009-11-01-NeonMoves.ll

Modified: llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp?rev=105991&r1=105990&r2=105991&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp (original)
+++ llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp Tue Jun 15 00:56:31 2010
@@ -33,6 +33,7 @@
 #include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -1183,11 +1184,8 @@
            UI = MRI->use_nodbg_begin(SrcReg),
            UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
       MachineInstr *UseMI = &*UI;
-      // FIXME: For now require that the destination subregs match the subregs
-      // being extracted.
       if (!UseMI->isExtractSubreg() ||
           UseMI->getOperand(0).getReg() != DstReg ||
-          UseMI->getOperand(0).getSubReg() != UseMI->getOperand(2).getImm() ||
           UseMI->getOperand(1).getSubReg() != 0) {
         CanCoalesce = false;
         break;
@@ -1198,40 +1196,92 @@
     if (!CanCoalesce || SubIndices.size() < 2)
       continue;
 
-    // FIXME: For now require that the src and dst registers are in the
-    // same regclass.
-    if (MRI->getRegClass(SrcReg) != MRI->getRegClass(DstReg))
+    std::sort(SubIndices.begin(), SubIndices.end());
+    unsigned NewSrcSubIdx = 0;
+    if (!TRI->canCombineSubRegIndices(MRI->getRegClass(SrcReg), SubIndices,
+                                      NewSrcSubIdx))
       continue;
 
+    // Now that we know that all the uses are extract_subregs and that those
+    // subregs can somehow be combined, scan all the extract_subregs again to
+    // make sure the subregs are in the right order and can be composed.
+    // Also keep track of the destination subregisters so we can make sure
+    // that those can be combined.
+    SubIndices.clear();
+    MachineInstr *SomeMI = 0;
+    CanCoalesce = true;
+    for (MachineRegisterInfo::use_nodbg_iterator
+           UI = MRI->use_nodbg_begin(SrcReg),
+           UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
+      MachineInstr *UseMI = &*UI;
+      assert(UseMI->isExtractSubreg());
+      unsigned DstSubIdx = UseMI->getOperand(0).getSubReg();
+      unsigned SrcSubIdx = UseMI->getOperand(2).getImm();
+      assert(DstSubIdx != 0 && "missing subreg from RegSequence elimination");
+      if (TRI->composeSubRegIndices(NewSrcSubIdx, DstSubIdx) != SrcSubIdx) {
+        CanCoalesce = false;
+        break;
+      }
+      SubIndices.push_back(DstSubIdx);
+      // Keep track of one of the uses.
+      SomeMI = UseMI;
+    }
+    if (!CanCoalesce)
+      continue;
+
+    // Check that the destination subregisters can also be combined.
     std::sort(SubIndices.begin(), SubIndices.end());
-    unsigned NewSubIdx = 0;
-    if (TRI->canCombineSubRegIndices(MRI->getRegClass(SrcReg), SubIndices,
-                                     NewSubIdx)) {
-      bool Proceed = true;
-      if (NewSubIdx)
-        for (MachineRegisterInfo::reg_nodbg_iterator
-               RI = MRI->reg_nodbg_begin(SrcReg), RE = MRI->reg_nodbg_end();
-             RI != RE; ) {
-          MachineOperand &MO = RI.getOperand();
-          ++RI;
-          // FIXME: If the sub-registers do not combine to the whole
-          // super-register, i.e. NewSubIdx != 0, and any of the use has a
-          // sub-register index, then abort the coalescing attempt.
-          if (MO.getSubReg()) {
-            Proceed = false;
-            break;
-          }
-        }
-      if (Proceed)
-        for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg),
-               RE = MRI->reg_end(); RI != RE; ) {
-          MachineOperand &MO = RI.getOperand();
-          ++RI;
-          MO.setReg(DstReg);
-          if (NewSubIdx)
-            MO.setSubReg(NewSubIdx);
-        }
+    unsigned NewDstSubIdx = 0;
+    if (!TRI->canCombineSubRegIndices(MRI->getRegClass(DstReg), SubIndices,
+                                      NewDstSubIdx))
+      continue;
+
+    // If neither source nor destination can be combined to the full register,
+    // just give up.  This could be improved if it ever matters.
+    if (NewSrcSubIdx != 0 && NewDstSubIdx != 0)
+      continue;
+
+    // Insert a copy or an extract to replace the original extracts.
+    MachineBasicBlock::iterator InsertLoc = SomeMI;
+    if (NewSrcSubIdx) {
+      // Insert an extract subreg.
+      BuildMI(*SomeMI->getParent(), InsertLoc, SomeMI->getDebugLoc(),
+              TII->get(TargetOpcode::EXTRACT_SUBREG), DstReg)
+        .addReg(SrcReg).addImm(NewSrcSubIdx);
+    } else if (NewDstSubIdx) {
+      // Do a subreg insertion.
+      BuildMI(*SomeMI->getParent(), InsertLoc, SomeMI->getDebugLoc(),
+              TII->get(TargetOpcode::INSERT_SUBREG), DstReg)
+        .addReg(DstReg).addReg(SrcReg).addImm(NewDstSubIdx);
+    } else {
+      // Insert a copy.
+      bool Emitted =
+        TII->copyRegToReg(*SomeMI->getParent(), InsertLoc, DstReg, SrcReg,
+                          MRI->getRegClass(DstReg), MRI->getRegClass(SrcReg),
+                          SomeMI->getDebugLoc());
+      (void)Emitted;
+    }
+    MachineBasicBlock::iterator CopyMI = prior(InsertLoc);
+
+    // Remove all the old extract instructions.
+    for (MachineRegisterInfo::use_nodbg_iterator
+           UI = MRI->use_nodbg_begin(SrcReg),
+           UE = MRI->use_nodbg_end(); UI != UE; ) {
+      MachineInstr *UseMI = &*UI;
+      ++UI;
+      if (UseMI == CopyMI)
+        continue;
+      assert(UseMI->isExtractSubreg());
+      // Move any kills to the new copy or extract instruction.
+      if (UseMI->getOperand(1).isKill()) {
+        MachineOperand *KillMO = CopyMI->findRegisterUseOperand(SrcReg);
+        KillMO->setIsKill();
+        if (LV)
+          // Update live variables
+          LV->replaceKillInstruction(SrcReg, UseMI, &*CopyMI);
       }
+      UseMI->eraseFromParent();
+    }
   }
 }
 

Modified: llvm/trunk/test/CodeGen/ARM/2009-11-01-NeonMoves.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/2009-11-01-NeonMoves.ll?rev=105991&r1=105990&r2=105991&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/2009-11-01-NeonMoves.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/2009-11-01-NeonMoves.ll Tue Jun 15 00:56:31 2010
@@ -11,11 +11,11 @@
   %0 = getelementptr inbounds %foo* %quat_addr, i32 0, i32 0 ; <<4 x float>*> [#uses=1]
   store <4 x float> %quat.0, <4 x float>* %0
   %1 = call arm_aapcs_vfpcc  <4 x float> @quux(%foo* %quat_addr) nounwind ; <<4 x float>> [#uses=3]
-;CHECK: vmov.f32
-;CHECK: vmov.f32
   %2 = fmul <4 x float> %1, %1                    ; <<4 x float>> [#uses=2]
   %3 = shufflevector <4 x float> %2, <4 x float> undef, <2 x i32> <i32 0, i32 1> ; <<2 x float>> [#uses=1]
   %4 = shufflevector <4 x float> %2, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
+;CHECK-NOT: vmov
+;CHECK: vpadd
   %5 = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %3, <2 x float> %4) nounwind ; <<2 x float>> [#uses=2]
   %6 = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %5, <2 x float> %5) nounwind ; <<2 x float>> [#uses=2]
   %7 = shufflevector <2 x float> %6, <2 x float> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=2]





More information about the llvm-commits mailing list