[llvm] r365370 - RegUsageInfoCollector: Don't iterate all regs for every reg class

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 8 11:48:42 PDT 2019


Author: arsenm
Date: Mon Jul  8 11:48:42 2019
New Revision: 365370

URL: http://llvm.org/viewvc/llvm-project?rev=365370&view=rev
Log:
RegUsageInfoCollector: Don't iterate all regs for every reg class

This is extremly slow on AMDGPU, which has a lot of physical register
and a lot of register classes.

determineCalleeSaves, via MachineRegisterInfo::isPhysRegUsed already
added all of the super registers to the saved set.

Added:
    llvm/trunk/test/CodeGen/AMDGPU/ipra-regmask.ll
Modified:
    llvm/trunk/lib/CodeGen/RegUsageInfoCollector.cpp

Modified: llvm/trunk/lib/CodeGen/RegUsageInfoCollector.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegUsageInfoCollector.cpp?rev=365370&r1=365369&r2=365370&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/RegUsageInfoCollector.cpp (original)
+++ llvm/trunk/lib/CodeGen/RegUsageInfoCollector.cpp Mon Jul  8 11:48:42 2019
@@ -189,42 +189,17 @@ computeCalleeSavedRegs(BitVector &SavedR
   // Target will return the set of registers that it saves/restores as needed.
   SavedRegs.clear();
   TFI.determineCalleeSaves(MF, SavedRegs);
+  if (SavedRegs.none())
+    return;
 
   // Insert subregs.
   const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF);
   for (unsigned i = 0; CSRegs[i]; ++i) {
-    unsigned Reg = CSRegs[i];
-    if (SavedRegs.test(Reg))
-      for (MCSubRegIterator SR(Reg, &TRI, false); SR.isValid(); ++SR)
+    MCPhysReg Reg = CSRegs[i];
+    if (SavedRegs.test(Reg)) {
+      // Save subregisters
+      for (MCSubRegIterator SR(Reg, &TRI); SR.isValid(); ++SR)
         SavedRegs.set(*SR);
-  }
-
-  // Insert any register fully saved via subregisters.
-  // FIXME: Rewrite to use regunits.
-  for (const TargetRegisterClass *RC : TRI.regclasses()) {
-    if (!RC->CoveredBySubRegs)
-       continue;
-
-    for (unsigned PReg = 1, PRegE = TRI.getNumRegs(); PReg < PRegE; ++PReg) {
-      if (SavedRegs.test(PReg))
-        continue;
-
-      // Check if PReg is fully covered by its subregs.
-      if (!RC->contains(PReg))
-        continue;
-
-      // Add PReg to SavedRegs if all subregs are saved.
-      bool AllSubRegsSaved = true;
-      bool HasAtLeastOneSubreg = false;
-      for (MCSubRegIterator SR(PReg, &TRI, false); SR.isValid(); ++SR) {
-        HasAtLeastOneSubreg = true;
-        if (!SavedRegs.test(*SR)) {
-          AllSubRegsSaved = false;
-          break;
-        }
-      }
-      if (AllSubRegsSaved && HasAtLeastOneSubreg)
-        SavedRegs.set(PReg);
     }
   }
 }

Added: llvm/trunk/test/CodeGen/AMDGPU/ipra-regmask.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/ipra-regmask.ll?rev=365370&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/ipra-regmask.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/ipra-regmask.ll Mon Jul  8 11:48:42 2019
@@ -0,0 +1,46 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -enable-ipra -print-regusage -o /dev/null 2>&1 < %s | FileCheck %s
+; Make sure the expected regmask is generated for sub/superregisters.
+
+; CHECK-DAG: csr Clobbered Registers: $vgpr0 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr0_vgpr1 $vgpr0_vgpr1_vgpr2 {{$}}
+define void @csr() #0 {
+  call void asm sideeffect "", "~{v0},~{v36},~{v37}"() #0
+  ret void
+}
+
+; CHECK-DAG: subregs_for_super Clobbered Registers: $vgpr0 $vgpr1 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16 $vgpr0_vgpr1 $vgpr1_vgpr2 $vgpr0_vgpr1_vgpr2 $vgpr1_vgpr2_vgpr3 {{$}}
+define void @subregs_for_super() #0 {
+  call void asm sideeffect "", "~{v0},~{v1}"() #0
+  ret void
+}
+
+; CHECK-DAG: clobbered_reg_with_sub Clobbered Registers: $vgpr0 $vgpr1 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16 $vgpr0_vgpr1 $vgpr1_vgpr2 $vgpr0_vgpr1_vgpr2 $vgpr1_vgpr2_vgpr3 {{$}}
+define void @clobbered_reg_with_sub() #0 {
+  call void asm sideeffect "", "~{v[0:1]}"() #0
+  ret void
+}
+
+; CHECK-DAG: nothing Clobbered Registers: {{$}}
+define void @nothing() #0 {
+  ret void
+}
+
+; CHECK-DAG: special_regs Clobbered Registers: $scc $m0 {{$}}
+define void @special_regs() #0 {
+  call void asm sideeffect "", "~{m0},~{scc}"() #0
+  ret void
+}
+
+; CHECK-DAG: vcc Clobbered Registers: $vcc $vcc_hi $vcc_lo {{$}}
+define void @vcc() #0 {
+  call void asm sideeffect "", "~{vcc}"() #0
+  ret void
+}
+
+ at llvm.used = appending global [6 x i8*] [i8* bitcast (void ()* @csr to i8*),
+                                         i8* bitcast (void ()* @subregs_for_super to i8*),
+                                         i8* bitcast (void ()* @clobbered_reg_with_sub to i8*),
+                                         i8* bitcast (void ()* @nothing to i8*),
+                                         i8* bitcast (void ()* @special_regs to i8*),
+                                         i8* bitcast (void ()* @vcc to i8*)]
+
+attributes #0 = { nounwind }




More information about the llvm-commits mailing list