[PATCH] D112065: [SystemZ] Improvement of emitMemMemWrapper()

Jonas Paulsson via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 19 04:35:28 PDT 2021


jonpa created this revision.
jonpa added a reviewer: uweigand.
Herald added a subscriber: hiraditya.
jonpa requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

It was discovered that an extra register COPY remained when expanding a (var-len) memory operation with a loop and there were another use of the involved address register(s) afterwards.

A simple fix for this is to COPY the address registers before the loop and use that new vreg instead. This handles the test cases and also seems clearly beneficial on SPEC:

  Spill|Reload   :    613173     613123    -50
  Copies         :   1018500    1016022  -2478

It doesn't seem to make a difference to do this COPY in all cases, even though it is only useful in the case of a register loop with other uses of the register.


https://reviews.llvm.org/D112065

Files:
  llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
  llvm/test/CodeGen/SystemZ/memset-05.ll


Index: llvm/test/CodeGen/SystemZ/memset-05.ll
===================================================================
--- llvm/test/CodeGen/SystemZ/memset-05.ll
+++ llvm/test/CodeGen/SystemZ/memset-05.ll
@@ -48,43 +48,39 @@
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    llgfr %r1, %r3
 ; CHECK-NEXT:    aghi %r1, -1
-; CHECK-NEXT:    cgije %r1, -1, .LBB2_5
+; CHECK-NEXT:    cgije %r1, -1, .LBB2_4
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    srlg %r0, %r1, 8
 ; CHECK-NEXT:    lgr %r3, %r2
-; CHECK-NEXT:    cgije %r0, 0, .LBB2_4
-; CHECK-NEXT:  # %bb.2:
-; CHECK-NEXT:    lgr %r3, %r2
-; CHECK-NEXT:  .LBB2_3: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    cgije %r0, 0, .LBB2_3
+; CHECK-NEXT:  .LBB2_2: # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    xc 0(256,%r3), 0(%r3)
 ; CHECK-NEXT:    la %r3, 256(%r3)
-; CHECK-NEXT:    brctg %r0, .LBB2_3
-; CHECK-NEXT:  .LBB2_4:
+; CHECK-NEXT:    brctg %r0, .LBB2_2
+; CHECK-NEXT:  .LBB2_3:
 ; CHECK-NEXT:    exrl %r1, .Ltmp1
-; CHECK-NEXT:  .LBB2_5:
-; CHECK-NEXT:    cgije %r1, -1, .LBB2_10
-; CHECK-NEXT:  # %bb.6:
+; CHECK-NEXT:  .LBB2_4:
+; CHECK-NEXT:    cgije %r1, -1, .LBB2_8
+; CHECK-NEXT:  # %bb.5:
 ; CHECK-NEXT:    srlg %r0, %r1, 8
 ; CHECK-NEXT:    lgr %r3, %r2
-; CHECK-NEXT:    cgije %r0, 0, .LBB2_9
-; CHECK-NEXT:  # %bb.7:
-; CHECK-NEXT:    lgr %r3, %r2
-; CHECK-NEXT:  .LBB2_8: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    cgije %r0, 0, .LBB2_7
+; CHECK-NEXT:  .LBB2_6: # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    xc 0(256,%r3), 0(%r3)
 ; CHECK-NEXT:    la %r3, 256(%r3)
-; CHECK-NEXT:    brctg %r0, .LBB2_8
-; CHECK-NEXT:  .LBB2_9:
+; CHECK-NEXT:    brctg %r0, .LBB2_6
+; CHECK-NEXT:  .LBB2_7:
 ; CHECK-NEXT:    exrl %r1, .Ltmp1
-; CHECK-NEXT:  .LBB2_10:
+; CHECK-NEXT:  .LBB2_8:
 ; CHECK-NEXT:    cgibe %r1, -1, 0(%r14)
-; CHECK-NEXT:  .LBB2_11:
+; CHECK-NEXT:  .LBB2_9:
 ; CHECK-NEXT:    srlg %r0, %r1, 8
-; CHECK-NEXT:    cgije %r0, 0, .LBB2_13
-; CHECK-NEXT:  .LBB2_12: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    cgije %r0, 0, .LBB2_11
+; CHECK-NEXT:  .LBB2_10: # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    xc 0(256,%r2), 0(%r2)
 ; CHECK-NEXT:    la %r2, 256(%r2)
-; CHECK-NEXT:    brctg %r0, .LBB2_12
-; CHECK-NEXT:  .LBB2_13:
+; CHECK-NEXT:    brctg %r0, .LBB2_10
+; CHECK-NEXT:  .LBB2_11:
 ; CHECK-NEXT:    exrl %r1, .Ltmp0
 ; CHECK-NEXT:    br %r14
   tail call void @llvm.memset.p0i8.i32(i8* %Addr, i8 0, i32 %Len, i1 false)
Index: llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
===================================================================
--- llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -7100,13 +7100,19 @@
 // Force base value Base into a register before MI.  Return the register.
 static Register forceReg(MachineInstr &MI, MachineOperand &Base,
                          const SystemZInstrInfo *TII) {
-  if (Base.isReg())
-    return Base.getReg();
-
   MachineBasicBlock *MBB = MI.getParent();
   MachineFunction &MF = *MBB->getParent();
   MachineRegisterInfo &MRI = MF.getRegInfo();
 
+  if (Base.isReg()) {
+    // Copy Base into a new virtual register to help register coalescing in
+    // cases with multiple uses.
+    Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
+    BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), Reg)
+      .add(Base);
+    return Reg;
+  }
+
   Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
   BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg)
       .add(Base)


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D112065.380641.patch
Type: text/x-patch
Size: 3559 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20211019/eed7def2/attachment.bin>


More information about the llvm-commits mailing list