[llvm] 9965370 - Revert "[AArch64LoadStoreOptimizer] Generate more STPs by renaming registers earlier"

Martin Storsjö via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 10 00:30:50 PDT 2021


Author: Martin Storsjö
Date: 2021-06-10T10:30:13+03:00
New Revision: 99653702fdca0e9a47f36456468c3a444ba50b3a

URL: https://github.com/llvm/llvm-project/commit/99653702fdca0e9a47f36456468c3a444ba50b3a
DIFF: https://github.com/llvm/llvm-project/commit/99653702fdca0e9a47f36456468c3a444ba50b3a.diff

LOG: Revert "[AArch64LoadStoreOptimizer] Generate more STPs by renaming registers earlier"

This reverts commit d96ea46629803641038ebe46d8cd512f8cf7e20f, as it
caused various misoptimizations, see https://reviews.llvm.org/D103597
for discussion on the issues.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
    llvm/test/CodeGen/AArch64/GlobalISel/byval-call.ll
    llvm/test/CodeGen/AArch64/consthoist-gep.ll
    llvm/test/CodeGen/AArch64/ldst-opt.ll
    llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index d4c8064048129..bf042c83294ad 100644
--- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -1515,32 +1515,6 @@ static Optional<MCPhysReg> tryToFindRegisterToRename(
   return None;
 }
 
-// Returns a boolean that represents whether there exists a register
-// from FirstMI to the beginning of the block that can be renamed. If
-// one exists, we update Flags with its value.
-static bool updateFlagsWithRenameReg(
-    Optional<bool> MaybeCanRename, LdStPairFlags &Flags, MachineInstr &FirstMI,
-    MachineInstr &MI, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween,
-    SmallPtrSetImpl<const TargetRegisterClass *> &RequiredClasses,
-    const TargetRegisterInfo *TRI) {
-  if (DebugCounter::shouldExecute(RegRenamingCounter)) {
-    if (!MaybeCanRename)
-      MaybeCanRename = {
-          canRenameUpToDef(FirstMI, UsedInBetween, RequiredClasses, TRI)};
-
-    if (*MaybeCanRename) {
-      Optional<MCPhysReg> MaybeRenameReg = tryToFindRegisterToRename(
-          FirstMI, MI, DefinedInBB, UsedInBetween, RequiredClasses, TRI);
-      if (MaybeRenameReg) {
-        Flags.setRenameReg(*MaybeRenameReg);
-        Flags.setMergeForward(true);
-        return true;
-      }
-    }
-  }
-  return false;
-}
-
 /// Scan the instructions looking for a load/store that can be combined with the
 /// current instruction into a wider equivalent or a load/store pair.
 MachineBasicBlock::iterator
@@ -1692,27 +1666,6 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
             continue;
           }
         }
-        // If the load/store pattern has been optimized and reordered
-        // into the following:
-        //    ldr	q0, [x1, #16]
-        //    str	q0, [x0, #16]
-        //    ldr	q0, [x1]
-        //    str	q0, [x0]
-        // and the destination register of the load/store instruction is
-        // the same register as or a sub/super register of the other
-        // load/store, it will not generate an LDP/STP, so we attempt to
-        // rename the register so that it can be recognised as a pair.
-        // TODO: This is currently supported for STPs, LDPs are not
-        // being generated yet.
-        if (TRI->isSuperOrSubRegisterEq(Reg, getLdStRegOp(MI).getReg())) {
-          bool flagsHaveRenameReg = updateFlagsWithRenameReg(
-              MaybeCanRename, Flags, FirstMI, MI, DefinedInBB, UsedInBetween,
-              RequiredClasses, TRI);
-          if (flagsHaveRenameReg) {
-            MBBIWithRenameReg = MBBI;
-            continue;
-          }
-        }
         // If the destination register of one load is the same register or a
         // sub/super register of the other load, bail and keep looking. A
         // load-pair instruction with both destination registers the same is
@@ -1761,11 +1714,22 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
             Flags.clearRenameReg();
             return MBBI;
           }
-          bool flagsHaveRenameReg = updateFlagsWithRenameReg(
-              MaybeCanRename, Flags, FirstMI, MI, DefinedInBB, UsedInBetween,
-              RequiredClasses, TRI);
-          if (flagsHaveRenameReg) {
-            MBBIWithRenameReg = MBBI;
+
+          if (DebugCounter::shouldExecute(RegRenamingCounter)) {
+            if (!MaybeCanRename)
+              MaybeCanRename = {canRenameUpToDef(FirstMI, UsedInBetween,
+                                                 RequiredClasses, TRI)};
+
+            if (*MaybeCanRename) {
+              Optional<MCPhysReg> MaybeRenameReg = tryToFindRegisterToRename(
+                  FirstMI, MI, DefinedInBB, UsedInBetween, RequiredClasses,
+                  TRI);
+              if (MaybeRenameReg) {
+                Flags.setRenameReg(*MaybeRenameReg);
+                Flags.setMergeForward(true);
+                MBBIWithRenameReg = MBBI;
+              }
+            }
           }
         }
         // Unable to combine these instructions due to interference in between.

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/byval-call.ll b/llvm/test/CodeGen/AArch64/GlobalISel/byval-call.ll
index 560cc168ce104..778c823552a1f 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/byval-call.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/byval-call.ll
@@ -33,30 +33,38 @@ define void @call_byval_a64i32([64 x i32]* %incoming) {
 ; CHECK-NEXT:    .cfi_offset w28, -16
 ; CHECK-NEXT:    .cfi_offset w30, -24
 ; CHECK-NEXT:    .cfi_offset w29, -32
-; CHECK-NEXT:    ldr q1, [x0]
+; CHECK-NEXT:    ldr q0, [x0]
+; CHECK-NEXT:    str q0, [sp]
 ; CHECK-NEXT:    ldr q0, [x0, #16]
-; CHECK-NEXT:    stp q1, q0, [sp]
-; CHECK-NEXT:    ldr q1, [x0, #32]
+; CHECK-NEXT:    str q0, [sp, #16]
+; CHECK-NEXT:    ldr q0, [x0, #32]
+; CHECK-NEXT:    str q0, [sp, #32]
 ; CHECK-NEXT:    ldr q0, [x0, #48]
-; CHECK-NEXT:    stp q1, q0, [sp, #32]
-; CHECK-NEXT:    ldr q1, [x0, #64]
+; CHECK-NEXT:    str q0, [sp, #48]
+; CHECK-NEXT:    ldr q0, [x0, #64]
+; CHECK-NEXT:    str q0, [sp, #64]
 ; CHECK-NEXT:    ldr q0, [x0, #80]
-; CHECK-NEXT:    stp q1, q0, [sp, #64]
-; CHECK-NEXT:    ldr q1, [x0, #96]
+; CHECK-NEXT:    str q0, [sp, #80]
+; CHECK-NEXT:    ldr q0, [x0, #96]
+; CHECK-NEXT:    str q0, [sp, #96]
 ; CHECK-NEXT:    ldr q0, [x0, #112]
-; CHECK-NEXT:    stp q1, q0, [sp, #96]
-; CHECK-NEXT:    ldr q1, [x0, #128]
+; CHECK-NEXT:    str q0, [sp, #112]
+; CHECK-NEXT:    ldr q0, [x0, #128]
+; CHECK-NEXT:    str q0, [sp, #128]
 ; CHECK-NEXT:    ldr q0, [x0, #144]
-; CHECK-NEXT:    stp q1, q0, [sp, #128]
-; CHECK-NEXT:    ldr q1, [x0, #160]
+; CHECK-NEXT:    str q0, [sp, #144]
+; CHECK-NEXT:    ldr q0, [x0, #160]
+; CHECK-NEXT:    str q0, [sp, #160]
 ; CHECK-NEXT:    ldr q0, [x0, #176]
-; CHECK-NEXT:    stp q1, q0, [sp, #160]
-; CHECK-NEXT:    ldr q1, [x0, #192]
+; CHECK-NEXT:    str q0, [sp, #176]
+; CHECK-NEXT:    ldr q0, [x0, #192]
+; CHECK-NEXT:    str q0, [sp, #192]
 ; CHECK-NEXT:    ldr q0, [x0, #208]
-; CHECK-NEXT:    stp q1, q0, [sp, #192]
-; CHECK-NEXT:    ldr q1, [x0, #224]
+; CHECK-NEXT:    str q0, [sp, #208]
+; CHECK-NEXT:    ldr q0, [x0, #224]
+; CHECK-NEXT:    str q0, [sp, #224]
 ; CHECK-NEXT:    ldr q0, [x0, #240]
-; CHECK-NEXT:    stp q1, q0, [sp, #224]
+; CHECK-NEXT:    str q0, [sp, #240]
 ; CHECK-NEXT:    bl byval_a64i32
 ; CHECK-NEXT:    ldr x28, [sp, #272] // 8-byte Folded Reload
 ; CHECK-NEXT:    ldp x29, x30, [sp, #256] // 16-byte Folded Reload

diff  --git a/llvm/test/CodeGen/AArch64/consthoist-gep.ll b/llvm/test/CodeGen/AArch64/consthoist-gep.ll
index ecd2e48dafafa..507f949b6ad7c 100644
--- a/llvm/test/CodeGen/AArch64/consthoist-gep.ll
+++ b/llvm/test/CodeGen/AArch64/consthoist-gep.ll
@@ -3,8 +3,6 @@
 ; CHECK-NOT: adrp    x10, global+332
 ; CHECK-NOT: add     x10, x10, :lo12:global+332
 ; CHECK: adrp    x10, global+528
-; CHECK-NEXT:  and w12, w8, #0xffffff
-; CHECK-NEXT:  ldr w8, [x11]
 ; CHECK-NEXT: add     x10, x10, :lo12:global+528
 
 %struct.blam = type { %struct.bar, %struct.bar.0, %struct.wobble, %struct.wombat, i8, i16, %struct.snork.2, %struct.foo, %struct.snork.3, %struct.wobble.4, %struct.quux, [9 x i16], %struct.spam, %struct.zot }

diff  --git a/llvm/test/CodeGen/AArch64/ldst-opt.ll b/llvm/test/CodeGen/AArch64/ldst-opt.ll
index 38f8a4b94a2f0..fe55806e56f48 100644
--- a/llvm/test/CodeGen/AArch64/ldst-opt.ll
+++ b/llvm/test/CodeGen/AArch64/ldst-opt.ll
@@ -1117,7 +1117,7 @@ define void @store-pair-post-indexed-double() nounwind {
 define void @post-indexed-sub-word(i32* %a, i32* %b, i64 %count) nounwind {
 ; CHECK-LABEL: post-indexed-sub-word
 ; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}], #-8
-; CHECK: stp w{{[0-9]+}}, w{{[0-9]+}}, [x0, #-4]
+; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}], #-8
   br label %for.body
 for.body:
   %phi1 = phi i32* [ %gep4, %for.body ], [ %b, %0 ]
@@ -1141,7 +1141,7 @@ end:
 define void @post-indexed-sub-doubleword(i64* %a, i64* %b, i64 %count) nounwind {
 ; CHECK-LABEL: post-indexed-sub-doubleword
 ; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}], #-16
-; CHECK: stp x{{[0-9]+}}, x{{[0-9]+}}, [x0, #-8]
+; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}], #-16
   br label %for.body
 for.body:
   %phi1 = phi i64* [ %gep4, %for.body ], [ %b, %0 ]
@@ -1165,7 +1165,7 @@ end:
 define void @post-indexed-sub-quadword(<2 x i64>* %a, <2 x i64>* %b, i64 %count) nounwind {
 ; CHECK-LABEL: post-indexed-sub-quadword
 ; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}], #-32
-; CHECK: stp q{{[0-9]+}}, q{{[0-9]+}}, [x0, #-16]
+; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}], #-32
   br label %for.body
 for.body:
   %phi1 = phi <2 x i64>* [ %gep4, %for.body ], [ %b, %0 ]
@@ -1189,7 +1189,7 @@ end:
 define void @post-indexed-sub-float(float* %a, float* %b, i64 %count) nounwind {
 ; CHECK-LABEL: post-indexed-sub-float
 ; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}], #-8
-; CHECK: stp s{{[0-9]+}}, s{{[0-9]+}}, [x0, #-4]
+; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}], #-8
   br label %for.body
 for.body:
   %phi1 = phi float* [ %gep4, %for.body ], [ %b, %0 ]
@@ -1213,7 +1213,7 @@ end:
 define void @post-indexed-sub-double(double* %a, double* %b, i64 %count) nounwind {
 ; CHECK-LABEL: post-indexed-sub-double
 ; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}], #-16
-; CHECK: stp d{{[0-9]+}}, d{{[0-9]+}}, [x0, #-8]
+; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}], #-16
   br label %for.body
 for.body:
   %phi1 = phi double* [ %gep4, %for.body ], [ %b, %0 ]
@@ -1237,7 +1237,7 @@ end:
 define void @post-indexed-sub-doubleword-offset-min(i64* %a, i64* %b, i64 %count) nounwind {
 ; CHECK-LABEL: post-indexed-sub-doubleword-offset-min
 ; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}], #-256
-; CHECK: stp x{{[0-9]+}}, x{{[0-9]+}}, [x0], #-256
+; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}], #-256
   br label %for.body
 for.body:
   %phi1 = phi i64* [ %gep4, %for.body ], [ %b, %0 ]
@@ -1262,7 +1262,8 @@ define void @post-indexed-doubleword-offset-out-of-range(i64* %a, i64* %b, i64 %
 ; CHECK-LABEL: post-indexed-doubleword-offset-out-of-range
 ; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}]
 ; CHECK: add x{{[0-9]+}}, x{{[0-9]+}}, #256
-; CHECK: stp x{{[0-9]+}}, x{{[0-9]+}}, [x0], #256
+; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}]
+; CHECK: add x{{[0-9]+}}, x{{[0-9]+}}, #256
 
   br label %for.body
 for.body:

diff  --git a/llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir b/llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir
index af40bc4be7ceb..21d22dc585f62 100644
--- a/llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir
+++ b/llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir
@@ -544,183 +544,3 @@ body:             |
     RET undef $lr
 
 ...
-
-# During ISel, the order of load/store pairs can be optimized and changed
-# which prevents STPs from being generated. These tests check that an STP 
-# instruction is generated after register renaming.
-... 
----
-#
-# CHECK-LABEL: name: memcpy32
-# CHECK: liveins: $x0, $x1
-# CHECK: $q1 = LDRQui renamable $x1, 1 :: (load 16)
-# CHECK: renamable $q0 = LDRQui killed renamable $x1, 0 :: (load 16, align 32)
-# CHECK: STPQi killed renamable $q0, killed $q1, killed renamable $x0, 0 :: (store 16, align 32)
-# CHECK: RET undef $lr
-#
-name:            memcpy32
-alignment:       4
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-failedISel:      false
-tracksRegLiveness: true
-hasWinCFI:       false
-registers:       []
-liveins:
-  - { reg: '$x0', virtual-reg: '' }
-  - { reg: '$x1', virtual-reg: '' }
-frameInfo:
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       0
-  offsetAdjustment: 0
-  maxAlignment:    1
-  adjustsStack:    false
-  hasCalls:        false
-  stackProtector:  ''
-  maxCallFrameSize: 0
-  cvBytesOfCalleeSavedRegisters: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-  hasTailCall:     false
-  localFrameSize:  0
-  savePoint:       ''
-  restorePoint:    ''
-fixedStack:      []
-stack:           []
-callSites:       []
-debugValueSubstitutions: []
-constants:       []
-machineFunctionInfo:
-  hasRedZone:      false
-body:             |
-  bb.0.entry:
-    liveins: $x0, $x1
-    renamable $q0 = LDRQui renamable $x1, 1 :: (load 16)
-    STRQui killed renamable $q0, renamable $x0, 1 :: (store 16, basealign 32)
-    renamable $q0 = LDRQui killed renamable $x1, 0 :: (load 16, align 32)
-    STRQui killed renamable $q0, killed renamable $x0, 0 :: (store 16, align 32)
-    RET undef $lr
-
-...
----
-#
-# CHECK-LABEL: name: memcpy64
-# CHECK: liveins: $x0, $x1
-# CHECK: $q1 = LDRQui renamable $x1, 1 :: (load 16)
-# CHECK: renamable $q0 = LDRQui killed renamable $x1, 0 :: (load 16, align 64)
-# CHECK: STPQi killed renamable $q0, killed $q1, killed renamable $x0, 0 :: (store 16, align 64)
-# CHECK: RET undef $lr
-#
-name:            memcpy64
-alignment:       4
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-failedISel:      false
-tracksRegLiveness: true
-hasWinCFI:       false
-registers:       []
-liveins:
-  - { reg: '$x0', virtual-reg: '' }
-  - { reg: '$x1', virtual-reg: '' }
-frameInfo:
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       0
-  offsetAdjustment: 0
-  maxAlignment:    1
-  adjustsStack:    false
-  hasCalls:        false
-  stackProtector:  ''
-  maxCallFrameSize: 0
-  cvBytesOfCalleeSavedRegisters: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-  hasTailCall:     false
-  localFrameSize:  0
-  savePoint:       ''
-  restorePoint:    ''
-fixedStack:      []
-stack:           []
-callSites:       []
-debugValueSubstitutions: []
-constants:       []
-machineFunctionInfo:
-  hasRedZone:      false
-body:             |
-  bb.0.entry:
-    liveins: $x0, $x1
-    renamable $q0 = LDRQui renamable $x1, 1 :: (load 16)
-    STRQui killed renamable $q0, renamable $x0, 1 :: (store 16, basealign 64)
-    renamable $q0 = LDRQui killed renamable $x1, 0 :: (load 16, align 64)
-    STRQui killed renamable $q0, killed renamable $x0, 0 :: (store 16, align 64)
-    RET undef $lr
-
-...
----
-#
-# CHECK-LABEL: name: memcpy128
-# CHECK: liveins: $x0, $x1
-# CHECK: $q1 = LDRQui renamable $x1, 1 :: (load 16)
-# CHECK: renamable $q0 = LDRQui killed renamable $x1, 0 :: (load 16, align 128)
-# CHECK: STPQi killed renamable $q0, killed $q1, killed renamable $x0, 0 :: (store 16, align 128)
-# CHECK: RET undef $lr
-#
-name:            memcpy128
-alignment:       4
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-failedISel:      false
-tracksRegLiveness: true
-hasWinCFI:       false
-registers:       []
-liveins:
-  - { reg: '$x0', virtual-reg: '' }
-  - { reg: '$x1', virtual-reg: '' }
-frameInfo:
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       0
-  offsetAdjustment: 0
-  maxAlignment:    1
-  adjustsStack:    false
-  hasCalls:        false
-  stackProtector:  ''
-  maxCallFrameSize: 0
-  cvBytesOfCalleeSavedRegisters: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-  hasTailCall:     false
-  localFrameSize:  0
-  savePoint:       ''
-  restorePoint:    ''
-fixedStack:      []
-stack:           []
-callSites:       []
-debugValueSubstitutions: []
-constants:       []
-machineFunctionInfo:
-  hasRedZone:      false
-body:             |
-  bb.0.entry:
-    liveins: $x0, $x1
-    renamable $q0 = LDRQui renamable $x1, 1 :: (load 16)
-    STRQui killed renamable $q0, renamable $x0, 1 :: (store 16, basealign 128)
-    renamable $q0 = LDRQui killed renamable $x1, 0 :: (load 16, align 128)
-    STRQui killed renamable $q0, killed renamable $x0, 0 :: (store 16, align 128)
-    RET undef $lr


        


More information about the llvm-commits mailing list