[llvm] ddb46ab - [LSR] Don't consider users of constant outside loop

Nikita Popov via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 13 03:22:45 PDT 2023


Author: Nikita Popov
Date: 2023-07-13T12:22:38+02:00
New Revision: ddb46abd3c8b33d7213dd96ec70e1f92336f467e

URL: https://github.com/llvm/llvm-project/commit/ddb46abd3c8b33d7213dd96ec70e1f92336f467e
DIFF: https://github.com/llvm/llvm-project/commit/ddb46abd3c8b33d7213dd96ec70e1f92336f467e.diff

LOG: [LSR] Don't consider users of constant outside loop

In CollectLoopInvariantFixupsAndFormulae(), LSR looks at users
outside the loop. E.g. if we have an addrec based on %base, and
%base is also used outside the loop, then we have to keep it in a
register anyway, which may make it more profitable to use
%base + %idx style addressing.

This reasoning doesn't hold up when the base is a constant, because
the constant can be rematerialized. The lsr-memcpy.ll test regressed
when enabling opaque pointers, because inttoptr (i64 6442450944 to ptr)
now also has a use outside the loop (previously it didn't due to a
pointer type difference), and that extra "use" results in worse use
of addressing modes in the loop. However, the use outside the loop
actually gets rematerialized, so the alleged register saving does
not occur.

The same reasoning also applies to other types of constants, such
as global variable references.

Differential Revision: https://reviews.llvm.org/D155073

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
    llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-memcpy.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 905f84efcae58e..be7784faeea228 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -3507,8 +3507,8 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
       if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
         // Look for instructions defined outside the loop.
         if (L->contains(Inst)) continue;
-      } else if (isa<UndefValue>(V))
-        // Undef doesn't have a live range, so it doesn't matter.
+      } else if (isa<Constant>(V))
+        // Constants can be re-materialized.
         continue;
       for (const Use &U : V->uses()) {
         const Instruction *UserInst = dyn_cast<Instruction>(U.getUser());

diff  --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-memcpy.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-memcpy.ll
index 8092d598f85fbe..2521f3e84475a2 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-memcpy.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-memcpy.ll
@@ -7,21 +7,18 @@
 ; <rdar://problem/12702735> [ARM64][coalescer] need better register
 ; coalescing for simple unit tests.
 
-; FIXME: This regressed after enabling opaque pointers.
 define i32 @test_inttoptr() nounwind {
 ; CHECK-LABEL: test_inttoptr:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    mov x8, #0 // =0x0
-; CHECK-NEXT:    mov w9, #1288 // =0x508
-; CHECK-NEXT:    mov x10, #4294967296 // =0x100000000
-; CHECK-NEXT:    mov x11, #6442450944 // =0x180000000
+; CHECK-NEXT:    mov w8, #1288 // =0x508
+; CHECK-NEXT:    mov x9, #4294967296 // =0x100000000
+; CHECK-NEXT:    mov x10, #6442450944 // =0x180000000
 ; CHECK-NEXT:  .LBB0_1: // %while.body
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldr x12, [x8, x10]
-; CHECK-NEXT:    str x12, [x8, x11]
-; CHECK-NEXT:    add x8, x8, #8
-; CHECK-NEXT:    subs x9, x9, #8
+; CHECK-NEXT:    ldr x11, [x9], #8
+; CHECK-NEXT:    str x11, [x10], #8
+; CHECK-NEXT:    subs x8, x8, #8
 ; CHECK-NEXT:    b.pl .LBB0_1
 ; CHECK-NEXT:  // %bb.2: // %while.end
 ; CHECK-NEXT:    mov x8, #6442450944 // =0x180000000
@@ -55,18 +52,16 @@ while.end:                                        ; preds = %while.body
 define ptr @test_globals() nounwind {
 ; CHECK-LABEL: test_globals:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov x8, #0 // =0x0
-; CHECK-NEXT:    mov w9, #1288 // =0x508
-; CHECK-NEXT:    adrp x10, g2
-; CHECK-NEXT:    add x10, x10, :lo12:g2
-; CHECK-NEXT:    adrp x11, g1
-; CHECK-NEXT:    add x11, x11, :lo12:g1
+; CHECK-NEXT:    mov w8, #1288 // =0x508
+; CHECK-NEXT:    adrp x9, g2
+; CHECK-NEXT:    add x9, x9, :lo12:g2
+; CHECK-NEXT:    adrp x10, g1
+; CHECK-NEXT:    add x10, x10, :lo12:g1
 ; CHECK-NEXT:  .LBB1_1: // %while.body
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldr x12, [x10, x8]
-; CHECK-NEXT:    str x12, [x11, x8]
-; CHECK-NEXT:    add x8, x8, #8
-; CHECK-NEXT:    subs x9, x9, #8
+; CHECK-NEXT:    ldr x11, [x9], #8
+; CHECK-NEXT:    str x11, [x10], #8
+; CHECK-NEXT:    subs x8, x8, #8
 ; CHECK-NEXT:    b.pl .LBB1_1
 ; CHECK-NEXT:  // %bb.2: // %while.end
 ; CHECK-NEXT:    adrp x0, g1


        


More information about the llvm-commits mailing list