[llvm] 9d5af55 - [X86][CodeGenPrepare] Try to reuse IV's incremented value instead of adding the offset, part 2

Thu Mar 4 01:49:10 PST 2021

Author: Max Kazantsev
Date: 2021-03-04T16:47:43+07:00
New Revision: 9d5af555891d8187bfb4a5f573e059081af99571

URL: https://github.com/llvm/llvm-project/commit/9d5af555891d8187bfb4a5f573e059081af99571
DIFF: https://github.com/llvm/llvm-project/commit/9d5af555891d8187bfb4a5f573e059081af99571.diff

LOG: [X86][CodeGenPrepare] Try to reuse IV's incremented value instead of adding the offset, part 2

This patch enables the case where we do not completely eliminate offset.
Supposedly in this case we reduce live range overlap that never harms, but
since there are doubts this is true, this goes as a separate change.

Differential Revision: https://reviews.llvm.org/D96399
Reviewed By: reames

Added: 
    

Modified: 
    llvm/lib/CodeGen/CodeGenPrepare.cpp
    llvm/test/CodeGen/X86/2020_12_02_decrementing_loop.ll
    llvm/test/CodeGen/X86/overflowing-iv.ll
    llvm/test/CodeGen/X86/usub_inc_iv.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index ecf14b1653cb..5d6c8ec0569c 100644

--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -3884,13 +3884,15 @@ bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
   // In this case, we may reuse the IV increment instead of the IV Phi to
   // achieve the following advantages:
   // 1. If IV step matches the offset, we will have no need in the offset;
+  // 2. Even if they don't match, we will reduce the overlap of living IV
+  //    and IV increment, that will potentially lead to better register
+  //    assignment.
   if (AddrMode.BaseOffs) {
     if (auto IVStep = GetConstantStep(ScaleReg)) {
       Instruction *IVInc = IVStep->first;
       APInt Step = IVStep->second;
       APInt Offset = Step * AddrMode.Scale;
-      if (Offset.isSignedIntN(64) && TestAddrMode.BaseOffs == Offset &&
-          DT.dominates(IVInc, MemoryInst)) {
+      if (Offset.isSignedIntN(64) && DT.dominates(IVInc, MemoryInst)) {
         TestAddrMode.InBounds = false;
         TestAddrMode.ScaledReg = IVInc;
         TestAddrMode.BaseOffs -= Offset.getLimitedValue();

diff  --git a/llvm/test/CodeGen/X86/2020_12_02_decrementing_loop.ll b/llvm/test/CodeGen/X86/2020_12_02_decrementing_loop.ll
index 900e12d71d6d..c004523f19de 100644
--- a/llvm/test/CodeGen/X86/2020_12_02_decrementing_loop.ll
+++ b/llvm/test/CodeGen/X86/2020_12_02_decrementing_loop.ll
@@ -44,16 +44,14 @@ failure:                                          ; preds = %backedge
 define i32 @test_01a(i32* %p, i64 %len, i32 %x) {
 ; CHECK-LABEL: test_01a:
 ; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    movq %rsi, %rax
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  LBB1_1: ## %loop
 ; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    subq $1, %rax
+; CHECK-NEXT:    subq $1, %rsi
 ; CHECK-NEXT:    jb LBB1_4
 ; CHECK-NEXT:  ## %bb.2: ## %backedge
 ; CHECK-NEXT:    ## in Loop: Header=BB1_1 Depth=1
-; CHECK-NEXT:    cmpl %edx, -28(%rdi,%rsi,4)
-; CHECK-NEXT:    movq %rax, %rsi
+; CHECK-NEXT:    cmpl %edx, -24(%rdi,%rsi,4)
 ; CHECK-NEXT:    jne LBB1_1
 ; CHECK-NEXT:  ## %bb.3: ## %failure
 ; CHECK-NEXT:    ud2

diff  --git a/llvm/test/CodeGen/X86/overflowing-iv.ll b/llvm/test/CodeGen/X86/overflowing-iv.ll
index 1d5f3c2034b3..35234470c854 100644
--- a/llvm/test/CodeGen/X86/overflowing-iv.ll
+++ b/llvm/test/CodeGen/X86/overflowing-iv.ll
@@ -12,10 +12,10 @@ define i32 @test_01(i32* %p, i64 %len, i32 %x) {
 ; CHECK-NEXT:    [[COND_1:%.*]] = icmp eq i64 [[IV]], [[LEN:%.*]]
 ; CHECK-NEXT:    br i1 [[COND_1]], label [[EXIT:%.*]], label [[BACKEDGE]]
 ; CHECK:       backedge:
-; CHECK-NEXT:    [[SUNKADDR:%.*]] = mul i64 [[IV]], 4
+; CHECK-NEXT:    [[SUNKADDR:%.*]] = mul i64 [[IV_NEXT]], 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to i8*
 ; CHECK-NEXT:    [[SUNKADDR1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 [[SUNKADDR]]
-; CHECK-NEXT:    [[SUNKADDR2:%.*]] = getelementptr i8, i8* [[SUNKADDR1]], i64 -4
+; CHECK-NEXT:    [[SUNKADDR2:%.*]] = getelementptr i8, i8* [[SUNKADDR1]], i64 -8
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[SUNKADDR2]] to i32*
 ; CHECK-NEXT:    [[LOADED:%.*]] = load atomic i32, i32* [[TMP1]] unordered, align 4
 ; CHECK-NEXT:    [[COND_2:%.*]] = icmp eq i32 [[LOADED]], [[X:%.*]]

diff  --git a/llvm/test/CodeGen/X86/usub_inc_iv.ll b/llvm/test/CodeGen/X86/usub_inc_iv.ll
index 7744319f4124..a3097e9314d1 100644
--- a/llvm/test/CodeGen/X86/usub_inc_iv.ll
+++ b/llvm/test/CodeGen/X86/usub_inc_iv.ll
@@ -59,10 +59,10 @@ define i32 @test_01a(i32* %p, i64 %len, i32 %x) {
 ; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1
 ; CHECK-NEXT:    br i1 [[OV]], label [[EXIT:%.*]], label [[BACKEDGE]]
 ; CHECK:       backedge:
-; CHECK-NEXT:    [[SUNKADDR:%.*]] = mul i64 [[IV]], 4
+; CHECK-NEXT:    [[SUNKADDR:%.*]] = mul i64 [[MATH]], 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[P:%.*]] to i8*
 ; CHECK-NEXT:    [[SUNKADDR1:%.*]] = getelementptr i8, i8* [[TMP1]], i64 [[SUNKADDR]]
-; CHECK-NEXT:    [[SUNKADDR2:%.*]] = getelementptr i8, i8* [[SUNKADDR1]], i64 -28
+; CHECK-NEXT:    [[SUNKADDR2:%.*]] = getelementptr i8, i8* [[SUNKADDR1]], i64 -24
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[SUNKADDR2]] to i32*
 ; CHECK-NEXT:    [[LOADED:%.*]] = load atomic i32, i32* [[TMP2]] unordered, align 4
 ; CHECK-NEXT:    [[COND_2:%.*]] = icmp eq i32 [[LOADED]], [[X:%.*]]