[llvm] 757409d - [MergeICmps] Ignore clobbering instructions before the loads

Nikita Popov via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 27 14:32:06 PDT 2021


Author: Nikita Popov
Date: 2021-08-27T23:31:35+02:00
New Revision: 757409da7a69f3b0ab23687642dc5edf2061bc51

URL: https://github.com/llvm/llvm-project/commit/757409da7a69f3b0ab23687642dc5edf2061bc51
DIFF: https://github.com/llvm/llvm-project/commit/757409da7a69f3b0ab23687642dc5edf2061bc51.diff

LOG: [MergeICmps] Ignore clobbering instructions before the loads

This is another followup to D106591. Even if there is an
instruction that clobbers one of the loads, this doesn't matter if
it happens before the loads. Those instructions aren't affected by
the transform at all.

The gep-references-bb.ll is modified to preserve the spirit of the
test, as the store to @g no longer impacts the transform.

Differential Revision: https://reviews.llvm.org/D108782

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/MergeICmps.cpp
    llvm/test/Transforms/MergeICmps/X86/gep-references-bb.ll
    llvm/test/Transforms/MergeICmps/X86/split-block-does-work.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/MergeICmps.cpp b/llvm/lib/Transforms/Scalar/MergeICmps.cpp
index a90edfd45438..f7083f8404f8 100644
--- a/llvm/lib/Transforms/Scalar/MergeICmps.cpp
+++ b/llvm/lib/Transforms/Scalar/MergeICmps.cpp
@@ -239,11 +239,13 @@ bool BCECmpBlock::canSinkBCECmpInst(const Instruction *Inst,
   // If this instruction may clobber the loads and is in middle of the BCE cmp
   // block instructions, then bail for now.
   if (Inst->mayWriteToMemory()) {
-    // Disallow instructions that might modify the BCE operands
-    MemoryLocation LLoc = MemoryLocation::get(Cmp.Lhs.LoadI);
-    MemoryLocation RLoc = MemoryLocation::get(Cmp.Rhs.LoadI);
-    if (isModSet(AA.getModRefInfo(Inst, LLoc)) ||
-        isModSet(AA.getModRefInfo(Inst, RLoc)))
+    auto MayClobber = [&](LoadInst *LI) {
+      // If a potentially clobbering instruction comes before the load,
+      // we can still safely sink the load.
+      return !Inst->comesBefore(LI) &&
+             isModSet(AA.getModRefInfo(Inst, MemoryLocation::get(LI)));
+    };
+    if (MayClobber(Cmp.Lhs.LoadI) || MayClobber(Cmp.Rhs.LoadI))
       return false;
   }
   // Make sure this instruction does not use any of the BCE cmp block

diff  --git a/llvm/test/Transforms/MergeICmps/X86/gep-references-bb.ll b/llvm/test/Transforms/MergeICmps/X86/gep-references-bb.ll
index 501df86133d3..357081757619 100644
--- a/llvm/test/Transforms/MergeICmps/X86/gep-references-bb.ll
+++ b/llvm/test/Transforms/MergeICmps/X86/gep-references-bb.ll
@@ -11,12 +11,12 @@ target triple = "x86_64"
 define i1 @bug(%Triple* nonnull dereferenceable(16) %lhs, %Triple* nonnull dereferenceable(16) %rhs) nofree nosync {
 ; CHECK-LABEL: @bug(
 ; CHECK-NEXT:  bb0:
-; CHECK-NEXT:    store i32 1, i32* @g, align 4
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr [[TRIPLE:%.*]], %Triple* [[RHS:%.*]], i64 0, i32 0
 ; CHECK-NEXT:    [[L0_ADDR:%.*]] = getelementptr inbounds [[TRIPLE]], %Triple* [[LHS:%.*]], i64 0, i32 0
 ; CHECK-NEXT:    [[L0:%.*]] = load i32, i32* [[L0_ADDR]], align 4
 ; CHECK-NEXT:    [[R0_ADDR:%.*]] = getelementptr inbounds [[TRIPLE]], %Triple* [[RHS]], i64 0, i32 0
 ; CHECK-NEXT:    [[R0:%.*]] = load i32, i32* [[R0_ADDR]], align 4
+; CHECK-NEXT:    store i32 1, i32* @g, align 4
 ; CHECK-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[L0]], [[R0]]
 ; CHECK-NEXT:    br i1 [[CMP0]], label %"bb1+bb2", label [[FINAL:%.*]]
 ; CHECK:       "bb1+bb2":
@@ -32,12 +32,12 @@ define i1 @bug(%Triple* nonnull dereferenceable(16) %lhs, %Triple* nonnull deref
 ; CHECK-NEXT:    ret i1 [[RET]]
 ;
 bb0:
-  store i32 1, i32* @g
   %gep = getelementptr %Triple, %Triple* %rhs, i64 0, i32 0
   %l0_addr = getelementptr inbounds %Triple, %Triple* %lhs, i64 0, i32 0
   %l0 = load i32, i32* %l0_addr, align 4
   %r0_addr = getelementptr inbounds %Triple, %Triple* %rhs, i64 0, i32 0
   %r0 = load i32, i32* %r0_addr, align 4
+  store i32 1, i32* @g
   %cmp0 = icmp eq i32 %l0, %r0
   br i1 %cmp0, label %bb1, label %final
 

diff  --git a/llvm/test/Transforms/MergeICmps/X86/split-block-does-work.ll b/llvm/test/Transforms/MergeICmps/X86/split-block-does-work.ll
index 2fa8d9f709cc..2bbf08b8e2fd 100644
--- a/llvm/test/Transforms/MergeICmps/X86/split-block-does-work.ll
+++ b/llvm/test/Transforms/MergeICmps/X86/split-block-does-work.ll
@@ -141,28 +141,20 @@ opeq1.exit:
   ret i1 %8
 }
 
-; TODO: The call happen before the loads, so it cannot clobber them.
+; The call happens before the loads, so it cannot clobber them.
 define zeroext i1 @opeq1_call_before_loads(
 ; X86-LABEL: @opeq1_call_before_loads(
-; X86-NEXT:  entry:
+; X86-NEXT:  "entry+land.rhs.i+land.rhs.i.2+land.rhs.i.3":
 ; X86-NEXT:    call void (...) @foo()
-; X86-NEXT:    [[FIRST_I:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0
-; X86-NEXT:    [[TMP0:%.*]] = load i32, i32* [[FIRST_I]], align 4
-; X86-NEXT:    [[FIRST1_I:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0
-; X86-NEXT:    [[TMP1:%.*]] = load i32, i32* [[FIRST1_I]], align 4
-; X86-NEXT:    [[CMP_I:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
-; X86-NEXT:    br i1 [[CMP_I]], label %"land.rhs.i+land.rhs.i.2+land.rhs.i.3", label [[OPEQ1_EXIT:%.*]]
-; X86:       "land.rhs.i+land.rhs.i.2+land.rhs.i.3":
-; X86-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[S]], %S* [[A]], i64 0, i32 1
-; X86-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[S]], %S* [[B]], i64 0, i32 1
-; X86-NEXT:    [[CSTR:%.*]] = bitcast i32* [[TMP2]] to i8*
-; X86-NEXT:    [[CSTR1:%.*]] = bitcast i32* [[TMP3]] to i8*
-; X86-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 12)
-; X86-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[MEMCMP]], 0
-; X86-NEXT:    br label [[OPEQ1_EXIT]]
+; X86-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0
+; X86-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0
+; X86-NEXT:    [[CSTR:%.*]] = bitcast i32* [[TMP0]] to i8*
+; X86-NEXT:    [[CSTR1:%.*]] = bitcast i32* [[TMP1]] to i8*
+; X86-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 16)
+; X86-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[MEMCMP]], 0
+; X86-NEXT:    br label [[OPEQ1_EXIT:%.*]]
 ; X86:       opeq1.exit:
-; X86-NEXT:    [[TMP5:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[TMP4]], %"land.rhs.i+land.rhs.i.2+land.rhs.i.3" ]
-; X86-NEXT:    ret i1 [[TMP5]]
+; X86-NEXT:    ret i1 [[TMP2]]
 ;
 ; Make sure this call is moved to the beginning of the entry block.
   %S* nocapture readonly dereferenceable(16) %a,


        


More information about the llvm-commits mailing list