[llvm] [GVN] Drop Clobber dependency if store may overwrite only the same value (PR #68322)

Sergey Kachkov via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 5 08:27:09 PDT 2023


https://github.com/skachkov-sc created https://github.com/llvm/llvm-project/pull/68322

Motivation: https://godbolt.org/z/sfx6fPc3n

In this example the second load is redundant and can be optimized. The problem is a cloberring store between it and first load, but here it can be safely skipped because one-byte store with alignment of 1 can only be MustAlias or NoAlias (no partial overlapping possible), and in case of MustAlias it will rewrite exactly the same value.

Note: this case can be optimized by GCC and there were previous attempts to fix this issue, e.g. https://reviews.llvm.org/D155843


>From bca61c872522bd82905aa85dd20bf61dd64a9ea0 Mon Sep 17 00:00:00 2001
From: Sergey Kachkov <sergey.kachkov at syntacore.com>
Date: Thu, 5 Oct 2023 15:56:54 +0300
Subject: [PATCH 1/2] [GVN][NFC] Add pre-commit test

---
 .../Transforms/GVN/rle-clobbering-store.ll    | 94 +++++++++++++++++++
 1 file changed, 94 insertions(+)
 create mode 100644 llvm/test/Transforms/GVN/rle-clobbering-store.ll

diff --git a/llvm/test/Transforms/GVN/rle-clobbering-store.ll b/llvm/test/Transforms/GVN/rle-clobbering-store.ll
new file mode 100644
index 000000000000000..13b9e15186bd02c
--- /dev/null
+++ b/llvm/test/Transforms/GVN/rle-clobbering-store.ll
@@ -0,0 +1,94 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt < %s -passes=gvn -S | FileCheck %s
+
+define void @test_i8(ptr %a, ptr %b, ptr %c) {
+; CHECK-LABEL: define void @test_i8(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[A]], align 1
+; CHECK-NEXT:    store i8 [[TMP0]], ptr [[B]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[A]], align 1
+; CHECK-NEXT:    store i8 [[TMP1]], ptr [[C]], align 1
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = load i8, ptr %a, align 1
+  store i8 %0, ptr %b, align 1
+  %1 = load i8, ptr %a, align 1
+  store i8 %1, ptr %c, align 1
+  ret void
+}
+
+define void @test_i32(ptr %a, ptr %b, ptr %c) {
+; CHECK-LABEL: define void @test_i32(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[B]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A]], align 4
+; CHECK-NEXT:    store i32 [[TMP1]], ptr [[C]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = load i32, ptr %a, align 4
+  store i32 %0, ptr %b, align 4
+  %1 = load i32, ptr %a, align 4
+  store i32 %1, ptr %c, align 4
+  ret void
+}
+
+define void @test_float(ptr %a, ptr %b, ptr %c) {
+; CHECK-LABEL: define void @test_float(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[A]], align 4
+; CHECK-NEXT:    store float [[TMP0]], ptr [[B]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[A]], align 4
+; CHECK-NEXT:    store float [[TMP1]], ptr [[C]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = load float, ptr %a, align 4
+  store float %0, ptr %b, align 4
+  %1 = load float, ptr %a, align 4
+  store float %1, ptr %c, align 4
+  ret void
+}
+
+define void @test_unaligned(ptr %a, ptr %b, ptr %c) {
+; CHECK-LABEL: define void @test_unaligned(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[B]], align 2
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A]], align 4
+; CHECK-NEXT:    store i32 [[TMP1]], ptr [[C]], align 2
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = load i32, ptr %a, align 4
+  store i32 %0, ptr %b, align 2
+  %1 = load i32, ptr %a, align 4
+  store i32 %1, ptr %c, align 2
+  ret void
+}
+
+define void @test_modify_between(ptr %a, ptr %b, ptr %c) {
+; CHECK-LABEL: define void @test_modify_between(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[A]], align 1
+; CHECK-NEXT:    store i8 42, ptr [[C]], align 1
+; CHECK-NEXT:    store i8 [[TMP0]], ptr [[B]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[A]], align 1
+; CHECK-NEXT:    store i8 [[TMP1]], ptr [[C]], align 1
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = load i8, ptr %a, align 1
+  store i8 42, ptr %c, align 1
+  store i8 %0, ptr %b, align 1
+  %1 = load i8, ptr %a, align 1
+  store i8 %1, ptr %c, align 1
+  ret void
+}

>From 07a2e8973297db3731c8cc7b28697a740e01e858 Mon Sep 17 00:00:00 2001
From: Sergey Kachkov <sergey.kachkov at syntacore.com>
Date: Thu, 5 Oct 2023 16:26:28 +0300
Subject: [PATCH 2/2] [GVN] Drop Clobber dependency if store may overwrite only
 the same value

In some cases clobbering store can be safely skipped if it can only must
or no alias with memory location and it writes the same value. This
patch supports simple case when the value from memory location was
loaded in the same basic block before the store and there are no
modifications between them.
---
 .../lib/Analysis/MemoryDependenceAnalysis.cpp | 44 ++++++++++++++++---
 .../Transforms/GVN/rle-clobbering-store.ll    |  9 ++--
 2 files changed, 42 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index 071ecdba8a54ace..eb503e4a781c94e 100644
--- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -360,11 +360,42 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
   return MemDepResult::getNonLocal();
 }
 
+// canSkipClobberingStore - check if SI that may alias with MemLoc can be safely
+// skipped. This is possible in case if SI can only must alias or no alias with
+// MemLoc (no partial overlapping possible) and it writes the same value that
+// MemLoc contains now (it was loaded before this store and was not modified in
+// between).
+static bool canSkipClobberingStore(const StoreInst *SI,
+                                   const MemoryLocation &MemLoc,
+                                   Align MemLocAlign, BatchAAResults &BatchAA) {
+  if (!MemLoc.Size.hasValue())
+    return false;
+  if (MemoryLocation::get(SI).Size != MemLoc.Size)
+    return false;
+  if (MemLocAlign.value() < MemLoc.Size.getValue())
+    return false;
+  if (SI->getAlign() < MemLocAlign)
+    return false;
+
+  auto *LI = dyn_cast<LoadInst>(SI->getValueOperand());
+  if (!LI || LI->getParent() != SI->getParent())
+    return false;
+  if (BatchAA.alias(MemoryLocation::get(LI), MemLoc) != AliasResult::MustAlias)
+    return false;
+  for (const auto *I = LI->getNextNode(); I != SI; I = I->getNextNode())
+    if (isModSet(BatchAA.getModRefInfo(I, MemLoc)))
+      return false;
+
+  return true;
+}
+
 MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
     const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
     BasicBlock *BB, Instruction *QueryInst, unsigned *Limit,
     BatchAAResults &BatchAA) {
   bool isInvariantLoad = false;
+  Align MemLocAlign =
+      MemLoc.Ptr->getPointerAlignment(BB->getModule()->getDataLayout());
 
   unsigned DefaultLimit = getDefaultBlockScanLimit();
   if (!Limit)
@@ -402,11 +433,12 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
   // do want to respect mustalias results since defs are useful for value
   // forwarding, but any mayalias write can be assumed to be noalias.
   // Arguably, this logic should be pushed inside AliasAnalysis itself.
-  if (isLoad && QueryInst) {
-    LoadInst *LI = dyn_cast<LoadInst>(QueryInst);
-    if (LI && LI->hasMetadata(LLVMContext::MD_invariant_load))
-      isInvariantLoad = true;
-  }
+  if (isLoad && QueryInst)
+    if (LoadInst *LI = dyn_cast<LoadInst>(QueryInst)) {
+      if (LI->hasMetadata(LLVMContext::MD_invariant_load))
+        isInvariantLoad = true;
+      MemLocAlign = LI->getAlign();
+    }
 
   // True for volatile instruction.
   // For Load/Store return true if atomic ordering is stronger than AO,
@@ -577,6 +609,8 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
         return MemDepResult::getDef(Inst);
       if (isInvariantLoad)
         continue;
+      if (canSkipClobberingStore(SI, MemLoc, MemLocAlign, BatchAA))
+        continue;
       return MemDepResult::getClobber(Inst);
     }
 
diff --git a/llvm/test/Transforms/GVN/rle-clobbering-store.ll b/llvm/test/Transforms/GVN/rle-clobbering-store.ll
index 13b9e15186bd02c..d6b8949752af4ac 100644
--- a/llvm/test/Transforms/GVN/rle-clobbering-store.ll
+++ b/llvm/test/Transforms/GVN/rle-clobbering-store.ll
@@ -7,8 +7,7 @@ define void @test_i8(ptr %a, ptr %b, ptr %c) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[A]], align 1
 ; CHECK-NEXT:    store i8 [[TMP0]], ptr [[B]], align 1
-; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[A]], align 1
-; CHECK-NEXT:    store i8 [[TMP1]], ptr [[C]], align 1
+; CHECK-NEXT:    store i8 [[TMP0]], ptr [[C]], align 1
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -25,8 +24,7 @@ define void @test_i32(ptr %a, ptr %b, ptr %c) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    store i32 [[TMP0]], ptr [[B]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A]], align 4
-; CHECK-NEXT:    store i32 [[TMP1]], ptr [[C]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[C]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -43,8 +41,7 @@ define void @test_float(ptr %a, ptr %b, ptr %c) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[A]], align 4
 ; CHECK-NEXT:    store float [[TMP0]], ptr [[B]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[A]], align 4
-; CHECK-NEXT:    store float [[TMP1]], ptr [[C]], align 4
+; CHECK-NEXT:    store float [[TMP0]], ptr [[C]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:



More information about the llvm-commits mailing list