[llvm-branch-commits] [llvm] 7adf83b - [InstCombine] Fix worklist management in DSE (PR44552)

Wed Jan 22 15:44:40 PST 2020

Author: Nikita Popov
Date: 2020-01-23T00:41:57+01:00
New Revision: 7adf83beece381d153f8b4a67fa341d13f9c4ba2

URL: https://github.com/llvm/llvm-project/commit/7adf83beece381d153f8b4a67fa341d13f9c4ba2
DIFF: https://github.com/llvm/llvm-project/commit/7adf83beece381d153f8b4a67fa341d13f9c4ba2.diff

LOG: [InstCombine] Fix worklist management in DSE (PR44552)

Fixes https://bugs.llvm.org/show_bug.cgi?id=44552. We need to make
sure that the store is reprocessed, because performing DSE may
expose more DSE opportunities.

There is a slight caveat here though: We need to make sure that we
add back the store the worklist first, because that means it will
be processed after the operands of the removed store have been
processed. This is a general bug in InstCombine worklist management
that I hope to address at some point, but for now it means we need
to do this manually rather than just returning the instruction as
changed.

Differential Revision: https://reviews.llvm.org/D72807

(cherry picked from commit 522c030aa9b1dd1881feb5a0d0fa2639b4a5feb7)

Added: 
    llvm/test/Transforms/InstCombine/pr44552.ll

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index ebf9d24eecc4..c288a7d8d403 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -1439,9 +1439,12 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
       if (PrevSI->isUnordered() && equivalentAddressValues(PrevSI->getOperand(1),
                                                         SI.getOperand(1))) {
         ++NumDeadStore;
-        ++BBI;
+        // Manually add back the original store to the worklist now, so it will
+        // be processed after the operands of the removed store, as this may
+        // expose additional DSE opportunities.
+        Worklist.Add(&SI);
         eraseInstFromFunction(*PrevSI);
-        continue;
+        return nullptr;
       }
       break;
     }

diff  --git a/llvm/test/Transforms/InstCombine/pr44552.ll b/llvm/test/Transforms/InstCombine/pr44552.ll
new file mode 100644
index 000000000000..adefe829df2f
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr44552.ll
@@ -0,0 +1,59 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine -instcombine-infinite-loop-threshold=2 < %s | FileCheck %s
+
+; This used to require 10 instcombine iterations to fully optimize.
+; The number of iterations grew linearly with the number of DSEd stores,
+; resulting in overall quadratic runtime.
+
+%struct.S3 = type { i64 }
+
+ at csmith_sink_ = dso_local global i64 0, align 1
+ at g_302_7 = internal constant i32 0, align 1
+ at g_313_0 = internal global i16 0, align 1
+ at g_313_1 = internal global i32 0, align 1
+ at g_313_2 = internal global i32 0, align 1
+ at g_313_3 = internal global i32 0, align 1
+ at g_313_4 = internal global i16 0, align 1
+ at g_313_5 = internal global i16 0, align 1
+ at g_313_6 = internal global i16 0, align 1
+ at g_316 = internal global %struct.S3 zeroinitializer, align 1
+ at g_316_1_0 = internal global i16 0, align 1
+
+define i16 @main() {
+; CHECK-LABEL: @main(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 0, i64* @csmith_sink_, align 8
+; CHECK-NEXT:    ret i16 0
+;
+entry:
+  store i64 0, i64* @csmith_sink_, align 1
+  %0 = load i16, i16* @g_313_0, align 1
+  %conv2 = sext i16 %0 to i64
+  store i64 %conv2, i64* @csmith_sink_, align 1
+  %1 = load i32, i32* @g_313_1, align 1
+  %conv3 = zext i32 %1 to i64
+  store i64 %conv3, i64* @csmith_sink_, align 1
+  %2 = load i32, i32* @g_313_2, align 1
+  %conv4 = sext i32 %2 to i64
+  store i64 %conv4, i64* @csmith_sink_, align 1
+  %3 = load i32, i32* @g_313_3, align 1
+  %conv5 = zext i32 %3 to i64
+  store i64 %conv5, i64* @csmith_sink_, align 1
+  %4 = load i16, i16* @g_313_4, align 1
+  %conv6 = sext i16 %4 to i64
+  store i64 %conv6, i64* @csmith_sink_, align 1
+  %5 = load i16, i16* @g_313_5, align 1
+  %conv7 = sext i16 %5 to i64
+  store i64 %conv7, i64* @csmith_sink_, align 1
+  %6 = load i16, i16* @g_313_6, align 1
+  %conv8 = sext i16 %6 to i64
+  store i64 %conv8, i64* @csmith_sink_, align 1
+  %7 = load i64, i64* getelementptr inbounds (%struct.S3, %struct.S3* @g_316, i32 0, i32 0), align 1
+  store i64 %7, i64* @csmith_sink_, align 1
+  %8 = load i16, i16* @g_316_1_0, align 1
+  %conv9 = sext i16 %8 to i64
+  store i64 %conv9, i64* @csmith_sink_, align 1
+  store i64 0, i64* @csmith_sink_, align 1
+  ret i16 0
+}
+