[PATCH] D72944: [InstCombine] Fix worklist management when simplifying demanded bits (PR44541)

Fri Jan 17 12:09:16 PST 2020

nikic created this revision.
nikic added reviewers: spatel, lebedev.ri.
Herald added subscribers: llvm-commits, hiraditya.
Herald added a project: LLVM.
nikic marked an inline comment as done.
nikic added inline comments.


================
Comment at: llvm/test/Transforms/InstCombine/logical-select.ll:553
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i8> [[COND:%.*]], zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i8> [[TVAL:%.*]], <4 x i8> [[FVAL:%.*]]
 ; CHECK-NEXT:    ret <4 x i8> [[TMP2]]
----------------
These changes are caused by https://bugs.llvm.org/show_bug.cgi?id=44521. It's one of two common spurious differences that arise when worklist order changes are made.


When simplifying demanded bits, we currently only report the instruction on which SimplifyDemandedBits was called as changed. However, this is a recursive call, and the actually modified instruction will usually be further up the chain. Additionally, all the intermediate instructions should also be revisited, as additional combines may be possible after the demanded bits simplification. We fix this by explicitly adding them to the worklist.

I originally wrote this patch to address the excessive number of instcombine iterations in an existing test (drops from 5 to 3, which is still not optimal), but found that this also addresses https://bugs.llvm.org/show_bug.cgi?id=44541, though I'm not sure whether this is really a "fix". What happens there is that demanded bits simplifies

  %zero = call i16 @passthru(i16 0)
  %sub = sub nuw nsw i16 %arg, %zero
  %cmp = icmp slt i16 %sub, 0
  %ret = select i1 %cmp, i16 0, i16 %sub

to

  %zero = call i16 @passthru(i16 0)
  %sub = sub nuw nsw i16 %arg, 0
  %cmp = icmp slt i16 %sub, 0
  %ret = select i1 %cmp, i16 0, i16 %sub

without adding the `sub` to the worklist (which this patch fixes).  Then `%cmp = icmp slt i16 %sub, 0` sensibly becomes `%cmp = icmp slt i16 %arg, 0`. The select is then recognized as a minmax SPF and canonicalized to

  %zero = call i16 @passthru(i16 0)
  %sub = sub nuw nsw i16 %arg, 0
  %cmp = icmp sgt i16 0, %sub
  %ret = select i1 %cmp, i16 0, i16 %sub

At which point we enter an infinite combine loop. Adding the `%sub` to the worklist is sufficient to get it simplified and avoid the issue. I don't know if that's considered enough, or whether there is also a bug in SPF matching or canonicalization here, in the sense that it needs to special-case this type of pattern, on the off-chance that it could appear through another pathway.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D72944

Files:
  llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
  llvm/test/Transforms/InstCombine/logical-select.ll
  llvm/test/Transforms/InstCombine/pr44541.ll
  llvm/test/Transforms/InstCombine/select-imm-canon.ll
  llvm/test/Transforms/InstCombine/vec_sext.ll


Index: llvm/test/Transforms/InstCombine/vec_sext.ll
===================================================================

--- llvm/test/Transforms/InstCombine/vec_sext.ll
+++ llvm/test/Transforms/InstCombine/vec_sext.ll
@@ -4,8 +4,8 @@
 define <4 x i32> @vec_select(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: @vec_select(
 ; CHECK-NEXT:    [[SUB:%.*]] = sub nsw <4 x i32> zeroinitializer, [[A:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[B:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[A]], <4 x i32> [[SUB]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[B:%.*]], zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[SUB]], <4 x i32> [[A]]
 ; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 ;
   %cmp = icmp slt <4 x i32> %b, zeroinitializer
@@ -23,8 +23,8 @@
 define <4 x i32> @vec_select_alternate_sign_bit_test(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: @vec_select_alternate_sign_bit_test(
 ; CHECK-NEXT:    [[SUB:%.*]] = sub nsw <4 x i32> zeroinitializer, [[A:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[B:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[SUB]], <4 x i32> [[A]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[B:%.*]], zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[A]], <4 x i32> [[SUB]]
 ; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 ;
   %cmp = icmp sgt <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
Index: llvm/test/Transforms/InstCombine/select-imm-canon.ll
===================================================================
--- llvm/test/Transforms/InstCombine/select-imm-canon.ll
+++ llvm/test/Transforms/InstCombine/select-imm-canon.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -instcombine -S | FileCheck %s
+; RUN: opt < %s -instcombine -instcombine-infinite-loop-threshold=3 -S | FileCheck %s
 
 define i8 @single(i32 %A) {
 ; CHECK-LABEL: @single(
Index: llvm/test/Transforms/InstCombine/pr44541.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/InstCombine/pr44541.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine -expensive-combines=0 -instcombine-infinite-loop-threshold=2 < %s | FileCheck %s
+
+; This test used to cause an infinite combine loop.
+
+define i16 @passthru(i16 returned %x) {
+; CHECK-LABEL: @passthru(
+; CHECK-NEXT:    ret i16 [[X:%.*]]
+;
+  ret i16 %x
+}
+
+define i16 @test(i16 %arg) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:    [[ZERO:%.*]] = call i16 @passthru(i16 0)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i16 [[ARG:%.*]], 0
+; CHECK-NEXT:    [[RET:%.*]] = select i1 [[TMP1]], i16 [[ARG]], i16 0
+; CHECK-NEXT:    ret i16 [[RET]]
+;
+  %zero = call i16 @passthru(i16 0)
+  %sub = sub nuw nsw i16 %arg, %zero
+  %cmp = icmp slt i16 %sub, 0
+  %ret = select i1 %cmp, i16 0, i16 %sub
+  ret i16 %ret
+}
Index: llvm/test/Transforms/InstCombine/logical-select.ll
===================================================================
--- llvm/test/Transforms/InstCombine/logical-select.ll
+++ llvm/test/Transforms/InstCombine/logical-select.ll
@@ -549,8 +549,8 @@
 
 define <4 x i8> @allSignBits_vec(<4 x i8> %cond, <4 x i8> %tval, <4 x i8> %fval) {
 ; CHECK-LABEL: @allSignBits_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i8> [[COND:%.*]], <i8 -1, i8 -1, i8 -1, i8 -1>
-; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i8> [[FVAL:%.*]], <4 x i8> [[TVAL:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i8> [[COND:%.*]], zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i8> [[TVAL:%.*]], <4 x i8> [[FVAL:%.*]]
 ; CHECK-NEXT:    ret <4 x i8> [[TMP2]]
 ;
   %bitmask = ashr <4 x i8> %cond, <i8 7, i8 7, i8 7, i8 7>
Index: llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
===================================================================
--- llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -88,6 +88,9 @@
                                           Depth, I);
   if (!NewVal) return false;
   U = NewVal;
+  // Add the simplified instruction back to the worklist.
+  if (auto *UseInst = dyn_cast<Instruction>(U.get()))
+    Worklist.Add(UseInst);
   return true;
 }
 


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D72944.238842.patch
Type: text/x-patch
Size: 4515 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200117/1f99d438/attachment.bin>