[llvm] 39c1653 - [JumpThreading] Conditionally freeze its condition when unfolding select

Juneyoung Lee via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 9 23:50:21 PDT 2020


Author: Juneyoung Lee
Date: 2020-09-10T15:49:40+09:00
New Revision: 39c1653b3dbb7d1c439a3e8cf31d1aa159a4afc5

URL: https://github.com/llvm/llvm-project/commit/39c1653b3dbb7d1c439a3e8cf31d1aa159a4afc5
DIFF: https://github.com/llvm/llvm-project/commit/39c1653b3dbb7d1c439a3e8cf31d1aa159a4afc5.diff

LOG: [JumpThreading] Conditionally freeze its condition when unfolding select

This patch fixes pr45956 (https://bugs.llvm.org/show_bug.cgi?id=45956 ).
To minimize its impact to the quality of generated code, I suggest enabling
this only for LTO as a start (it has two JumpThreading passes registered).
This patch contains a flag that makes JumpThreading enable it.

Reviewed By: efriedma

Differential Revision: https://reviews.llvm.org/D84940

Added: 
    llvm/test/Transforms/JumpThreading/select-unfold-freeze.ll

Modified: 
    llvm/include/llvm/Transforms/Scalar.h
    llvm/include/llvm/Transforms/Scalar/JumpThreading.h
    llvm/lib/Transforms/Scalar/JumpThreading.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h
index 242ffa0ede09..5ab8a0584ad0 100644
--- a/llvm/include/llvm/Transforms/Scalar.h
+++ b/llvm/include/llvm/Transforms/Scalar.h
@@ -240,10 +240,12 @@ FunctionPass *createReassociatePass();
 //===----------------------------------------------------------------------===//
 //
 // JumpThreading - Thread control through mult-pred/multi-succ blocks where some
-// preds always go to some succ. Thresholds other than minus one override the
-// internal BB duplication default threshold.
+// preds always go to some succ. If FreezeSelectCond is true, unfold the
+// condition of a select that unfolds to branch. Thresholds other than minus one
+// override the internal BB duplication default threshold.
 //
-FunctionPass *createJumpThreadingPass(int Threshold = -1);
+FunctionPass *createJumpThreadingPass(bool FreezeSelectCond = false,
+                                      int Threshold = -1);
 
 //===----------------------------------------------------------------------===//
 //

diff  --git a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
index 327bf6d00c47..b5b907471cd7 100644
--- a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
+++ b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
@@ -91,9 +91,10 @@ class JumpThreadingPass : public PassInfoMixin<JumpThreadingPass> {
 
   unsigned BBDupThreshold;
   unsigned DefaultBBDupThreshold;
+  bool InsertFreezeWhenUnfoldingSelect;
 
 public:
-  JumpThreadingPass(int T = -1);
+  JumpThreadingPass(bool InsertFreezeWhenUnfoldingSelect = false, int T = -1);
 
   // Glue for old PM.
   bool runImpl(Function &F, TargetLibraryInfo *TLI_, LazyValueInfo *LVI_,

diff  --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 311ca11de84e..354afc710f31 100644
--- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -104,6 +104,11 @@ static cl::opt<bool> PrintLVIAfterJumpThreading(
     cl::desc("Print the LazyValueInfo cache after JumpThreading"), cl::init(false),
     cl::Hidden);
 
+static cl::opt<bool> JumpThreadingFreezeSelectCond(
+    "jump-threading-freeze-select-cond",
+    cl::desc("Freeze the condition when unfolding select"), cl::init(false),
+    cl::Hidden);
+
 static cl::opt<bool> ThreadAcrossLoopHeaders(
     "jump-threading-across-loop-headers",
     cl::desc("Allow JumpThreading to thread across loop headers, for testing"),
@@ -133,7 +138,8 @@ namespace {
   public:
     static char ID; // Pass identification
 
-    JumpThreading(int T = -1) : FunctionPass(ID), Impl(T) {
+    JumpThreading(bool InsertFreezeWhenUnfoldingSelect = false, int T = -1)
+        : FunctionPass(ID), Impl(InsertFreezeWhenUnfoldingSelect, T) {
       initializeJumpThreadingPass(*PassRegistry::getPassRegistry());
     }
 
@@ -166,11 +172,12 @@ INITIALIZE_PASS_END(JumpThreading, "jump-threading",
                 "Jump Threading", false, false)
 
 // Public interface to the Jump Threading pass
-FunctionPass *llvm::createJumpThreadingPass(int Threshold) {
-  return new JumpThreading(Threshold);
+FunctionPass *llvm::createJumpThreadingPass(bool InsertFr, int Threshold) {
+  return new JumpThreading(InsertFr, Threshold);
 }
 
-JumpThreadingPass::JumpThreadingPass(int T) {
+JumpThreadingPass::JumpThreadingPass(bool InsertFr, int T) {
+  InsertFreezeWhenUnfoldingSelect = JumpThreadingFreezeSelectCond | InsertFr;
   DefaultBBDupThreshold = (T == -1) ? BBDuplicateThreshold : unsigned(T);
 }
 
@@ -2798,13 +2805,8 @@ bool JumpThreadingPass::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) {
 /// select is not jump-threaded, it will be folded again in the later
 /// optimizations.
 bool JumpThreadingPass::TryToUnfoldSelectInCurrBB(BasicBlock *BB) {
-  // This transform can introduce a UB (a conditional branch that depends on a
-  // poison value) that was not present in the original program. See
-  // @TryToUnfoldSelectInCurrBB test in test/Transforms/JumpThreading/select.ll.
+  // This transform would reduce the quality of msan diagnostics.
   // Disable this transform under MemorySanitizer.
-  // FIXME: either delete it or replace with a valid transform. This issue is
-  // not limited to MemorySanitizer (but has only been observed as an MSan false
-  // positive in practice so far).
   if (BB->getParent()->hasFnAttribute(Attribute::SanitizeMemory))
     return false;
 
@@ -2852,8 +2854,11 @@ bool JumpThreadingPass::TryToUnfoldSelectInCurrBB(BasicBlock *BB) {
     if (!SI)
       continue;
     // Expand the select.
-    Instruction *Term =
-        SplitBlockAndInsertIfThen(SI->getCondition(), SI, false);
+    Value *Cond = SI->getCondition();
+    if (InsertFreezeWhenUnfoldingSelect &&
+        !isGuaranteedNotToBeUndefOrPoison(Cond, SI, &DTU->getDomTree()))
+      Cond = new FreezeInst(Cond, "cond.fr", SI);
+    Instruction *Term = SplitBlockAndInsertIfThen(Cond, SI, false);
     BasicBlock *SplitBB = SI->getParent();
     BasicBlock *NewBB = Term->getParent();
     PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI);

diff  --git a/llvm/test/Transforms/JumpThreading/select-unfold-freeze.ll b/llvm/test/Transforms/JumpThreading/select-unfold-freeze.ll
new file mode 100644
index 000000000000..12288fc27262
--- /dev/null
+++ b/llvm/test/Transforms/JumpThreading/select-unfold-freeze.ll
@@ -0,0 +1,248 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -jump-threading-freeze-select-cond -jump-threading < %s | FileCheck %s
+
+declare void @foo()
+declare void @bar()
+declare void @baz()
+declare void @quux()
+
+
+define void @test_switch_cmp(i1 %cond, i32 %val, i8 %value) nounwind {
+; CHECK-LABEL: @test_switch_cmp(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[L0:%.*]], label [[L0_THREAD:%.*]]
+; CHECK:       L0:
+; CHECK-NEXT:    [[VAL_PHI:%.*]] = phi i32 [ [[VAL:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[VAL_PHI]], 0
+; CHECK-NEXT:    [[COND_FR:%.*]] = freeze i1 [[CMP]]
+; CHECK-NEXT:    br i1 [[COND_FR]], label [[L1:%.*]], label [[TMP0:%.*]]
+; CHECK:       0:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i8 [ [[VALUE:%.*]], [[L0]] ]
+; CHECK-NEXT:    switch i8 [[TMP1]], label [[L3:%.*]] [
+; CHECK-NEXT:    i8 1, label [[L1]]
+; CHECK-NEXT:    i8 2, label [[L2:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       L1:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    ret void
+; CHECK:       L2:
+; CHECK-NEXT:    call void @bar()
+; CHECK-NEXT:    ret void
+; CHECK:       L3:
+; CHECK-NEXT:    call void @baz()
+; CHECK-NEXT:    ret void
+; CHECK:       L0.thread:
+; CHECK-NEXT:    call void @quux()
+; CHECK-NEXT:    br label [[L1]]
+;
+entry:
+  br i1 %cond, label %L0, label %L4
+L0:
+  %val.phi = phi i32 [%val, %entry], [-1, %L4]
+  %cmp = icmp slt i32 %val.phi, 0
+  %expr = select i1 %cmp, i8 1, i8 %value
+  switch i8 %expr, label %L3 [i8 1, label %L1 i8 2, label %L2]
+
+L1:
+  call void @foo()
+  ret void
+L2:
+  call void @bar()
+  ret void
+L3:
+  call void @baz()
+  ret void
+L4:
+  call void @quux()
+  br label %L0
+}
+
+define i32 @unfold3(i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z, i32 %j) nounwind {
+; CHECK-LABEL: @unfold3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD3:%.*]] = add nsw i32 [[J:%.*]], 2
+; CHECK-NEXT:    [[CMP_I:%.*]] = icmp slt i32 [[U:%.*]], [[V:%.*]]
+; CHECK-NEXT:    br i1 [[CMP_I]], label [[DOTEXIT_THREAD4:%.*]], label [[COND_FALSE_I:%.*]]
+; CHECK:       cond.false.i:
+; CHECK-NEXT:    [[CMP4_I:%.*]] = icmp sgt i32 [[U]], [[V]]
+; CHECK-NEXT:    br i1 [[CMP4_I]], label [[DOTEXIT_THREAD:%.*]], label [[COND_FALSE_6_I:%.*]]
+; CHECK:       cond.false.6.i:
+; CHECK-NEXT:    [[CMP8_I:%.*]] = icmp slt i32 [[W:%.*]], [[X:%.*]]
+; CHECK-NEXT:    br i1 [[CMP8_I]], label [[DOTEXIT_THREAD4]], label [[COND_FALSE_10_I:%.*]]
+; CHECK:       cond.false.10.i:
+; CHECK-NEXT:    [[CMP13_I:%.*]] = icmp sgt i32 [[W]], [[X]]
+; CHECK-NEXT:    br i1 [[CMP13_I]], label [[DOTEXIT_THREAD]], label [[DOTEXIT:%.*]]
+; CHECK:       .exit:
+; CHECK-NEXT:    [[PHITMP:%.*]] = icmp sge i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[COND_FR:%.*]] = freeze i1 [[PHITMP]]
+; CHECK-NEXT:    br i1 [[COND_FR]], label [[DOTEXIT_THREAD]], label [[DOTEXIT_THREAD4]]
+; CHECK:       .exit.thread:
+; CHECK-NEXT:    br label [[DOTEXIT_THREAD4]]
+; CHECK:       .exit.thread4:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ [[J]], [[DOTEXIT_THREAD]] ], [ [[ADD3]], [[DOTEXIT]] ], [ [[ADD3]], [[ENTRY:%.*]] ], [ [[ADD3]], [[COND_FALSE_6_I]] ]
+; CHECK-NEXT:    ret i32 [[TMP0]]
+;
+entry:
+  %add3 = add nsw i32 %j, 2
+  %cmp.i = icmp slt i32 %u, %v
+  br i1 %cmp.i, label %.exit, label %cond.false.i
+
+cond.false.i:                                     ; preds = %entry
+  %cmp4.i = icmp sgt i32 %u, %v
+  br i1 %cmp4.i, label %.exit, label %cond.false.6.i
+
+cond.false.6.i:                                   ; preds = %cond.false.i
+  %cmp8.i = icmp slt i32 %w, %x
+  br i1 %cmp8.i, label %.exit, label %cond.false.10.i
+
+cond.false.10.i:                                  ; preds = %cond.false.6.i
+  %cmp13.i = icmp sgt i32 %w, %x
+  br i1 %cmp13.i, label %.exit, label %cond.false.15.i
+
+cond.false.15.i:                                  ; preds = %cond.false.10.i
+  %phitmp = icmp sge i32 %y, %z
+  br label %.exit
+
+.exit:                                  ; preds = %entry, %cond.false.i, %cond.false.6.i, %cond.false.10.i, %cond.false.15.i
+  %cond23.i = phi i1 [ false, %entry ], [ true, %cond.false.i ], [ false, %cond.false.6.i ], [ %phitmp, %cond.false.15.i ], [ true, %cond.false.10.i ]
+  %j.add3 = select i1 %cond23.i, i32 %j, i32 %add3
+  ret i32 %j.add3
+}
+
+define i32 @unfold4(i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z, i32 %j) nounwind {
+; CHECK-LABEL: @unfold4(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD3:%.*]] = add nsw i32 [[J:%.*]], 2
+; CHECK-NEXT:    [[CMP_I:%.*]] = icmp slt i32 [[U:%.*]], [[V:%.*]]
+; CHECK-NEXT:    br i1 [[CMP_I]], label [[DOTEXIT_THREAD:%.*]], label [[COND_FALSE_I:%.*]]
+; CHECK:       cond.false.i:
+; CHECK-NEXT:    [[CMP4_I:%.*]] = icmp sgt i32 [[U]], [[V]]
+; CHECK-NEXT:    br i1 [[CMP4_I]], label [[DOTEXIT_THREAD5:%.*]], label [[COND_FALSE_6_I:%.*]]
+; CHECK:       cond.false.6.i:
+; CHECK-NEXT:    [[CMP8_I:%.*]] = icmp slt i32 [[W:%.*]], [[X:%.*]]
+; CHECK-NEXT:    br i1 [[CMP8_I]], label [[DOTEXIT_THREAD]], label [[COND_FALSE_10_I:%.*]]
+; CHECK:       cond.false.10.i:
+; CHECK-NEXT:    [[CMP13_I:%.*]] = icmp sgt i32 [[W]], [[X]]
+; CHECK-NEXT:    br i1 [[CMP13_I]], label [[DOTEXIT_THREAD5]], label [[DOTEXIT:%.*]]
+; CHECK:       .exit:
+; CHECK-NEXT:    [[CMP19_I:%.*]] = icmp sge i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP19_I]] to i32
+; CHECK-NEXT:    [[LNOT_I18:%.*]] = icmp eq i32 [[CONV]], 1
+; CHECK-NEXT:    [[COND_FR:%.*]] = freeze i1 [[LNOT_I18]]
+; CHECK-NEXT:    br i1 [[COND_FR]], label [[DOTEXIT_THREAD]], label [[DOTEXIT_THREAD5]]
+; CHECK:       .exit.thread:
+; CHECK-NEXT:    br label [[DOTEXIT_THREAD5]]
+; CHECK:       .exit.thread5:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ [[J]], [[DOTEXIT_THREAD]] ], [ [[ADD3]], [[DOTEXIT]] ], [ [[ADD3]], [[COND_FALSE_I]] ], [ [[ADD3]], [[COND_FALSE_10_I]] ]
+; CHECK-NEXT:    ret i32 [[TMP0]]
+;
+entry:
+  %add3 = add nsw i32 %j, 2
+  %cmp.i = icmp slt i32 %u, %v
+  br i1 %cmp.i, label %.exit, label %cond.false.i
+
+cond.false.i:                                     ; preds = %entry
+  %cmp4.i = icmp sgt i32 %u, %v
+  br i1 %cmp4.i, label %.exit, label %cond.false.6.i
+
+cond.false.6.i:                                   ; preds = %cond.false.i
+  %cmp8.i = icmp slt i32 %w, %x
+  br i1 %cmp8.i, label %.exit, label %cond.false.10.i
+
+cond.false.10.i:                                  ; preds = %cond.false.6.i
+  %cmp13.i = icmp sgt i32 %w, %x
+  br i1 %cmp13.i, label %.exit, label %cond.false.15.i
+
+cond.false.15.i:                                  ; preds = %cond.false.10.i
+  %cmp19.i = icmp sge i32 %y, %z
+  %conv = zext i1 %cmp19.i to i32
+  br label %.exit
+
+.exit:                                  ; preds = %entry, %cond.false.i, %cond.false.6.i, %cond.false.10.i, %cond.false.15.i
+  %cond23.i = phi i32 [ 1, %entry ], [ 0, %cond.false.i ], [ 1, %cond.false.6.i ], [ %conv, %cond.false.15.i ], [ 0, %cond.false.10.i ]
+  %lnot.i18 = icmp eq i32 %cond23.i, 1
+  %j.add3 = select i1 %lnot.i18, i32 %j, i32 %add3
+  ret i32 %j.add3
+}
+
+; TODO: cond23_i should be constant-folded.
+define i32 @unfold5(i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z, i32 %j) nounwind {
+; CHECK-LABEL: @unfold5(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD3:%.*]] = add nsw i32 [[J:%.*]], 2
+; CHECK-NEXT:    [[CMP_I:%.*]] = icmp slt i32 [[U:%.*]], [[V:%.*]]
+; CHECK-NEXT:    br i1 [[CMP_I]], label [[DOTEXIT_THREAD:%.*]], label [[COND_FALSE_I:%.*]]
+; CHECK:       cond.false.i:
+; CHECK-NEXT:    [[CMP4_I:%.*]] = icmp sgt i32 [[U]], [[V]]
+; CHECK-NEXT:    br i1 [[CMP4_I]], label [[DOTEXIT_THREAD]], label [[COND_FALSE_6_I:%.*]]
+; CHECK:       cond.false.6.i:
+; CHECK-NEXT:    [[CMP8_I:%.*]] = icmp slt i32 [[W:%.*]], [[X:%.*]]
+; CHECK-NEXT:    br i1 [[CMP8_I]], label [[DOTEXIT_THREAD]], label [[COND_FALSE_10_I:%.*]]
+; CHECK:       cond.false.10.i:
+; CHECK-NEXT:    [[CMP13_I:%.*]] = icmp sgt i32 [[W]], [[X]]
+; CHECK-NEXT:    br i1 [[CMP13_I]], label [[TMP0:%.*]], label [[COND_FALSE_15_I:%.*]]
+; CHECK:       cond.false.15.i:
+; CHECK-NEXT:    [[CMP19_I:%.*]] = icmp sge i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP19_I]] to i32
+; CHECK-NEXT:    br label [[DOTEXIT_THREAD]]
+; CHECK:       0:
+; CHECK-NEXT:    [[COND23_I:%.*]] = phi i32 [ 7, [[COND_FALSE_10_I]] ]
+; CHECK-NEXT:    [[LNOT_I18:%.*]] = icmp sgt i32 [[COND23_I]], 5
+; CHECK-NEXT:    br label [[DOTEXIT_THREAD]]
+; CHECK:       .exit.thread:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[J]], [[TMP0]] ], [ [[CONV]], [[COND_FALSE_15_I]] ], [ 1, [[COND_FALSE_6_I]] ], [ 3, [[COND_FALSE_I]] ], [ 2, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+entry:
+  %add3 = add nsw i32 %j, 2
+  %cmp.i = icmp slt i32 %u, %v
+  br i1 %cmp.i, label %.exit, label %cond.false.i
+
+cond.false.i:                                     ; preds = %entry
+  %cmp4.i = icmp sgt i32 %u, %v
+  br i1 %cmp4.i, label %.exit, label %cond.false.6.i
+
+cond.false.6.i:                                   ; preds = %cond.false.i
+  %cmp8.i = icmp slt i32 %w, %x
+  br i1 %cmp8.i, label %.exit, label %cond.false.10.i
+
+cond.false.10.i:                                  ; preds = %cond.false.6.i
+  %cmp13.i = icmp sgt i32 %w, %x
+  br i1 %cmp13.i, label %.exit, label %cond.false.15.i
+
+cond.false.15.i:                                  ; preds = %cond.false.10.i
+  %cmp19.i = icmp sge i32 %y, %z
+  %conv = zext i1 %cmp19.i to i32
+  br label %.exit
+
+.exit:                                  ; preds = %entry, %cond.false.i, %cond.false.6.i, %cond.false.10.i, %cond.false.15.i
+  %cond23.i = phi i32 [ 2, %entry ], [ 3, %cond.false.i ], [ 1, %cond.false.6.i ], [ %conv, %cond.false.15.i ], [ 7, %cond.false.10.i ]
+  %lnot.i18 = icmp sgt i32 %cond23.i, 5
+  %j.add3 = select i1 %lnot.i18, i32 %j, i32 %cond23.i
+  ret i32 %j.add3
+}
+
+define i32 @TryToUnfoldSelectInCurrBB(i1 %b, i1 %ui, i32 %s, i1 %x) {
+; CHECK-LABEL: @TryToUnfoldSelectInCurrBB(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[B:%.*]], label [[IF_END_THREAD:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[COND_FR:%.*]] = freeze i1 [[X:%.*]]
+; CHECK-NEXT:    br i1 [[COND_FR]], label [[TMP0:%.*]], label [[IF_END_THREAD]]
+; CHECK:       0:
+; CHECK-NEXT:    br label [[IF_END_THREAD]]
+; CHECK:       if.end.thread:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[S:%.*]], [[TMP0]] ], [ 42, [[IF_END]] ], [ 42, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+entry:
+  br i1 %b, label %if.end, label %if.else
+
+if.else:
+  br label %if.end
+
+if.end:
+  %v = phi i1 [ %x, %if.else ], [ false, %entry ]
+  %v1 = select i1 %v, i32 %s, i32 42
+  ret i32 %v1
+}


        


More information about the llvm-commits mailing list