[llvm-branch-commits] [llvm] [Passes] Disable code sinking in InstCombine early on. (PR #72567)

Thu Nov 16 12:49:38 PST 2023

llvmbot wrote:




@llvm/pr-subscribers-llvm-transforms

Author: Florian Hahn (fhahn)

<details>
<summary>Changes</summary>

Sinking instructions very early in the pipeline destroys
dereferenceability information, that could be used by other passes, e.g.
this can prevent if-conversion by SimplifyCFG.


---
Full diff: https://github.com/llvm/llvm-project/pull/72567.diff


8 Files Affected:

- (modified) llvm/include/llvm/Transforms/InstCombine/InstCombine.h (+6) 
- (modified) llvm/lib/Passes/PassBuilder.cpp (+2) 
- (modified) llvm/lib/Passes/PassBuilderPipelines.cpp (+2-1) 
- (modified) llvm/lib/Passes/PassRegistry.def (+2-1) 
- (modified) llvm/lib/Transforms/InstCombine/InstCombineInternal.h (+7-2) 
- (modified) llvm/lib/Transforms/InstCombine/InstructionCombining.cpp (+6-4) 
- (modified) llvm/test/Transforms/InstCombine/no_sink_instruction.ll (+1) 
- (modified) llvm/test/Transforms/PhaseOrdering/AArch64/sinking-vs-if-conversion.ll (+37-32) 


``````````diff

diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombine.h b/llvm/include/llvm/Transforms/InstCombine/InstCombine.h
index f38ec2debb18136..14d1c127984c874 100644
--- a/llvm/include/llvm/Transforms/InstCombine/InstCombine.h
+++ b/llvm/include/llvm/Transforms/InstCombine/InstCombine.h
@@ -32,6 +32,7 @@ struct InstCombineOptions {
   // Verify that a fix point has been reached after MaxIterations.
   bool VerifyFixpoint = false;
   unsigned MaxIterations = InstCombineDefaultMaxIterations;
+  bool EnableCodeSinking = true;
 
   InstCombineOptions() = default;
 
@@ -49,6 +50,11 @@ struct InstCombineOptions {
     MaxIterations = Value;
     return *this;
   }
+
+  InstCombineOptions &setEnableCodeSinking(bool Value) {
+    EnableCodeSinking = Value;
+    return *this;
+  }
 };
 
 class InstCombinePass : public PassInfoMixin<InstCombinePass> {
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index dd9d799f9d55dcc..1e79ff660ea3ea2 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -872,6 +872,8 @@ Expected<InstCombineOptions> parseInstCombineOptions(StringRef Params) {
                     ParamName).str(),
             inconvertibleErrorCode());
       Result.setMaxIterations((unsigned)MaxIterations.getZExtValue());
+    } else if (ParamName == "code-sinking") {
+      Result.setEnableCodeSinking(Enable);
     } else {
       return make_error<StringError>(
           formatv("invalid InstCombine pass parameter '{0}' ", ParamName).str(),
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index f3d280316e04077..8946480340d29a9 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -1101,7 +1101,8 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
   FunctionPassManager GlobalCleanupPM;
   // FIXME: Should this instead by a run of SROA?
   GlobalCleanupPM.addPass(PromotePass());
-  GlobalCleanupPM.addPass(InstCombinePass());
+  GlobalCleanupPM.addPass(
+      InstCombinePass(InstCombineOptions().setEnableCodeSinking(false)));
   invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
   GlobalCleanupPM.addPass(
       SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 2067fc473b522db..50dda63578a0add 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -526,7 +526,8 @@ FUNCTION_PASS_WITH_PARAMS("instcombine",
                           parseInstCombineOptions,
                           "no-use-loop-info;use-loop-info;"
                           "no-verify-fixpoint;verify-fixpoint;"
-                          "max-iterations=N"
+                          "max-iterations=N;"
+                          "no-code-sinking;code-sinking"
                           )
 FUNCTION_PASS_WITH_PARAMS("mldst-motion",
                           "MergedLoadStoreMotionPass",
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 68a8fb676d8d909..83364f14ef7db6c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -53,6 +53,7 @@ class DataLayout;
 class DominatorTree;
 class GEPOperator;
 class GlobalVariable;
+struct InstCombineOptions;
 class LoopInfo;
 class OptimizationRemarkEmitter;
 class ProfileSummaryInfo;
@@ -68,9 +69,11 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
                    TargetLibraryInfo &TLI, TargetTransformInfo &TTI,
                    DominatorTree &DT, OptimizationRemarkEmitter &ORE,
                    BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
-                   const DataLayout &DL, LoopInfo *LI)
+                   const DataLayout &DL, LoopInfo *LI,
+                   const InstCombineOptions &Opts)
       : InstCombiner(Worklist, Builder, MinimizeSize, AA, AC, TLI, TTI, DT, ORE,
-                     BFI, PSI, DL, LI) {}
+                     BFI, PSI, DL, LI),
+        Opts(Opts) {}
 
   virtual ~InstCombinerImpl() = default;
 
@@ -434,6 +437,8 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
 
   Instruction *hoistFNegAboveFMulFDiv(Value *FNegOp, Instruction &FMFSource);
 
+  const InstCombineOptions &Opts;
+
 public:
   /// Create and insert the idiom we use to indicate a block is unreachable
   /// without having to rewrite the CFG from within InstCombine.
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 5859f58a9f462b0..1eab37f3ca0d57a 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -131,8 +131,10 @@ DEBUG_COUNTER(VisitCounter, "instcombine-visit",
               "Controls which instructions are visited");
 
 static cl::opt<bool>
-EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"),
-                                              cl::init(true));
+    EnableCodeSinking("instcombine-code-sinking",
+                      cl::desc("Enable code sinking, unless code sinking is "
+                               "disabled via a pass option."),
+                      cl::init(true));
 
 static cl::opt<unsigned> MaxSinkNumUsers(
     "instcombine-max-sink-users", cl::init(32),
@@ -4017,7 +4019,7 @@ bool InstCombinerImpl::run() {
     // Return the UserBlock if successful.
     auto getOptionalSinkBlockForInst =
         [this](Instruction *I) -> std::optional<BasicBlock *> {
-      if (!EnableCodeSinking)
+      if (!Opts.EnableCodeSinking || !EnableCodeSinking)
         return std::nullopt;
 
       BasicBlock *BB = I->getParent();
@@ -4405,7 +4407,7 @@ static bool combineInstructionsOverFunction(
                       << F.getName() << "\n");
 
     InstCombinerImpl IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT,
-                        ORE, BFI, PSI, DL, LI);
+                        ORE, BFI, PSI, DL, LI, Opts);
     IC.MaxArraySizeForCombine = MaxArraySize;
     bool MadeChangeInThisIteration = IC.prepareWorklist(F, RPOT);
     MadeChangeInThisIteration |= IC.run();
diff --git a/llvm/test/Transforms/InstCombine/no_sink_instruction.ll b/llvm/test/Transforms/InstCombine/no_sink_instruction.ll
index 70c309912919d90..caace08b0dd9989 100644
--- a/llvm/test/Transforms/InstCombine/no_sink_instruction.ll
+++ b/llvm/test/Transforms/InstCombine/no_sink_instruction.ll
@@ -1,4 +1,5 @@
 ; RUN: opt -passes=instcombine -instcombine-code-sinking=0 -S < %s | FileCheck %s
+; RUN: opt -passes='instcombine<no-code-sinking>' -S < %s | FileCheck %s
 
 define i32 @test(i1 %C, i32 %A, i32 %B) {
 ; CHECK-LABEL: @test(
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/sinking-vs-if-conversion.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/sinking-vs-if-conversion.ll
index 06d7b36509a52f8..8b4be2778c97692 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/sinking-vs-if-conversion.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/sinking-vs-if-conversion.ll
@@ -23,27 +23,23 @@ define void @test_find_min(ptr noundef nonnull align 8 dereferenceable(24) %this
 ; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[TMP0]] to i64
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[COND_END7:%.*]] ]
-; CHECK-NEXT:    [[MIN_010:%.*]] = phi ptr [ [[TMP1]], [[FOR_BODY_LR_PH]] ], [ [[COND8:%.*]], [[COND_END7]] ]
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[MIN_010:%.*]] = phi ptr [ [[TMP1]], [[FOR_BODY_LR_PH]] ], [ [[COND8:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
+; CHECK-NEXT:    [[KEY:%.*]] = getelementptr inbounds [[STRUCT_NODE:%.*]], ptr [[TMP3]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[KEY]], align 4
+; CHECK-NEXT:    [[KEY2:%.*]] = getelementptr inbounds [[STRUCT_NODE]], ptr [[MIN_010]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[KEY2]], align 4
 ; CHECK-NEXT:    [[CMP3:%.*]] = icmp eq ptr [[MIN_010]], null
-; CHECK-NEXT:    br i1 [[CMP3]], label [[COND_END7]], label [[COND_FALSE:%.*]]
-; CHECK:       cond.false:
-; CHECK-NEXT:    [[KEY2:%.*]] = getelementptr inbounds [[STRUCT_NODE:%.*]], ptr [[MIN_010]], i64 0, i32 1
-; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[KEY2]], align 4
-; CHECK-NEXT:    [[KEY:%.*]] = getelementptr inbounds [[STRUCT_NODE]], ptr [[TMP3]], i64 0, i32 1
-; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[KEY]], align 4
-; CHECK-NEXT:    [[CMP4:%.*]] = icmp slt i32 [[TMP5]], [[TMP4]]
-; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP4]], ptr [[TMP3]], ptr [[MIN_010]]
-; CHECK-NEXT:    br label [[COND_END7]]
-; CHECK:       cond.end7:
-; CHECK-NEXT:    [[COND8]] = phi ptr [ [[COND]], [[COND_FALSE]] ], [ [[TMP3]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[CMP4:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]]
+; CHECK-NEXT:    [[TMP6:%.*]] = select i1 [[CMP3]], i1 true, i1 [[CMP4]]
+; CHECK-NEXT:    [[COND8]] = select i1 [[TMP6]], ptr [[TMP3]], ptr [[MIN_010]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]]
 ; CHECK:       for.end:
-; CHECK-NEXT:    [[MIN_0_LCSSA:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[COND8]], [[COND_END7]] ]
+; CHECK-NEXT:    [[MIN_0_LCSSA:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[COND8]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    store ptr [[MIN_0_LCSSA]], ptr [[THIS]], align 8
 ; CHECK-NEXT:    ret void
 ;
@@ -140,24 +136,28 @@ define void @cond_select_loop(ptr noalias nocapture noundef readonly %a, ptr noa
 ; CHECK-LABEL: define void @cond_select_loop(
 ; CHECK-SAME: ptr noalias nocapture noundef readonly [[A:%.*]], ptr noalias nocapture noundef readonly [[B:%.*]], ptr noalias nocapture noundef writeonly [[C:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.body:
-; CHECK-NEXT:    [[I_07:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[COND_END:%.*]] ]
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[I_07]]
-; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
-; CHECK-NEXT:    [[CMP2:%.*]] = fcmp ogt float [[TMP0]], 0.000000e+00
-; CHECK-NEXT:    br i1 [[CMP2]], label [[COND_END]], label [[COND_FALSE:%.*]]
-; CHECK:       cond.false:
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[I_07]]
-; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT:    br label [[COND_END]]
-; CHECK:       cond.end:
-; CHECK-NEXT:    [[COND:%.*]] = phi float [ [[TMP1]], [[COND_FALSE]] ], [ [[TMP0]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[I_07]]
-; CHECK-NEXT:    store float [[COND]], ptr [[ARRAYIDX4]], align 4
-; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_07]], 1
-; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 1000
-; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i64 4
+; CHECK-NEXT:    [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD9:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 4
+; CHECK-NEXT:    [[WIDE_LOAD10:%.*]] = load <4 x float>, ptr [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD9]], zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD10]], zeroinitializer
+; CHECK-NEXT:    [[TMP6:%.*]] = select <4 x i1> [[TMP4]], <4 x float> [[WIDE_LOAD9]], <4 x float> [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP7:%.*]] = select <4 x i1> [[TMP5]], <4 x float> [[WIDE_LOAD10]], <4 x float> [[WIDE_LOAD8]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[INDEX]]
+; CHECK-NEXT:    store <4 x float> [[TMP6]], ptr [[TMP8]], align 4
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 4
+; CHECK-NEXT:    store <4 x float> [[TMP7]], ptr [[TMP9]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
+; CHECK-NEXT:    br i1 [[TMP10]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    ret void
 ;
@@ -222,3 +222,8 @@ for.inc:                                          ; preds = %cond.end
 for.end:
   ret void
 }
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+;.

``````````

</details>


https://github.com/llvm/llvm-project/pull/72567