[llvm-branch-commits] [llvm] 05b1a15 - [IROutliner] Adapting to hoisted bitcasts in CodeExtractor

Andrew Litteken via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Jan 13 09:15:31 PST 2021


Author: Andrew Litteken
Date: 2021-01-13T11:10:37-06:00
New Revision: 05b1a15f703c3e01f4123424700bc37188af8af1

URL: https://github.com/llvm/llvm-project/commit/05b1a15f703c3e01f4123424700bc37188af8af1
DIFF: https://github.com/llvm/llvm-project/commit/05b1a15f703c3e01f4123424700bc37188af8af1.diff

LOG: [IROutliner] Adapting to hoisted bitcasts in CodeExtractor

In commit 700d2417d8281ea56dfd7ac72d1a1473d03d2d59 the CodeExtractor
was updated so that bitcasts that have lifetime markers that beginning
outside of the region are deduplicated outside the region and are not
used as an output.  This caused a discrepancy in the IROutliner, where
in these cases there were arguments added to the aggregate function
that were not needed causing assertion errors.

The IROutliner queries the CodeExtractor twice to determine the inputs
and outputs, before and after `findAllocas` is called with the same
ValueSet for the outputs causing the duplication. This has been fixed
with a dummy ValueSet for the first call.

However, the additional bitcasts prevent us from using the same
similarity relationships that were previously defined by the
IR Similarity Analysis Pass. In these cases, we check whether the
initial version of the region being analyzed for outlining is still the
same as it was previously.  If it is not, i.e. because of the additional
bitcast instructions from the CodeExtractor, we discard the region.

Reviewers: yroux

Differential Revision: https://reviews.llvm.org/D94303

Added: 
    llvm/test/Transforms/IROutliner/outlining-bitcasts.ll

Modified: 
    llvm/lib/Transforms/IPO/IROutliner.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp
index 0e5e1dd0886e..f6fdd69d71c3 100644
--- a/llvm/lib/Transforms/IPO/IROutliner.cpp
+++ b/llvm/lib/Transforms/IPO/IROutliner.cpp
@@ -510,13 +510,16 @@ static void getCodeExtractorArguments(
   // outlined region. PremappedInputs are the arguments found by the
   // CodeExtractor, removing conditions such as sunken allocas, but that
   // may need to be remapped due to the extracted output values replacing
-  // the original values.
-  SetVector<Value *> OverallInputs, PremappedInputs, SinkCands, HoistCands;
+  // the original values. We use DummyOutputs for this first run of finding
+  // inputs and outputs since the outputs could change during findAllocas,
+  // the correct set of extracted outputs will be in the final Outputs ValueSet.
+  SetVector<Value *> OverallInputs, PremappedInputs, SinkCands, HoistCands,
+      DummyOutputs;
 
   // Use the code extractor to get the inputs and outputs, without sunken
   // allocas or removing llvm.assumes.
   CodeExtractor *CE = Region.CE;
-  CE->findInputsOutputs(OverallInputs, Outputs, SinkCands);
+  CE->findInputsOutputs(OverallInputs, DummyOutputs, SinkCands);
   assert(Region.StartBB && "Region must have a start BasicBlock!");
   Function *OrigF = Region.StartBB->getParent();
   CodeExtractorAnalysisCache CEAC(*OrigF);
@@ -1263,6 +1266,16 @@ void IROutliner::pruneIncompatibleRegions(
       continue;
 
     bool BadInst = any_of(IRSC, [this](IRInstructionData &ID) {
+      // We check if there is a discrepancy between the InstructionDataList
+      // and the actual next instruction in the module.  If there is, it means
+      // that an extra instruction was added, likely by the CodeExtractor.
+
+      // Since we do not have any similarity data about this particular
+      // instruction, we cannot confidently outline it, and must discard this
+      // candidate.
+      if (std::next(ID.getIterator())->Inst !=
+          ID.Inst->getNextNonDebugInstruction())
+        return true;
       return !this->InstructionClassifier.visit(ID.Inst);
     });
 

diff  --git a/llvm/test/Transforms/IROutliner/outlining-bitcasts.ll b/llvm/test/Transforms/IROutliner/outlining-bitcasts.ll
new file mode 100644
index 000000000000..68b7eb74ec0c
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/outlining-bitcasts.ll
@@ -0,0 +1,105 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; This test ensures that an extra output is not added when there is a bitcast
+; that is relocated to outside of the extraction due to a starting lifetime
+; instruction outside of the extracted region.
+
+; Additionally, we check that the newly added bitcast instruction is excluded in
+; further extractions.
+
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
+
+define void @outline_bitcast_base() {
+; CHECK-LABEL: @outline_bitcast_base(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[D:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @[[FUNCTION_0:.*]](i32* [[A]], i32* [[B]], i32* [[C]], i32* [[D]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  %c = alloca i32, align 4
+  %d = alloca i32, align 4
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  store i32 4, i32* %c, align 4
+  %X = bitcast i32* %d to i8*
+  %al = load i32, i32* %a
+  %bl = load i32, i32* %b
+  %cl = load i32, i32* %c
+  ret void
+}
+
+define void @outline_bitcast_removed() {
+; CHECK-LABEL: @outline_bitcast_removed(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[D:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @[[FUNCTION_0:.*]](i32* [[A]], i32* [[B]], i32* [[C]], i32* [[D]])
+; CHECK-NEXT:    [[LT_CAST1:%.*]] = bitcast i32* [[D]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]])
+; CHECK-NEXT:    [[AM:%.*]] = load i32, i32* [[B]], align 4
+; CHECK-NEXT:    [[BM:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[CM:%.*]] = load i32, i32* [[C]], align 4
+; CHECK-NEXT:    [[AS:%.*]] = add i32 [[AM]], [[BM]]
+; CHECK-NEXT:    [[BS:%.*]] = add i32 [[BM]], [[AM]]
+; CHECK-NEXT:    [[CS:%.*]] = add i32 [[BM]], [[CM]]
+; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[D]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  %c = alloca i32, align 4
+  %d = alloca i32, align 4
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  store i32 4, i32* %c, align 4
+  %X = bitcast i32* %d to i8*
+  %al = load i32, i32* %a
+  %bl = load i32, i32* %b
+  %cl = load i32, i32* %c
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %X)
+  %am = load i32, i32* %b
+  %bm = load i32, i32* %a
+  %cm = load i32, i32* %c
+  %as = add i32 %am, %bm
+  %bs = add i32 %bm, %am
+  %cs = add i32 %bm, %cm
+  call void @llvm.lifetime.end.p0i8(i64 -1, i8* %X)
+  ret void
+}
+
+define void @outline_bitcast_base2(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @outline_bitcast_base2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[AL:%.*]] = add i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[BL:%.*]] = add i32 [[B]], [[A]]
+; CHECK-NEXT:    [[CL:%.*]] = add i32 [[B]], [[C:%.*]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  %al = add i32 %a, %b
+  %bl = add i32 %b, %a
+  %cl = add i32 %b, %c
+  ret void
+}
+
+; CHECK: define internal void @[[FUNCTION_0]](i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]], i32* [[ARG3:%.*]])
+; CHECK: entry_to_outline:
+; CHECK-NEXT:    store i32 2, i32* [[ARG0]], align 4
+; CHECK-NEXT:    store i32 3, i32* [[ARG1]], align 4
+; CHECK-NEXT:    store i32 4, i32* [[ARG2]], align 4
+; CHECK-NEXT:    [[X:%.*]] = bitcast i32* [[ARG3]] to i8*
+; CHECK-NEXT:    [[AL:%.*]] = load i32, i32* [[ARG0]], align 4
+; CHECK-NEXT:    [[BL:%.*]] = load i32, i32* [[ARG1]], align 4
+; CHECK-NEXT:    [[CL:%.*]] = load i32, i32* [[ARG2]], align 4


        


More information about the llvm-branch-commits mailing list