[llvm] c79ab10 - [IROutliner] Separate split PHI nodes from multiple exits by different outlinable regions.
Andrew Litteken via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 14 12:57:10 PDT 2022
Author: Andrew Litteken
Date: 2022-03-14T14:56:59-05:00
New Revision: c79ab1065e89872668b8d43c747ff3e5974b0d96
URL: https://github.com/llvm/llvm-project/commit/c79ab1065e89872668b8d43c747ff3e5974b0d96
DIFF: https://github.com/llvm/llvm-project/commit/c79ab1065e89872668b8d43c747ff3e5974b0d96.diff
LOG: [IROutliner] Separate split PHI nodes from multiple exits by different outlinable regions.
The IR Outliner is supposed to extract the outputs contained in an external phi node and place them into a phi node contained within the outlined function. However, when the output values of two outlined functions with two different output sets are contained within the same phi node, they are counted as the same exit path when first analyzed. In reality, these create two different phi nodes, creating an inconsistency, resulting in a mismatch in the expected number of output paths and a crash. This fixes that counting when analyzing the outputs by also analyzing the incoming blocks rather than just the incoming values.
Reviewer: paquette
Differential Revision: https://reviews.llvm.org/D121313
Added:
llvm/test/Transforms/IROutliner/exit-block-phi-node-value-attribution.ll
Modified:
llvm/lib/Transforms/IPO/IROutliner.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp
index b4f4e0109a1c8..9028573f3a2c9 100644
--- a/llvm/lib/Transforms/IPO/IROutliner.cpp
+++ b/llvm/lib/Transforms/IPO/IROutliner.cpp
@@ -1095,26 +1095,39 @@ static hash_code encodePHINodeData(PHINodeData &PND) {
///
/// \param Region - The region that \p PN is an output for.
/// \param PN - The PHINode we are analyzing.
+/// \param Blocks - The blocks for the region we are analyzing.
/// \param AggArgIdx - The argument \p PN will be stored into.
/// \returns An optional holding the assigned canonical number, or None if
/// there is some attribute of the PHINode blocking it from being used.
static Optional<unsigned> getGVNForPHINode(OutlinableRegion &Region,
- PHINode *PN, unsigned AggArgIdx) {
+ PHINode *PN,
+ DenseSet<BasicBlock *> &Blocks,
+ unsigned AggArgIdx) {
OutlinableGroup &Group = *Region.Parent;
IRSimilarityCandidate &Cand = *Region.Candidate;
BasicBlock *PHIBB = PN->getParent();
CanonList PHIGVNs;
- for (Value *Incoming : PN->incoming_values()) {
- // If we cannot find a GVN, this means that the input to the PHINode is
- // not included in the region we are trying to analyze, meaning, that if
- // it was outlined, we would be adding an extra input. We ignore this
- // case for now, and so ignore the region.
+ Value *Incoming;
+ BasicBlock *IncomingBlock;
+ for (unsigned Idx = 0, EIdx = PN->getNumIncomingValues(); Idx < EIdx; Idx++) {
+ Incoming = PN->getIncomingValue(Idx);
+ IncomingBlock = PN->getIncomingBlock(Idx);
+ // If we cannot find a GVN, and the incoming block is included in the region
+ // this means that the input to the PHINode is not included in the region we
+ // are trying to analyze, meaning, that if it was outlined, we would be
+ // adding an extra input. We ignore this case for now, and so ignore the
+ // region.
Optional<unsigned> OGVN = Cand.getGVN(Incoming);
- if (!OGVN.hasValue()) {
+ if (!OGVN.hasValue() && (Blocks.find(IncomingBlock) != Blocks.end())) {
Region.IgnoreRegion = true;
return None;
}
+ // If the incoming block isn't in the region, we don't have to worry about
+ // this incoming value.
+ if (Blocks.find(IncomingBlock) == Blocks.end())
+ continue;
+
// Collect the canonical numbers of the values in the PHINode.
unsigned GVN = OGVN.getValue();
OGVN = Cand.getCanonicalNum(GVN);
@@ -1259,9 +1272,9 @@ findExtractedOutputToOverallOutputMapping(OutlinableRegion &Region,
// If two PHINodes have the same canonical values, but
diff erent aggregate
// argument locations, then they will have distinct Canonical Values.
- GVN = getGVNForPHINode(Region, PN, AggArgIdx);
+ GVN = getGVNForPHINode(Region, PN, BlocksInRegion, AggArgIdx);
if (!GVN.hasValue())
- return;
+ return;
} else {
// If we do not have a PHINode we use the global value numbering for the
// output value, to find the canonical number to add to the set of stored
diff --git a/llvm/test/Transforms/IROutliner/exit-block-phi-node-value-attribution.ll b/llvm/test/Transforms/IROutliner/exit-block-phi-node-value-attribution.ll
new file mode 100644
index 0000000000000..54da00fe089e3
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/exit-block-phi-node-value-attribution.ll
@@ -0,0 +1,98 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; Make sure that each outlined region only analyzes on the incoming values in exit
+; block phi nodes that come from the outlined region. Without this, more incoming
+; values from the phi node would be considered if the incoming value is
+; included in the outlined region. This is particularly likely to happen for
+; constants.
+
+define void @f1() {
+bb1:
+ %0 = add i32 1, 2
+ %1 = add i32 3, 4
+ %2 = add i32 5, 6
+ %3 = add i32 7, 8
+ br label %bb5
+bb2:
+ %4 = mul i32 5, 4
+ br label %bb5
+placeholder:
+ %a = sub i32 5, 4
+ br label %bb5
+bb3:
+ %5 = add i32 1, 2
+ %6 = add i32 3, 4
+ %7 = add i32 5, 6
+ %8 = add i32 7, 8
+ br label %bb5
+bb4:
+ %9 = mul i32 5, 4
+ br label %bb5
+
+placeholder1:
+ %b = add i32 5, 4
+ ret void
+
+bb5:
+ %phinode = phi i32 [5, %placeholder], [5, %bb1], [5, %bb2], [4, %bb3], [4, %bb4]
+ ret void
+}
+; CHECK-LABEL: @f1(
+; CHECK-NEXT: bb1:
+; CHECK-NEXT: [[PHINODE_CE_LOC1:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[PHINODE_CE_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[PHINODE_CE_LOC]] to i8*
+; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[PHINODE_CE_LOC]], i32 0)
+; CHECK-NEXT: [[PHINODE_CE_RELOAD:%.*]] = load i32, i32* [[PHINODE_CE_LOC]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT: br label [[BB5:%.*]]
+; CHECK: placeholder:
+; CHECK-NEXT: [[A:%.*]] = sub i32 5, 4
+; CHECK-NEXT: br label [[BB5]]
+; CHECK: bb3:
+; CHECK-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[PHINODE_CE_LOC1]] to i8*
+; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]])
+; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[PHINODE_CE_LOC1]], i32 1)
+; CHECK-NEXT: [[PHINODE_CE_RELOAD2:%.*]] = load i32, i32* [[PHINODE_CE_LOC1]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST3]])
+; CHECK-NEXT: br label [[BB5]]
+; CHECK: placeholder1:
+; CHECK-NEXT: [[B:%.*]] = add i32 5, 4
+; CHECK-NEXT: ret void
+; CHECK: bb5:
+; CHECK-NEXT: [[PHINODE:%.*]] = phi i32 [ 5, [[PLACEHOLDER:%.*]] ], [ [[PHINODE_CE_RELOAD]], [[BB1:%.*]] ], [ [[PHINODE_CE_RELOAD2]], [[BB3:%.*]] ]
+; CHECK-NEXT: ret void
+;
+;
+; CHECK-LABEL: define internal void @outlined_ir_func_0(
+; CHECK-NEXT: newFuncRoot:
+; CHECK-NEXT: br label [[BB1_TO_OUTLINE:%.*]]
+; CHECK: bb1_to_outline:
+; CHECK-NEXT: [[TMP2:%.*]] = add i32 1, 2
+; CHECK-NEXT: [[TMP3:%.*]] = add i32 3, 4
+; CHECK-NEXT: [[TMP4:%.*]] = add i32 5, 6
+; CHECK-NEXT: [[TMP5:%.*]] = add i32 7, 8
+; CHECK-NEXT: br label [[BB5_SPLIT:%.*]]
+; CHECK: bb2:
+; CHECK-NEXT: [[TMP6:%.*]] = mul i32 5, 4
+; CHECK-NEXT: br label [[BB5_SPLIT]]
+; CHECK: bb5.split:
+; CHECK-NEXT: [[TMP7:%.*]] = phi i32 [ 4, [[BB1_TO_OUTLINE]] ], [ 4, [[BB2:%.*]] ]
+; CHECK-NEXT: [[PHINODE_CE:%.*]] = phi i32 [ 5, [[BB1_TO_OUTLINE]] ], [ 5, [[BB2]] ]
+; CHECK-NEXT: br label [[BB5_EXITSTUB:%.*]]
+; CHECK: bb5.exitStub:
+; CHECK-NEXT: switch i32 [[TMP1:%.*]], label [[FINAL_BLOCK_0:%.*]] [
+; CHECK-NEXT: i32 0, label [[OUTPUT_BLOCK_0_0:%.*]]
+; CHECK-NEXT: i32 1, label [[OUTPUT_BLOCK_1_0:%.*]]
+; CHECK-NEXT: ]
+; CHECK: output_block_0_0:
+; CHECK-NEXT: store i32 [[PHINODE_CE]], i32* [[TMP0:%.*]], align 4
+; CHECK-NEXT: br label [[FINAL_BLOCK_0]]
+; CHECK: output_block_1_0:
+; CHECK-NEXT: store i32 [[TMP7]], i32* [[TMP0]], align 4
+; CHECK-NEXT: br label [[FINAL_BLOCK_0]]
+; CHECK: final_block_0:
+; CHECK-NEXT: ret void
+;
More information about the llvm-commits
mailing list