[llvm] 52a2d07 - [AMDGPU] Improve PHI-breaking heuristics in CGP
via llvm-commits
llvm-commits at lists.llvm.org
Mon May 15 00:16:30 PDT 2023
Author: pvanhout
Date: 2023-05-15T09:16:22+02:00
New Revision: 52a2d07bb3bb42594aab957b0da2e1e911abab59
URL: https://github.com/llvm/llvm-project/commit/52a2d07bb3bb42594aab957b0da2e1e911abab59
DIFF: https://github.com/llvm/llvm-project/commit/52a2d07bb3bb42594aab957b0da2e1e911abab59.diff
LOG: [AMDGPU] Improve PHI-breaking heuristics in CGP
D147786 made the transform more conservative by adding heuristics,
which was a good idea. However, the transform got a bit
too conservative at times.
This caused a surprise in some rocRAND benchmarks because D143731 greatly helped a few of them.
For instance, a few xorwow-uniform tests saw a +30% boost in performance after that pass, which was lost when D147786 landed.
This patch is an attempt at reaching a middleground that makes
the pass a bit more permissive. It continues in the same spirit as
D147786 but does the following changes:
- PHI users of a PHI node are now recursively checked. When loops are encountered, we consider the PHIs non-breakable. (Considering them breakable had very negative effect in one app I tested)
- `shufflevector` is now considered interesting, given that it satisfies a few trivial checks.
Reviewed By: arsenm, #amdgpu, jmmartinez
Differential Revision: https://reviews.llvm.org/D150266
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis-heuristics.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index bdf6c425f8f7a..84e35404d4159 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -95,6 +95,10 @@ class AMDGPUCodeGenPrepare : public FunctionPass,
bool HasUnsafeFPMath = false;
bool HasFP32Denormals = false;
+ DenseMap<const PHINode *, bool> BreakPhiNodesCache;
+
+ bool canBreakPHINode(const PHINode &I);
+
/// Copies exact/nsw/nuw flags (if any) from binary operation \p I to
/// binary operation \p V.
///
@@ -1398,48 +1402,105 @@ bool AMDGPUCodeGenPrepare::visitSelectInst(SelectInst &I) {
return Changed;
}
+static bool areInSameBB(const Value *A, const Value *B) {
+ const auto *IA = dyn_cast<Instruction>(A);
+ const auto *IB = dyn_cast<Instruction>(B);
+ return IA && IB && IA->getParent() == IB->getParent();
+}
+
// Helper for breaking large PHIs that returns true when an extractelement on V
// is likely to be folded away by the DAG combiner.
-static bool isInterestingPHIIncomingValue(Value *V, FixedVectorType *FVT) {
- InsertElementInst *IE = dyn_cast<InsertElementInst>(V);
+static bool isInterestingPHIIncomingValue(const Value *V) {
+ const auto *FVT = dyn_cast<FixedVectorType>(V->getType());
+ if (!FVT)
+ return false;
- // Constants & InsertElements chains are interesting.
- if (!IE)
- return isa<Constant>(V);
+ const Value *CurVal = V;
- // Check if this is a simple chain of insertelement that fills the vector. If
- // that's the case, we can break up this PHI node profitably because the
- // extractelement we will insert will get folded out.
- BasicBlock *BB = IE->getParent();
+ // Check for insertelements, keeping track of the elements covered.
BitVector EltsCovered(FVT->getNumElements());
- InsertElementInst *Next = IE;
- while (Next && !EltsCovered.all()) {
- ConstantInt *Idx = dyn_cast<ConstantInt>(Next->getOperand(2));
+ while (const auto *IE = dyn_cast<InsertElementInst>(CurVal)) {
+ const auto *Idx = dyn_cast<ConstantInt>(IE->getOperand(2));
// Non constant index/out of bounds index -> folding is unlikely.
- // Note that this is more of a sanity check - canonical IR should
- // already have replaced those with poison.
+ // The latter is more of a sanity check because canonical IR should just
+ // have replaced those with poison.
if (!Idx || Idx->getSExtValue() >= FVT->getNumElements())
return false;
+ const auto *VecSrc = IE->getOperand(0);
+
+ // If the vector source is another instruction, it must be in the same basic
+ // block. Otherwise, the DAGCombiner won't see the whole thing and is
+ // unlikely to be able to do anything interesting here.
+ if (isa<Instruction>(VecSrc) && !areInSameBB(VecSrc, IE))
+ return false;
+
+ CurVal = VecSrc;
EltsCovered.set(Idx->getSExtValue());
- // If the insertelement chain ends with a constant, it's fine.
- if (isa<Constant>(Next->getOperand(0)))
+ // All elements covered.
+ if (EltsCovered.all())
return true;
+ }
- Next = dyn_cast<InsertElementInst>(Next->getOperand(0));
+ // We either didn't find a single insertelement, or the insertelement chain
+ // ended before all elements were covered. Check for other interesting values.
- // If the chain is spread across basic blocks, the DAG combiner
- // won't see it in its entirety and is unlikely to be able to fold
- // evevrything away.
- if (Next && Next->getParent() != BB)
- return false;
+ // Constants are always interesting because we can just constant fold the
+ // extractelements.
+ if (isa<Constant>(CurVal))
+ return true;
+
+ // shufflevector is likely to be profitable if either operand is a constant,
+ // or if either source is in the same block.
+ // This is because shufflevector is most often lowered as a series of
+ // insert/extract elements anyway.
+ if (const auto *SV = dyn_cast<ShuffleVectorInst>(CurVal)) {
+ return isa<Constant>(SV->getOperand(1)) ||
+ areInSameBB(SV, SV->getOperand(0)) ||
+ areInSameBB(SV, SV->getOperand(1));
+ }
+
+ return false;
+}
+
+bool AMDGPUCodeGenPrepare::canBreakPHINode(const PHINode &I) {
+ // Check in the cache, or add an entry for this node.
+ //
+ // We init with false because we consider all PHI nodes unbreakable until we
+ // reach a conclusion. Doing the opposite - assuming they're break-able until
+ // proven otherwise - can be harmful in some pathological cases so we're
+ // conservative for now.
+ const auto [It, DidInsert] = BreakPhiNodesCache.insert({&I, false});
+ if (!DidInsert)
+ return It->second;
+
+ // This function may recurse, so to guard against infinite looping, this PHI
+ // is conservatively considered unbreakable until we reach a conclusion.
+
+ // Don't break PHIs that have no interesting incoming values. That is, where
+ // there is no clear opportunity to fold the "extractelement" instructions we
+ // would add.
+ //
+ // Note: IC does not run after this pass, so we're only interested in the
+ // foldings that the DAG combiner can do.
+ if (none_of(I.incoming_values(),
+ [&](Value *V) { return isInterestingPHIIncomingValue(V); }))
+ return false;
+
+ // Now, check users for unbreakable PHI nodes. If we have an unbreakable PHI
+ // node as user, we don't want to break this PHI either because it's unlikely
+ // to be beneficial. We would just explode the vector and reassemble it
+ // directly, wasting instructions.
+ for (const Value *U : I.users()) {
+ if (const auto *PU = dyn_cast<PHINode>(U)) {
+ if (!canBreakPHINode(*PU))
+ return false;
+ }
}
- // All elements covered, all of the extract elements will likely be
- // combined.
- return EltsCovered.all();
+ return BreakPhiNodesCache[&I] = true;
}
bool AMDGPUCodeGenPrepare::visitPHINode(PHINode &I) {
@@ -1460,23 +1521,8 @@ bool AMDGPUCodeGenPrepare::visitPHINode(PHINode &I) {
if (!FVT || DL->getTypeSizeInBits(FVT) <= ScalarizeLargePHIsThreshold)
return false;
- // Try to avoid unprofitable cases:
- // - Don't break PHIs that have no interesting incoming values. That is, where
- // there is no clear opportunity to fold the "extractelement" instructions we
- // would add.
- // - Note: IC does not run after this pass, so we're only interested in the
- // folding that the DAG combiner can do.
- // - For simplicity, don't break PHIs that are used by other PHIs because it'd
- // require us to determine if the whole "chain" can be converted or not. e.g.
- // if we broke this PHI but not its user, we would actually make things worse.
- if (!ForceScalarizeLargePHIs) {
- if (none_of(
- I.incoming_values(),
- [&](Value *V) { return isInterestingPHIIncomingValue(V, FVT); }) ||
- any_of(I.users(), [&](User *U) { return isa<PHINode>(U); })) {
- return false;
- }
- }
+ if (!ForceScalarizeLargePHIs && !canBreakPHINode(I))
+ return false;
struct VectorSlice {
Type *Ty = nullptr;
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis-heuristics.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis-heuristics.ll
index 504c7b4c6430f..4ff8647f5eaa2 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis-heuristics.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis-heuristics.ll
@@ -138,6 +138,197 @@ finally:
ret void
}
+define amdgpu_kernel void @shufflevec_inc_with_cst_op(<5 x double> %in, ptr %out, i1 %cond) {
+; CHECK-LABEL: @shufflevec_inc_with_cst_op(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[COND:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK: then:
+; CHECK-NEXT: [[X:%.*]] = insertelement <5 x double> [[IN:%.*]], double 3.140000e+00, i64 3
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE0:%.*]] = extractelement <5 x double> [[X]], i64 0
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <5 x double> [[X]], i64 1
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <5 x double> [[X]], i64 2
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <5 x double> [[X]], i64 3
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <5 x double> [[X]], i64 4
+; CHECK-NEXT: br label [[FINALLY:%.*]]
+; CHECK: else:
+; CHECK-NEXT: [[SHUFFLED:%.*]] = shufflevector <5 x double> [[IN]], <5 x double> poison, <5 x i32> <i32 0, i32 3, i32 2, i32 1, i32 4>
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE01:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 0
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE12:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 1
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE23:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 2
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE34:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 3
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE45:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 4
+; CHECK-NEXT: br label [[FINALLY]]
+; CHECK: finally:
+; CHECK-NEXT: [[TMP0:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE01]], [[ELSE]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE1]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE12]], [[ELSE]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE23]], [[ELSE]] ]
+; CHECK-NEXT: [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE3]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE34]], [[ELSE]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE45]], [[ELSE]] ]
+; CHECK-NEXT: [[LARGEPHI_INSERTSLICE0:%.*]] = insertelement <5 x double> poison, double [[TMP0]], i64 0
+; CHECK-NEXT: [[LARGEPHI_INSERTSLICE1:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE0]], double [[TMP1]], i64 1
+; CHECK-NEXT: [[LARGEPHI_INSERTSLICE2:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE1]], double [[TMP2]], i64 2
+; CHECK-NEXT: [[LARGEPHI_INSERTSLICE3:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE2]], double [[TMP3]], i64 3
+; CHECK-NEXT: [[LARGEPHI_INSERTSLICE4:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE3]], double [[TMP4]], i64 4
+; CHECK-NEXT: store <5 x double> [[LARGEPHI_INSERTSLICE4]], ptr [[OUT:%.*]], align 1
+; CHECK-NEXT: ret void
+;
+entry:
+ br i1 %cond, label %then, label %else
+
+then:
+ %x = insertelement <5 x double> %in, double 3.140000e+00, i64 3
+ br label %finally
+
+else:
+ %shuffled = shufflevector <5 x double> %in, <5 x double> poison, <5 x i32> <i32 0, i32 3, i32 2, i32 1, i32 4>
+ br label %finally
+
+finally:
+ %val = phi <5 x double> [ %x, %then ], [ %shuffled, %else ]
+ store <5 x double> %val, ptr %out, align 1
+ ret void
+}
+
+define amdgpu_kernel void @shufflevec_inc_with_local_lhs(<5 x double> %in, ptr %out, i1 %cond) {
+; CHECK-LABEL: @shufflevec_inc_with_local_lhs(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[COND:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK: then:
+; CHECK-NEXT: [[X:%.*]] = insertelement <5 x double> [[IN:%.*]], double 3.140000e+00, i64 3
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE0:%.*]] = extractelement <5 x double> [[X]], i64 0
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <5 x double> [[X]], i64 1
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <5 x double> [[X]], i64 2
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <5 x double> [[X]], i64 3
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <5 x double> [[X]], i64 4
+; CHECK-NEXT: br label [[FINALLY:%.*]]
+; CHECK: else:
+; CHECK-NEXT: [[LOCAL_SHUFFLE_SRC:%.*]] = insertelement <5 x double> [[IN]], double 3.250000e+00, i64 2
+; CHECK-NEXT: [[SHUFFLED:%.*]] = shufflevector <5 x double> [[LOCAL_SHUFFLE_SRC]], <5 x double> [[IN]], <5 x i32> <i32 7, i32 3, i32 2, i32 5, i32 4>
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE01:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 0
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE12:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 1
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE23:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 2
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE34:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 3
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE45:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 4
+; CHECK-NEXT: br label [[FINALLY]]
+; CHECK: finally:
+; CHECK-NEXT: [[TMP0:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE01]], [[ELSE]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE1]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE12]], [[ELSE]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE23]], [[ELSE]] ]
+; CHECK-NEXT: [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE3]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE34]], [[ELSE]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE45]], [[ELSE]] ]
+; CHECK-NEXT: [[LARGEPHI_INSERTSLICE0:%.*]] = insertelement <5 x double> poison, double [[TMP0]], i64 0
+; CHECK-NEXT: [[LARGEPHI_INSERTSLICE1:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE0]], double [[TMP1]], i64 1
+; CHECK-NEXT: [[LARGEPHI_INSERTSLICE2:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE1]], double [[TMP2]], i64 2
+; CHECK-NEXT: [[LARGEPHI_INSERTSLICE3:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE2]], double [[TMP3]], i64 3
+; CHECK-NEXT: [[LARGEPHI_INSERTSLICE4:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE3]], double [[TMP4]], i64 4
+; CHECK-NEXT: store <5 x double> [[LARGEPHI_INSERTSLICE4]], ptr [[OUT:%.*]], align 1
+; CHECK-NEXT: ret void
+;
+entry:
+ br i1 %cond, label %then, label %else
+
+then:
+ %x = insertelement <5 x double> %in, double 3.140000e+00, i64 3
+ br label %finally
+
+else:
+ %local.shuffle.src = insertelement <5 x double> %in, double 3.250000e+00, i64 2
+ %shuffled = shufflevector <5 x double> %local.shuffle.src, <5 x double> %in, <5 x i32> <i32 7, i32 3, i32 2, i32 5, i32 4>
+ br label %finally
+
+finally:
+ %val = phi <5 x double> [ %x, %then ], [ %shuffled, %else ]
+ store <5 x double> %val, ptr %out, align 1
+ ret void
+}
+
+define amdgpu_kernel void @shufflevec_inc_with_local_rhs(<5 x double> %in, ptr %out, i1 %cond) {
+; CHECK-LABEL: @shufflevec_inc_with_local_rhs(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[COND:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK: then:
+; CHECK-NEXT: [[X:%.*]] = insertelement <5 x double> [[IN:%.*]], double 3.140000e+00, i64 3
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE0:%.*]] = extractelement <5 x double> [[X]], i64 0
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <5 x double> [[X]], i64 1
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <5 x double> [[X]], i64 2
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <5 x double> [[X]], i64 3
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <5 x double> [[X]], i64 4
+; CHECK-NEXT: br label [[FINALLY:%.*]]
+; CHECK: else:
+; CHECK-NEXT: [[LOCAL_SHUFFLE_SRC:%.*]] = insertelement <5 x double> [[IN]], double 3.250000e+00, i64 2
+; CHECK-NEXT: [[SHUFFLED:%.*]] = shufflevector <5 x double> [[IN]], <5 x double> [[LOCAL_SHUFFLE_SRC]], <5 x i32> <i32 7, i32 3, i32 2, i32 5, i32 4>
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE01:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 0
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE12:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 1
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE23:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 2
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE34:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 3
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE45:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 4
+; CHECK-NEXT: br label [[FINALLY]]
+; CHECK: finally:
+; CHECK-NEXT: [[TMP0:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE01]], [[ELSE]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE1]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE12]], [[ELSE]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE23]], [[ELSE]] ]
+; CHECK-NEXT: [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE3]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE34]], [[ELSE]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE45]], [[ELSE]] ]
+; CHECK-NEXT: [[LARGEPHI_INSERTSLICE0:%.*]] = insertelement <5 x double> poison, double [[TMP0]], i64 0
+; CHECK-NEXT: [[LARGEPHI_INSERTSLICE1:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE0]], double [[TMP1]], i64 1
+; CHECK-NEXT: [[LARGEPHI_INSERTSLICE2:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE1]], double [[TMP2]], i64 2
+; CHECK-NEXT: [[LARGEPHI_INSERTSLICE3:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE2]], double [[TMP3]], i64 3
+; CHECK-NEXT: [[LARGEPHI_INSERTSLICE4:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE3]], double [[TMP4]], i64 4
+; CHECK-NEXT: store <5 x double> [[LARGEPHI_INSERTSLICE4]], ptr [[OUT:%.*]], align 1
+; CHECK-NEXT: ret void
+;
+entry:
+ br i1 %cond, label %then, label %else
+
+then:
+ %x = insertelement <5 x double> %in, double 3.140000e+00, i64 3
+ br label %finally
+
+else:
+ %local.shuffle.src = insertelement <5 x double> %in, double 3.250000e+00, i64 2
+ %shuffled = shufflevector <5 x double> %in, <5 x double> %local.shuffle.src, <5 x i32> <i32 7, i32 3, i32 2, i32 5, i32 4>
+ br label %finally
+
+finally:
+ %val = phi <5 x double> [ %x, %then ], [ %shuffled, %else ]
+ store <5 x double> %val, ptr %out, align 1
+ ret void
+}
+
+define amdgpu_kernel void @shufflevec_inc_with_nonlocal_ops(<5 x double> %in, ptr %out, i1 %cond) {
+; CHECK-LABEL: @shufflevec_inc_with_nonlocal_ops(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SHUFFLE_SRC:%.*]] = insertelement <5 x double> [[IN:%.*]], double 3.250000e+00, i64 2
+; CHECK-NEXT: br i1 [[COND:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK: then:
+; CHECK-NEXT: [[X:%.*]] = insertelement <5 x double> [[IN]], double 3.140000e+00, i64 3
+; CHECK-NEXT: br label [[FINALLY:%.*]]
+; CHECK: else:
+; CHECK-NEXT: [[SHUFFLED:%.*]] = shufflevector <5 x double> [[SHUFFLE_SRC]], <5 x double> [[IN]], <5 x i32> <i32 7, i32 3, i32 2, i32 5, i32 4>
+; CHECK-NEXT: br label [[FINALLY]]
+; CHECK: finally:
+; CHECK-NEXT: [[VAL:%.*]] = phi <5 x double> [ [[X]], [[THEN]] ], [ [[SHUFFLED]], [[ELSE]] ]
+; CHECK-NEXT: store <5 x double> [[VAL]], ptr [[OUT:%.*]], align 1
+; CHECK-NEXT: ret void
+;
+entry:
+ %shuffle.src = insertelement <5 x double> %in, double 3.250000e+00, i64 2
+ br i1 %cond, label %then, label %else
+
+then:
+ %x = insertelement <5 x double> %in, double 3.140000e+00, i64 3
+ br label %finally
+
+else:
+ %shuffled = shufflevector <5 x double> %shuffle.src, <5 x double> %in, <5 x i32> <i32 7, i32 3, i32 2, i32 5, i32 4>
+ br label %finally
+
+finally:
+ %val = phi <5 x double> [ %x, %then ], [ %shuffled, %else ]
+ store <5 x double> %val, ptr %out, align 1
+ ret void
+}
+
define amdgpu_kernel void @trivial_insertelt_chain(<5 x double> %in, ptr %out, i1 %cond, double %x, double %y, double %z) {
; CHECK-LABEL: @trivial_insertelt_chain(
; CHECK-NEXT: entry:
@@ -246,20 +437,39 @@ finally:
ret void
}
-define amdgpu_kernel void @nontrivial_insertelt_chain(<5 x double> %in, ptr %out, i1 %cond, double %x, i32 %idx) {
-; CHECK-LABEL: @nontrivial_insertelt_chain(
+define amdgpu_kernel void @insertelt_shufflevec(<5 x double> %in, ptr %out, i1 %cond, double %x, i32 %idx) {
+; CHECK-LABEL: @insertelt_shufflevec(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 [[COND:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]]
; CHECK: then:
; CHECK-NEXT: [[X_1:%.*]] = insertelement <5 x double> <double 3.140000e+00, double poison, double poison, double poison, double poison>, double [[X:%.*]], i32 [[IDX:%.*]]
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <5 x double> [[X_1]], <5 x double> <double poison, double poison, double poison, double 6.140000e+00, double 9.900000e+00>, <5 x i32> <i32 0, i32 1, i32 poison, i32 8, i32 9>
; CHECK-NEXT: [[X_4:%.*]] = insertelement <5 x double> [[TMP0]], double [[X]], i64 2
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE0:%.*]] = extractelement <5 x double> [[X_4]], i64 0
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <5 x double> [[X_4]], i64 1
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <5 x double> [[X_4]], i64 2
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <5 x double> [[X_4]], i64 3
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <5 x double> [[X_4]], i64 4
; CHECK-NEXT: br label [[FINALLY:%.*]]
; CHECK: else:
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE01:%.*]] = extractelement <5 x double> [[IN:%.*]], i64 0
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE12:%.*]] = extractelement <5 x double> [[IN]], i64 1
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE23:%.*]] = extractelement <5 x double> [[IN]], i64 2
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE34:%.*]] = extractelement <5 x double> [[IN]], i64 3
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE45:%.*]] = extractelement <5 x double> [[IN]], i64 4
; CHECK-NEXT: br label [[FINALLY]]
; CHECK: finally:
-; CHECK-NEXT: [[VAL:%.*]] = phi <5 x double> [ [[X_4]], [[THEN]] ], [ [[IN:%.*]], [[ELSE]] ]
-; CHECK-NEXT: store <5 x double> [[VAL]], ptr [[OUT:%.*]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE01]], [[ELSE]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE1]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE12]], [[ELSE]] ]
+; CHECK-NEXT: [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE23]], [[ELSE]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE3]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE34]], [[ELSE]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE45]], [[ELSE]] ]
+; CHECK-NEXT: [[LARGEPHI_INSERTSLICE0:%.*]] = insertelement <5 x double> poison, double [[TMP1]], i64 0
+; CHECK-NEXT: [[LARGEPHI_INSERTSLICE1:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE0]], double [[TMP2]], i64 1
+; CHECK-NEXT: [[LARGEPHI_INSERTSLICE2:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE1]], double [[TMP3]], i64 2
+; CHECK-NEXT: [[LARGEPHI_INSERTSLICE3:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE2]], double [[TMP4]], i64 3
+; CHECK-NEXT: [[LARGEPHI_INSERTSLICE4:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE3]], double [[TMP5]], i64 4
+; CHECK-NEXT: store <5 x double> [[LARGEPHI_INSERTSLICE4]], ptr [[OUT:%.*]], align 1
; CHECK-NEXT: ret void
;
entry:
@@ -354,38 +564,52 @@ finally:
ret void
}
-define amdgpu_kernel void @used_by_phi(<5 x double> %in, ptr %out, i1 %cond, i1 %cond2) {
-; CHECK-LABEL: @used_by_phi(
+define amdgpu_kernel void @used_by_breakable_phi(<5 x double> %in, ptr %out, i1 %cond, i1 %cond2) {
+; CHECK-LABEL: @used_by_breakable_phi(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 [[COND:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]]
; CHECK: then:
; CHECK-NEXT: [[X:%.*]] = insertelement <5 x double> [[IN:%.*]], double 3.140000e+00, i64 3
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE0:%.*]] = extractelement <5 x double> [[X]], i64 0
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <5 x double> [[X]], i64 1
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <5 x double> [[X]], i64 2
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <5 x double> [[X]], i64 3
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <5 x double> [[X]], i64 4
; CHECK-NEXT: br label [[FINALLY:%.*]]
; CHECK: else:
; CHECK-NEXT: br label [[FINALLY]]
; CHECK: finally:
-; CHECK-NEXT: [[VAL:%.*]] = phi <5 x double> [ [[X]], [[THEN]] ], [ zeroinitializer, [[ELSE]] ]
-; CHECK-NEXT: store <5 x double> [[VAL]], ptr [[OUT:%.*]], align 1
-; CHECK-NEXT: br i1 [[COND2:%.*]], label [[THEN1:%.*]], label [[END:%.*]]
-; CHECK: then1:
-; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE0:%.*]] = extractelement <5 x double> [[VAL]], i64 0
-; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <5 x double> [[VAL]], i64 1
-; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <5 x double> [[VAL]], i64 2
-; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <5 x double> [[VAL]], i64 3
-; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <5 x double> [[VAL]], i64 4
-; CHECK-NEXT: br label [[END]]
-; CHECK: end:
-; CHECK-NEXT: [[TMP0:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE1]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ]
-; CHECK-NEXT: [[TMP2:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ]
-; CHECK-NEXT: [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE3]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN]] ], [ 0.000000e+00, [[ELSE]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE1]], [[THEN]] ], [ 0.000000e+00, [[ELSE]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ 0.000000e+00, [[ELSE]] ]
+; CHECK-NEXT: [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE3]], [[THEN]] ], [ 0.000000e+00, [[ELSE]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ 0.000000e+00, [[ELSE]] ]
; CHECK-NEXT: [[LARGEPHI_INSERTSLICE0:%.*]] = insertelement <5 x double> poison, double [[TMP0]], i64 0
; CHECK-NEXT: [[LARGEPHI_INSERTSLICE1:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE0]], double [[TMP1]], i64 1
; CHECK-NEXT: [[LARGEPHI_INSERTSLICE2:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE1]], double [[TMP2]], i64 2
; CHECK-NEXT: [[LARGEPHI_INSERTSLICE3:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE2]], double [[TMP3]], i64 3
; CHECK-NEXT: [[LARGEPHI_INSERTSLICE4:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE3]], double [[TMP4]], i64 4
-; CHECK-NEXT: store <5 x double> [[LARGEPHI_INSERTSLICE4]], ptr [[OUT]], align 1
+; CHECK-NEXT: store <5 x double> [[LARGEPHI_INSERTSLICE4]], ptr [[OUT:%.*]], align 1
+; CHECK-NEXT: br i1 [[COND2:%.*]], label [[THEN1:%.*]], label [[END:%.*]]
+; CHECK: then1:
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE01:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 0
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE12:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 1
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE23:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 2
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE34:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 3
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE45:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 4
+; CHECK-NEXT: br label [[END]]
+; CHECK: end:
+; CHECK-NEXT: [[TMP5:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE01]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE12]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ]
+; CHECK-NEXT: [[TMP7:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE23]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ]
+; CHECK-NEXT: [[TMP8:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE34]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ]
+; CHECK-NEXT: [[TMP9:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE45]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ]
+; CHECK-NEXT: [[LARGEPHI_INSERTSLICE06:%.*]] = insertelement <5 x double> poison, double [[TMP5]], i64 0
+; CHECK-NEXT: [[LARGEPHI_INSERTSLICE17:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE06]], double [[TMP6]], i64 1
+; CHECK-NEXT: [[LARGEPHI_INSERTSLICE28:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE17]], double [[TMP7]], i64 2
+; CHECK-NEXT: [[LARGEPHI_INSERTSLICE39:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE28]], double [[TMP8]], i64 3
+; CHECK-NEXT: [[LARGEPHI_INSERTSLICE410:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE39]], double [[TMP9]], i64 4
+; CHECK-NEXT: store <5 x double> [[LARGEPHI_INSERTSLICE410]], ptr [[OUT]], align 1
; CHECK-NEXT: ret void
;
entry:
@@ -411,3 +635,75 @@ end:
store <5 x double> %endval, ptr %out, align 1
ret void
}
+
+define amdgpu_kernel void @used_by_unbreakable_phi(<5 x double> %in, ptr %out, i1 %cond, i1 %cond2) {
+; CHECK-LABEL: @used_by_unbreakable_phi(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[COND:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK: then:
+; CHECK-NEXT: [[X:%.*]] = insertelement <5 x double> [[IN:%.*]], double 3.140000e+00, i64 3
+; CHECK-NEXT: br label [[FINALLY:%.*]]
+; CHECK: else:
+; CHECK-NEXT: br label [[FINALLY]]
+; CHECK: finally:
+; CHECK-NEXT: [[VAL:%.*]] = phi <5 x double> [ [[X]], [[THEN]] ], [ zeroinitializer, [[ELSE]] ]
+; CHECK-NEXT: store <5 x double> [[VAL]], ptr [[OUT:%.*]], align 1
+; CHECK-NEXT: br i1 [[COND2:%.*]], label [[THEN1:%.*]], label [[END:%.*]]
+; CHECK: then1:
+; CHECK-NEXT: br label [[END]]
+; CHECK: end:
+; CHECK-NEXT: [[ENDVAL:%.*]] = phi <5 x double> [ [[VAL]], [[THEN1]] ], [ [[IN]], [[FINALLY]] ]
+; CHECK-NEXT: store <5 x double> [[ENDVAL]], ptr [[OUT]], align 1
+; CHECK-NEXT: ret void
+;
+entry:
+ br i1 %cond, label %then, label %else
+
+then:
+ %x = insertelement <5 x double> %in, double 3.140000e+00, i64 3
+ br label %finally
+
+else:
+ br label %finally
+
+finally:
+ %val = phi <5 x double> [ %x, %then ], [ zeroinitializer, %else ]
+ store <5 x double> %val, ptr %out, align 1
+ br i1 %cond2, label %then1, label %end
+
+then1:
+ br label %end
+
+end:
+ %endval = phi <5 x double> [ %val, %then1 ], [ %in, %finally ]
+ store <5 x double> %endval, ptr %out, align 1
+ ret void
+}
+
+; check for infinite recursion
+define amdgpu_kernel void @used_by_phi_self(<5 x double> %in, ptr %out, i8 %count) {
+; CHECK-LABEL: @used_by_phi_self(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[VAL:%.*]] = phi <5 x double> [ [[IN:%.*]], [[ENTRY:%.*]] ], [ [[VAL]], [[LOOP]] ]
+; CHECK-NEXT: store <5 x double> [[VAL]], ptr [[OUT:%.*]], align 1
+; CHECK-NEXT: [[COUNT_DEC:%.*]] = sub i8 [[COUNT:%.*]], 0
+; CHECK-NEXT: [[COND:%.*]] = icmp ne i8 [[COUNT]], 0
+; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[END:%.*]]
+; CHECK: end:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %val = phi <5 x double> [ %in, %entry ], [ %val, %loop ]
+ store <5 x double> %val, ptr %out, align 1
+ %count.dec = sub i8 %count, 0
+ %cond = icmp ne i8 %count, 0
+ br i1 %cond, label %loop, label %end
+
+end:
+ ret void
+}
More information about the llvm-commits
mailing list