[llvm] 52a2d07 - [AMDGPU] Improve PHI-breaking heuristics in CGP

via llvm-commits llvm-commits at lists.llvm.org
Mon May 15 00:16:30 PDT 2023


Author: pvanhout
Date: 2023-05-15T09:16:22+02:00
New Revision: 52a2d07bb3bb42594aab957b0da2e1e911abab59

URL: https://github.com/llvm/llvm-project/commit/52a2d07bb3bb42594aab957b0da2e1e911abab59
DIFF: https://github.com/llvm/llvm-project/commit/52a2d07bb3bb42594aab957b0da2e1e911abab59.diff

LOG: [AMDGPU] Improve PHI-breaking heuristics in CGP

D147786 made the transform more conservative by adding heuristics,
which was a good idea. However, the transform got a bit
too conservative at times.

This caused a surprise in some rocRAND benchmarks because D143731 greatly helped a few of them.
For instance, a few xorwow-uniform tests saw a +30% boost in performance after that pass, which was lost when D147786 landed.

This patch is an attempt at reaching a middleground that makes
the pass a bit more permissive. It continues in the same spirit as
D147786 but does the following changes:
- PHI users of a PHI node are now recursively checked. When loops are encountered, we consider the PHIs non-breakable. (Considering them breakable had very negative effect in one app I tested)
-  `shufflevector` is now considered interesting, given that it satisfies a few trivial checks.

Reviewed By: arsenm, #amdgpu, jmmartinez

Differential Revision: https://reviews.llvm.org/D150266

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
    llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis-heuristics.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index bdf6c425f8f7a..84e35404d4159 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -95,6 +95,10 @@ class AMDGPUCodeGenPrepare : public FunctionPass,
   bool HasUnsafeFPMath = false;
   bool HasFP32Denormals = false;
 
+  DenseMap<const PHINode *, bool> BreakPhiNodesCache;
+
+  bool canBreakPHINode(const PHINode &I);
+
   /// Copies exact/nsw/nuw flags (if any) from binary operation \p I to
   /// binary operation \p V.
   ///
@@ -1398,48 +1402,105 @@ bool AMDGPUCodeGenPrepare::visitSelectInst(SelectInst &I) {
   return Changed;
 }
 
+static bool areInSameBB(const Value *A, const Value *B) {
+  const auto *IA = dyn_cast<Instruction>(A);
+  const auto *IB = dyn_cast<Instruction>(B);
+  return IA && IB && IA->getParent() == IB->getParent();
+}
+
 // Helper for breaking large PHIs that returns true when an extractelement on V
 // is likely to be folded away by the DAG combiner.
-static bool isInterestingPHIIncomingValue(Value *V, FixedVectorType *FVT) {
-  InsertElementInst *IE = dyn_cast<InsertElementInst>(V);
+static bool isInterestingPHIIncomingValue(const Value *V) {
+  const auto *FVT = dyn_cast<FixedVectorType>(V->getType());
+  if (!FVT)
+    return false;
 
-  // Constants & InsertElements chains are interesting.
-  if (!IE)
-    return isa<Constant>(V);
+  const Value *CurVal = V;
 
-  // Check if this is a simple chain of insertelement that fills the vector. If
-  // that's the case, we can break up this PHI node profitably because the
-  // extractelement we will insert will get folded out.
-  BasicBlock *BB = IE->getParent();
+  // Check for insertelements, keeping track of the elements covered.
   BitVector EltsCovered(FVT->getNumElements());
-  InsertElementInst *Next = IE;
-  while (Next && !EltsCovered.all()) {
-    ConstantInt *Idx = dyn_cast<ConstantInt>(Next->getOperand(2));
+  while (const auto *IE = dyn_cast<InsertElementInst>(CurVal)) {
+    const auto *Idx = dyn_cast<ConstantInt>(IE->getOperand(2));
 
     // Non constant index/out of bounds index -> folding is unlikely.
-    // Note that this is more of a sanity check - canonical IR should
-    // already have replaced those with poison.
+    // The latter is more of a sanity check because canonical IR should just
+    // have replaced those with poison.
     if (!Idx || Idx->getSExtValue() >= FVT->getNumElements())
       return false;
 
+    const auto *VecSrc = IE->getOperand(0);
+
+    // If the vector source is another instruction, it must be in the same basic
+    // block. Otherwise, the DAGCombiner won't see the whole thing and is
+    // unlikely to be able to do anything interesting here.
+    if (isa<Instruction>(VecSrc) && !areInSameBB(VecSrc, IE))
+      return false;
+
+    CurVal = VecSrc;
     EltsCovered.set(Idx->getSExtValue());
 
-    // If the insertelement chain ends with a constant, it's fine.
-    if (isa<Constant>(Next->getOperand(0)))
+    // All elements covered.
+    if (EltsCovered.all())
       return true;
+  }
 
-    Next = dyn_cast<InsertElementInst>(Next->getOperand(0));
+  // We either didn't find a single insertelement, or the insertelement chain
+  // ended before all elements were covered. Check for other interesting values.
 
-    // If the chain is spread across basic blocks, the DAG combiner
-    // won't see it in its entirety and is unlikely to be able to fold
-    // evevrything away.
-    if (Next && Next->getParent() != BB)
-      return false;
+  // Constants are always interesting because we can just constant fold the
+  // extractelements.
+  if (isa<Constant>(CurVal))
+    return true;
+
+  // shufflevector is likely to be profitable if either operand is a constant,
+  // or if either source is in the same block.
+  // This is because shufflevector is most often lowered as a series of
+  // insert/extract elements anyway.
+  if (const auto *SV = dyn_cast<ShuffleVectorInst>(CurVal)) {
+    return isa<Constant>(SV->getOperand(1)) ||
+           areInSameBB(SV, SV->getOperand(0)) ||
+           areInSameBB(SV, SV->getOperand(1));
+  }
+
+  return false;
+}
+
+bool AMDGPUCodeGenPrepare::canBreakPHINode(const PHINode &I) {
+  // Check in the cache, or add an entry for this node.
+  //
+  // We init with false because we consider all PHI nodes unbreakable until we
+  // reach a conclusion. Doing the opposite - assuming they're break-able until
+  // proven otherwise - can be harmful in some pathological cases so we're
+  // conservative for now.
+  const auto [It, DidInsert] = BreakPhiNodesCache.insert({&I, false});
+  if (!DidInsert)
+    return It->second;
+
+  // This function may recurse, so to guard against infinite looping, this PHI
+  // is conservatively considered unbreakable until we reach a conclusion.
+
+  // Don't break PHIs that have no interesting incoming values. That is, where
+  // there is no clear opportunity to fold the "extractelement" instructions we
+  // would add.
+  //
+  // Note: IC does not run after this pass, so we're only interested in the
+  // foldings that the DAG combiner can do.
+  if (none_of(I.incoming_values(),
+              [&](Value *V) { return isInterestingPHIIncomingValue(V); }))
+    return false;
+
+  // Now, check users for unbreakable PHI nodes. If we have an unbreakable PHI
+  // node as user, we don't want to break this PHI either because it's unlikely
+  // to be beneficial. We would just explode the vector and reassemble it
+  // directly, wasting instructions.
+  for (const Value *U : I.users()) {
+    if (const auto *PU = dyn_cast<PHINode>(U)) {
+      if (!canBreakPHINode(*PU))
+        return false;
+    }
   }
 
-  // All elements covered, all of the extract elements will likely be
-  // combined.
-  return EltsCovered.all();
+  return BreakPhiNodesCache[&I] = true;
 }
 
 bool AMDGPUCodeGenPrepare::visitPHINode(PHINode &I) {
@@ -1460,23 +1521,8 @@ bool AMDGPUCodeGenPrepare::visitPHINode(PHINode &I) {
   if (!FVT || DL->getTypeSizeInBits(FVT) <= ScalarizeLargePHIsThreshold)
     return false;
 
-  // Try to avoid unprofitable cases:
-  // - Don't break PHIs that have no interesting incoming values. That is, where
-  // there is no clear opportunity to fold the "extractelement" instructions we
-  // would add.
-  //   - Note: IC does not run after this pass, so we're only interested in the
-  //     folding that the DAG combiner can do.
-  // - For simplicity, don't break PHIs that are used by other PHIs because it'd
-  // require us to determine if the whole "chain" can be converted or not. e.g.
-  // if we broke this PHI but not its user, we would actually make things worse.
-  if (!ForceScalarizeLargePHIs) {
-    if (none_of(
-            I.incoming_values(),
-            [&](Value *V) { return isInterestingPHIIncomingValue(V, FVT); }) ||
-        any_of(I.users(), [&](User *U) { return isa<PHINode>(U); })) {
-      return false;
-    }
-  }
+  if (!ForceScalarizeLargePHIs && !canBreakPHINode(I))
+    return false;
 
   struct VectorSlice {
     Type *Ty = nullptr;

diff  --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis-heuristics.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis-heuristics.ll
index 504c7b4c6430f..4ff8647f5eaa2 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis-heuristics.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis-heuristics.ll
@@ -138,6 +138,197 @@ finally:
   ret void
 }
 
+define amdgpu_kernel void @shufflevec_inc_with_cst_op(<5 x double> %in, ptr %out, i1 %cond) {
+; CHECK-LABEL: @shufflevec_inc_with_cst_op(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    [[X:%.*]] = insertelement <5 x double> [[IN:%.*]], double 3.140000e+00, i64 3
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE0:%.*]] = extractelement <5 x double> [[X]], i64 0
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <5 x double> [[X]], i64 1
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <5 x double> [[X]], i64 2
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <5 x double> [[X]], i64 3
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <5 x double> [[X]], i64 4
+; CHECK-NEXT:    br label [[FINALLY:%.*]]
+; CHECK:       else:
+; CHECK-NEXT:    [[SHUFFLED:%.*]] = shufflevector <5 x double> [[IN]], <5 x double> poison, <5 x i32> <i32 0, i32 3, i32 2, i32 1, i32 4>
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE01:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 0
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE12:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 1
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE23:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 2
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE34:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 3
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE45:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 4
+; CHECK-NEXT:    br label [[FINALLY]]
+; CHECK:       finally:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE01]], [[ELSE]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE1]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE12]], [[ELSE]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE23]], [[ELSE]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE3]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE34]], [[ELSE]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE45]], [[ELSE]] ]
+; CHECK-NEXT:    [[LARGEPHI_INSERTSLICE0:%.*]] = insertelement <5 x double> poison, double [[TMP0]], i64 0
+; CHECK-NEXT:    [[LARGEPHI_INSERTSLICE1:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE0]], double [[TMP1]], i64 1
+; CHECK-NEXT:    [[LARGEPHI_INSERTSLICE2:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE1]], double [[TMP2]], i64 2
+; CHECK-NEXT:    [[LARGEPHI_INSERTSLICE3:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE2]], double [[TMP3]], i64 3
+; CHECK-NEXT:    [[LARGEPHI_INSERTSLICE4:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE3]], double [[TMP4]], i64 4
+; CHECK-NEXT:    store <5 x double> [[LARGEPHI_INSERTSLICE4]], ptr [[OUT:%.*]], align 1
+; CHECK-NEXT:    ret void
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %x = insertelement <5 x double> %in, double 3.140000e+00, i64 3
+  br label %finally
+
+else:
+  %shuffled = shufflevector <5 x double> %in, <5 x double> poison, <5 x i32> <i32 0, i32 3, i32 2, i32 1, i32 4>
+  br label %finally
+
+finally:
+  %val = phi <5 x double> [ %x, %then ], [ %shuffled, %else ]
+  store <5 x double> %val, ptr %out, align 1
+  ret void
+}
+
+define amdgpu_kernel void @shufflevec_inc_with_local_lhs(<5 x double> %in, ptr %out, i1 %cond) {
+; CHECK-LABEL: @shufflevec_inc_with_local_lhs(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    [[X:%.*]] = insertelement <5 x double> [[IN:%.*]], double 3.140000e+00, i64 3
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE0:%.*]] = extractelement <5 x double> [[X]], i64 0
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <5 x double> [[X]], i64 1
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <5 x double> [[X]], i64 2
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <5 x double> [[X]], i64 3
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <5 x double> [[X]], i64 4
+; CHECK-NEXT:    br label [[FINALLY:%.*]]
+; CHECK:       else:
+; CHECK-NEXT:    [[LOCAL_SHUFFLE_SRC:%.*]] = insertelement <5 x double> [[IN]], double 3.250000e+00, i64 2
+; CHECK-NEXT:    [[SHUFFLED:%.*]] = shufflevector <5 x double> [[LOCAL_SHUFFLE_SRC]], <5 x double> [[IN]], <5 x i32> <i32 7, i32 3, i32 2, i32 5, i32 4>
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE01:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 0
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE12:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 1
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE23:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 2
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE34:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 3
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE45:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 4
+; CHECK-NEXT:    br label [[FINALLY]]
+; CHECK:       finally:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE01]], [[ELSE]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE1]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE12]], [[ELSE]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE23]], [[ELSE]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE3]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE34]], [[ELSE]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE45]], [[ELSE]] ]
+; CHECK-NEXT:    [[LARGEPHI_INSERTSLICE0:%.*]] = insertelement <5 x double> poison, double [[TMP0]], i64 0
+; CHECK-NEXT:    [[LARGEPHI_INSERTSLICE1:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE0]], double [[TMP1]], i64 1
+; CHECK-NEXT:    [[LARGEPHI_INSERTSLICE2:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE1]], double [[TMP2]], i64 2
+; CHECK-NEXT:    [[LARGEPHI_INSERTSLICE3:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE2]], double [[TMP3]], i64 3
+; CHECK-NEXT:    [[LARGEPHI_INSERTSLICE4:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE3]], double [[TMP4]], i64 4
+; CHECK-NEXT:    store <5 x double> [[LARGEPHI_INSERTSLICE4]], ptr [[OUT:%.*]], align 1
+; CHECK-NEXT:    ret void
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %x = insertelement <5 x double> %in, double 3.140000e+00, i64 3
+  br label %finally
+
+else:
+  %local.shuffle.src = insertelement <5 x double> %in, double 3.250000e+00, i64 2
+  %shuffled = shufflevector <5 x double> %local.shuffle.src, <5 x double> %in, <5 x i32> <i32 7, i32 3, i32 2, i32 5, i32 4>
+  br label %finally
+
+finally:
+  %val = phi <5 x double> [ %x, %then ], [ %shuffled, %else ]
+  store <5 x double> %val, ptr %out, align 1
+  ret void
+}
+
+define amdgpu_kernel void @shufflevec_inc_with_local_rhs(<5 x double> %in, ptr %out, i1 %cond) {
+; CHECK-LABEL: @shufflevec_inc_with_local_rhs(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    [[X:%.*]] = insertelement <5 x double> [[IN:%.*]], double 3.140000e+00, i64 3
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE0:%.*]] = extractelement <5 x double> [[X]], i64 0
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <5 x double> [[X]], i64 1
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <5 x double> [[X]], i64 2
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <5 x double> [[X]], i64 3
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <5 x double> [[X]], i64 4
+; CHECK-NEXT:    br label [[FINALLY:%.*]]
+; CHECK:       else:
+; CHECK-NEXT:    [[LOCAL_SHUFFLE_SRC:%.*]] = insertelement <5 x double> [[IN]], double 3.250000e+00, i64 2
+; CHECK-NEXT:    [[SHUFFLED:%.*]] = shufflevector <5 x double> [[IN]], <5 x double> [[LOCAL_SHUFFLE_SRC]], <5 x i32> <i32 7, i32 3, i32 2, i32 5, i32 4>
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE01:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 0
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE12:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 1
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE23:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 2
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE34:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 3
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE45:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 4
+; CHECK-NEXT:    br label [[FINALLY]]
+; CHECK:       finally:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE01]], [[ELSE]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE1]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE12]], [[ELSE]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE23]], [[ELSE]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE3]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE34]], [[ELSE]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE45]], [[ELSE]] ]
+; CHECK-NEXT:    [[LARGEPHI_INSERTSLICE0:%.*]] = insertelement <5 x double> poison, double [[TMP0]], i64 0
+; CHECK-NEXT:    [[LARGEPHI_INSERTSLICE1:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE0]], double [[TMP1]], i64 1
+; CHECK-NEXT:    [[LARGEPHI_INSERTSLICE2:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE1]], double [[TMP2]], i64 2
+; CHECK-NEXT:    [[LARGEPHI_INSERTSLICE3:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE2]], double [[TMP3]], i64 3
+; CHECK-NEXT:    [[LARGEPHI_INSERTSLICE4:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE3]], double [[TMP4]], i64 4
+; CHECK-NEXT:    store <5 x double> [[LARGEPHI_INSERTSLICE4]], ptr [[OUT:%.*]], align 1
+; CHECK-NEXT:    ret void
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %x = insertelement <5 x double> %in, double 3.140000e+00, i64 3
+  br label %finally
+
+else:
+  %local.shuffle.src = insertelement <5 x double> %in, double 3.250000e+00, i64 2
+  %shuffled = shufflevector <5 x double> %in, <5 x double> %local.shuffle.src, <5 x i32> <i32 7, i32 3, i32 2, i32 5, i32 4>
+  br label %finally
+
+finally:
+  %val = phi <5 x double> [ %x, %then ], [ %shuffled, %else ]
+  store <5 x double> %val, ptr %out, align 1
+  ret void
+}
+
+define amdgpu_kernel void @shufflevec_inc_with_nonlocal_ops(<5 x double> %in, ptr %out, i1 %cond) {
+; CHECK-LABEL: @shufflevec_inc_with_nonlocal_ops(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SHUFFLE_SRC:%.*]] = insertelement <5 x double> [[IN:%.*]], double 3.250000e+00, i64 2
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    [[X:%.*]] = insertelement <5 x double> [[IN]], double 3.140000e+00, i64 3
+; CHECK-NEXT:    br label [[FINALLY:%.*]]
+; CHECK:       else:
+; CHECK-NEXT:    [[SHUFFLED:%.*]] = shufflevector <5 x double> [[SHUFFLE_SRC]], <5 x double> [[IN]], <5 x i32> <i32 7, i32 3, i32 2, i32 5, i32 4>
+; CHECK-NEXT:    br label [[FINALLY]]
+; CHECK:       finally:
+; CHECK-NEXT:    [[VAL:%.*]] = phi <5 x double> [ [[X]], [[THEN]] ], [ [[SHUFFLED]], [[ELSE]] ]
+; CHECK-NEXT:    store <5 x double> [[VAL]], ptr [[OUT:%.*]], align 1
+; CHECK-NEXT:    ret void
+;
+entry:
+  %shuffle.src = insertelement <5 x double> %in, double 3.250000e+00, i64 2
+  br i1 %cond, label %then, label %else
+
+then:
+  %x = insertelement <5 x double> %in, double 3.140000e+00, i64 3
+  br label %finally
+
+else:
+  %shuffled = shufflevector <5 x double> %shuffle.src, <5 x double> %in, <5 x i32> <i32 7, i32 3, i32 2, i32 5, i32 4>
+  br label %finally
+
+finally:
+  %val = phi <5 x double> [ %x, %then ], [ %shuffled, %else ]
+  store <5 x double> %val, ptr %out, align 1
+  ret void
+}
+
 define amdgpu_kernel void @trivial_insertelt_chain(<5 x double> %in, ptr %out, i1 %cond, double %x, double %y, double %z) {
 ; CHECK-LABEL: @trivial_insertelt_chain(
 ; CHECK-NEXT:  entry:
@@ -246,20 +437,39 @@ finally:
   ret void
 }
 
-define amdgpu_kernel void @nontrivial_insertelt_chain(<5 x double> %in, ptr %out, i1 %cond, double %x, i32 %idx) {
-; CHECK-LABEL: @nontrivial_insertelt_chain(
+define amdgpu_kernel void @insertelt_shufflevec(<5 x double> %in, ptr %out, i1 %cond, double %x, i32 %idx) {
+; CHECK-LABEL: @insertelt_shufflevec(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 [[COND:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]]
 ; CHECK:       then:
 ; CHECK-NEXT:    [[X_1:%.*]] = insertelement <5 x double> <double 3.140000e+00, double poison, double poison, double poison, double poison>, double [[X:%.*]], i32 [[IDX:%.*]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <5 x double> [[X_1]], <5 x double> <double poison, double poison, double poison, double 6.140000e+00, double 9.900000e+00>, <5 x i32> <i32 0, i32 1, i32 poison, i32 8, i32 9>
 ; CHECK-NEXT:    [[X_4:%.*]] = insertelement <5 x double> [[TMP0]], double [[X]], i64 2
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE0:%.*]] = extractelement <5 x double> [[X_4]], i64 0
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <5 x double> [[X_4]], i64 1
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <5 x double> [[X_4]], i64 2
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <5 x double> [[X_4]], i64 3
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <5 x double> [[X_4]], i64 4
 ; CHECK-NEXT:    br label [[FINALLY:%.*]]
 ; CHECK:       else:
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE01:%.*]] = extractelement <5 x double> [[IN:%.*]], i64 0
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE12:%.*]] = extractelement <5 x double> [[IN]], i64 1
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE23:%.*]] = extractelement <5 x double> [[IN]], i64 2
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE34:%.*]] = extractelement <5 x double> [[IN]], i64 3
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE45:%.*]] = extractelement <5 x double> [[IN]], i64 4
 ; CHECK-NEXT:    br label [[FINALLY]]
 ; CHECK:       finally:
-; CHECK-NEXT:    [[VAL:%.*]] = phi <5 x double> [ [[X_4]], [[THEN]] ], [ [[IN:%.*]], [[ELSE]] ]
-; CHECK-NEXT:    store <5 x double> [[VAL]], ptr [[OUT:%.*]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE01]], [[ELSE]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE1]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE12]], [[ELSE]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE23]], [[ELSE]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE3]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE34]], [[ELSE]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE45]], [[ELSE]] ]
+; CHECK-NEXT:    [[LARGEPHI_INSERTSLICE0:%.*]] = insertelement <5 x double> poison, double [[TMP1]], i64 0
+; CHECK-NEXT:    [[LARGEPHI_INSERTSLICE1:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE0]], double [[TMP2]], i64 1
+; CHECK-NEXT:    [[LARGEPHI_INSERTSLICE2:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE1]], double [[TMP3]], i64 2
+; CHECK-NEXT:    [[LARGEPHI_INSERTSLICE3:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE2]], double [[TMP4]], i64 3
+; CHECK-NEXT:    [[LARGEPHI_INSERTSLICE4:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE3]], double [[TMP5]], i64 4
+; CHECK-NEXT:    store <5 x double> [[LARGEPHI_INSERTSLICE4]], ptr [[OUT:%.*]], align 1
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -354,38 +564,52 @@ finally:
   ret void
 }
 
-define amdgpu_kernel void @used_by_phi(<5 x double> %in, ptr %out, i1 %cond, i1 %cond2) {
-; CHECK-LABEL: @used_by_phi(
+define amdgpu_kernel void @used_by_breakable_phi(<5 x double> %in, ptr %out, i1 %cond, i1 %cond2) {
+; CHECK-LABEL: @used_by_breakable_phi(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 [[COND:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]]
 ; CHECK:       then:
 ; CHECK-NEXT:    [[X:%.*]] = insertelement <5 x double> [[IN:%.*]], double 3.140000e+00, i64 3
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE0:%.*]] = extractelement <5 x double> [[X]], i64 0
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <5 x double> [[X]], i64 1
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <5 x double> [[X]], i64 2
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <5 x double> [[X]], i64 3
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <5 x double> [[X]], i64 4
 ; CHECK-NEXT:    br label [[FINALLY:%.*]]
 ; CHECK:       else:
 ; CHECK-NEXT:    br label [[FINALLY]]
 ; CHECK:       finally:
-; CHECK-NEXT:    [[VAL:%.*]] = phi <5 x double> [ [[X]], [[THEN]] ], [ zeroinitializer, [[ELSE]] ]
-; CHECK-NEXT:    store <5 x double> [[VAL]], ptr [[OUT:%.*]], align 1
-; CHECK-NEXT:    br i1 [[COND2:%.*]], label [[THEN1:%.*]], label [[END:%.*]]
-; CHECK:       then1:
-; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE0:%.*]] = extractelement <5 x double> [[VAL]], i64 0
-; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <5 x double> [[VAL]], i64 1
-; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <5 x double> [[VAL]], i64 2
-; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <5 x double> [[VAL]], i64 3
-; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <5 x double> [[VAL]], i64 4
-; CHECK-NEXT:    br label [[END]]
-; CHECK:       end:
-; CHECK-NEXT:    [[TMP0:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE1]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ]
-; CHECK-NEXT:    [[TMP2:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ]
-; CHECK-NEXT:    [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE3]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN]] ], [ 0.000000e+00, [[ELSE]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE1]], [[THEN]] ], [ 0.000000e+00, [[ELSE]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ 0.000000e+00, [[ELSE]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE3]], [[THEN]] ], [ 0.000000e+00, [[ELSE]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ 0.000000e+00, [[ELSE]] ]
 ; CHECK-NEXT:    [[LARGEPHI_INSERTSLICE0:%.*]] = insertelement <5 x double> poison, double [[TMP0]], i64 0
 ; CHECK-NEXT:    [[LARGEPHI_INSERTSLICE1:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE0]], double [[TMP1]], i64 1
 ; CHECK-NEXT:    [[LARGEPHI_INSERTSLICE2:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE1]], double [[TMP2]], i64 2
 ; CHECK-NEXT:    [[LARGEPHI_INSERTSLICE3:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE2]], double [[TMP3]], i64 3
 ; CHECK-NEXT:    [[LARGEPHI_INSERTSLICE4:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE3]], double [[TMP4]], i64 4
-; CHECK-NEXT:    store <5 x double> [[LARGEPHI_INSERTSLICE4]], ptr [[OUT]], align 1
+; CHECK-NEXT:    store <5 x double> [[LARGEPHI_INSERTSLICE4]], ptr [[OUT:%.*]], align 1
+; CHECK-NEXT:    br i1 [[COND2:%.*]], label [[THEN1:%.*]], label [[END:%.*]]
+; CHECK:       then1:
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE01:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 0
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE12:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 1
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE23:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 2
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE34:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 3
+; CHECK-NEXT:    [[LARGEPHI_EXTRACTSLICE45:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 4
+; CHECK-NEXT:    br label [[END]]
+; CHECK:       end:
+; CHECK-NEXT:    [[TMP5:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE01]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ]
+; CHECK-NEXT:    [[TMP6:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE12]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE23]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ]
+; CHECK-NEXT:    [[TMP8:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE34]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ]
+; CHECK-NEXT:    [[TMP9:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE45]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ]
+; CHECK-NEXT:    [[LARGEPHI_INSERTSLICE06:%.*]] = insertelement <5 x double> poison, double [[TMP5]], i64 0
+; CHECK-NEXT:    [[LARGEPHI_INSERTSLICE17:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE06]], double [[TMP6]], i64 1
+; CHECK-NEXT:    [[LARGEPHI_INSERTSLICE28:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE17]], double [[TMP7]], i64 2
+; CHECK-NEXT:    [[LARGEPHI_INSERTSLICE39:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE28]], double [[TMP8]], i64 3
+; CHECK-NEXT:    [[LARGEPHI_INSERTSLICE410:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE39]], double [[TMP9]], i64 4
+; CHECK-NEXT:    store <5 x double> [[LARGEPHI_INSERTSLICE410]], ptr [[OUT]], align 1
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -411,3 +635,75 @@ end:
   store <5 x double> %endval, ptr %out, align 1
   ret void
 }
+
+define amdgpu_kernel void @used_by_unbreakable_phi(<5 x double> %in, ptr %out, i1 %cond, i1 %cond2) {
+; CHECK-LABEL: @used_by_unbreakable_phi(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    [[X:%.*]] = insertelement <5 x double> [[IN:%.*]], double 3.140000e+00, i64 3
+; CHECK-NEXT:    br label [[FINALLY:%.*]]
+; CHECK:       else:
+; CHECK-NEXT:    br label [[FINALLY]]
+; CHECK:       finally:
+; CHECK-NEXT:    [[VAL:%.*]] = phi <5 x double> [ [[X]], [[THEN]] ], [ zeroinitializer, [[ELSE]] ]
+; CHECK-NEXT:    store <5 x double> [[VAL]], ptr [[OUT:%.*]], align 1
+; CHECK-NEXT:    br i1 [[COND2:%.*]], label [[THEN1:%.*]], label [[END:%.*]]
+; CHECK:       then1:
+; CHECK-NEXT:    br label [[END]]
+; CHECK:       end:
+; CHECK-NEXT:    [[ENDVAL:%.*]] = phi <5 x double> [ [[VAL]], [[THEN1]] ], [ [[IN]], [[FINALLY]] ]
+; CHECK-NEXT:    store <5 x double> [[ENDVAL]], ptr [[OUT]], align 1
+; CHECK-NEXT:    ret void
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %x = insertelement <5 x double> %in, double 3.140000e+00, i64 3
+  br label %finally
+
+else:
+  br label %finally
+
+finally:
+  %val = phi <5 x double> [ %x, %then ], [ zeroinitializer, %else ]
+  store <5 x double> %val, ptr %out, align 1
+  br i1 %cond2, label %then1, label %end
+
+then1:
+  br label %end
+
+end:
+  %endval = phi <5 x double> [ %val, %then1 ], [ %in, %finally ]
+  store <5 x double> %endval, ptr %out, align 1
+  ret void
+}
+
+; check for infinite recursion
+define amdgpu_kernel void @used_by_phi_self(<5 x double> %in, ptr %out, i8 %count) {
+; CHECK-LABEL: @used_by_phi_self(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[VAL:%.*]] = phi <5 x double> [ [[IN:%.*]], [[ENTRY:%.*]] ], [ [[VAL]], [[LOOP]] ]
+; CHECK-NEXT:    store <5 x double> [[VAL]], ptr [[OUT:%.*]], align 1
+; CHECK-NEXT:    [[COUNT_DEC:%.*]] = sub i8 [[COUNT:%.*]], 0
+; CHECK-NEXT:    [[COND:%.*]] = icmp ne i8 [[COUNT]], 0
+; CHECK-NEXT:    br i1 [[COND]], label [[LOOP]], label [[END:%.*]]
+; CHECK:       end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %val = phi <5 x double> [ %in, %entry ], [ %val, %loop ]
+  store <5 x double> %val, ptr %out, align 1
+  %count.dec = sub i8 %count, 0
+  %cond = icmp ne i8 %count, 0
+  br i1 %cond, label %loop, label %end
+
+end:
+  ret void
+}


        


More information about the llvm-commits mailing list