[llvm] [LV] Keep duplicate recipes in VPExpressionRecipe (PR #156976)

Sam Tebbs via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 30 09:32:29 PDT 2025


https://github.com/SamTebbs33 updated https://github.com/llvm/llvm-project/pull/156976

>From 44f216a28c0208c4c21be613cab10a62ad32a0b2 Mon Sep 17 00:00:00 2001
From: Sam Tebbs <samuel.tebbs at arm.com>
Date: Thu, 4 Sep 2025 23:21:21 +0100
Subject: [PATCH 1/5] [LV] Keep duplicate recipes in VPExpressionRecipe

The VPExpressionRecipe class uses a set to store its bundled recipes. If
repeated recipes are bundled then the duplicates will be lost, causing
the following recipes to not be at the expected place in the set.

When printing a reduce.add(mul(ext, ext)) bundle, if the extends are the
same then the 3rd element of the set will the the reduction, rather than
the expected mul, causing a cast error. With this change, the recipes
are at the expected index in the set.

Fixes #156464
---
 llvm/lib/Transforms/Vectorize/VPlan.h         |  8 +++-
 .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 29 +++++++++---
 .../vplan-printing-reductions.ll              | 47 +++++++++++++++++++
 3 files changed, 76 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index f79855f7e2c5f..e6f6067bc9df3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -29,6 +29,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/ADT/ilist.h"
@@ -3013,8 +3014,11 @@ class VPExpressionRecipe : public VPSingleDefRecipe {
                            {Ext0, Ext1, Mul, Red}) {}
 
   ~VPExpressionRecipe() override {
-    for (auto *R : reverse(ExpressionRecipes))
-      delete R;
+    SmallSet<VPSingleDefRecipe *, 4> ExpressionRecipesSeen;
+    for (auto *R : reverse(ExpressionRecipes)) {
+      if (ExpressionRecipesSeen.insert(R).second)
+        delete R;
+    }
     for (VPValue *T : LiveInPlaceholders)
       delete T;
   }
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 8e9c3db50319f..dd4c325d2c9f9 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2761,9 +2761,8 @@ VPExpressionRecipe::VPExpressionRecipe(
     ExpressionTypes ExpressionType,
     ArrayRef<VPSingleDefRecipe *> ExpressionRecipes)
     : VPSingleDefRecipe(VPDef::VPExpressionSC, {}, {}),
-      ExpressionRecipes(SetVector<VPSingleDefRecipe *>(
-                            ExpressionRecipes.begin(), ExpressionRecipes.end())
-                            .takeVector()),
+      ExpressionRecipes(SmallVector<VPSingleDefRecipe *>(
+          ExpressionRecipes.begin(), ExpressionRecipes.end())),
       ExpressionType(ExpressionType) {
   assert(!ExpressionRecipes.empty() && "Nothing to combine?");
   assert(
@@ -2797,25 +2796,43 @@ VPExpressionRecipe::VPExpressionRecipe(
       R->removeFromParent();
   }
 
+  // Keep track of how many instances of each recipe occur in the recipe list
+  SmallMapVector<VPSingleDefRecipe *, unsigned, 4> ExpressionRecipeCounts;
+  for (auto *R : ExpressionRecipes) {
+    auto *F = ExpressionRecipeCounts.find(R);
+    if (F == ExpressionRecipeCounts.end())
+      ExpressionRecipeCounts.insert(std::make_pair(R, 1));
+    else
+      F->second++;
+  }
+
   // Internalize all external operands to the expression recipes. To do so,
   // create new temporary VPValues for all operands defined by a recipe outside
   // the expression. The original operands are added as operands of the
   // VPExpressionRecipe itself.
   for (auto *R : ExpressionRecipes) {
+    auto *F = ExpressionRecipeCounts.find(R);
+    F->second--;
     for (const auto &[Idx, Op] : enumerate(R->operands())) {
       auto *Def = Op->getDefiningRecipe();
       if (Def && ExpressionRecipesAsSetOfUsers.contains(Def))
         continue;
       addOperand(Op);
-      LiveInPlaceholders.push_back(new VPValue());
-      R->setOperand(Idx, LiveInPlaceholders.back());
+      auto *Tmp = new VPValue();
+      Tmp->setUnderlyingValue(Op->getUnderlyingValue());
+      LiveInPlaceholders.push_back(Tmp);
+      // Only modify this recipe's operands if it's the last time it occurs in
+      // the recipe list
+      if (F->second == 0)
+        R->setOperand(Idx, Tmp);
     }
   }
 }
 
 void VPExpressionRecipe::decompose() {
   for (auto *R : ExpressionRecipes)
-    R->insertBefore(this);
+    if (!R->getParent())
+      R->insertBefore(this);
 
   for (const auto &[Idx, Op] : enumerate(operands()))
     LiveInPlaceholders[Idx]->replaceAllUsesWith(Op);
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
index 4e6ef0de6a9ed..db64eb32f0320 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
@@ -632,3 +632,50 @@ exit:
   %r.0.lcssa = phi i64 [ %rdx.next, %loop ]
   ret i64 %r.0.lcssa
 }
+
+define i64 @print_mulacc_duplicate_extends(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) {
+; CHECK-LABEL: 'print_mulacc_duplicate_extends'
+; CHECK:      VPlan 'Initial VPlan for VF={4},UF>=1' {
+; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF
+; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
+; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
+; CHECK-NEXT: Live-in ir<%n> = original trip-count
+; CHECK-EMPTY:
+; CHECK:      vector.ph:
+; CHECK-NEXT:   EMIT vp<[[RDX_START:%.+]]> = reduction-start-vector ir<0>, ir<0>, ir<1>
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
+; CHECK-NEXT: <x1> vector loop: {
+; CHECK-NEXT:   vector.body:
+; CHECK-NEXT:     EMIT vp<[[IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[IV_NEXT:%.+]]>
+; CHECK-NEXT:     WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi vp<[[RDX_START]]>, vp<[[RDX_NEXT:%.+]]>
+; CHECK-NEXT:     vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1>
+; CHECK-NEXT:     CLONE ir<[[ARRAYIDX0:%.+]]> = getelementptr inbounds ir<%x>, vp<[[STEPS]]>
+; CHECK-NEXT:     vp<[[ADDR0:%.+]]> = vector-pointer ir<[[ARRAYIDX0]]>
+; CHECK-NEXT:     WIDEN ir<[[LOAD0:%.+]]> = load vp<[[ADDR0]]>
+; CHECK-NEXT:     EXPRESSION vp<[[RDX_NEXT:%.+]]> = ir<[[RDX]]> + reduce.sub (mul nsw (ir<[[LOAD0]]> sext to i64), (ir<[[LOAD0]]> sext to i64))
+; CHECK-NEXT:     EMIT vp<[[IV_NEXT]]> = add nuw vp<[[IV]]>, vp<[[VFxUF]]>
+; CHECK-NEXT:     EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]>
+; CHECK-NEXT:   No successors
+; CHECK-NEXT: }
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ %iv.next, %loop ], [ 0, %entry ]
+  %rdx = phi i64 [ %rdx.next, %loop ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i16, ptr %x, i32 %iv
+  %load0 = load i16, ptr %arrayidx, align 4
+  %conv0 = sext i16 %load0 to i32
+  %mul = mul nsw i32 %conv0, %conv0
+  %conv = sext i32 %mul to i64
+  %rdx.next = sub nsw i64 %rdx, %conv
+  %iv.next = add nuw nsw i32 %iv, 1
+  %exitcond = icmp eq i32 %iv.next, %n
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  %r.0.lcssa = phi i64 [ %rdx.next, %loop ]
+  ret i64 %r.0.lcssa
+}

>From 23005065ceb7551e80eeb4e2c24e6515234742e4 Mon Sep 17 00:00:00 2001
From: Sam Tebbs <samuel.tebbs at arm.com>
Date: Fri, 19 Sep 2025 13:25:37 +0100
Subject: [PATCH 2/5] remove underlying value set

---
 llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index dd4c325d2c9f9..4568d4f37a751 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2819,7 +2819,6 @@ VPExpressionRecipe::VPExpressionRecipe(
         continue;
       addOperand(Op);
       auto *Tmp = new VPValue();
-      Tmp->setUnderlyingValue(Op->getUnderlyingValue());
       LiveInPlaceholders.push_back(Tmp);
       // Only modify this recipe's operands if it's the last time it occurs in
       // the recipe list

>From 211ed7686a069a79056663180cbece9288959074 Mon Sep 17 00:00:00 2001
From: Samuel Tebbs <samuel.tebbs at arm.com>
Date: Tue, 23 Sep 2025 21:55:33 +0100
Subject: [PATCH 3/5] Cache placeholder values

---
 llvm/lib/Transforms/Vectorize/VPlan.h         |  7 ++--
 .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 34 +++++++++----------
 2 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index e6f6067bc9df3..5aad3a3b47492 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -3019,8 +3019,11 @@ class VPExpressionRecipe : public VPSingleDefRecipe {
       if (ExpressionRecipesSeen.insert(R).second)
         delete R;
     }
-    for (VPValue *T : LiveInPlaceholders)
-      delete T;
+    SmallSet<VPValue *, 4> PlaceholdersSeen;
+    for (VPValue *T : LiveInPlaceholders) {
+      if (PlaceholdersSeen.insert(T).second)
+        delete T;
+    }
   }
 
   VP_CLASSOF_IMPL(VPDef::VPExpressionSC)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 4568d4f37a751..af593343020db 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2796,34 +2796,32 @@ VPExpressionRecipe::VPExpressionRecipe(
       R->removeFromParent();
   }
 
-  // Keep track of how many instances of each recipe occur in the recipe list
-  SmallMapVector<VPSingleDefRecipe *, unsigned, 4> ExpressionRecipeCounts;
-  for (auto *R : ExpressionRecipes) {
-    auto *F = ExpressionRecipeCounts.find(R);
-    if (F == ExpressionRecipeCounts.end())
-      ExpressionRecipeCounts.insert(std::make_pair(R, 1));
-    else
-      F->second++;
-  }
-
   // Internalize all external operands to the expression recipes. To do so,
   // create new temporary VPValues for all operands defined by a recipe outside
   // the expression. The original operands are added as operands of the
   // VPExpressionRecipe itself.
+
+  // This map caches the temporary placeholders so duplicates aren't created in
+  // the case that recipes share operands.
+  SmallMapVector<VPValue *, VPValue *, 4> OperandPlaceholders;
+
   for (auto *R : ExpressionRecipes) {
-    auto *F = ExpressionRecipeCounts.find(R);
-    F->second--;
     for (const auto &[Idx, Op] : enumerate(R->operands())) {
       auto *Def = Op->getDefiningRecipe();
       if (Def && ExpressionRecipesAsSetOfUsers.contains(Def))
         continue;
       addOperand(Op);
-      auto *Tmp = new VPValue();
-      LiveInPlaceholders.push_back(Tmp);
-      // Only modify this recipe's operands if it's the last time it occurs in
-      // the recipe list
-      if (F->second == 0)
-        R->setOperand(Idx, Tmp);
+      if (OperandPlaceholders.find(Op) == OperandPlaceholders.end())
+        OperandPlaceholders[Op] = new VPValue();
+      LiveInPlaceholders.push_back(OperandPlaceholders[Op]);
+    }
+  }
+
+  for (auto *R : ExpressionRecipes) {
+    for (const auto &[Idx, Op] : enumerate(R->operands())) {
+      auto *Entry = OperandPlaceholders.find(Op);
+      if (Entry != OperandPlaceholders.end())
+        R->setOperand(Idx, Entry->second);
     }
   }
 }

>From 32a86719979229ecc723edba1256a29d491f6ef0 Mon Sep 17 00:00:00 2001
From: Sam Tebbs <samuel.tebbs at arm.com>
Date: Fri, 26 Sep 2025 10:57:24 +0100
Subject: [PATCH 4/5] Address review

---
 llvm/lib/Transforms/Vectorize/VPlan.h         | 10 ++++-----
 .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 21 +++++++------------
 2 files changed, 12 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 5aad3a3b47492..e65547b932a84 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2969,7 +2969,8 @@ class LLVM_ABI_FOR_TEST VPBranchOnMaskRecipe : public VPRecipeBase {
 /// the expression is elevated to connect the non-expression recipe with the
 /// VPExpressionRecipe itself.
 class VPExpressionRecipe : public VPSingleDefRecipe {
-  /// Recipes included in this VPExpressionRecipe.
+  /// Recipes included in this VPExpressionRecipe. This could contain
+  /// duplicates.
   SmallVector<VPSingleDefRecipe *> ExpressionRecipes;
 
   /// Temporary VPValues used for external operands of the expression, i.e.
@@ -3019,11 +3020,8 @@ class VPExpressionRecipe : public VPSingleDefRecipe {
       if (ExpressionRecipesSeen.insert(R).second)
         delete R;
     }
-    SmallSet<VPValue *, 4> PlaceholdersSeen;
-    for (VPValue *T : LiveInPlaceholders) {
-      if (PlaceholdersSeen.insert(T).second)
-        delete T;
-    }
+    for (VPValue *T : LiveInPlaceholders)
+      delete T;
   }
 
   VP_CLASSOF_IMPL(VPDef::VPExpressionSC)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index af593343020db..79c94bf05291a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2801,33 +2801,28 @@ VPExpressionRecipe::VPExpressionRecipe(
   // the expression. The original operands are added as operands of the
   // VPExpressionRecipe itself.
 
-  // This map caches the temporary placeholders so duplicates aren't created in
-  // the case that recipes share operands.
   SmallMapVector<VPValue *, VPValue *, 4> OperandPlaceholders;
-
   for (auto *R : ExpressionRecipes) {
     for (const auto &[Idx, Op] : enumerate(R->operands())) {
       auto *Def = Op->getDefiningRecipe();
       if (Def && ExpressionRecipesAsSetOfUsers.contains(Def))
         continue;
       addOperand(Op);
-      if (OperandPlaceholders.find(Op) == OperandPlaceholders.end())
-        OperandPlaceholders[Op] = new VPValue();
-      LiveInPlaceholders.push_back(OperandPlaceholders[Op]);
+      VPValue *Tmp = new VPValue();
+      OperandPlaceholders[Op] = Tmp;
+      LiveInPlaceholders.push_back(Tmp);
     }
   }
 
-  for (auto *R : ExpressionRecipes) {
-    for (const auto &[Idx, Op] : enumerate(R->operands())) {
-      auto *Entry = OperandPlaceholders.find(Op);
-      if (Entry != OperandPlaceholders.end())
-        R->setOperand(Idx, Entry->second);
-    }
-  }
+  for (auto *R : ExpressionRecipes)
+    for (auto &Entry : OperandPlaceholders)
+      R->replaceUsesOfWith(Entry.first, Entry.second);
 }
 
 void VPExpressionRecipe::decompose() {
   for (auto *R : ExpressionRecipes)
+    // Since the list could contain duplicates, make sure the recipe hasn't
+    // already been inserted.
     if (!R->getParent())
       R->insertBefore(this);
 

>From e3a2221300bf074166c32aeaf5310821f2976810 Mon Sep 17 00:00:00 2001
From: Samuel Tebbs <samuel.tebbs at arm.com>
Date: Tue, 30 Sep 2025 13:55:20 +0100
Subject: [PATCH 5/5] Use SmallPtrSet and remove placeholder map

---
 llvm/lib/Transforms/Vectorize/VPlan.h         |   2 +-
 .../lib/Transforms/Vectorize/VPlanRecipes.cpp |   9 +-
 .../LoopVectorize/reduction-inloop.ll         | 124 ++++++++++++++++++
 3 files changed, 128 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index e65547b932a84..240fe009a3ca7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -3015,7 +3015,7 @@ class VPExpressionRecipe : public VPSingleDefRecipe {
                            {Ext0, Ext1, Mul, Red}) {}
 
   ~VPExpressionRecipe() override {
-    SmallSet<VPSingleDefRecipe *, 4> ExpressionRecipesSeen;
+    SmallPtrSet<VPSingleDefRecipe *, 4> ExpressionRecipesSeen;
     for (auto *R : reverse(ExpressionRecipes)) {
       if (ExpressionRecipesSeen.insert(R).second)
         delete R;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 79c94bf05291a..e9f762c1347d0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2801,22 +2801,19 @@ VPExpressionRecipe::VPExpressionRecipe(
   // the expression. The original operands are added as operands of the
   // VPExpressionRecipe itself.
 
-  SmallMapVector<VPValue *, VPValue *, 4> OperandPlaceholders;
   for (auto *R : ExpressionRecipes) {
     for (const auto &[Idx, Op] : enumerate(R->operands())) {
       auto *Def = Op->getDefiningRecipe();
       if (Def && ExpressionRecipesAsSetOfUsers.contains(Def))
         continue;
       addOperand(Op);
-      VPValue *Tmp = new VPValue();
-      OperandPlaceholders[Op] = Tmp;
-      LiveInPlaceholders.push_back(Tmp);
+      LiveInPlaceholders.push_back(new VPValue());
     }
   }
 
   for (auto *R : ExpressionRecipes)
-    for (auto &Entry : OperandPlaceholders)
-      R->replaceUsesOfWith(Entry.first, Entry.second);
+    for (auto const &[LiveIn, Tmp] : zip(operands(), LiveInPlaceholders))
+      R->replaceUsesOfWith(LiveIn, Tmp);
 }
 
 void VPExpressionRecipe::decompose() {
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
index 57c8af86fa87f..33c9144e2f918 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
@@ -2788,6 +2788,130 @@ for.end:                                          ; preds = %for.body, %entry
   ret i32 %x.0.lcssa
 }
 
+; Test that bundling recipes that share an operand into an expression works.
+; In this case the two extends are the recipes that share an operand.
+define i64 @reduction_expression_same_operands(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) {
+; CHECK-LABEL: @reduction_expression_same_operands(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N:%.*]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_VEC:%.*]] = and i32 [[N]], -4
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i64>
+; CHECK-NEXT:    [[TMP3:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i64>
+; CHECK-NEXT:    [[TMP4:%.*]] = mul nsw <4 x i64> [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP4]])
+; CHECK-NEXT:    [[TMP6]] = add i64 [[VEC_PHI]], [[TMP5]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP33:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[RDX:%.*]] = phi i64 [ [[RDX_NEXT:%.*]], [[LOOP]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[TMP8:%.*]] = sext i32 [[IV]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[X]], i64 [[TMP8]]
+; CHECK-NEXT:    [[LOAD0:%.*]] = load i16, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CONV0:%.*]] = sext i16 [[LOAD0]] to i64
+; CHECK-NEXT:    [[CONV1:%.*]] = sext i16 [[LOAD0]] to i64
+; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV0]], [[CONV1]]
+; CHECK-NEXT:    [[RDX_NEXT]] = add nsw i64 [[RDX]], [[MUL]]
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP34:![0-9]+]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[R_0_LCSSA:%.*]] = phi i64 [ [[RDX_NEXT]], [[LOOP]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    ret i64 [[R_0_LCSSA]]
+;
+; CHECK-INTERLEAVED-LABEL: @reduction_expression_same_operands(
+; CHECK-INTERLEAVED-NEXT:  entry:
+; CHECK-INTERLEAVED-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N:%.*]], 8
+; CHECK-INTERLEAVED-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-INTERLEAVED:       vector.ph:
+; CHECK-INTERLEAVED-NEXT:    [[N_VEC:%.*]] = and i32 [[N]], -8
+; CHECK-INTERLEAVED-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-INTERLEAVED:       vector.body:
+; CHECK-INTERLEAVED-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-INTERLEAVED-NEXT:    [[VEC_PHI:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
+; CHECK-INTERLEAVED-NEXT:    [[VEC_PHI1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
+; CHECK-INTERLEAVED-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
+; CHECK-INTERLEAVED-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i64 [[TMP0]]
+; CHECK-INTERLEAVED-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 8
+; CHECK-INTERLEAVED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP1]], align 4
+; CHECK-INTERLEAVED-NEXT:    [[WIDE_LOAD2:%.*]] = load <4 x i16>, ptr [[TMP2]], align 4
+; CHECK-INTERLEAVED-NEXT:    [[TMP3:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i64>
+; CHECK-INTERLEAVED-NEXT:    [[TMP4:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i64>
+; CHECK-INTERLEAVED-NEXT:    [[TMP5:%.*]] = mul nsw <4 x i64> [[TMP3]], [[TMP4]]
+; CHECK-INTERLEAVED-NEXT:    [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP5]])
+; CHECK-INTERLEAVED-NEXT:    [[TMP7]] = add i64 [[VEC_PHI]], [[TMP6]]
+; CHECK-INTERLEAVED-NEXT:    [[TMP8:%.*]] = sext <4 x i16> [[WIDE_LOAD2]] to <4 x i64>
+; CHECK-INTERLEAVED-NEXT:    [[TMP9:%.*]] = sext <4 x i16> [[WIDE_LOAD2]] to <4 x i64>
+; CHECK-INTERLEAVED-NEXT:    [[TMP10:%.*]] = mul nsw <4 x i64> [[TMP8]], [[TMP9]]
+; CHECK-INTERLEAVED-NEXT:    [[TMP11:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP10]])
+; CHECK-INTERLEAVED-NEXT:    [[TMP12]] = add i64 [[VEC_PHI1]], [[TMP11]]
+; CHECK-INTERLEAVED-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
+; CHECK-INTERLEAVED-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-INTERLEAVED-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP33:![0-9]+]]
+; CHECK-INTERLEAVED:       middle.block:
+; CHECK-INTERLEAVED-NEXT:    [[BIN_RDX:%.*]] = add i64 [[TMP12]], [[TMP7]]
+; CHECK-INTERLEAVED-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
+; CHECK-INTERLEAVED-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-INTERLEAVED:       scalar.ph:
+; CHECK-INTERLEAVED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-INTERLEAVED-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; CHECK-INTERLEAVED-NEXT:    br label [[LOOP:%.*]]
+; CHECK-INTERLEAVED:       loop:
+; CHECK-INTERLEAVED-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-INTERLEAVED-NEXT:    [[RDX:%.*]] = phi i64 [ [[RDX_NEXT:%.*]], [[LOOP]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
+; CHECK-INTERLEAVED-NEXT:    [[TMP14:%.*]] = sext i32 [[IV]] to i64
+; CHECK-INTERLEAVED-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[X]], i64 [[TMP14]]
+; CHECK-INTERLEAVED-NEXT:    [[LOAD0:%.*]] = load i16, ptr [[ARRAYIDX]], align 4
+; CHECK-INTERLEAVED-NEXT:    [[CONV0:%.*]] = sext i16 [[LOAD0]] to i64
+; CHECK-INTERLEAVED-NEXT:    [[CONV1:%.*]] = sext i16 [[LOAD0]] to i64
+; CHECK-INTERLEAVED-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV0]], [[CONV1]]
+; CHECK-INTERLEAVED-NEXT:    [[RDX_NEXT]] = add nsw i64 [[RDX]], [[MUL]]
+; CHECK-INTERLEAVED-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-INTERLEAVED-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]]
+; CHECK-INTERLEAVED-NEXT:    br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP34:![0-9]+]]
+; CHECK-INTERLEAVED:       exit:
+; CHECK-INTERLEAVED-NEXT:    [[R_0_LCSSA:%.*]] = phi i64 [ [[RDX_NEXT]], [[LOOP]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ]
+; CHECK-INTERLEAVED-NEXT:    ret i64 [[R_0_LCSSA]]
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ %iv.next, %loop ], [ 0, %entry ]
+  %rdx = phi i64 [ %rdx.next, %loop ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i16, ptr %x, i32 %iv
+  %load0 = load i16, ptr %arrayidx, align 4
+  %conv0 = sext i16 %load0 to i32
+  %conv1 = sext i16 %load0 to i32
+  %mul = mul nsw i32 %conv0, %conv1
+  %conv = sext i32 %mul to i64
+  %rdx.next = add nsw i64 %rdx, %conv
+  %iv.next = add nuw nsw i32 %iv, 1
+  %exitcond = icmp eq i32 %iv.next, %n
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  %r.0.lcssa = phi i64 [ %rdx.next, %loop ]
+  ret i64 %r.0.lcssa
+}
+
 declare float @llvm.fmuladd.f32(float, float, float)
 
 !6 = distinct !{!6, !7, !8}



More information about the llvm-commits mailing list