[llvm] [SLP] Make getSameOpcode support interchangeable instructions. (PR #127450)

Han-Kuan Chen via llvm-commits llvm-commits at lists.llvm.org
Sun Feb 16 23:32:39 PST 2025


https://github.com/HanKuanChen updated https://github.com/llvm/llvm-project/pull/127450

>From 4c951fcd35caa94bad8d858909cc3ebb74460124 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Thu, 13 Feb 2025 23:58:17 -0800
Subject: [PATCH 1/8] [SLP] NFC. Update test.

---
 ...gathered-delayed-nodes-with-reused-user.ll | 26 +++++++++----------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll b/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll
index fa33621de5ae7..f49bac26e846b 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll
@@ -9,23 +9,23 @@ define i64 @foo() {
 ; CHECK-NEXT:    br label [[BB3:%.*]]
 ; CHECK:       bb1:
 ; CHECK-NEXT:    [[PHI:%.*]] = phi i64 [ [[ADD:%.*]], [[BB3]] ]
-; CHECK-NEXT:    [[PHI2:%.*]] = phi i64 [ [[TMP9:%.*]], [[BB3]] ]
+; CHECK-NEXT:    [[PHI2:%.*]] = phi i64 [ [[OR:%.*]], [[BB3]] ]
 ; CHECK-NEXT:    ret i64 0
 ; CHECK:       bb3:
-; CHECK-NEXT:    [[PHI5:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ 0, [[BB3]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x i64> [ zeroinitializer, [[BB]] ], [ [[TMP7:%.*]], [[BB3]] ]
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
-; CHECK-NEXT:    [[ADD]] = add i64 [[TMP3]], [[TMP2]]
+; CHECK-NEXT:    [[PHI4:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ 0, [[BB3]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x i64> [ zeroinitializer, [[BB]] ], [ [[TMP3:%.*]], [[BB3]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i64> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1
+; CHECK-NEXT:    [[ADD]] = add i64 [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 0
-; CHECK-NEXT:    [[TMP9]] = or i64 [[PHI5]], 0
-; CHECK-NEXT:    [[ICMP:%.*]] = icmp ult i64 [[TMP9]], 0
-; CHECK-NEXT:    [[TMP7]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[ADD]], i32 0
+; CHECK-NEXT:    [[OR]] = or i64 [[PHI4]], 0
+; CHECK-NEXT:    [[ICMP:%.*]] = icmp ult i64 [[OR]], 0
+; CHECK-NEXT:    [[TMP3]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[ADD]], i32 0
 ; CHECK-NEXT:    br i1 false, label [[BB3]], label [[BB1:%.*]]
 ;
 ; FORCED-LABEL: define i64 @foo() {
 ; FORCED-NEXT:  bb:
-; FORCED-NEXT:    [[TMP8:%.*]] = add i64 0, 0
+; FORCED-NEXT:    [[ADD7:%.*]] = add i64 0, 0
 ; FORCED-NEXT:    br label [[BB3:%.*]]
 ; FORCED:       bb1:
 ; FORCED-NEXT:    [[TMP0:%.*]] = phi <2 x i64> [ [[TMP5:%.*]], [[BB3]] ]
@@ -39,9 +39,9 @@ define i64 @foo() {
 ; FORCED-NEXT:    [[TMP5]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i32> <i32 0, i32 3>
 ; FORCED-NEXT:    [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> <i64 poison, i64 0>, <2 x i32> <i32 0, i32 3>
 ; FORCED-NEXT:    [[TMP7]] = add <2 x i64> [[TMP6]], [[TMP2]]
-; FORCED-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 [[TMP8]]
-; FORCED-NEXT:    [[TMP9:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
-; FORCED-NEXT:    [[ICMP:%.*]] = icmp ult i64 [[TMP9]], 0
+; FORCED-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 [[ADD7]]
+; FORCED-NEXT:    [[TMP8:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
+; FORCED-NEXT:    [[ICMP:%.*]] = icmp ult i64 [[TMP8]], 0
 ; FORCED-NEXT:    br i1 false, label [[BB3]], label [[BB1:%.*]]
 ;
 bb:

>From 68684d3ddab8d6b8c2333632e9e688a2fee1c07c Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Thu, 13 Feb 2025 07:36:53 -0800
Subject: [PATCH 2/8] [SLP] Make getSameOpcode support different instructions
 if they have same semantics.

---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 302 ++++++++++++++++--
 .../AArch64/gather-with-minbith-user.ll       |   4 +-
 .../SLPVectorizer/AArch64/vec3-base.ll        |   8 +-
 .../SLPVectorizer/RISCV/vec3-base.ll          |   8 +-
 .../SLPVectorizer/X86/barriercall.ll          |   4 +-
 .../X86/bottom-to-top-reorder.ll              |  11 +-
 .../buildvector-postpone-for-dependency.ll    |   8 +-
 .../X86/extract-scalar-from-undef.ll          |  12 +-
 .../SLPVectorizer/X86/extractcost.ll          |   4 +-
 .../Transforms/SLPVectorizer/X86/vec3-base.ll |  19 +-
 10 files changed, 306 insertions(+), 74 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index e946620406c2e..d9cac6309a95d 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -810,6 +810,205 @@ static std::optional<unsigned> getExtractIndex(Instruction *E) {
 
 namespace {
 
+/// Base class for representing instructions that can be interchanged with other
+/// equivalent forms. For example, multiplication by a power of 2 can be
+/// interchanged with a left shift.
+///
+/// Derived classes implement specific interchange patterns by overriding the
+/// virtual methods to define their interchange logic.
+///
+/// The class maintains a reference to the main instruction (MainOp) and provides
+/// methods to:
+/// - Check if another instruction is interchangeable (isSame)
+/// - Get the opcode for the interchangeable form (getInterchangeableInstructionOpcode)
+/// - Get the operands for the interchangeable form (getInterchangeableInstructionOps)
+class InterchangeableInstruction {
+protected:
+  Instruction *const MainOp;
+
+  /// Return non nullptr if the right operand of I is ConstantInt.
+  static ConstantInt *isBinOpWithConstantInt(Instruction *I) {
+    Constant *C;
+    if (!match(I, m_BinOp(m_Value(), m_Constant(C))))
+      return nullptr;
+    if (auto *CI = dyn_cast<ConstantInt>(C)) {
+      return CI;
+    } else if (auto *CDV = dyn_cast<ConstantDataVector>(C)) {
+      if (auto *CI = dyn_cast_if_present<ConstantInt>(CDV->getSplatValue()))
+        return CI;
+    }
+    return nullptr;
+  }
+
+public:
+  InterchangeableInstruction(Instruction *MainOp) : MainOp(MainOp) {}
+  virtual bool isSame(Instruction *I) {
+    return MainOp->getOpcode() == I->getOpcode();
+  }
+  virtual unsigned getInterchangeableInstructionOpcode() {
+    return MainOp->getOpcode();
+  }
+  virtual SmallVector<Value *>
+  getInterchangeableInstructionOps(Instruction *I) {
+    assert(MainOp->getOpcode() == I->getOpcode());
+    return SmallVector<Value *>(MainOp->operands());
+  }
+  virtual ~InterchangeableInstruction() = default;
+};
+
+class BinOpIsNoOp final : public InterchangeableInstruction {
+  constexpr static std::initializer_list<unsigned> SupportedOp = {
+      Instruction::Add, Instruction::Sub,  Instruction::Mul,
+      Instruction::Shl, Instruction::AShr, Instruction::And};
+  SmallVector<unsigned> CandidateOp = SupportedOp;
+
+public:
+  using InterchangeableInstruction::InterchangeableInstruction;
+  bool isSame(Instruction *I) override {
+    unsigned Opcode = I->getOpcode();
+    if (!is_contained(SupportedOp, Opcode))
+      return false;
+    ConstantInt *CI = isBinOpWithConstantInt(I);
+    if (CI) {
+      switch (Opcode) {
+      case Instruction::Mul:
+        if (CI->getValue().isOne())
+          return true;
+        break;
+      case Instruction::And:
+        if (CI->getValue().isAllOnes())
+          return true;
+        break;
+      default:
+        if (CI->getValue().isZero())
+          return true;
+      }
+    }
+    if (is_contained(CandidateOp, Opcode)) {
+      CandidateOp = {Opcode};
+      return true;
+    }
+    return false;
+  }
+  unsigned getInterchangeableInstructionOpcode() override {
+    assert(!CandidateOp.empty() && "Cannot find interchangeable instruction.");
+    if (is_contained(CandidateOp, MainOp->getOpcode()))
+      return MainOp->getOpcode();
+    return CandidateOp[0];
+  }
+  SmallVector<Value *>
+  getInterchangeableInstructionOps(Instruction *I) override {
+    assert(is_contained(SupportedOp, I->getOpcode()));
+    return {MainOp->getOperand(0),
+            ConstantInt::get(MainOp->getOperand(1)->getType(),
+                             I->getOpcode() == Instruction::Mul)};
+  }
+};
+
+class MulAndShlWithConstantInt final : public InterchangeableInstruction {
+  constexpr static std::initializer_list<unsigned> SupportedOp = {
+      Instruction::Mul, Instruction::Shl};
+  SmallVector<unsigned> CandidateOp = SupportedOp;
+
+public:
+  using InterchangeableInstruction::InterchangeableInstruction;
+  bool isSame(Instruction *I) override {
+    unsigned Opcode = I->getOpcode();
+    if (!is_contained(SupportedOp, Opcode))
+      return false;
+    ConstantInt *CI = isBinOpWithConstantInt(I);
+    if (CI && (Opcode != Instruction::Mul || CI->getValue().isPowerOf2()))
+      return true;
+    if (is_contained(CandidateOp, Opcode)) {
+      CandidateOp = {Opcode};
+      return true;
+    }
+    return false;
+  }
+  unsigned getInterchangeableInstructionOpcode() override {
+    assert(!CandidateOp.empty() && "Cannot find interchangeable instruction.");
+    if (is_contained(CandidateOp, MainOp->getOpcode()))
+      return MainOp->getOpcode();
+    return CandidateOp[0];
+  }
+  SmallVector<Value *>
+  getInterchangeableInstructionOps(Instruction *I) override {
+    assert(is_contained(SupportedOp, I->getOpcode()));
+    if (MainOp->getOpcode() == I->getOpcode())
+      return SmallVector<Value *>(MainOp->operands());
+    const APInt &Op1Int = isBinOpWithConstantInt(MainOp)->getValue();
+    return {MainOp->getOperand(0),
+            ConstantInt::get(MainOp->getOperand(1)->getType(),
+                             I->getOpcode() == Instruction::Mul
+                                 ? (1 << Op1Int.getZExtValue())
+                                 : Op1Int.logBase2())};
+  }
+};
+
+static SmallVector<std::unique_ptr<InterchangeableInstruction>>
+getInterchangeableInstruction(Instruction *MainOp) {
+  SmallVector<std::unique_ptr<InterchangeableInstruction>> Candidate;
+  Candidate.push_back(std::make_unique<InterchangeableInstruction>(MainOp));
+  if (MainOp->isBinaryOp()) {
+    Candidate.push_back(std::make_unique<BinOpIsNoOp>(MainOp));
+    Candidate.push_back(std::make_unique<MulAndShlWithConstantInt>(MainOp));
+  }
+  return Candidate;
+}
+
+static bool getInterchangeableInstruction(
+    SmallVector<std::unique_ptr<InterchangeableInstruction>> &Candidate,
+    Instruction *I) {
+  auto Iter = std::stable_partition(
+      Candidate.begin(), Candidate.end(),
+      [&](const std::unique_ptr<InterchangeableInstruction> &C) {
+        return C->isSame(I);
+      });
+  if (Iter == Candidate.begin())
+    return false;
+  Candidate.erase(Iter, Candidate.end());
+  return true;
+}
+
+static bool isConvertible(Instruction *I, Instruction *MainOp,
+                          Instruction *AltOp) {
+  if (!I->isBinaryOp())
+    return I->getOpcode() == MainOp->getOpcode() ||
+           I->getOpcode() == AltOp->getOpcode();
+  assert(MainOp && "MainOp cannot be nullptr.");
+  SmallVector<std::unique_ptr<InterchangeableInstruction>> Candidate(
+      getInterchangeableInstruction(I));
+  for (std::unique_ptr<InterchangeableInstruction> &C : Candidate)
+    if (C->isSame(I) && C->isSame(MainOp))
+      return true;
+  Candidate = getInterchangeableInstruction(I);
+  assert(AltOp && "AltOp cannot be nullptr.");
+  for (std::unique_ptr<InterchangeableInstruction> &C : Candidate)
+    if (C->isSame(I) && C->isSame(AltOp))
+      return true;
+  return false;
+}
+
+static std::pair<Instruction *, SmallVector<Value *>>
+convertTo(Instruction *I, Instruction *MainOp, Instruction *AltOp) {
+  assert(isConvertible(I, MainOp, AltOp) && "Cannot convert the instruction.");
+  if (!I->isBinaryOp())
+    return std::make_pair(I->getOpcode() == MainOp->getOpcode() ? MainOp
+                                                                : AltOp,
+                          SmallVector<Value *>(I->operands()));
+  SmallVector<std::unique_ptr<InterchangeableInstruction>> Candidate(
+      getInterchangeableInstruction(I));
+  for (std::unique_ptr<InterchangeableInstruction> &C : Candidate)
+    if (C->isSame(I) && C->isSame(MainOp))
+      return std::make_pair(MainOp,
+                            C->getInterchangeableInstructionOps(MainOp));
+  Candidate = getInterchangeableInstruction(I);
+  for (std::unique_ptr<InterchangeableInstruction> &C : Candidate)
+    if (C->isSame(I) && C->isSame(AltOp))
+      return std::make_pair(AltOp, C->getInterchangeableInstructionOps(AltOp));
+  llvm_unreachable("Cannot convert the instruction.");
+}
+
 /// Main data required for vectorization of instructions.
 class InstructionsState {
   /// The main/alternate instruction. MainOp is also VL0.
@@ -931,6 +1130,11 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
   unsigned Opcode = MainOp->getOpcode();
   unsigned AltOpcode = Opcode;
 
+  SmallVector<std::unique_ptr<InterchangeableInstruction>>
+      InterchangeableInstructionCandidate(
+          getInterchangeableInstruction(MainOp));
+  SmallVector<std::unique_ptr<InterchangeableInstruction>>
+      AlternateInterchangeableInstructionCandidate;
   bool SwappedPredsCompatible = IsCmpOp && [&]() {
     SetVector<unsigned> UniquePreds, UniqueNonSwappedPreds;
     UniquePreds.insert(BasePred);
@@ -977,14 +1181,18 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
       return InstructionsState::invalid();
     unsigned InstOpcode = I->getOpcode();
     if (IsBinOp && isa<BinaryOperator>(I)) {
-      if (InstOpcode == Opcode || InstOpcode == AltOpcode)
-        continue;
-      if (Opcode == AltOpcode && isValidForAlternation(InstOpcode) &&
-          isValidForAlternation(Opcode)) {
-        AltOpcode = InstOpcode;
-        AltOp = I;
+      if (getInterchangeableInstruction(InterchangeableInstructionCandidate, I))
         continue;
+      if (AlternateInterchangeableInstructionCandidate.empty()) {
+        if (!isValidForAlternation(Opcode) ||
+            !isValidForAlternation(InstOpcode))
+          return InstructionsState::invalid();
+        AlternateInterchangeableInstructionCandidate =
+            getInterchangeableInstruction(I);
       }
+      if (getInterchangeableInstruction(
+              AlternateInterchangeableInstructionCandidate, I))
+        continue;
     } else if (IsCastOp && isa<CastInst>(I)) {
       Value *Op0 = MainOp->getOperand(0);
       Type *Ty0 = Op0->getType();
@@ -1085,6 +1293,29 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
     return InstructionsState::invalid();
   }
 
+  if (IsBinOp) {
+    auto FindOp =
+        [&](ArrayRef<std::unique_ptr<InterchangeableInstruction>> Candidate) {
+          for (const std::unique_ptr<InterchangeableInstruction> &I :
+               Candidate) {
+            unsigned InterchangeableInstructionOpcode =
+                I->getInterchangeableInstructionOpcode();
+            for (Value *V : VL) {
+              if (isa<PoisonValue>(V))
+                continue;
+              if (cast<Instruction>(V)->getOpcode() ==
+                  InterchangeableInstructionOpcode)
+                return cast<Instruction>(V);
+            }
+          }
+          llvm_unreachable(
+              "Cannot find the candidate instruction for InstructionsState.");
+        };
+    MainOp = FindOp(InterchangeableInstructionCandidate);
+    AltOp = AlternateInterchangeableInstructionCandidate.empty()
+                ? MainOp
+                : FindOp(AlternateInterchangeableInstructionCandidate);
+  }
   return InstructionsState(MainOp, AltOp);
 }
 
@@ -2447,29 +2678,28 @@ class BoUpSLP {
       ArgSize = isa<IntrinsicInst>(MainOp) ? IntrinsicNumOperands : NumOperands;
       OpsVec.resize(NumOperands);
       unsigned NumLanes = VL.size();
-      for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
+      for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx)
         OpsVec[OpIdx].resize(NumLanes);
-        for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
-          assert((isa<Instruction>(VL[Lane]) || isa<PoisonValue>(VL[Lane])) &&
-                 "Expected instruction or poison value");
-          // Our tree has just 3 nodes: the root and two operands.
-          // It is therefore trivial to get the APO. We only need to check the
-          // opcode of VL[Lane] and whether the operand at OpIdx is the LHS or
-          // RHS operand. The LHS operand of both add and sub is never attached
-          // to an inversese operation in the linearized form, therefore its APO
-          // is false. The RHS is true only if VL[Lane] is an inverse operation.
-
-          // Since operand reordering is performed on groups of commutative
-          // operations or alternating sequences (e.g., +, -), we can safely
-          // tell the inverse operations by checking commutativity.
-          if (isa<PoisonValue>(VL[Lane])) {
-            if (auto *EI = dyn_cast<ExtractElementInst>(MainOp)) {
-              if (OpIdx == 0) {
+      for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
+        assert((isa<Instruction>(VL[Lane]) || isa<PoisonValue>(VL[Lane])) &&
+               "Expected instruction or poison value");
+        // Our tree has just 3 nodes: the root and two operands.
+        // It is therefore trivial to get the APO. We only need to check the
+        // opcode of VL[Lane] and whether the operand at OpIdx is the LHS or RHS
+        // operand. The LHS operand of both add and sub is never attached to an
+        // inversese operation in the linearized form, therefore its APO is
+        // false. The RHS is true only if VL[Lane] is an inverse operation.
+
+        // Since operand reordering is performed on groups of commutative
+        // operations or alternating sequences (e.g., +, -), we can safely tell
+        // the inverse operations by checking commutativity.
+        if (isa<PoisonValue>(VL[Lane])) {
+          for (unsigned OpIdx : seq<unsigned>(NumOperands)) {
+            if (OpIdx == 0) {
+              if (auto *EI = dyn_cast<ExtractElementInst>(MainOp)) {
                 OpsVec[OpIdx][Lane] = {EI->getVectorOperand(), true, false};
                 continue;
-              }
-            } else if (auto *EV = dyn_cast<ExtractValueInst>(MainOp)) {
-              if (OpIdx == 0) {
+              } else if (auto *EV = dyn_cast<ExtractValueInst>(MainOp)) {
                 OpsVec[OpIdx][Lane] = {EV->getAggregateOperand(), true, false};
                 continue;
               }
@@ -2477,12 +2707,15 @@ class BoUpSLP {
             OpsVec[OpIdx][Lane] = {
                 PoisonValue::get(MainOp->getOperand(OpIdx)->getType()), true,
                 false};
-            continue;
           }
-          bool IsInverseOperation = !isCommutative(cast<Instruction>(VL[Lane]));
+          continue;
+        }
+        auto [SelectedOp, Ops] =
+            convertTo(cast<Instruction>(VL[Lane]), MainOp, S.getAltOp());
+        bool IsInverseOperation = !isCommutative(SelectedOp);
+        for (unsigned OpIdx : seq<unsigned>(NumOperands)) {
           bool APO = (OpIdx == 0) ? false : IsInverseOperation;
-          OpsVec[OpIdx][Lane] = {cast<Instruction>(VL[Lane])->getOperand(OpIdx),
-                                 APO, false};
+          OpsVec[OpIdx][Lane] = {Ops[OpIdx], APO, false};
         }
       }
     }
@@ -8501,8 +8734,12 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
 
   BlockScheduling &BS = *BSRef;
 
+  SmallVector<Value *> MainOpIsTheFirst(UniqueValues);
+  auto MainOpIter = find(MainOpIsTheFirst, S.getMainOp());
+  std::rotate(MainOpIsTheFirst.begin(), MainOpIter, std::next(MainOpIter));
+
   std::optional<ScheduleData *> Bundle =
-      BS.tryScheduleBundle(UniqueValues, this, S);
+      BS.tryScheduleBundle(MainOpIsTheFirst, this, S);
 #ifdef EXPENSIVE_CHECKS
   // Make sure we didn't break any internal invariants
   BS.verify();
@@ -15889,7 +16126,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
       Value *V = Builder.CreateBinOp(
           static_cast<Instruction::BinaryOps>(E->getOpcode()), LHS,
           RHS);
-      propagateIRFlags(V, E->Scalars, VL0, It == MinBWs.end());
+      propagateIRFlags(V, E->Scalars, nullptr, It == MinBWs.end());
       if (auto *I = dyn_cast<Instruction>(V)) {
         V = ::propagateMetadata(I, E->Scalars);
         // Drop nuw flags for abs(sub(commutative), true).
@@ -17169,6 +17406,7 @@ BoUpSLP::BlockScheduling::buildBundle(ArrayRef<Value *> VL) {
 std::optional<BoUpSLP::ScheduleData *>
 BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
                                             const InstructionsState &S) {
+  assert(VL[0] == S.getMainOp() && "MainOp must be the first element of VL.");
   // No need to schedule PHIs, insertelement, extractelement and extractvalue
   // instructions.
   if (isa<PHINode>(S.getMainOp()) ||
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-with-minbith-user.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-with-minbith-user.ll
index 3ebe920d17343..781954cbec2f7 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-with-minbith-user.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-with-minbith-user.ll
@@ -5,7 +5,9 @@ define void @h() {
 ; CHECK-LABEL: define void @h() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr i8, ptr null, i64 16
-; CHECK-NEXT:    store <8 x i16> zeroinitializer, ptr [[ARRAYIDX2]], align 2
+; CHECK-NEXT:    [[ARRAYIDX18:%.*]] = getelementptr i8, ptr null, i64 24
+; CHECK-NEXT:    store <4 x i16> zeroinitializer, ptr [[ARRAYIDX2]], align 2
+; CHECK-NEXT:    store <4 x i16> zeroinitializer, ptr [[ARRAYIDX18]], align 2
 ; CHECK-NEXT:    ret void
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll
index feb4ad865f314..d527d38adbee3 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll
@@ -314,10 +314,10 @@ define void @store_try_reorder(ptr %dst) {
 ;
 ; POW2-ONLY-LABEL: @store_try_reorder(
 ; POW2-ONLY-NEXT:  entry:
-; POW2-ONLY-NEXT:    [[ADD:%.*]] = add i32 0, 0
-; POW2-ONLY-NEXT:    store i32 [[ADD]], ptr [[DST:%.*]], align 4
-; POW2-ONLY-NEXT:    [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1
-; POW2-ONLY-NEXT:    store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4
+; POW2-ONLY-NEXT:    store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887:%.*]], align 4
+; POW2-ONLY-NEXT:    [[ADD216:%.*]] = sub i32 0, 0
+; POW2-ONLY-NEXT:    [[ARRAYIDX_I1891:%.*]] = getelementptr i32, ptr [[ARRAYIDX_I1887]], i64 2
+; POW2-ONLY-NEXT:    store i32 [[ADD216]], ptr [[ARRAYIDX_I1891]], align 4
 ; POW2-ONLY-NEXT:    ret void
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll
index 7ab5e4d6cb787..481d586e6658a 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll
@@ -324,10 +324,10 @@ define void @store_try_reorder(ptr %dst) {
 ;
 ; POW2-ONLY-LABEL: @store_try_reorder(
 ; POW2-ONLY-NEXT:  entry:
-; POW2-ONLY-NEXT:    [[ADD:%.*]] = add i32 0, 0
-; POW2-ONLY-NEXT:    store i32 [[ADD]], ptr [[DST:%.*]], align 4
-; POW2-ONLY-NEXT:    [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1
-; POW2-ONLY-NEXT:    store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4
+; POW2-ONLY-NEXT:    store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887:%.*]], align 4
+; POW2-ONLY-NEXT:    [[ADD216:%.*]] = sub i32 0, 0
+; POW2-ONLY-NEXT:    [[ARRAYIDX_I1891:%.*]] = getelementptr i32, ptr [[ARRAYIDX_I1887]], i64 2
+; POW2-ONLY-NEXT:    store i32 [[ADD216]], ptr [[ARRAYIDX_I1891]], align 4
 ; POW2-ONLY-NEXT:    ret void
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll b/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll
index f46a5d84a86cc..a39e602e2da71 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll
@@ -10,9 +10,7 @@ define i32 @foo(ptr nocapture %A, i32 %n) {
 ; CHECK-NEXT:    [[CALL:%.*]] = tail call i32 (...) @bar()
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP1:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
-; CHECK-NEXT:    [[TMP2:%.*]] = shl <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+; CHECK-NEXT:    [[TMP3:%.*]] = mul <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 8, i32 10>
 ; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], splat (i32 9)
 ; CHECK-NEXT:    store <4 x i32> [[TMP4]], ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    ret i32 undef
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll
index 889f5a95c81d6..299677ca80b34 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll
@@ -10,15 +10,10 @@ define void @test(ptr %0, ptr %1, ptr %2) {
 ; CHECK-NEXT:    [[TMP11:%.*]] = sub <4 x i32> <i32 0, i32 0, i32 undef, i32 0>, [[TMP8]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = sub <4 x i32> [[TMP11]], [[TMP10]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = add <4 x i32> [[TMP12]], [[TMP6]]
-; CHECK-NEXT:    [[TMP14:%.*]] = add <4 x i32> [[TMP13]], <i32 0, i32 0, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP15:%.*]] = sub <4 x i32> [[TMP13]], <i32 0, i32 0, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], <4 x i32> <i32 2, i32 0, i32 1, i32 7>
+; CHECK-NEXT:    [[TMP16:%.*]] = add <4 x i32> <i32 0, i32 0, i32 1, i32 0>, [[TMP13]]
 ; CHECK-NEXT:    [[TMP17:%.*]] = add <4 x i32> [[TMP16]], zeroinitializer
-; CHECK-NEXT:    [[TMP18:%.*]] = sub <4 x i32> [[TMP16]], zeroinitializer
-; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <4 x i32> [[TMP17]], <4 x i32> [[TMP18]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP20:%.*]] = add <4 x i32> [[TMP19]], zeroinitializer
-; CHECK-NEXT:    [[TMP21:%.*]] = sub <4 x i32> [[TMP19]], zeroinitializer
-; CHECK-NEXT:    [[TMP22:%.*]] = shufflevector <4 x i32> [[TMP20]], <4 x i32> [[TMP21]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
+; CHECK-NEXT:    [[TMP14:%.*]] = add <4 x i32> [[TMP17]], zeroinitializer
+; CHECK-NEXT:    [[TMP22:%.*]] = shufflevector <4 x i32> [[TMP14]], <4 x i32> poison, <4 x i32> <i32 2, i32 0, i32 1, i32 3>
 ; CHECK-NEXT:    store <4 x i32> [[TMP22]], ptr [[TMP2:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll
index 43c42c1ea2bfb..aa424b9031e77 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll
@@ -8,15 +8,13 @@ define void @test() {
 ; CHECK:       [[BB1:.*]]:
 ; CHECK-NEXT:    br label %[[BB2:.*]]
 ; CHECK:       [[BB2]]:
-; CHECK-NEXT:    [[TMP0:%.*]] = phi <4 x i32> [ poison, %[[BB1]] ], [ [[TMP5:%.*]], %[[BB6]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = phi <4 x i32> [ poison, %[[BB1]] ], [ [[TMP4:%.*]], %[[BB6]] ]
 ; CHECK-NEXT:    ret void
 ; CHECK:       [[BB6]]:
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB]] ], [ [[TMP8:%.*]], %[[BB6]] ]
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> <i32 0, i32 0, i32 poison, i32 poison>, <4 x i32> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 5, i32 4>
-; CHECK-NEXT:    [[TMP3:%.*]] = ashr <4 x i32> zeroinitializer, [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = mul <4 x i32> zeroinitializer, [[TMP2]]
-; CHECK-NEXT:    [[TMP5]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> <i32 1, i32 0, i32 poison, i32 poison>, <4 x i32> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 5, i32 4>
+; CHECK-NEXT:    [[TMP4]] = mul <4 x i32> [[TMP3]], zeroinitializer
 ; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> <i32 0, i32 poison>, <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[TMP8]] = mul <2 x i32> zeroinitializer, [[TMP7]]
 ; CHECK-NEXT:    br i1 false, label %[[BB2]], label %[[BB6]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll
index 1c62e57edfc46..56d2df11458d1 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll
@@ -6,15 +6,13 @@ define i64 @foo(i32 %tmp7) {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[TMP5:%.*]], i32 2
 ; CHECK-NEXT:    [[TMP3:%.*]] = sub <4 x i32> [[TMP2]], zeroinitializer
-; CHECK-NEXT:    [[TMP24:%.*]] = sub i32 undef, 0
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 poison, i32 poison, i32 undef, i32 0>, i32 [[TMP24]], i32 4
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <8 x i32> [[TMP0]], i32 0, i32 5
-; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <8 x i32> <i32 poison, i32 poison, i32 undef, i32 poison, i32 poison, i32 undef, i32 poison, i32 undef>, i32 [[TMP24]], i32 6
+; CHECK-NEXT:    [[TMP13:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 poison, i32 poison, i32 undef, i32 0>, <2 x i32> <i32 undef, i32 0>, i64 4)
+; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <8 x i32> <i32 poison, i32 poison, i32 undef, i32 poison, i32 poison, i32 undef, i32 poison, i32 undef>, i32 undef, i32 6
 ; CHECK-NEXT:    [[TMP12:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> [[TMP3]], i64 0)
 ; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP12]], <8 x i32> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 poison, i32 2, i32 3, i32 poison, i32 14, i32 poison>
-; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <8 x i32> [[TMP1]], [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = sub nsw <8 x i32> [[TMP1]], [[TMP4]]
-; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> [[TMP6]], <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 4, i32 5, i32 14, i32 15>
+; CHECK-NEXT:    [[TMP6:%.*]] = add nsw <8 x i32> [[TMP13]], [[TMP4]]
+; CHECK-NEXT:    [[TMP5:%.*]] = sub nsw <8 x i32> [[TMP13]], [[TMP4]]
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP5]], <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 4, i32 5, i32 14, i32 15>
 ; CHECK-NEXT:    [[TMP8:%.*]] = add <8 x i32> zeroinitializer, [[TMP7]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = xor <8 x i32> [[TMP8]], zeroinitializer
 ; CHECK-NEXT:    [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP9]])
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll
index 02c3173adc654..c6f5308cf54aa 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll
@@ -9,9 +9,7 @@ define i32 @foo(ptr nocapture %A, i32 %n, i32 %m) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP1:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
-; CHECK-NEXT:    [[TMP2:%.*]] = shl <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+; CHECK-NEXT:    [[TMP3:%.*]] = mul <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 8, i32 10>
 ; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], splat (i32 9)
 ; CHECK-NEXT:    store <4 x i32> [[TMP4]], ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec3-base.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec3-base.ll
index 6e2a43ac5f9f1..15dd6756cd7db 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vec3-base.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vec3-base.ll
@@ -242,13 +242,18 @@ exit:
 }
 
 define void @store_try_reorder(ptr %dst) {
-; CHECK-LABEL: @store_try_reorder(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ADD:%.*]] = add i32 0, 0
-; CHECK-NEXT:    store i32 [[ADD]], ptr [[DST:%.*]], align 4
-; CHECK-NEXT:    [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1
-; CHECK-NEXT:    store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4
-; CHECK-NEXT:    ret void
+; NON-POW2-LABEL: @store_try_reorder(
+; NON-POW2-NEXT:  entry:
+; NON-POW2-NEXT:    store <3 x i32> zeroinitializer, ptr [[DST:%.*]], align 4
+; NON-POW2-NEXT:    ret void
+;
+; POW2-ONLY-LABEL: @store_try_reorder(
+; POW2-ONLY-NEXT:  entry:
+; POW2-ONLY-NEXT:    store <2 x i32> zeroinitializer, ptr [[DST:%.*]], align 4
+; POW2-ONLY-NEXT:    [[ADD216:%.*]] = sub i32 0, 0
+; POW2-ONLY-NEXT:    [[ARRAYIDX_I1891:%.*]] = getelementptr i32, ptr [[DST]], i64 2
+; POW2-ONLY-NEXT:    store i32 [[ADD216]], ptr [[ARRAYIDX_I1891]], align 4
+; POW2-ONLY-NEXT:    ret void
 ;
 entry:
   %add = add i32 0, 0

>From b7c1a244d8a9a5e19b2dabed653cf1a971e1ea18 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Mon, 23 Dec 2024 02:18:24 -0800
Subject: [PATCH 3/8] [SLP] Pre-commit test.

---
 .../Transforms/SLPVectorizer/isOpcodeOrAlt.ll | 27 +++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 llvm/test/Transforms/SLPVectorizer/isOpcodeOrAlt.ll

diff --git a/llvm/test/Transforms/SLPVectorizer/isOpcodeOrAlt.ll b/llvm/test/Transforms/SLPVectorizer/isOpcodeOrAlt.ll
new file mode 100644
index 0000000000000..29585343b2f9a
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/isOpcodeOrAlt.ll
@@ -0,0 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=slp-vectorizer -S -slp-max-reg-size=1024 %s | FileCheck %s
+
+define void @test(ptr %a, ptr %b) {
+entry:
+  %gep0 = getelementptr inbounds i32, ptr %a, i64 0
+  %gep1 = getelementptr inbounds i32, ptr %a, i64 1
+  %gep2 = getelementptr inbounds i32, ptr %a, i64 2
+  %gep3 = getelementptr inbounds i32, ptr %a, i64 3
+  %0 = load i32, ptr %gep0, align 4
+  %1 = load i32, ptr %gep1, align 4
+  %2 = load i32, ptr %gep2, align 4
+  %3 = load i32, ptr %gep3, align 4
+  %op0 = shl i32 %0, 1
+  %op1 = add i32 %1, zeroinitializer
+  %op2 = mul i32 %2, 2
+  %op3 = shl i32 %3, zeroinitializer
+  %gep4 = getelementptr inbounds i32, ptr %b, i64 0
+  %gep5 = getelementptr inbounds i32, ptr %b, i64 1
+  %gep6 = getelementptr inbounds i32, ptr %b, i64 2
+  %gep7 = getelementptr inbounds i32, ptr %b, i64 3
+  store i32 %op0, ptr %gep4, align 4
+  store i32 %op1, ptr %gep5, align 4
+  store i32 %op2, ptr %gep6, align 4
+  store i32 %op3, ptr %gep7, align 4
+  ret void
+}

>From 5ad586cd7ecd5813c293f42c41337900aecdd9e7 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Thu, 13 Feb 2025 07:43:46 -0800
Subject: [PATCH 4/8] [SLP] Fix isOpcodeOrAlt cannot find interchangeable
 instruction.

---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp     |  3 +--
 llvm/test/Transforms/SLPVectorizer/isOpcodeOrAlt.ll | 11 +++++++++++
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index d9cac6309a95d..847042908599a 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1035,8 +1035,7 @@ class InstructionsState {
   bool isAltShuffle() const { return getMainOp() != getAltOp(); }
 
   bool isOpcodeOrAlt(Instruction *I) const {
-    unsigned CheckedOpcode = I->getOpcode();
-    return getOpcode() == CheckedOpcode || getAltOpcode() == CheckedOpcode;
+    return isConvertible(I, MainOp, AltOp);
   }
 
   /// Checks if the current state is valid, i.e. has non-null MainOp
diff --git a/llvm/test/Transforms/SLPVectorizer/isOpcodeOrAlt.ll b/llvm/test/Transforms/SLPVectorizer/isOpcodeOrAlt.ll
index 29585343b2f9a..623c9e816a59c 100644
--- a/llvm/test/Transforms/SLPVectorizer/isOpcodeOrAlt.ll
+++ b/llvm/test/Transforms/SLPVectorizer/isOpcodeOrAlt.ll
@@ -2,6 +2,17 @@
 ; RUN: opt -passes=slp-vectorizer -S -slp-max-reg-size=1024 %s | FileCheck %s
 
 define void @test(ptr %a, ptr %b) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 0
+; CHECK-NEXT:    [[GEP4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[GEP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> [[TMP0]], <i32 1, i32 0, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = mul <4 x i32> [[TMP0]], <i32 1, i32 0, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[GEP4]], align 4
+; CHECK-NEXT:    ret void
+;
 entry:
   %gep0 = getelementptr inbounds i32, ptr %a, i64 0
   %gep1 = getelementptr inbounds i32, ptr %a, i64 1

>From fdf88a2441f80cf3c9b946be83fb093d7074ecfe Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Thu, 13 Feb 2025 08:02:31 -0800
Subject: [PATCH 5/8] support Or

---
 .../Transforms/Vectorize/SLPVectorizer.cpp    |  4 +-
 ...reversed-strided-node-with-external-ptr.ll |  7 ++-
 .../SLPVectorizer/X86/bv-shuffle-mask.ll      |  4 +-
 ...gathered-delayed-nodes-with-reused-user.ll | 20 ++++-----
 .../X86/minbitwidth-drop-wrapping-flags.ll    |  4 +-
 .../X86/multi-extracts-bv-combined.ll         |  4 +-
 .../non-scheduled-inst-reused-as-last-inst.ll | 44 +++++++++----------
 .../alternate-opcode-sindle-bv.ll             | 26 ++++++++++-
 .../resized-alt-shuffle-after-minbw.ll        |  4 +-
 .../SLPVectorizer/shuffle-mask-resized.ll     |  4 +-
 10 files changed, 63 insertions(+), 58 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 847042908599a..c26ac4a904d45 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -858,8 +858,8 @@ class InterchangeableInstruction {
 
 class BinOpIsNoOp final : public InterchangeableInstruction {
   constexpr static std::initializer_list<unsigned> SupportedOp = {
-      Instruction::Add, Instruction::Sub,  Instruction::Mul,
-      Instruction::Shl, Instruction::AShr, Instruction::And};
+      Instruction::Add,  Instruction::Sub, Instruction::Mul, Instruction::Shl,
+      Instruction::AShr, Instruction::And, Instruction::Or};
   SmallVector<unsigned> CandidateOp = SupportedOp;
 
 public:
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll
index fd3d4ab80b29c..ff897180cc9b7 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll
@@ -7,13 +7,12 @@ define void @test(ptr %a, i64 %0) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[A]], i32 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x ptr> [[TMP1]], <2 x ptr> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[TMP0]], i32 0
 ; CHECK-NEXT:    br label %[[BB:.*]]
 ; CHECK:       [[BB]]:
-; CHECK-NEXT:    [[TMP3:%.*]] = or disjoint i64 [[TMP0]], 1
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i32 0
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i64> [[TMP4]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP5:%.*]] = or disjoint <2 x i64> [[TMP3]], <i64 1, i64 0>
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr double, <2 x ptr> [[TMP2]], <2 x i64> [[TMP5]]
-; CHECK-NEXT:    [[ARRAYIDX17_I28_1:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP3]]
+; CHECK-NEXT:    [[ARRAYIDX17_I28_1:%.*]] = extractelement <2 x ptr> [[TMP6]], i32 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> [[TMP6]], i32 8, <2 x i1> splat (i1 true), <2 x double> poison)
 ; CHECK-NEXT:    [[TMP8:%.*]] = load <2 x double>, ptr [[A]], align 8
 ; CHECK-NEXT:    [[TMP9:%.*]] = load <2 x double>, ptr [[A]], align 8
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/bv-shuffle-mask.ll b/llvm/test/Transforms/SLPVectorizer/X86/bv-shuffle-mask.ll
index 766916fe71f35..c4ddc5d63cc04 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/bv-shuffle-mask.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/bv-shuffle-mask.ll
@@ -7,14 +7,12 @@ define i16 @test(i16 %v1, i16 %v2) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i16> <i16 0, i16 0, i16 0, i16 poison>, i16 [[V2]], i32 3
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i16> <i16 0, i16 0, i16 0, i16 poison>, i16 [[V1]], i32 3
-; CHECK-NEXT:    [[TMP2:%.*]] = or <4 x i16> [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = and <4 x i16> [[TMP0]], [[TMP1]]
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <2 x i32> <i32 poison, i32 3>
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i16> [[TMP5]], i16 [[V1]], i32 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x i16> [[TMP6]], <2 x i16> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP8:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP9:%.*]] = and <4 x i16> [[TMP4]], zeroinitializer
+; CHECK-NEXT:    [[TMP9:%.*]] = and <4 x i16> [[TMP3]], zeroinitializer
 ; CHECK-NEXT:    [[TMP10:%.*]] = and <4 x i16> [[TMP9]], zeroinitializer
 ; CHECK-NEXT:    [[TMP11:%.*]] = icmp ne <4 x i16> [[TMP10]], zeroinitializer
 ; CHECK-NEXT:    [[TMP12:%.*]] = or <4 x i1> [[TMP11]], zeroinitializer
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll b/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll
index f49bac26e846b..40faa0841bfe0 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll
@@ -6,21 +6,21 @@
 define i64 @foo() {
 ; CHECK-LABEL: define i64 @foo() {
 ; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[ADD7:%.*]] = add i64 0, 0
 ; CHECK-NEXT:    br label [[BB3:%.*]]
 ; CHECK:       bb1:
-; CHECK-NEXT:    [[PHI:%.*]] = phi i64 [ [[ADD:%.*]], [[BB3]] ]
-; CHECK-NEXT:    [[PHI2:%.*]] = phi i64 [ [[OR:%.*]], [[BB3]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x i64> [ [[TMP5:%.*]], [[BB3]] ]
 ; CHECK-NEXT:    ret i64 0
 ; CHECK:       bb3:
 ; CHECK-NEXT:    [[PHI4:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ 0, [[BB3]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x i64> [ zeroinitializer, [[BB]] ], [ [[TMP3:%.*]], [[BB3]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i64> [[TMP0]], i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1
-; CHECK-NEXT:    [[ADD]] = add i64 [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 0
-; CHECK-NEXT:    [[OR]] = or i64 [[PHI4]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[PHI4]], i32 0
+; CHECK-NEXT:    [[TMP5]] = add <2 x i64> [[TMP0]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> <i64 poison, i64 0>, <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    [[TMP3]] = add <2 x i64> [[TMP4]], [[TMP2]]
+; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 [[ADD7]]
+; CHECK-NEXT:    [[OR:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
 ; CHECK-NEXT:    [[ICMP:%.*]] = icmp ult i64 [[OR]], 0
-; CHECK-NEXT:    [[TMP3]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[ADD]], i32 0
 ; CHECK-NEXT:    br i1 false, label [[BB3]], label [[BB1:%.*]]
 ;
 ; FORCED-LABEL: define i64 @foo() {
@@ -34,9 +34,7 @@ define i64 @foo() {
 ; FORCED-NEXT:    [[PHI5:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ 0, [[BB3]] ]
 ; FORCED-NEXT:    [[TMP1:%.*]] = phi <2 x i64> [ zeroinitializer, [[BB]] ], [ [[TMP7:%.*]], [[BB3]] ]
 ; FORCED-NEXT:    [[TMP2:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[PHI5]], i32 0
-; FORCED-NEXT:    [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]]
-; FORCED-NEXT:    [[TMP4:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
-; FORCED-NEXT:    [[TMP5]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i32> <i32 0, i32 3>
+; FORCED-NEXT:    [[TMP5]] = add <2 x i64> [[TMP1]], [[TMP2]]
 ; FORCED-NEXT:    [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> <i64 poison, i64 0>, <2 x i32> <i32 0, i32 3>
 ; FORCED-NEXT:    [[TMP7]] = add <2 x i64> [[TMP6]], [[TMP2]]
 ; FORCED-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 [[ADD7]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-drop-wrapping-flags.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-drop-wrapping-flags.ll
index 2a5bfa7390770..0198b1c5cb846 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-drop-wrapping-flags.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-drop-wrapping-flags.ll
@@ -9,9 +9,7 @@ define i32 @test() {
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i8> poison, i8 [[A_PROMOTED]], i32 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = add <4 x i8> [[TMP1]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = or <4 x i8> [[TMP1]], zeroinitializer
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i16>
+; CHECK-NEXT:    [[TMP5:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i16>
 ; CHECK-NEXT:    [[TMP6:%.*]] = add <4 x i16> [[TMP5]], <i16 0, i16 -1, i16 0, i16 0>
 ; CHECK-NEXT:    [[TMP7:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP6]])
 ; CHECK-NEXT:    [[TMP8:%.*]] = zext i16 [[TMP7]] to i32
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll b/llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll
index e6a166c27ac49..94f2c79faa8c9 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll
@@ -9,9 +9,7 @@ define i32 @foo() {
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> <i32 0, i32 undef, i32 1, i32 0>, i32 [[D]], i32 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:    [[TMP2:%.*]] = or <8 x i32> zeroinitializer, [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = add <8 x i32> zeroinitializer, [[TMP1]]
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    store <8 x i32> [[TMP4]], ptr getelementptr inbounds ([64 x i32], ptr null, i64 0, i64 15), align 4
+; CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr getelementptr inbounds ([64 x i32], ptr null, i64 0, i64 15), align 4
 ; CHECK-NEXT:    ret i32 0
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll
index 1163c8219dabe..034fe82862950 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll
@@ -4,6 +4,24 @@
 ; RUN: -slp-skip-early-profitability-check < %s | FileCheck %s --check-prefixes=FORCED
 
 define void @foo() {
+; CHECK-LABEL: define void @foo() {
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 0, i32 0
+; CHECK-NEXT:    br label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, [[BB:%.*]] ], [ [[TMP6:%.*]], [[BB4:%.*]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i32> [[TMP1]], [[TMP0]]
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP1]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    [[TMP6]] = or <2 x i32> [[TMP5]], zeroinitializer
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0
+; CHECK-NEXT:    [[CALL:%.*]] = call i64 null(i32 [[TMP7]])
+; CHECK-NEXT:    br label [[BB4]]
+; CHECK:       bb4:
+; CHECK-NEXT:    br i1 false, label [[BB5:%.*]], label [[BB1]]
+; CHECK:       bb5:
+; CHECK-NEXT:    [[TMP8:%.*]] = phi <2 x i32> [ [[TMP2]], [[BB4]] ]
+; CHECK-NEXT:    ret void
+;
 ; FORCED-LABEL: define void @foo() {
 ; FORCED-NEXT:  bb:
 ; FORCED-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 0, i32 0
@@ -11,9 +29,7 @@ define void @foo() {
 ; FORCED:       bb1:
 ; FORCED-NEXT:    [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, [[BB:%.*]] ], [ [[TMP6:%.*]], [[BB4:%.*]] ]
 ; FORCED-NEXT:    [[TMP2:%.*]] = shl <2 x i32> [[TMP1]], [[TMP0]]
-; FORCED-NEXT:    [[TMP3:%.*]] = or <2 x i32> [[TMP1]], [[TMP0]]
-; FORCED-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 0, i32 3>
-; FORCED-NEXT:    [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP1]], <2 x i32> <i32 0, i32 3>
+; FORCED-NEXT:    [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP1]], <2 x i32> <i32 0, i32 3>
 ; FORCED-NEXT:    [[TMP6]] = or <2 x i32> [[TMP5]], zeroinitializer
 ; FORCED-NEXT:    [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0
 ; FORCED-NEXT:    [[CALL:%.*]] = call i64 null(i32 [[TMP7]])
@@ -21,29 +37,9 @@ define void @foo() {
 ; FORCED:       bb4:
 ; FORCED-NEXT:    br i1 false, label [[BB5:%.*]], label [[BB1]]
 ; FORCED:       bb5:
-; FORCED-NEXT:    [[TMP8:%.*]] = phi <2 x i32> [ [[TMP4]], [[BB4]] ]
+; FORCED-NEXT:    [[TMP8:%.*]] = phi <2 x i32> [ [[TMP2]], [[BB4]] ]
 ; FORCED-NEXT:    ret void
 ;
-; CHECK-LABEL: define void @foo() {
-; CHECK-NEXT:  bb:
-; CHECK-NEXT:    br label [[BB1:%.*]]
-; CHECK:       bb1:
-; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, [[BB:%.*]] ], [ [[TMP6:%.*]], [[BB4:%.*]] ]
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
-; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[TMP2]], 0
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[SHL]], i32 0
-; CHECK-NEXT:    [[TMP6]] = or <2 x i32> [[TMP5]], zeroinitializer
-; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0
-; CHECK-NEXT:    [[CALL:%.*]] = call i64 null(i32 [[TMP7]])
-; CHECK-NEXT:    br label [[BB4]]
-; CHECK:       bb4:
-; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x i32> [[TMP6]], i32 1
-; CHECK-NEXT:    br i1 false, label [[BB5:%.*]], label [[BB1]]
-; CHECK:       bb5:
-; CHECK-NEXT:    [[PHI6:%.*]] = phi i32 [ [[SHL]], [[BB4]] ]
-; CHECK-NEXT:    [[PHI7:%.*]] = phi i32 [ [[TMP8]], [[BB4]] ]
-; CHECK-NEXT:    ret void
-;
 bb:
   br label %bb1
 
diff --git a/llvm/test/Transforms/SLPVectorizer/alternate-opcode-sindle-bv.ll b/llvm/test/Transforms/SLPVectorizer/alternate-opcode-sindle-bv.ll
index c250029519590..32139a5f54816 100644
--- a/llvm/test/Transforms/SLPVectorizer/alternate-opcode-sindle-bv.ll
+++ b/llvm/test/Transforms/SLPVectorizer/alternate-opcode-sindle-bv.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
-; RUN: %if x86-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s %}
-; RUN: %if aarch64-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s %}
+; RUN: %if x86-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=X86 %}
+; RUN: %if aarch64-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=AARCH64 %}
 
 define <2 x i32> @test(i32 %arg) {
 ; CHECK-LABEL: define <2 x i32> @test(
@@ -14,6 +14,28 @@ define <2 x i32> @test(i32 %arg) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[MUL]], i32 1
 ; CHECK-NEXT:    ret <2 x i32> [[TMP1]]
 ;
+; X86-LABEL: define <2 x i32> @test(
+; X86-SAME: i32 [[ARG:%.*]]) {
+; X86-NEXT:  bb:
+; X86-NEXT:    [[OR:%.*]] = or i32 [[ARG]], 0
+; X86-NEXT:    [[MUL:%.*]] = mul i32 0, 1
+; X86-NEXT:    [[MUL1:%.*]] = mul i32 [[OR]], [[MUL]]
+; X86-NEXT:    [[CMP:%.*]] = icmp ugt i32 0, [[MUL1]]
+; X86-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[OR]], i32 0
+; X86-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[MUL]], i32 1
+; X86-NEXT:    ret <2 x i32> [[TMP1]]
+;
+; AARCH64-LABEL: define <2 x i32> @test(
+; AARCH64-SAME: i32 [[ARG:%.*]]) {
+; AARCH64-NEXT:  bb:
+; AARCH64-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[ARG]], i32 0
+; AARCH64-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[TMP0]], zeroinitializer
+; AARCH64-NEXT:    [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
+; AARCH64-NEXT:    [[TMP3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
+; AARCH64-NEXT:    [[MUL1:%.*]] = mul i32 [[TMP2]], [[TMP3]]
+; AARCH64-NEXT:    [[CMP:%.*]] = icmp ugt i32 0, [[MUL1]]
+; AARCH64-NEXT:    ret <2 x i32> [[TMP1]]
+;
 bb:
   %or = or i32 %arg, 0
   %mul = mul i32 0, 1
diff --git a/llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll b/llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll
index 056b6222cae72..caca410f056c1 100644
--- a/llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll
+++ b/llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll
@@ -6,11 +6,9 @@ define void @func(i32 %0) {
 ; CHECK-SAME: i32 [[TMP0:%.*]]) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> <i32 0, i32 poison, i32 0, i32 0>, i32 [[TMP0]], i32 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = shl <4 x i32> [[TMP2]], zeroinitializer
-; CHECK-NEXT:    [[TMP4:%.*]] = or <4 x i32> [[TMP2]], zeroinitializer
-; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
 ; CHECK-NEXT:    [[TMP6:%.*]] = shl i32 [[TMP0]], 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0
-; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <32 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <32 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 3>
 ; CHECK-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP6]] to i64
 ; CHECK-NEXT:    [[TMP10:%.*]] = or i64 [[TMP9]], 0
 ; CHECK-NEXT:    [[TMP11:%.*]] = trunc i64 [[TMP9]] to i32
diff --git a/llvm/test/Transforms/SLPVectorizer/shuffle-mask-resized.ll b/llvm/test/Transforms/SLPVectorizer/shuffle-mask-resized.ll
index 732b50396a460..1e3255f2187af 100644
--- a/llvm/test/Transforms/SLPVectorizer/shuffle-mask-resized.ll
+++ b/llvm/test/Transforms/SLPVectorizer/shuffle-mask-resized.ll
@@ -12,9 +12,7 @@ define i32 @test() {
 ; CHECK-NEXT:    br i1 false, label [[BB4:%.*]], label [[BB3]]
 ; CHECK:       bb3:
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> <i32 0, i32 poison>, <2 x i32> <i32 2, i32 1>
-; CHECK-NEXT:    [[TMP3:%.*]] = add <2 x i32> zeroinitializer, [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = or <2 x i32> zeroinitializer, [[TMP2]]
-; CHECK-NEXT:    [[TMP5]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    [[TMP5]] = or <2 x i32> zeroinitializer, [[TMP2]]
 ; CHECK-NEXT:    br label [[BB1]]
 ; CHECK:       bb4:
 ; CHECK-NEXT:    [[TMP6:%.*]] = phi <8 x i32> [ [[TMP1]], [[BB1]] ]

>From 4a08497fcc1ec161ee2bb0686364405faecdef37 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Fri, 14 Feb 2025 00:18:15 -0800
Subject: [PATCH 6/8] support Xor

---
 .../lib/Transforms/Vectorize/SLPVectorizer.cpp |  2 +-
 .../X86/non-power-2-num-elems-reused.ll        | 18 +++++++++++++-----
 .../X86/reduced-val-vectorized-in-transform.ll |  6 +++---
 .../SLPVectorizer/X86/shuffle-mask-emission.ll |  6 ++----
 4 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index c26ac4a904d45..eb1a6fb55c9d1 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -859,7 +859,7 @@ class InterchangeableInstruction {
 class BinOpIsNoOp final : public InterchangeableInstruction {
   constexpr static std::initializer_list<unsigned> SupportedOp = {
       Instruction::Add,  Instruction::Sub, Instruction::Mul, Instruction::Shl,
-      Instruction::AShr, Instruction::And, Instruction::Or};
+      Instruction::AShr, Instruction::And, Instruction::Or,  Instruction::Xor};
   SmallVector<unsigned> CandidateOp = SupportedOp;
 
 public:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-power-2-num-elems-reused.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-power-2-num-elems-reused.ll
index 4ad02d47fb385..fbccb6914006a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/non-power-2-num-elems-reused.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/non-power-2-num-elems-reused.ll
@@ -5,11 +5,19 @@ define i64 @test() {
 ; CHECK-LABEL: define i64 @test() {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[OR54_I_I_6:%.*]] = or i32 0, 0
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <16 x i32> poison, i32 [[OR54_I_I_6]], i32 8
-; CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> [[TMP0]], <8 x i32> zeroinitializer, i64 0)
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 7, i32 7, i32 8>
-; CHECK-NEXT:    [[TMP3:%.*]] = zext <16 x i32> [[TMP2]] to <16 x i64>
-; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> [[TMP3]])
+; CHECK-NEXT:    [[CONV193_1_I_6:%.*]] = zext i32 [[OR54_I_I_6]] to i64
+; CHECK-NEXT:    [[CONV193_I_7:%.*]] = zext i32 0 to i64
+; CHECK-NEXT:    [[TMP0:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.v8i64(<8 x i64> zeroinitializer, i64 0)
+; CHECK-NEXT:    [[RDX_OP:%.*]] = or <4 x i64> [[TMP0]], zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v4i64(<8 x i64> zeroinitializer, <4 x i64> [[RDX_OP]], i64 0)
+; CHECK-NEXT:    [[OP_RDX:%.*]] = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[OP_RDX]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[CONV193_I_7]], i32 1
+; CHECK-NEXT:    [[TMP7:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i64> [[TMP7]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i64> [[TMP7]], i32 1
+; CHECK-NEXT:    [[OP_RDX3:%.*]] = or i64 [[TMP5]], [[TMP6]]
+; CHECK-NEXT:    [[TMP4:%.*]] = or i64 [[OP_RDX3]], [[CONV193_1_I_6]]
 ; CHECK-NEXT:    ret i64 [[TMP4]]
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduced-val-vectorized-in-transform.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduced-val-vectorized-in-transform.ll
index 81f3bf99f3fd8..7fe6941d52da7 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduced-val-vectorized-in-transform.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduced-val-vectorized-in-transform.ll
@@ -9,16 +9,16 @@ define i32 @test(i1 %cond) {
 ; CHECK:       [[BB]]:
 ; CHECK-NEXT:    [[P1:%.*]] = phi i32 [ [[OR92:%.*]], %[[BB]] ], [ 0, %[[ENTRY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x i32> [ [[TMP8:%.*]], %[[BB]] ], [ zeroinitializer, %[[ENTRY]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = or i32 1, 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> <i32 poison, i32 poison, i32 0, i32 0>, <4 x i32> <i32 poison, i32 1, i32 6, i32 7>
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[P1]], i32 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = or <4 x i32> zeroinitializer, [[TMP4]]
 ; CHECK-NEXT:    [[OR92]] = or i32 1, 0
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP5]])
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <2 x i32> <i32 poison, i32 1>, i32 [[TMP6]], i32 0
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[OR92]], i32 0
+; CHECK-NEXT:    [[TMP8]] = xor <2 x i32> [[TMP9]], [[TMP7]]
 ; CHECK-NEXT:    [[OP_RDX:%.*]] = xor i32 [[TMP6]], [[OR92]]
-; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x i32> poison, i32 [[OP_RDX]], i32 0
-; CHECK-NEXT:    [[TMP8]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP1]], i32 1
 ; CHECK-NEXT:    br i1 [[COND]], label %[[EXIT:.*]], label %[[BB]]
 ; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    ret i32 [[OP_RDX]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shuffle-mask-emission.ll b/llvm/test/Transforms/SLPVectorizer/X86/shuffle-mask-emission.ll
index fcc295de62adf..70beef71b2e34 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/shuffle-mask-emission.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/shuffle-mask-emission.ll
@@ -7,10 +7,8 @@ define i1 @test() {
 ; CHECK-NEXT:    [[H_PROMOTED118_I_FR:%.*]] = freeze i32 1
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[H_PROMOTED118_I_FR]], i32 2
 ; CHECK-NEXT:    [[TMP1:%.*]] = xor <4 x i32> zeroinitializer, [[TMP0]]
-; CHECK-NEXT:    [[TMP2:%.*]] = add <4 x i32> zeroinitializer, [[TMP0]]
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> [[TMP3]], <4 x i32> <i32 2, i32 2, i32 7, i32 2>
-; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[TMP3]], [[TMP4]]
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> <i32 2, i32 2, i32 7, i32 2>
+; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = and <4 x i32> [[TMP5]], <i32 0, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq <4 x i32> [[TMP6]], <i32 1, i32 0, i32 0, i32 0>
 ; CHECK-NEXT:    [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP7]])

>From 6788da47bdb58422b7cad4251e160c3828e4a2ae Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Sun, 16 Feb 2025 23:18:55 -0800
Subject: [PATCH 7/8] fix undef deprecator issue

---
 .../X86/extract-scalar-from-undef.ll          | 20 +++++++++----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll
index 56d2df11458d1..1c0b3f41d523b 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll
@@ -4,12 +4,10 @@
 define i64 @foo(i32 %tmp7) {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[TMP5:%.*]], i32 2
-; CHECK-NEXT:    [[TMP3:%.*]] = sub <4 x i32> [[TMP2]], zeroinitializer
-; CHECK-NEXT:    [[TMP13:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 poison, i32 poison, i32 undef, i32 0>, <2 x i32> <i32 undef, i32 0>, i64 4)
-; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <8 x i32> <i32 poison, i32 poison, i32 undef, i32 poison, i32 poison, i32 undef, i32 poison, i32 undef>, i32 undef, i32 6
-; CHECK-NEXT:    [[TMP12:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> [[TMP3]], i64 0)
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP12]], <8 x i32> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 poison, i32 2, i32 3, i32 poison, i32 14, i32 poison>
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <8 x i32> <i32 0, i32 0, i32 poison, i32 poison, i32 0, i32 poison, i32 poison, i32 poison>, i32 [[TMP8:%.*]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = sub <8 x i32> [[TMP0]], <i32 0, i32 0, i32 poison, i32 0, i32 0, i32 poison, i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 0>, <8 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 14, i32 poison, i32 poison, i32 7>
+; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <8 x i32> [[TMP2]], i32 0, i32 5
 ; CHECK-NEXT:    [[TMP6:%.*]] = add nsw <8 x i32> [[TMP13]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = sub nsw <8 x i32> [[TMP13]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP5]], <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 4, i32 5, i32 14, i32 15>
@@ -27,7 +25,7 @@ bb:
   %tmp4 = xor i32 %tmp3, 0
   %tmp6 = sub i32 0, 0
   %tmp8 = sub i32 %tmp7, 0
-  %tmp9 = sub nsw i32 0, undef
+  %tmp9 = sub nsw i32 0, poison
   %tmp10 = add nsw i32 0, %tmp6
   %tmp11 = sub nsw i32 0, %tmp8
   %tmp12 = add i32 0, %tmp10
@@ -42,10 +40,10 @@ bb:
   %tmp21 = add i32 %tmp20, %tmp17
   %tmp22 = sub i32 0, 0
   %tmp23 = add i32 0, 0
-  %tmp24 = sub i32 undef, 0
-  %tmp25 = add nsw i32 %tmp23, undef
+  %tmp24 = sub i32 poison, 0
+  %tmp25 = add nsw i32 %tmp23, poison
   %tmp26 = add nsw i32 %tmp24, %tmp22
-  %tmp27 = sub nsw i32 undef, %tmp24
+  %tmp27 = sub nsw i32 poison, %tmp24
   %tmp28 = add i32 0, %tmp25
   %tmp29 = xor i32 %tmp28, 0
   %tmp30 = add i32 0, %tmp26
@@ -56,7 +54,7 @@ bb:
   %tmp35 = add i32 %tmp34, %tmp29
   %tmp36 = add i32 %tmp35, 0
   %tmp37 = add i32 %tmp36, %tmp33
-  %tmp38 = sub nsw i32 0, undef
+  %tmp38 = sub nsw i32 0, poison
   %tmp39 = add i32 0, %tmp38
   %tmp40 = xor i32 %tmp39, 0
   %tmp41 = add i32 0, %tmp37

>From deeb10d1c5813696c9d9ecc9fa23d9cb7c2e1533 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Sun, 16 Feb 2025 23:32:23 -0800
Subject: [PATCH 8/8] clang-format

---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index eb1a6fb55c9d1..82261740ed4ac 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -817,11 +817,13 @@ namespace {
 /// Derived classes implement specific interchange patterns by overriding the
 /// virtual methods to define their interchange logic.
 ///
-/// The class maintains a reference to the main instruction (MainOp) and provides
-/// methods to:
+/// The class maintains a reference to the main instruction (MainOp) and
+/// provides methods to:
 /// - Check if another instruction is interchangeable (isSame)
-/// - Get the opcode for the interchangeable form (getInterchangeableInstructionOpcode)
-/// - Get the operands for the interchangeable form (getInterchangeableInstructionOps)
+/// - Get the opcode for the interchangeable form
+/// (getInterchangeableInstructionOpcode)
+/// - Get the operands for the interchangeable form
+/// (getInterchangeableInstructionOps)
 class InterchangeableInstruction {
 protected:
   Instruction *const MainOp;



More information about the llvm-commits mailing list