[llvm] [SLP] Make getSameOpcode support interchangeable instructions. (PR #127450)

Han-Kuan Chen via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 13 04:24:44 PDT 2025


https://github.com/HanKuanChen updated https://github.com/llvm/llvm-project/pull/127450

>From 4c951fcd35caa94bad8d858909cc3ebb74460124 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Thu, 13 Feb 2025 23:58:17 -0800
Subject: [PATCH 01/38] [SLP] NFC. Update test.

---
 ...gathered-delayed-nodes-with-reused-user.ll | 26 +++++++++----------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll b/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll
index fa33621de5ae7..f49bac26e846b 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll
@@ -9,23 +9,23 @@ define i64 @foo() {
 ; CHECK-NEXT:    br label [[BB3:%.*]]
 ; CHECK:       bb1:
 ; CHECK-NEXT:    [[PHI:%.*]] = phi i64 [ [[ADD:%.*]], [[BB3]] ]
-; CHECK-NEXT:    [[PHI2:%.*]] = phi i64 [ [[TMP9:%.*]], [[BB3]] ]
+; CHECK-NEXT:    [[PHI2:%.*]] = phi i64 [ [[OR:%.*]], [[BB3]] ]
 ; CHECK-NEXT:    ret i64 0
 ; CHECK:       bb3:
-; CHECK-NEXT:    [[PHI5:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ 0, [[BB3]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x i64> [ zeroinitializer, [[BB]] ], [ [[TMP7:%.*]], [[BB3]] ]
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
-; CHECK-NEXT:    [[ADD]] = add i64 [[TMP3]], [[TMP2]]
+; CHECK-NEXT:    [[PHI4:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ 0, [[BB3]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x i64> [ zeroinitializer, [[BB]] ], [ [[TMP3:%.*]], [[BB3]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i64> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1
+; CHECK-NEXT:    [[ADD]] = add i64 [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 0
-; CHECK-NEXT:    [[TMP9]] = or i64 [[PHI5]], 0
-; CHECK-NEXT:    [[ICMP:%.*]] = icmp ult i64 [[TMP9]], 0
-; CHECK-NEXT:    [[TMP7]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[ADD]], i32 0
+; CHECK-NEXT:    [[OR]] = or i64 [[PHI4]], 0
+; CHECK-NEXT:    [[ICMP:%.*]] = icmp ult i64 [[OR]], 0
+; CHECK-NEXT:    [[TMP3]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[ADD]], i32 0
 ; CHECK-NEXT:    br i1 false, label [[BB3]], label [[BB1:%.*]]
 ;
 ; FORCED-LABEL: define i64 @foo() {
 ; FORCED-NEXT:  bb:
-; FORCED-NEXT:    [[TMP8:%.*]] = add i64 0, 0
+; FORCED-NEXT:    [[ADD7:%.*]] = add i64 0, 0
 ; FORCED-NEXT:    br label [[BB3:%.*]]
 ; FORCED:       bb1:
 ; FORCED-NEXT:    [[TMP0:%.*]] = phi <2 x i64> [ [[TMP5:%.*]], [[BB3]] ]
@@ -39,9 +39,9 @@ define i64 @foo() {
 ; FORCED-NEXT:    [[TMP5]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i32> <i32 0, i32 3>
 ; FORCED-NEXT:    [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> <i64 poison, i64 0>, <2 x i32> <i32 0, i32 3>
 ; FORCED-NEXT:    [[TMP7]] = add <2 x i64> [[TMP6]], [[TMP2]]
-; FORCED-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 [[TMP8]]
-; FORCED-NEXT:    [[TMP9:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
-; FORCED-NEXT:    [[ICMP:%.*]] = icmp ult i64 [[TMP9]], 0
+; FORCED-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 [[ADD7]]
+; FORCED-NEXT:    [[TMP8:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
+; FORCED-NEXT:    [[ICMP:%.*]] = icmp ult i64 [[TMP8]], 0
 ; FORCED-NEXT:    br i1 false, label [[BB3]], label [[BB1:%.*]]
 ;
 bb:

>From 68684d3ddab8d6b8c2333632e9e688a2fee1c07c Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Thu, 13 Feb 2025 07:36:53 -0800
Subject: [PATCH 02/38] [SLP] Make getSameOpcode support different instructions
 if they have same semantics.

---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 302 ++++++++++++++++--
 .../AArch64/gather-with-minbith-user.ll       |   4 +-
 .../SLPVectorizer/AArch64/vec3-base.ll        |   8 +-
 .../SLPVectorizer/RISCV/vec3-base.ll          |   8 +-
 .../SLPVectorizer/X86/barriercall.ll          |   4 +-
 .../X86/bottom-to-top-reorder.ll              |  11 +-
 .../buildvector-postpone-for-dependency.ll    |   8 +-
 .../X86/extract-scalar-from-undef.ll          |  12 +-
 .../SLPVectorizer/X86/extractcost.ll          |   4 +-
 .../Transforms/SLPVectorizer/X86/vec3-base.ll |  19 +-
 10 files changed, 306 insertions(+), 74 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index e946620406c2e..d9cac6309a95d 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -810,6 +810,205 @@ static std::optional<unsigned> getExtractIndex(Instruction *E) {
 
 namespace {
 
+/// Base class for representing instructions that can be interchanged with other
+/// equivalent forms. For example, multiplication by a power of 2 can be
+/// interchanged with a left shift.
+///
+/// Derived classes implement specific interchange patterns by overriding the
+/// virtual methods to define their interchange logic.
+///
+/// The class maintains a reference to the main instruction (MainOp) and provides
+/// methods to:
+/// - Check if another instruction is interchangeable (isSame)
+/// - Get the opcode for the interchangeable form (getInterchangeableInstructionOpcode)
+/// - Get the operands for the interchangeable form (getInterchangeableInstructionOps)
+class InterchangeableInstruction {
+protected:
+  Instruction *const MainOp;
+
+  /// Return non nullptr if the right operand of I is ConstantInt.
+  static ConstantInt *isBinOpWithConstantInt(Instruction *I) {
+    Constant *C;
+    if (!match(I, m_BinOp(m_Value(), m_Constant(C))))
+      return nullptr;
+    if (auto *CI = dyn_cast<ConstantInt>(C)) {
+      return CI;
+    } else if (auto *CDV = dyn_cast<ConstantDataVector>(C)) {
+      if (auto *CI = dyn_cast_if_present<ConstantInt>(CDV->getSplatValue()))
+        return CI;
+    }
+    return nullptr;
+  }
+
+public:
+  InterchangeableInstruction(Instruction *MainOp) : MainOp(MainOp) {}
+  virtual bool isSame(Instruction *I) {
+    return MainOp->getOpcode() == I->getOpcode();
+  }
+  virtual unsigned getInterchangeableInstructionOpcode() {
+    return MainOp->getOpcode();
+  }
+  virtual SmallVector<Value *>
+  getInterchangeableInstructionOps(Instruction *I) {
+    assert(MainOp->getOpcode() == I->getOpcode());
+    return SmallVector<Value *>(MainOp->operands());
+  }
+  virtual ~InterchangeableInstruction() = default;
+};
+
+class BinOpIsNoOp final : public InterchangeableInstruction {
+  constexpr static std::initializer_list<unsigned> SupportedOp = {
+      Instruction::Add, Instruction::Sub,  Instruction::Mul,
+      Instruction::Shl, Instruction::AShr, Instruction::And};
+  SmallVector<unsigned> CandidateOp = SupportedOp;
+
+public:
+  using InterchangeableInstruction::InterchangeableInstruction;
+  bool isSame(Instruction *I) override {
+    unsigned Opcode = I->getOpcode();
+    if (!is_contained(SupportedOp, Opcode))
+      return false;
+    ConstantInt *CI = isBinOpWithConstantInt(I);
+    if (CI) {
+      switch (Opcode) {
+      case Instruction::Mul:
+        if (CI->getValue().isOne())
+          return true;
+        break;
+      case Instruction::And:
+        if (CI->getValue().isAllOnes())
+          return true;
+        break;
+      default:
+        if (CI->getValue().isZero())
+          return true;
+      }
+    }
+    if (is_contained(CandidateOp, Opcode)) {
+      CandidateOp = {Opcode};
+      return true;
+    }
+    return false;
+  }
+  unsigned getInterchangeableInstructionOpcode() override {
+    assert(!CandidateOp.empty() && "Cannot find interchangeable instruction.");
+    if (is_contained(CandidateOp, MainOp->getOpcode()))
+      return MainOp->getOpcode();
+    return CandidateOp[0];
+  }
+  SmallVector<Value *>
+  getInterchangeableInstructionOps(Instruction *I) override {
+    assert(is_contained(SupportedOp, I->getOpcode()));
+    return {MainOp->getOperand(0),
+            ConstantInt::get(MainOp->getOperand(1)->getType(),
+                             I->getOpcode() == Instruction::Mul)};
+  }
+};
+
+class MulAndShlWithConstantInt final : public InterchangeableInstruction {
+  constexpr static std::initializer_list<unsigned> SupportedOp = {
+      Instruction::Mul, Instruction::Shl};
+  SmallVector<unsigned> CandidateOp = SupportedOp;
+
+public:
+  using InterchangeableInstruction::InterchangeableInstruction;
+  bool isSame(Instruction *I) override {
+    unsigned Opcode = I->getOpcode();
+    if (!is_contained(SupportedOp, Opcode))
+      return false;
+    ConstantInt *CI = isBinOpWithConstantInt(I);
+    if (CI && (Opcode != Instruction::Mul || CI->getValue().isPowerOf2()))
+      return true;
+    if (is_contained(CandidateOp, Opcode)) {
+      CandidateOp = {Opcode};
+      return true;
+    }
+    return false;
+  }
+  unsigned getInterchangeableInstructionOpcode() override {
+    assert(!CandidateOp.empty() && "Cannot find interchangeable instruction.");
+    if (is_contained(CandidateOp, MainOp->getOpcode()))
+      return MainOp->getOpcode();
+    return CandidateOp[0];
+  }
+  SmallVector<Value *>
+  getInterchangeableInstructionOps(Instruction *I) override {
+    assert(is_contained(SupportedOp, I->getOpcode()));
+    if (MainOp->getOpcode() == I->getOpcode())
+      return SmallVector<Value *>(MainOp->operands());
+    const APInt &Op1Int = isBinOpWithConstantInt(MainOp)->getValue();
+    return {MainOp->getOperand(0),
+            ConstantInt::get(MainOp->getOperand(1)->getType(),
+                             I->getOpcode() == Instruction::Mul
+                                 ? (1 << Op1Int.getZExtValue())
+                                 : Op1Int.logBase2())};
+  }
+};
+
+static SmallVector<std::unique_ptr<InterchangeableInstruction>>
+getInterchangeableInstruction(Instruction *MainOp) {
+  SmallVector<std::unique_ptr<InterchangeableInstruction>> Candidate;
+  Candidate.push_back(std::make_unique<InterchangeableInstruction>(MainOp));
+  if (MainOp->isBinaryOp()) {
+    Candidate.push_back(std::make_unique<BinOpIsNoOp>(MainOp));
+    Candidate.push_back(std::make_unique<MulAndShlWithConstantInt>(MainOp));
+  }
+  return Candidate;
+}
+
+static bool getInterchangeableInstruction(
+    SmallVector<std::unique_ptr<InterchangeableInstruction>> &Candidate,
+    Instruction *I) {
+  auto Iter = std::stable_partition(
+      Candidate.begin(), Candidate.end(),
+      [&](const std::unique_ptr<InterchangeableInstruction> &C) {
+        return C->isSame(I);
+      });
+  if (Iter == Candidate.begin())
+    return false;
+  Candidate.erase(Iter, Candidate.end());
+  return true;
+}
+
+static bool isConvertible(Instruction *I, Instruction *MainOp,
+                          Instruction *AltOp) {
+  if (!I->isBinaryOp())
+    return I->getOpcode() == MainOp->getOpcode() ||
+           I->getOpcode() == AltOp->getOpcode();
+  assert(MainOp && "MainOp cannot be nullptr.");
+  SmallVector<std::unique_ptr<InterchangeableInstruction>> Candidate(
+      getInterchangeableInstruction(I));
+  for (std::unique_ptr<InterchangeableInstruction> &C : Candidate)
+    if (C->isSame(I) && C->isSame(MainOp))
+      return true;
+  Candidate = getInterchangeableInstruction(I);
+  assert(AltOp && "AltOp cannot be nullptr.");
+  for (std::unique_ptr<InterchangeableInstruction> &C : Candidate)
+    if (C->isSame(I) && C->isSame(AltOp))
+      return true;
+  return false;
+}
+
+static std::pair<Instruction *, SmallVector<Value *>>
+convertTo(Instruction *I, Instruction *MainOp, Instruction *AltOp) {
+  assert(isConvertible(I, MainOp, AltOp) && "Cannot convert the instruction.");
+  if (!I->isBinaryOp())
+    return std::make_pair(I->getOpcode() == MainOp->getOpcode() ? MainOp
+                                                                : AltOp,
+                          SmallVector<Value *>(I->operands()));
+  SmallVector<std::unique_ptr<InterchangeableInstruction>> Candidate(
+      getInterchangeableInstruction(I));
+  for (std::unique_ptr<InterchangeableInstruction> &C : Candidate)
+    if (C->isSame(I) && C->isSame(MainOp))
+      return std::make_pair(MainOp,
+                            C->getInterchangeableInstructionOps(MainOp));
+  Candidate = getInterchangeableInstruction(I);
+  for (std::unique_ptr<InterchangeableInstruction> &C : Candidate)
+    if (C->isSame(I) && C->isSame(AltOp))
+      return std::make_pair(AltOp, C->getInterchangeableInstructionOps(AltOp));
+  llvm_unreachable("Cannot convert the instruction.");
+}
+
 /// Main data required for vectorization of instructions.
 class InstructionsState {
   /// The main/alternate instruction. MainOp is also VL0.
@@ -931,6 +1130,11 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
   unsigned Opcode = MainOp->getOpcode();
   unsigned AltOpcode = Opcode;
 
+  SmallVector<std::unique_ptr<InterchangeableInstruction>>
+      InterchangeableInstructionCandidate(
+          getInterchangeableInstruction(MainOp));
+  SmallVector<std::unique_ptr<InterchangeableInstruction>>
+      AlternateInterchangeableInstructionCandidate;
   bool SwappedPredsCompatible = IsCmpOp && [&]() {
     SetVector<unsigned> UniquePreds, UniqueNonSwappedPreds;
     UniquePreds.insert(BasePred);
@@ -977,14 +1181,18 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
       return InstructionsState::invalid();
     unsigned InstOpcode = I->getOpcode();
     if (IsBinOp && isa<BinaryOperator>(I)) {
-      if (InstOpcode == Opcode || InstOpcode == AltOpcode)
-        continue;
-      if (Opcode == AltOpcode && isValidForAlternation(InstOpcode) &&
-          isValidForAlternation(Opcode)) {
-        AltOpcode = InstOpcode;
-        AltOp = I;
+      if (getInterchangeableInstruction(InterchangeableInstructionCandidate, I))
         continue;
+      if (AlternateInterchangeableInstructionCandidate.empty()) {
+        if (!isValidForAlternation(Opcode) ||
+            !isValidForAlternation(InstOpcode))
+          return InstructionsState::invalid();
+        AlternateInterchangeableInstructionCandidate =
+            getInterchangeableInstruction(I);
       }
+      if (getInterchangeableInstruction(
+              AlternateInterchangeableInstructionCandidate, I))
+        continue;
     } else if (IsCastOp && isa<CastInst>(I)) {
       Value *Op0 = MainOp->getOperand(0);
       Type *Ty0 = Op0->getType();
@@ -1085,6 +1293,29 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
     return InstructionsState::invalid();
   }
 
+  if (IsBinOp) {
+    auto FindOp =
+        [&](ArrayRef<std::unique_ptr<InterchangeableInstruction>> Candidate) {
+          for (const std::unique_ptr<InterchangeableInstruction> &I :
+               Candidate) {
+            unsigned InterchangeableInstructionOpcode =
+                I->getInterchangeableInstructionOpcode();
+            for (Value *V : VL) {
+              if (isa<PoisonValue>(V))
+                continue;
+              if (cast<Instruction>(V)->getOpcode() ==
+                  InterchangeableInstructionOpcode)
+                return cast<Instruction>(V);
+            }
+          }
+          llvm_unreachable(
+              "Cannot find the candidate instruction for InstructionsState.");
+        };
+    MainOp = FindOp(InterchangeableInstructionCandidate);
+    AltOp = AlternateInterchangeableInstructionCandidate.empty()
+                ? MainOp
+                : FindOp(AlternateInterchangeableInstructionCandidate);
+  }
   return InstructionsState(MainOp, AltOp);
 }
 
@@ -2447,29 +2678,28 @@ class BoUpSLP {
       ArgSize = isa<IntrinsicInst>(MainOp) ? IntrinsicNumOperands : NumOperands;
       OpsVec.resize(NumOperands);
       unsigned NumLanes = VL.size();
-      for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
+      for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx)
         OpsVec[OpIdx].resize(NumLanes);
-        for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
-          assert((isa<Instruction>(VL[Lane]) || isa<PoisonValue>(VL[Lane])) &&
-                 "Expected instruction or poison value");
-          // Our tree has just 3 nodes: the root and two operands.
-          // It is therefore trivial to get the APO. We only need to check the
-          // opcode of VL[Lane] and whether the operand at OpIdx is the LHS or
-          // RHS operand. The LHS operand of both add and sub is never attached
-          // to an inversese operation in the linearized form, therefore its APO
-          // is false. The RHS is true only if VL[Lane] is an inverse operation.
-
-          // Since operand reordering is performed on groups of commutative
-          // operations or alternating sequences (e.g., +, -), we can safely
-          // tell the inverse operations by checking commutativity.
-          if (isa<PoisonValue>(VL[Lane])) {
-            if (auto *EI = dyn_cast<ExtractElementInst>(MainOp)) {
-              if (OpIdx == 0) {
+      for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
+        assert((isa<Instruction>(VL[Lane]) || isa<PoisonValue>(VL[Lane])) &&
+               "Expected instruction or poison value");
+        // Our tree has just 3 nodes: the root and two operands.
+        // It is therefore trivial to get the APO. We only need to check the
+        // opcode of VL[Lane] and whether the operand at OpIdx is the LHS or RHS
+        // operand. The LHS operand of both add and sub is never attached to an
+        // inversese operation in the linearized form, therefore its APO is
+        // false. The RHS is true only if VL[Lane] is an inverse operation.
+
+        // Since operand reordering is performed on groups of commutative
+        // operations or alternating sequences (e.g., +, -), we can safely tell
+        // the inverse operations by checking commutativity.
+        if (isa<PoisonValue>(VL[Lane])) {
+          for (unsigned OpIdx : seq<unsigned>(NumOperands)) {
+            if (OpIdx == 0) {
+              if (auto *EI = dyn_cast<ExtractElementInst>(MainOp)) {
                 OpsVec[OpIdx][Lane] = {EI->getVectorOperand(), true, false};
                 continue;
-              }
-            } else if (auto *EV = dyn_cast<ExtractValueInst>(MainOp)) {
-              if (OpIdx == 0) {
+              } else if (auto *EV = dyn_cast<ExtractValueInst>(MainOp)) {
                 OpsVec[OpIdx][Lane] = {EV->getAggregateOperand(), true, false};
                 continue;
               }
@@ -2477,12 +2707,15 @@ class BoUpSLP {
             OpsVec[OpIdx][Lane] = {
                 PoisonValue::get(MainOp->getOperand(OpIdx)->getType()), true,
                 false};
-            continue;
           }
-          bool IsInverseOperation = !isCommutative(cast<Instruction>(VL[Lane]));
+          continue;
+        }
+        auto [SelectedOp, Ops] =
+            convertTo(cast<Instruction>(VL[Lane]), MainOp, S.getAltOp());
+        bool IsInverseOperation = !isCommutative(SelectedOp);
+        for (unsigned OpIdx : seq<unsigned>(NumOperands)) {
           bool APO = (OpIdx == 0) ? false : IsInverseOperation;
-          OpsVec[OpIdx][Lane] = {cast<Instruction>(VL[Lane])->getOperand(OpIdx),
-                                 APO, false};
+          OpsVec[OpIdx][Lane] = {Ops[OpIdx], APO, false};
         }
       }
     }
@@ -8501,8 +8734,12 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
 
   BlockScheduling &BS = *BSRef;
 
+  SmallVector<Value *> MainOpIsTheFirst(UniqueValues);
+  auto MainOpIter = find(MainOpIsTheFirst, S.getMainOp());
+  std::rotate(MainOpIsTheFirst.begin(), MainOpIter, std::next(MainOpIter));
+
   std::optional<ScheduleData *> Bundle =
-      BS.tryScheduleBundle(UniqueValues, this, S);
+      BS.tryScheduleBundle(MainOpIsTheFirst, this, S);
 #ifdef EXPENSIVE_CHECKS
   // Make sure we didn't break any internal invariants
   BS.verify();
@@ -15889,7 +16126,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
       Value *V = Builder.CreateBinOp(
           static_cast<Instruction::BinaryOps>(E->getOpcode()), LHS,
           RHS);
-      propagateIRFlags(V, E->Scalars, VL0, It == MinBWs.end());
+      propagateIRFlags(V, E->Scalars, nullptr, It == MinBWs.end());
       if (auto *I = dyn_cast<Instruction>(V)) {
         V = ::propagateMetadata(I, E->Scalars);
         // Drop nuw flags for abs(sub(commutative), true).
@@ -17169,6 +17406,7 @@ BoUpSLP::BlockScheduling::buildBundle(ArrayRef<Value *> VL) {
 std::optional<BoUpSLP::ScheduleData *>
 BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
                                             const InstructionsState &S) {
+  assert(VL[0] == S.getMainOp() && "MainOp must be the first element of VL.");
   // No need to schedule PHIs, insertelement, extractelement and extractvalue
   // instructions.
   if (isa<PHINode>(S.getMainOp()) ||
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-with-minbith-user.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-with-minbith-user.ll
index 3ebe920d17343..781954cbec2f7 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-with-minbith-user.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-with-minbith-user.ll
@@ -5,7 +5,9 @@ define void @h() {
 ; CHECK-LABEL: define void @h() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr i8, ptr null, i64 16
-; CHECK-NEXT:    store <8 x i16> zeroinitializer, ptr [[ARRAYIDX2]], align 2
+; CHECK-NEXT:    [[ARRAYIDX18:%.*]] = getelementptr i8, ptr null, i64 24
+; CHECK-NEXT:    store <4 x i16> zeroinitializer, ptr [[ARRAYIDX2]], align 2
+; CHECK-NEXT:    store <4 x i16> zeroinitializer, ptr [[ARRAYIDX18]], align 2
 ; CHECK-NEXT:    ret void
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll
index feb4ad865f314..d527d38adbee3 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll
@@ -314,10 +314,10 @@ define void @store_try_reorder(ptr %dst) {
 ;
 ; POW2-ONLY-LABEL: @store_try_reorder(
 ; POW2-ONLY-NEXT:  entry:
-; POW2-ONLY-NEXT:    [[ADD:%.*]] = add i32 0, 0
-; POW2-ONLY-NEXT:    store i32 [[ADD]], ptr [[DST:%.*]], align 4
-; POW2-ONLY-NEXT:    [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1
-; POW2-ONLY-NEXT:    store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4
+; POW2-ONLY-NEXT:    store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887:%.*]], align 4
+; POW2-ONLY-NEXT:    [[ADD216:%.*]] = sub i32 0, 0
+; POW2-ONLY-NEXT:    [[ARRAYIDX_I1891:%.*]] = getelementptr i32, ptr [[ARRAYIDX_I1887]], i64 2
+; POW2-ONLY-NEXT:    store i32 [[ADD216]], ptr [[ARRAYIDX_I1891]], align 4
 ; POW2-ONLY-NEXT:    ret void
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll
index 7ab5e4d6cb787..481d586e6658a 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll
@@ -324,10 +324,10 @@ define void @store_try_reorder(ptr %dst) {
 ;
 ; POW2-ONLY-LABEL: @store_try_reorder(
 ; POW2-ONLY-NEXT:  entry:
-; POW2-ONLY-NEXT:    [[ADD:%.*]] = add i32 0, 0
-; POW2-ONLY-NEXT:    store i32 [[ADD]], ptr [[DST:%.*]], align 4
-; POW2-ONLY-NEXT:    [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1
-; POW2-ONLY-NEXT:    store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4
+; POW2-ONLY-NEXT:    store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887:%.*]], align 4
+; POW2-ONLY-NEXT:    [[ADD216:%.*]] = sub i32 0, 0
+; POW2-ONLY-NEXT:    [[ARRAYIDX_I1891:%.*]] = getelementptr i32, ptr [[ARRAYIDX_I1887]], i64 2
+; POW2-ONLY-NEXT:    store i32 [[ADD216]], ptr [[ARRAYIDX_I1891]], align 4
 ; POW2-ONLY-NEXT:    ret void
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll b/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll
index f46a5d84a86cc..a39e602e2da71 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll
@@ -10,9 +10,7 @@ define i32 @foo(ptr nocapture %A, i32 %n) {
 ; CHECK-NEXT:    [[CALL:%.*]] = tail call i32 (...) @bar()
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP1:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
-; CHECK-NEXT:    [[TMP2:%.*]] = shl <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+; CHECK-NEXT:    [[TMP3:%.*]] = mul <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 8, i32 10>
 ; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], splat (i32 9)
 ; CHECK-NEXT:    store <4 x i32> [[TMP4]], ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    ret i32 undef
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll
index 889f5a95c81d6..299677ca80b34 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll
@@ -10,15 +10,10 @@ define void @test(ptr %0, ptr %1, ptr %2) {
 ; CHECK-NEXT:    [[TMP11:%.*]] = sub <4 x i32> <i32 0, i32 0, i32 undef, i32 0>, [[TMP8]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = sub <4 x i32> [[TMP11]], [[TMP10]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = add <4 x i32> [[TMP12]], [[TMP6]]
-; CHECK-NEXT:    [[TMP14:%.*]] = add <4 x i32> [[TMP13]], <i32 0, i32 0, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP15:%.*]] = sub <4 x i32> [[TMP13]], <i32 0, i32 0, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], <4 x i32> <i32 2, i32 0, i32 1, i32 7>
+; CHECK-NEXT:    [[TMP16:%.*]] = add <4 x i32> <i32 0, i32 0, i32 1, i32 0>, [[TMP13]]
 ; CHECK-NEXT:    [[TMP17:%.*]] = add <4 x i32> [[TMP16]], zeroinitializer
-; CHECK-NEXT:    [[TMP18:%.*]] = sub <4 x i32> [[TMP16]], zeroinitializer
-; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <4 x i32> [[TMP17]], <4 x i32> [[TMP18]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP20:%.*]] = add <4 x i32> [[TMP19]], zeroinitializer
-; CHECK-NEXT:    [[TMP21:%.*]] = sub <4 x i32> [[TMP19]], zeroinitializer
-; CHECK-NEXT:    [[TMP22:%.*]] = shufflevector <4 x i32> [[TMP20]], <4 x i32> [[TMP21]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
+; CHECK-NEXT:    [[TMP14:%.*]] = add <4 x i32> [[TMP17]], zeroinitializer
+; CHECK-NEXT:    [[TMP22:%.*]] = shufflevector <4 x i32> [[TMP14]], <4 x i32> poison, <4 x i32> <i32 2, i32 0, i32 1, i32 3>
 ; CHECK-NEXT:    store <4 x i32> [[TMP22]], ptr [[TMP2:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll
index 43c42c1ea2bfb..aa424b9031e77 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll
@@ -8,15 +8,13 @@ define void @test() {
 ; CHECK:       [[BB1:.*]]:
 ; CHECK-NEXT:    br label %[[BB2:.*]]
 ; CHECK:       [[BB2]]:
-; CHECK-NEXT:    [[TMP0:%.*]] = phi <4 x i32> [ poison, %[[BB1]] ], [ [[TMP5:%.*]], %[[BB6]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = phi <4 x i32> [ poison, %[[BB1]] ], [ [[TMP4:%.*]], %[[BB6]] ]
 ; CHECK-NEXT:    ret void
 ; CHECK:       [[BB6]]:
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB]] ], [ [[TMP8:%.*]], %[[BB6]] ]
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> <i32 0, i32 0, i32 poison, i32 poison>, <4 x i32> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 5, i32 4>
-; CHECK-NEXT:    [[TMP3:%.*]] = ashr <4 x i32> zeroinitializer, [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = mul <4 x i32> zeroinitializer, [[TMP2]]
-; CHECK-NEXT:    [[TMP5]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> <i32 1, i32 0, i32 poison, i32 poison>, <4 x i32> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 5, i32 4>
+; CHECK-NEXT:    [[TMP4]] = mul <4 x i32> [[TMP3]], zeroinitializer
 ; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> <i32 0, i32 poison>, <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[TMP8]] = mul <2 x i32> zeroinitializer, [[TMP7]]
 ; CHECK-NEXT:    br i1 false, label %[[BB2]], label %[[BB6]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll
index 1c62e57edfc46..56d2df11458d1 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll
@@ -6,15 +6,13 @@ define i64 @foo(i32 %tmp7) {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[TMP5:%.*]], i32 2
 ; CHECK-NEXT:    [[TMP3:%.*]] = sub <4 x i32> [[TMP2]], zeroinitializer
-; CHECK-NEXT:    [[TMP24:%.*]] = sub i32 undef, 0
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 poison, i32 poison, i32 undef, i32 0>, i32 [[TMP24]], i32 4
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <8 x i32> [[TMP0]], i32 0, i32 5
-; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <8 x i32> <i32 poison, i32 poison, i32 undef, i32 poison, i32 poison, i32 undef, i32 poison, i32 undef>, i32 [[TMP24]], i32 6
+; CHECK-NEXT:    [[TMP13:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 poison, i32 poison, i32 undef, i32 0>, <2 x i32> <i32 undef, i32 0>, i64 4)
+; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <8 x i32> <i32 poison, i32 poison, i32 undef, i32 poison, i32 poison, i32 undef, i32 poison, i32 undef>, i32 undef, i32 6
 ; CHECK-NEXT:    [[TMP12:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> [[TMP3]], i64 0)
 ; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP12]], <8 x i32> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 poison, i32 2, i32 3, i32 poison, i32 14, i32 poison>
-; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <8 x i32> [[TMP1]], [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = sub nsw <8 x i32> [[TMP1]], [[TMP4]]
-; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> [[TMP6]], <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 4, i32 5, i32 14, i32 15>
+; CHECK-NEXT:    [[TMP6:%.*]] = add nsw <8 x i32> [[TMP13]], [[TMP4]]
+; CHECK-NEXT:    [[TMP5:%.*]] = sub nsw <8 x i32> [[TMP13]], [[TMP4]]
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP5]], <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 4, i32 5, i32 14, i32 15>
 ; CHECK-NEXT:    [[TMP8:%.*]] = add <8 x i32> zeroinitializer, [[TMP7]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = xor <8 x i32> [[TMP8]], zeroinitializer
 ; CHECK-NEXT:    [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP9]])
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll
index 02c3173adc654..c6f5308cf54aa 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll
@@ -9,9 +9,7 @@ define i32 @foo(ptr nocapture %A, i32 %n, i32 %m) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP1:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
-; CHECK-NEXT:    [[TMP2:%.*]] = shl <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+; CHECK-NEXT:    [[TMP3:%.*]] = mul <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 8, i32 10>
 ; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], splat (i32 9)
 ; CHECK-NEXT:    store <4 x i32> [[TMP4]], ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec3-base.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec3-base.ll
index 6e2a43ac5f9f1..15dd6756cd7db 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vec3-base.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vec3-base.ll
@@ -242,13 +242,18 @@ exit:
 }
 
 define void @store_try_reorder(ptr %dst) {
-; CHECK-LABEL: @store_try_reorder(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ADD:%.*]] = add i32 0, 0
-; CHECK-NEXT:    store i32 [[ADD]], ptr [[DST:%.*]], align 4
-; CHECK-NEXT:    [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1
-; CHECK-NEXT:    store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4
-; CHECK-NEXT:    ret void
+; NON-POW2-LABEL: @store_try_reorder(
+; NON-POW2-NEXT:  entry:
+; NON-POW2-NEXT:    store <3 x i32> zeroinitializer, ptr [[DST:%.*]], align 4
+; NON-POW2-NEXT:    ret void
+;
+; POW2-ONLY-LABEL: @store_try_reorder(
+; POW2-ONLY-NEXT:  entry:
+; POW2-ONLY-NEXT:    store <2 x i32> zeroinitializer, ptr [[DST:%.*]], align 4
+; POW2-ONLY-NEXT:    [[ADD216:%.*]] = sub i32 0, 0
+; POW2-ONLY-NEXT:    [[ARRAYIDX_I1891:%.*]] = getelementptr i32, ptr [[DST]], i64 2
+; POW2-ONLY-NEXT:    store i32 [[ADD216]], ptr [[ARRAYIDX_I1891]], align 4
+; POW2-ONLY-NEXT:    ret void
 ;
 entry:
   %add = add i32 0, 0

>From b7c1a244d8a9a5e19b2dabed653cf1a971e1ea18 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Mon, 23 Dec 2024 02:18:24 -0800
Subject: [PATCH 03/38] [SLP] Pre-commit test.

---
 .../Transforms/SLPVectorizer/isOpcodeOrAlt.ll | 27 +++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 llvm/test/Transforms/SLPVectorizer/isOpcodeOrAlt.ll

diff --git a/llvm/test/Transforms/SLPVectorizer/isOpcodeOrAlt.ll b/llvm/test/Transforms/SLPVectorizer/isOpcodeOrAlt.ll
new file mode 100644
index 0000000000000..29585343b2f9a
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/isOpcodeOrAlt.ll
@@ -0,0 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=slp-vectorizer -S -slp-max-reg-size=1024 %s | FileCheck %s
+
+define void @test(ptr %a, ptr %b) {
+entry:
+  %gep0 = getelementptr inbounds i32, ptr %a, i64 0
+  %gep1 = getelementptr inbounds i32, ptr %a, i64 1
+  %gep2 = getelementptr inbounds i32, ptr %a, i64 2
+  %gep3 = getelementptr inbounds i32, ptr %a, i64 3
+  %0 = load i32, ptr %gep0, align 4
+  %1 = load i32, ptr %gep1, align 4
+  %2 = load i32, ptr %gep2, align 4
+  %3 = load i32, ptr %gep3, align 4
+  %op0 = shl i32 %0, 1
+  %op1 = add i32 %1, zeroinitializer
+  %op2 = mul i32 %2, 2
+  %op3 = shl i32 %3, zeroinitializer
+  %gep4 = getelementptr inbounds i32, ptr %b, i64 0
+  %gep5 = getelementptr inbounds i32, ptr %b, i64 1
+  %gep6 = getelementptr inbounds i32, ptr %b, i64 2
+  %gep7 = getelementptr inbounds i32, ptr %b, i64 3
+  store i32 %op0, ptr %gep4, align 4
+  store i32 %op1, ptr %gep5, align 4
+  store i32 %op2, ptr %gep6, align 4
+  store i32 %op3, ptr %gep7, align 4
+  ret void
+}

>From 5ad586cd7ecd5813c293f42c41337900aecdd9e7 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Thu, 13 Feb 2025 07:43:46 -0800
Subject: [PATCH 04/38] [SLP] Fix isOpcodeOrAlt cannot find interchangeable
 instruction.

---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp     |  3 +--
 llvm/test/Transforms/SLPVectorizer/isOpcodeOrAlt.ll | 11 +++++++++++
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index d9cac6309a95d..847042908599a 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1035,8 +1035,7 @@ class InstructionsState {
   bool isAltShuffle() const { return getMainOp() != getAltOp(); }
 
   bool isOpcodeOrAlt(Instruction *I) const {
-    unsigned CheckedOpcode = I->getOpcode();
-    return getOpcode() == CheckedOpcode || getAltOpcode() == CheckedOpcode;
+    return isConvertible(I, MainOp, AltOp);
   }
 
   /// Checks if the current state is valid, i.e. has non-null MainOp
diff --git a/llvm/test/Transforms/SLPVectorizer/isOpcodeOrAlt.ll b/llvm/test/Transforms/SLPVectorizer/isOpcodeOrAlt.ll
index 29585343b2f9a..623c9e816a59c 100644
--- a/llvm/test/Transforms/SLPVectorizer/isOpcodeOrAlt.ll
+++ b/llvm/test/Transforms/SLPVectorizer/isOpcodeOrAlt.ll
@@ -2,6 +2,17 @@
 ; RUN: opt -passes=slp-vectorizer -S -slp-max-reg-size=1024 %s | FileCheck %s
 
 define void @test(ptr %a, ptr %b) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 0
+; CHECK-NEXT:    [[GEP4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[GEP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> [[TMP0]], <i32 1, i32 0, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = mul <4 x i32> [[TMP0]], <i32 1, i32 0, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[GEP4]], align 4
+; CHECK-NEXT:    ret void
+;
 entry:
   %gep0 = getelementptr inbounds i32, ptr %a, i64 0
   %gep1 = getelementptr inbounds i32, ptr %a, i64 1

>From fdf88a2441f80cf3c9b946be83fb093d7074ecfe Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Thu, 13 Feb 2025 08:02:31 -0800
Subject: [PATCH 05/38] support Or

---
 .../Transforms/Vectorize/SLPVectorizer.cpp    |  4 +-
 ...reversed-strided-node-with-external-ptr.ll |  7 ++-
 .../SLPVectorizer/X86/bv-shuffle-mask.ll      |  4 +-
 ...gathered-delayed-nodes-with-reused-user.ll | 20 ++++-----
 .../X86/minbitwidth-drop-wrapping-flags.ll    |  4 +-
 .../X86/multi-extracts-bv-combined.ll         |  4 +-
 .../non-scheduled-inst-reused-as-last-inst.ll | 44 +++++++++----------
 .../alternate-opcode-sindle-bv.ll             | 26 ++++++++++-
 .../resized-alt-shuffle-after-minbw.ll        |  4 +-
 .../SLPVectorizer/shuffle-mask-resized.ll     |  4 +-
 10 files changed, 63 insertions(+), 58 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 847042908599a..c26ac4a904d45 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -858,8 +858,8 @@ class InterchangeableInstruction {
 
 class BinOpIsNoOp final : public InterchangeableInstruction {
   constexpr static std::initializer_list<unsigned> SupportedOp = {
-      Instruction::Add, Instruction::Sub,  Instruction::Mul,
-      Instruction::Shl, Instruction::AShr, Instruction::And};
+      Instruction::Add,  Instruction::Sub, Instruction::Mul, Instruction::Shl,
+      Instruction::AShr, Instruction::And, Instruction::Or};
   SmallVector<unsigned> CandidateOp = SupportedOp;
 
 public:
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll
index fd3d4ab80b29c..ff897180cc9b7 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll
@@ -7,13 +7,12 @@ define void @test(ptr %a, i64 %0) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[A]], i32 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x ptr> [[TMP1]], <2 x ptr> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[TMP0]], i32 0
 ; CHECK-NEXT:    br label %[[BB:.*]]
 ; CHECK:       [[BB]]:
-; CHECK-NEXT:    [[TMP3:%.*]] = or disjoint i64 [[TMP0]], 1
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i32 0
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i64> [[TMP4]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP5:%.*]] = or disjoint <2 x i64> [[TMP3]], <i64 1, i64 0>
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr double, <2 x ptr> [[TMP2]], <2 x i64> [[TMP5]]
-; CHECK-NEXT:    [[ARRAYIDX17_I28_1:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP3]]
+; CHECK-NEXT:    [[ARRAYIDX17_I28_1:%.*]] = extractelement <2 x ptr> [[TMP6]], i32 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> [[TMP6]], i32 8, <2 x i1> splat (i1 true), <2 x double> poison)
 ; CHECK-NEXT:    [[TMP8:%.*]] = load <2 x double>, ptr [[A]], align 8
 ; CHECK-NEXT:    [[TMP9:%.*]] = load <2 x double>, ptr [[A]], align 8
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/bv-shuffle-mask.ll b/llvm/test/Transforms/SLPVectorizer/X86/bv-shuffle-mask.ll
index 766916fe71f35..c4ddc5d63cc04 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/bv-shuffle-mask.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/bv-shuffle-mask.ll
@@ -7,14 +7,12 @@ define i16 @test(i16 %v1, i16 %v2) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i16> <i16 0, i16 0, i16 0, i16 poison>, i16 [[V2]], i32 3
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i16> <i16 0, i16 0, i16 0, i16 poison>, i16 [[V1]], i32 3
-; CHECK-NEXT:    [[TMP2:%.*]] = or <4 x i16> [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = and <4 x i16> [[TMP0]], [[TMP1]]
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <2 x i32> <i32 poison, i32 3>
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i16> [[TMP5]], i16 [[V1]], i32 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x i16> [[TMP6]], <2 x i16> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP8:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP9:%.*]] = and <4 x i16> [[TMP4]], zeroinitializer
+; CHECK-NEXT:    [[TMP9:%.*]] = and <4 x i16> [[TMP3]], zeroinitializer
 ; CHECK-NEXT:    [[TMP10:%.*]] = and <4 x i16> [[TMP9]], zeroinitializer
 ; CHECK-NEXT:    [[TMP11:%.*]] = icmp ne <4 x i16> [[TMP10]], zeroinitializer
 ; CHECK-NEXT:    [[TMP12:%.*]] = or <4 x i1> [[TMP11]], zeroinitializer
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll b/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll
index f49bac26e846b..40faa0841bfe0 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll
@@ -6,21 +6,21 @@
 define i64 @foo() {
 ; CHECK-LABEL: define i64 @foo() {
 ; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[ADD7:%.*]] = add i64 0, 0
 ; CHECK-NEXT:    br label [[BB3:%.*]]
 ; CHECK:       bb1:
-; CHECK-NEXT:    [[PHI:%.*]] = phi i64 [ [[ADD:%.*]], [[BB3]] ]
-; CHECK-NEXT:    [[PHI2:%.*]] = phi i64 [ [[OR:%.*]], [[BB3]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x i64> [ [[TMP5:%.*]], [[BB3]] ]
 ; CHECK-NEXT:    ret i64 0
 ; CHECK:       bb3:
 ; CHECK-NEXT:    [[PHI4:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ 0, [[BB3]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x i64> [ zeroinitializer, [[BB]] ], [ [[TMP3:%.*]], [[BB3]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i64> [[TMP0]], i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1
-; CHECK-NEXT:    [[ADD]] = add i64 [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 0
-; CHECK-NEXT:    [[OR]] = or i64 [[PHI4]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[PHI4]], i32 0
+; CHECK-NEXT:    [[TMP5]] = add <2 x i64> [[TMP0]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> <i64 poison, i64 0>, <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    [[TMP3]] = add <2 x i64> [[TMP4]], [[TMP2]]
+; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 [[ADD7]]
+; CHECK-NEXT:    [[OR:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
 ; CHECK-NEXT:    [[ICMP:%.*]] = icmp ult i64 [[OR]], 0
-; CHECK-NEXT:    [[TMP3]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[ADD]], i32 0
 ; CHECK-NEXT:    br i1 false, label [[BB3]], label [[BB1:%.*]]
 ;
 ; FORCED-LABEL: define i64 @foo() {
@@ -34,9 +34,7 @@ define i64 @foo() {
 ; FORCED-NEXT:    [[PHI5:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ 0, [[BB3]] ]
 ; FORCED-NEXT:    [[TMP1:%.*]] = phi <2 x i64> [ zeroinitializer, [[BB]] ], [ [[TMP7:%.*]], [[BB3]] ]
 ; FORCED-NEXT:    [[TMP2:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[PHI5]], i32 0
-; FORCED-NEXT:    [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]]
-; FORCED-NEXT:    [[TMP4:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
-; FORCED-NEXT:    [[TMP5]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i32> <i32 0, i32 3>
+; FORCED-NEXT:    [[TMP5]] = add <2 x i64> [[TMP1]], [[TMP2]]
 ; FORCED-NEXT:    [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> <i64 poison, i64 0>, <2 x i32> <i32 0, i32 3>
 ; FORCED-NEXT:    [[TMP7]] = add <2 x i64> [[TMP6]], [[TMP2]]
 ; FORCED-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 [[ADD7]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-drop-wrapping-flags.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-drop-wrapping-flags.ll
index 2a5bfa7390770..0198b1c5cb846 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-drop-wrapping-flags.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-drop-wrapping-flags.ll
@@ -9,9 +9,7 @@ define i32 @test() {
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i8> poison, i8 [[A_PROMOTED]], i32 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = add <4 x i8> [[TMP1]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = or <4 x i8> [[TMP1]], zeroinitializer
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i16>
+; CHECK-NEXT:    [[TMP5:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i16>
 ; CHECK-NEXT:    [[TMP6:%.*]] = add <4 x i16> [[TMP5]], <i16 0, i16 -1, i16 0, i16 0>
 ; CHECK-NEXT:    [[TMP7:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP6]])
 ; CHECK-NEXT:    [[TMP8:%.*]] = zext i16 [[TMP7]] to i32
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll b/llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll
index e6a166c27ac49..94f2c79faa8c9 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll
@@ -9,9 +9,7 @@ define i32 @foo() {
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> <i32 0, i32 undef, i32 1, i32 0>, i32 [[D]], i32 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:    [[TMP2:%.*]] = or <8 x i32> zeroinitializer, [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = add <8 x i32> zeroinitializer, [[TMP1]]
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    store <8 x i32> [[TMP4]], ptr getelementptr inbounds ([64 x i32], ptr null, i64 0, i64 15), align 4
+; CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr getelementptr inbounds ([64 x i32], ptr null, i64 0, i64 15), align 4
 ; CHECK-NEXT:    ret i32 0
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll
index 1163c8219dabe..034fe82862950 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll
@@ -4,6 +4,24 @@
 ; RUN: -slp-skip-early-profitability-check < %s | FileCheck %s --check-prefixes=FORCED
 
 define void @foo() {
+; CHECK-LABEL: define void @foo() {
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 0, i32 0
+; CHECK-NEXT:    br label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, [[BB:%.*]] ], [ [[TMP6:%.*]], [[BB4:%.*]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i32> [[TMP1]], [[TMP0]]
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP1]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    [[TMP6]] = or <2 x i32> [[TMP5]], zeroinitializer
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0
+; CHECK-NEXT:    [[CALL:%.*]] = call i64 null(i32 [[TMP7]])
+; CHECK-NEXT:    br label [[BB4]]
+; CHECK:       bb4:
+; CHECK-NEXT:    br i1 false, label [[BB5:%.*]], label [[BB1]]
+; CHECK:       bb5:
+; CHECK-NEXT:    [[TMP8:%.*]] = phi <2 x i32> [ [[TMP2]], [[BB4]] ]
+; CHECK-NEXT:    ret void
+;
 ; FORCED-LABEL: define void @foo() {
 ; FORCED-NEXT:  bb:
 ; FORCED-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 0, i32 0
@@ -11,9 +29,7 @@ define void @foo() {
 ; FORCED:       bb1:
 ; FORCED-NEXT:    [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, [[BB:%.*]] ], [ [[TMP6:%.*]], [[BB4:%.*]] ]
 ; FORCED-NEXT:    [[TMP2:%.*]] = shl <2 x i32> [[TMP1]], [[TMP0]]
-; FORCED-NEXT:    [[TMP3:%.*]] = or <2 x i32> [[TMP1]], [[TMP0]]
-; FORCED-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 0, i32 3>
-; FORCED-NEXT:    [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP1]], <2 x i32> <i32 0, i32 3>
+; FORCED-NEXT:    [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP1]], <2 x i32> <i32 0, i32 3>
 ; FORCED-NEXT:    [[TMP6]] = or <2 x i32> [[TMP5]], zeroinitializer
 ; FORCED-NEXT:    [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0
 ; FORCED-NEXT:    [[CALL:%.*]] = call i64 null(i32 [[TMP7]])
@@ -21,29 +37,9 @@ define void @foo() {
 ; FORCED:       bb4:
 ; FORCED-NEXT:    br i1 false, label [[BB5:%.*]], label [[BB1]]
 ; FORCED:       bb5:
-; FORCED-NEXT:    [[TMP8:%.*]] = phi <2 x i32> [ [[TMP4]], [[BB4]] ]
+; FORCED-NEXT:    [[TMP8:%.*]] = phi <2 x i32> [ [[TMP2]], [[BB4]] ]
 ; FORCED-NEXT:    ret void
 ;
-; CHECK-LABEL: define void @foo() {
-; CHECK-NEXT:  bb:
-; CHECK-NEXT:    br label [[BB1:%.*]]
-; CHECK:       bb1:
-; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, [[BB:%.*]] ], [ [[TMP6:%.*]], [[BB4:%.*]] ]
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
-; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[TMP2]], 0
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[SHL]], i32 0
-; CHECK-NEXT:    [[TMP6]] = or <2 x i32> [[TMP5]], zeroinitializer
-; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0
-; CHECK-NEXT:    [[CALL:%.*]] = call i64 null(i32 [[TMP7]])
-; CHECK-NEXT:    br label [[BB4]]
-; CHECK:       bb4:
-; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x i32> [[TMP6]], i32 1
-; CHECK-NEXT:    br i1 false, label [[BB5:%.*]], label [[BB1]]
-; CHECK:       bb5:
-; CHECK-NEXT:    [[PHI6:%.*]] = phi i32 [ [[SHL]], [[BB4]] ]
-; CHECK-NEXT:    [[PHI7:%.*]] = phi i32 [ [[TMP8]], [[BB4]] ]
-; CHECK-NEXT:    ret void
-;
 bb:
   br label %bb1
 
diff --git a/llvm/test/Transforms/SLPVectorizer/alternate-opcode-sindle-bv.ll b/llvm/test/Transforms/SLPVectorizer/alternate-opcode-sindle-bv.ll
index c250029519590..32139a5f54816 100644
--- a/llvm/test/Transforms/SLPVectorizer/alternate-opcode-sindle-bv.ll
+++ b/llvm/test/Transforms/SLPVectorizer/alternate-opcode-sindle-bv.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
-; RUN: %if x86-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s %}
-; RUN: %if aarch64-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s %}
+; RUN: %if x86-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=X86 %}
+; RUN: %if aarch64-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=AARCH64 %}
 
 define <2 x i32> @test(i32 %arg) {
 ; CHECK-LABEL: define <2 x i32> @test(
@@ -14,6 +14,28 @@ define <2 x i32> @test(i32 %arg) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[MUL]], i32 1
 ; CHECK-NEXT:    ret <2 x i32> [[TMP1]]
 ;
+; X86-LABEL: define <2 x i32> @test(
+; X86-SAME: i32 [[ARG:%.*]]) {
+; X86-NEXT:  bb:
+; X86-NEXT:    [[OR:%.*]] = or i32 [[ARG]], 0
+; X86-NEXT:    [[MUL:%.*]] = mul i32 0, 1
+; X86-NEXT:    [[MUL1:%.*]] = mul i32 [[OR]], [[MUL]]
+; X86-NEXT:    [[CMP:%.*]] = icmp ugt i32 0, [[MUL1]]
+; X86-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[OR]], i32 0
+; X86-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[MUL]], i32 1
+; X86-NEXT:    ret <2 x i32> [[TMP1]]
+;
+; AARCH64-LABEL: define <2 x i32> @test(
+; AARCH64-SAME: i32 [[ARG:%.*]]) {
+; AARCH64-NEXT:  bb:
+; AARCH64-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[ARG]], i32 0
+; AARCH64-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[TMP0]], zeroinitializer
+; AARCH64-NEXT:    [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
+; AARCH64-NEXT:    [[TMP3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
+; AARCH64-NEXT:    [[MUL1:%.*]] = mul i32 [[TMP2]], [[TMP3]]
+; AARCH64-NEXT:    [[CMP:%.*]] = icmp ugt i32 0, [[MUL1]]
+; AARCH64-NEXT:    ret <2 x i32> [[TMP1]]
+;
 bb:
   %or = or i32 %arg, 0
   %mul = mul i32 0, 1
diff --git a/llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll b/llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll
index 056b6222cae72..caca410f056c1 100644
--- a/llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll
+++ b/llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll
@@ -6,11 +6,9 @@ define void @func(i32 %0) {
 ; CHECK-SAME: i32 [[TMP0:%.*]]) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> <i32 0, i32 poison, i32 0, i32 0>, i32 [[TMP0]], i32 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = shl <4 x i32> [[TMP2]], zeroinitializer
-; CHECK-NEXT:    [[TMP4:%.*]] = or <4 x i32> [[TMP2]], zeroinitializer
-; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
 ; CHECK-NEXT:    [[TMP6:%.*]] = shl i32 [[TMP0]], 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0
-; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <32 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <32 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 3>
 ; CHECK-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP6]] to i64
 ; CHECK-NEXT:    [[TMP10:%.*]] = or i64 [[TMP9]], 0
 ; CHECK-NEXT:    [[TMP11:%.*]] = trunc i64 [[TMP9]] to i32
diff --git a/llvm/test/Transforms/SLPVectorizer/shuffle-mask-resized.ll b/llvm/test/Transforms/SLPVectorizer/shuffle-mask-resized.ll
index 732b50396a460..1e3255f2187af 100644
--- a/llvm/test/Transforms/SLPVectorizer/shuffle-mask-resized.ll
+++ b/llvm/test/Transforms/SLPVectorizer/shuffle-mask-resized.ll
@@ -12,9 +12,7 @@ define i32 @test() {
 ; CHECK-NEXT:    br i1 false, label [[BB4:%.*]], label [[BB3]]
 ; CHECK:       bb3:
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> <i32 0, i32 poison>, <2 x i32> <i32 2, i32 1>
-; CHECK-NEXT:    [[TMP3:%.*]] = add <2 x i32> zeroinitializer, [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = or <2 x i32> zeroinitializer, [[TMP2]]
-; CHECK-NEXT:    [[TMP5]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    [[TMP5]] = or <2 x i32> zeroinitializer, [[TMP2]]
 ; CHECK-NEXT:    br label [[BB1]]
 ; CHECK:       bb4:
 ; CHECK-NEXT:    [[TMP6:%.*]] = phi <8 x i32> [ [[TMP1]], [[BB1]] ]

>From 4a08497fcc1ec161ee2bb0686364405faecdef37 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Fri, 14 Feb 2025 00:18:15 -0800
Subject: [PATCH 06/38] support Xor

---
 .../lib/Transforms/Vectorize/SLPVectorizer.cpp |  2 +-
 .../X86/non-power-2-num-elems-reused.ll        | 18 +++++++++++++-----
 .../X86/reduced-val-vectorized-in-transform.ll |  6 +++---
 .../SLPVectorizer/X86/shuffle-mask-emission.ll |  6 ++----
 4 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index c26ac4a904d45..eb1a6fb55c9d1 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -859,7 +859,7 @@ class InterchangeableInstruction {
 class BinOpIsNoOp final : public InterchangeableInstruction {
   constexpr static std::initializer_list<unsigned> SupportedOp = {
       Instruction::Add,  Instruction::Sub, Instruction::Mul, Instruction::Shl,
-      Instruction::AShr, Instruction::And, Instruction::Or};
+      Instruction::AShr, Instruction::And, Instruction::Or,  Instruction::Xor};
   SmallVector<unsigned> CandidateOp = SupportedOp;
 
 public:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-power-2-num-elems-reused.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-power-2-num-elems-reused.ll
index 4ad02d47fb385..fbccb6914006a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/non-power-2-num-elems-reused.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/non-power-2-num-elems-reused.ll
@@ -5,11 +5,19 @@ define i64 @test() {
 ; CHECK-LABEL: define i64 @test() {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[OR54_I_I_6:%.*]] = or i32 0, 0
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <16 x i32> poison, i32 [[OR54_I_I_6]], i32 8
-; CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> [[TMP0]], <8 x i32> zeroinitializer, i64 0)
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 7, i32 7, i32 8>
-; CHECK-NEXT:    [[TMP3:%.*]] = zext <16 x i32> [[TMP2]] to <16 x i64>
-; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> [[TMP3]])
+; CHECK-NEXT:    [[CONV193_1_I_6:%.*]] = zext i32 [[OR54_I_I_6]] to i64
+; CHECK-NEXT:    [[CONV193_I_7:%.*]] = zext i32 0 to i64
+; CHECK-NEXT:    [[TMP0:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.v8i64(<8 x i64> zeroinitializer, i64 0)
+; CHECK-NEXT:    [[RDX_OP:%.*]] = or <4 x i64> [[TMP0]], zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v4i64(<8 x i64> zeroinitializer, <4 x i64> [[RDX_OP]], i64 0)
+; CHECK-NEXT:    [[OP_RDX:%.*]] = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[OP_RDX]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[CONV193_I_7]], i32 1
+; CHECK-NEXT:    [[TMP7:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i64> [[TMP7]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i64> [[TMP7]], i32 1
+; CHECK-NEXT:    [[OP_RDX3:%.*]] = or i64 [[TMP5]], [[TMP6]]
+; CHECK-NEXT:    [[TMP4:%.*]] = or i64 [[OP_RDX3]], [[CONV193_1_I_6]]
 ; CHECK-NEXT:    ret i64 [[TMP4]]
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduced-val-vectorized-in-transform.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduced-val-vectorized-in-transform.ll
index 81f3bf99f3fd8..7fe6941d52da7 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduced-val-vectorized-in-transform.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduced-val-vectorized-in-transform.ll
@@ -9,16 +9,16 @@ define i32 @test(i1 %cond) {
 ; CHECK:       [[BB]]:
 ; CHECK-NEXT:    [[P1:%.*]] = phi i32 [ [[OR92:%.*]], %[[BB]] ], [ 0, %[[ENTRY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x i32> [ [[TMP8:%.*]], %[[BB]] ], [ zeroinitializer, %[[ENTRY]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = or i32 1, 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> <i32 poison, i32 poison, i32 0, i32 0>, <4 x i32> <i32 poison, i32 1, i32 6, i32 7>
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[P1]], i32 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = or <4 x i32> zeroinitializer, [[TMP4]]
 ; CHECK-NEXT:    [[OR92]] = or i32 1, 0
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP5]])
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <2 x i32> <i32 poison, i32 1>, i32 [[TMP6]], i32 0
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[OR92]], i32 0
+; CHECK-NEXT:    [[TMP8]] = xor <2 x i32> [[TMP9]], [[TMP7]]
 ; CHECK-NEXT:    [[OP_RDX:%.*]] = xor i32 [[TMP6]], [[OR92]]
-; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x i32> poison, i32 [[OP_RDX]], i32 0
-; CHECK-NEXT:    [[TMP8]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP1]], i32 1
 ; CHECK-NEXT:    br i1 [[COND]], label %[[EXIT:.*]], label %[[BB]]
 ; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    ret i32 [[OP_RDX]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shuffle-mask-emission.ll b/llvm/test/Transforms/SLPVectorizer/X86/shuffle-mask-emission.ll
index fcc295de62adf..70beef71b2e34 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/shuffle-mask-emission.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/shuffle-mask-emission.ll
@@ -7,10 +7,8 @@ define i1 @test() {
 ; CHECK-NEXT:    [[H_PROMOTED118_I_FR:%.*]] = freeze i32 1
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[H_PROMOTED118_I_FR]], i32 2
 ; CHECK-NEXT:    [[TMP1:%.*]] = xor <4 x i32> zeroinitializer, [[TMP0]]
-; CHECK-NEXT:    [[TMP2:%.*]] = add <4 x i32> zeroinitializer, [[TMP0]]
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> [[TMP3]], <4 x i32> <i32 2, i32 2, i32 7, i32 2>
-; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[TMP3]], [[TMP4]]
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> <i32 2, i32 2, i32 7, i32 2>
+; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = and <4 x i32> [[TMP5]], <i32 0, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq <4 x i32> [[TMP6]], <i32 1, i32 0, i32 0, i32 0>
 ; CHECK-NEXT:    [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP7]])

>From 6788da47bdb58422b7cad4251e160c3828e4a2ae Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Sun, 16 Feb 2025 23:18:55 -0800
Subject: [PATCH 07/38] fix undef deprecator issue

---
 .../X86/extract-scalar-from-undef.ll          | 20 +++++++++----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll
index 56d2df11458d1..1c0b3f41d523b 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll
@@ -4,12 +4,10 @@
 define i64 @foo(i32 %tmp7) {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[TMP5:%.*]], i32 2
-; CHECK-NEXT:    [[TMP3:%.*]] = sub <4 x i32> [[TMP2]], zeroinitializer
-; CHECK-NEXT:    [[TMP13:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 poison, i32 poison, i32 undef, i32 0>, <2 x i32> <i32 undef, i32 0>, i64 4)
-; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <8 x i32> <i32 poison, i32 poison, i32 undef, i32 poison, i32 poison, i32 undef, i32 poison, i32 undef>, i32 undef, i32 6
-; CHECK-NEXT:    [[TMP12:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> [[TMP3]], i64 0)
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP12]], <8 x i32> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 poison, i32 2, i32 3, i32 poison, i32 14, i32 poison>
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <8 x i32> <i32 0, i32 0, i32 poison, i32 poison, i32 0, i32 poison, i32 poison, i32 poison>, i32 [[TMP8:%.*]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = sub <8 x i32> [[TMP0]], <i32 0, i32 0, i32 poison, i32 0, i32 0, i32 poison, i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 0>, <8 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 14, i32 poison, i32 poison, i32 7>
+; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <8 x i32> [[TMP2]], i32 0, i32 5
 ; CHECK-NEXT:    [[TMP6:%.*]] = add nsw <8 x i32> [[TMP13]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = sub nsw <8 x i32> [[TMP13]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP5]], <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 4, i32 5, i32 14, i32 15>
@@ -27,7 +25,7 @@ bb:
   %tmp4 = xor i32 %tmp3, 0
   %tmp6 = sub i32 0, 0
   %tmp8 = sub i32 %tmp7, 0
-  %tmp9 = sub nsw i32 0, undef
+  %tmp9 = sub nsw i32 0, poison
   %tmp10 = add nsw i32 0, %tmp6
   %tmp11 = sub nsw i32 0, %tmp8
   %tmp12 = add i32 0, %tmp10
@@ -42,10 +40,10 @@ bb:
   %tmp21 = add i32 %tmp20, %tmp17
   %tmp22 = sub i32 0, 0
   %tmp23 = add i32 0, 0
-  %tmp24 = sub i32 undef, 0
-  %tmp25 = add nsw i32 %tmp23, undef
+  %tmp24 = sub i32 poison, 0
+  %tmp25 = add nsw i32 %tmp23, poison
   %tmp26 = add nsw i32 %tmp24, %tmp22
-  %tmp27 = sub nsw i32 undef, %tmp24
+  %tmp27 = sub nsw i32 poison, %tmp24
   %tmp28 = add i32 0, %tmp25
   %tmp29 = xor i32 %tmp28, 0
   %tmp30 = add i32 0, %tmp26
@@ -56,7 +54,7 @@ bb:
   %tmp35 = add i32 %tmp34, %tmp29
   %tmp36 = add i32 %tmp35, 0
   %tmp37 = add i32 %tmp36, %tmp33
-  %tmp38 = sub nsw i32 0, undef
+  %tmp38 = sub nsw i32 0, poison
   %tmp39 = add i32 0, %tmp38
   %tmp40 = xor i32 %tmp39, 0
   %tmp41 = add i32 0, %tmp37

>From deeb10d1c5813696c9d9ecc9fa23d9cb7c2e1533 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Sun, 16 Feb 2025 23:32:23 -0800
Subject: [PATCH 08/38] clang-format

---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index eb1a6fb55c9d1..82261740ed4ac 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -817,11 +817,13 @@ namespace {
 /// Derived classes implement specific interchange patterns by overriding the
 /// virtual methods to define their interchange logic.
 ///
-/// The class maintains a reference to the main instruction (MainOp) and provides
-/// methods to:
+/// The class maintains a reference to the main instruction (MainOp) and
+/// provides methods to:
 /// - Check if another instruction is interchangeable (isSame)
-/// - Get the opcode for the interchangeable form (getInterchangeableInstructionOpcode)
-/// - Get the operands for the interchangeable form (getInterchangeableInstructionOps)
+/// - Get the opcode for the interchangeable form
+/// (getInterchangeableInstructionOpcode)
+/// - Get the operands for the interchangeable form
+/// (getInterchangeableInstructionOps)
 class InterchangeableInstruction {
 protected:
   Instruction *const MainOp;

>From ace8e9132b51eb3a64d2ce89303b5a03daeda31e Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Mon, 17 Feb 2025 04:59:00 -0800
Subject: [PATCH 09/38] apply comment

---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 22 ++++++++++---------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 82261740ed4ac..f8ce2406159d1 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -833,9 +833,9 @@ class InterchangeableInstruction {
     Constant *C;
     if (!match(I, m_BinOp(m_Value(), m_Constant(C))))
       return nullptr;
-    if (auto *CI = dyn_cast<ConstantInt>(C)) {
+    if (auto *CI = dyn_cast<ConstantInt>(C))
       return CI;
-    } else if (auto *CDV = dyn_cast<ConstantDataVector>(C)) {
+    if (auto *CDV = dyn_cast<ConstantDataVector>(C)) {
       if (auto *CI = dyn_cast_if_present<ConstantInt>(CDV->getSplatValue()))
         return CI;
     }
@@ -900,7 +900,8 @@ class BinOpIsNoOp final : public InterchangeableInstruction {
   }
   SmallVector<Value *>
   getInterchangeableInstructionOps(Instruction *I) override {
-    assert(is_contained(SupportedOp, I->getOpcode()));
+    assert(is_contained(SupportedOp, I->getOpcode()) &&
+           "Not supported opcode.");
     return {MainOp->getOperand(0),
             ConstantInt::get(MainOp->getOperand(1)->getType(),
                              I->getOpcode() == Instruction::Mul)};
@@ -1304,9 +1305,9 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
             for (Value *V : VL) {
               if (isa<PoisonValue>(V))
                 continue;
-              if (cast<Instruction>(V)->getOpcode() ==
-                  InterchangeableInstructionOpcode)
-                return cast<Instruction>(V);
+              Instruction *Inst = cast<Instruction>(V);
+              if (Inst->getOpcode() == InterchangeableInstructionOpcode)
+                return Inst;
             }
           }
           llvm_unreachable(
@@ -2679,9 +2680,9 @@ class BoUpSLP {
       ArgSize = isa<IntrinsicInst>(MainOp) ? IntrinsicNumOperands : NumOperands;
       OpsVec.resize(NumOperands);
       unsigned NumLanes = VL.size();
-      for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx)
-        OpsVec[OpIdx].resize(NumLanes);
-      for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
+      for (OperandDataVec &Ops : OpsVec)
+        Ops.resize(NumLanes);
+      for (unsigned Lane : seq<unsigned>(NumLanes)) {
         assert((isa<Instruction>(VL[Lane]) || isa<PoisonValue>(VL[Lane])) &&
                "Expected instruction or poison value");
         // Our tree has just 3 nodes: the root and two operands.
@@ -2700,7 +2701,8 @@ class BoUpSLP {
               if (auto *EI = dyn_cast<ExtractElementInst>(MainOp)) {
                 OpsVec[OpIdx][Lane] = {EI->getVectorOperand(), true, false};
                 continue;
-              } else if (auto *EV = dyn_cast<ExtractValueInst>(MainOp)) {
+              }
+              if (auto *EV = dyn_cast<ExtractValueInst>(MainOp)) {
                 OpsVec[OpIdx][Lane] = {EV->getAggregateOperand(), true, false};
                 continue;
               }

>From f422a5942931445fba3dd949521711457c3d9307 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Wed, 19 Feb 2025 00:33:17 -0800
Subject: [PATCH 10/38] Merge BinOpIsNoOp and MulAndShlWithConstantInt to
 InterchangeableBinOp.

---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 167 +++++++++++-------
 .../buildvector-postpone-for-dependency.ll    |   2 +-
 .../Transforms/SLPVectorizer/isOpcodeOrAlt.ll |   4 +-
 3 files changed, 108 insertions(+), 65 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index f8ce2406159d1..393cf84062eea 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -858,93 +858,140 @@ class InterchangeableInstruction {
   virtual ~InterchangeableInstruction() = default;
 };
 
-class BinOpIsNoOp final : public InterchangeableInstruction {
+class InterchangeableBinOp final : public InterchangeableInstruction {
+  using MaskType = std::uint_fast8_t;
   constexpr static std::initializer_list<unsigned> SupportedOp = {
       Instruction::Add,  Instruction::Sub, Instruction::Mul, Instruction::Shl,
       Instruction::AShr, Instruction::And, Instruction::Or,  Instruction::Xor};
-  SmallVector<unsigned> CandidateOp = SupportedOp;
+  // from high to low bit: Xor Or And Sub Add Mul AShr Shl
+  MaskType Mask = 0b11111111;
+
+  static MaskType opcodeToMask(unsigned Opcode) {
+    switch (Opcode) {
+    case Instruction::Shl:
+      return 0b1;
+    case Instruction::AShr:
+      return 0b10;
+    case Instruction::Mul:
+      return 0b100;
+    case Instruction::Add:
+      return 0b1000;
+    case Instruction::Sub:
+      return 0b10000;
+    case Instruction::And:
+      return 0b100000;
+    case Instruction::Or:
+      return 0b1000000;
+    case Instruction::Xor:
+      return 0b10000000;
+    }
+    llvm_unreachable("Unsupported opcode.");
+  }
+
+  bool tryAnd(MaskType X) {
+    if (Mask & X) {
+      Mask &= X;
+      return true;
+    }
+    return false;
+  }
 
 public:
   using InterchangeableInstruction::InterchangeableInstruction;
   bool isSame(Instruction *I) override {
     unsigned Opcode = I->getOpcode();
-    if (!is_contained(SupportedOp, Opcode))
+    if (!binary_search(SupportedOp, Opcode))
       return false;
     ConstantInt *CI = isBinOpWithConstantInt(I);
     if (CI) {
+      const APInt &Op1Int = CI->getValue();
       switch (Opcode) {
+      case Instruction::Shl:
+        if (Op1Int.isZero())
+          return true;
+        return tryAnd(0b101);
       case Instruction::Mul:
-        if (CI->getValue().isOne())
+        if (Op1Int.isOne())
           return true;
+        if (Op1Int.isPowerOf2())
+          return tryAnd(0b101);
         break;
       case Instruction::And:
-        if (CI->getValue().isAllOnes())
+        if (Op1Int.isAllOnes())
           return true;
         break;
       default:
-        if (CI->getValue().isZero())
+        if (Op1Int.isZero())
           return true;
+        break;
       }
     }
-    if (is_contained(CandidateOp, Opcode)) {
-      CandidateOp = {Opcode};
-      return true;
-    }
-    return false;
-  }
-  unsigned getInterchangeableInstructionOpcode() override {
-    assert(!CandidateOp.empty() && "Cannot find interchangeable instruction.");
-    if (is_contained(CandidateOp, MainOp->getOpcode()))
-      return MainOp->getOpcode();
-    return CandidateOp[0];
-  }
-  SmallVector<Value *>
-  getInterchangeableInstructionOps(Instruction *I) override {
-    assert(is_contained(SupportedOp, I->getOpcode()) &&
-           "Not supported opcode.");
-    return {MainOp->getOperand(0),
-            ConstantInt::get(MainOp->getOperand(1)->getType(),
-                             I->getOpcode() == Instruction::Mul)};
-  }
-};
-
-class MulAndShlWithConstantInt final : public InterchangeableInstruction {
-  constexpr static std::initializer_list<unsigned> SupportedOp = {
-      Instruction::Mul, Instruction::Shl};
-  SmallVector<unsigned> CandidateOp = SupportedOp;
-
-public:
-  using InterchangeableInstruction::InterchangeableInstruction;
-  bool isSame(Instruction *I) override {
-    unsigned Opcode = I->getOpcode();
-    if (!is_contained(SupportedOp, Opcode))
-      return false;
-    ConstantInt *CI = isBinOpWithConstantInt(I);
-    if (CI && (Opcode != Instruction::Mul || CI->getValue().isPowerOf2()))
-      return true;
-    if (is_contained(CandidateOp, Opcode)) {
-      CandidateOp = {Opcode};
-      return true;
-    }
-    return false;
+    return tryAnd(opcodeToMask(Opcode));
   }
   unsigned getInterchangeableInstructionOpcode() override {
-    assert(!CandidateOp.empty() && "Cannot find interchangeable instruction.");
-    if (is_contained(CandidateOp, MainOp->getOpcode()))
-      return MainOp->getOpcode();
-    return CandidateOp[0];
+    unsigned Opcode = MainOp->getOpcode();
+    if (Mask & opcodeToMask(Opcode))
+      return Opcode;
+    if (Mask & 0b1)
+      return Instruction::Shl;
+    if (Mask & 0b10)
+      return Instruction::AShr;
+    if (Mask & 0b100)
+      return Instruction::Mul;
+    if (Mask & 0b1000)
+      return Instruction::Add;
+    if (Mask & 0b10000)
+      return Instruction::Sub;
+    if (Mask & 0b100000)
+      return Instruction::And;
+    if (Mask & 0b1000000)
+      return Instruction::Or;
+    if (Mask & 0b10000000)
+      return Instruction::Xor;
+    llvm_unreachable("Cannot find interchangeable instruction.");
   }
   SmallVector<Value *>
   getInterchangeableInstructionOps(Instruction *I) override {
-    assert(is_contained(SupportedOp, I->getOpcode()));
-    if (MainOp->getOpcode() == I->getOpcode())
+    unsigned ToOpcode = I->getOpcode();
+    assert(binary_search(SupportedOp, ToOpcode) && "Unsupported opcode.");
+    unsigned FromOpcode = MainOp->getOpcode();
+    if (FromOpcode == ToOpcode)
       return SmallVector<Value *>(MainOp->operands());
     const APInt &Op1Int = isBinOpWithConstantInt(MainOp)->getValue();
+    unsigned Op1IntBitWidth = Op1Int.getBitWidth();
+    APInt RHSV;
+    switch (FromOpcode) {
+    case Instruction::Shl:
+      if (ToOpcode == Instruction::Mul) {
+        RHSV = APInt::getOneBitSet(Op1IntBitWidth, Op1Int.getZExtValue());
+      } else {
+        assert(Op1Int.isZero() && "Cannot convert the instruction.");
+        RHSV = ToOpcode == Instruction::And ? APInt::getAllOnes(Op1IntBitWidth)
+                                            : APInt::getZero(Op1IntBitWidth);
+      }
+      break;
+    case Instruction::Mul:
+      assert(Op1Int.isPowerOf2() && "Cannot convert the instruction.");
+      if (ToOpcode == Instruction::Shl) {
+        RHSV = APInt(Op1IntBitWidth, Op1Int.logBase2());
+      } else {
+        assert(Op1Int.isOne() && "Cannot convert the instruction.");
+        RHSV = ToOpcode == Instruction::And ? APInt::getAllOnes(Op1IntBitWidth)
+                                            : APInt::getZero(Op1IntBitWidth);
+      }
+      break;
+    case Instruction::And:
+      assert(Op1Int.isAllOnes() && "Cannot convert the instruction.");
+      RHSV = ToOpcode == Instruction::Mul
+                 ? APInt::getOneBitSet(Op1IntBitWidth, 0)
+                 : APInt::getZero(Op1IntBitWidth);
+      break;
+    default:
+      RHSV = APInt::getZero(Op1IntBitWidth);
+      break;
+    }
     return {MainOp->getOperand(0),
-            ConstantInt::get(MainOp->getOperand(1)->getType(),
-                             I->getOpcode() == Instruction::Mul
-                                 ? (1 << Op1Int.getZExtValue())
-                                 : Op1Int.logBase2())};
+            ConstantInt::get(MainOp->getOperand(1)->getType(), RHSV)};
   }
 };
 
@@ -952,10 +999,8 @@ static SmallVector<std::unique_ptr<InterchangeableInstruction>>
 getInterchangeableInstruction(Instruction *MainOp) {
   SmallVector<std::unique_ptr<InterchangeableInstruction>> Candidate;
   Candidate.push_back(std::make_unique<InterchangeableInstruction>(MainOp));
-  if (MainOp->isBinaryOp()) {
-    Candidate.push_back(std::make_unique<BinOpIsNoOp>(MainOp));
-    Candidate.push_back(std::make_unique<MulAndShlWithConstantInt>(MainOp));
-  }
+  if (MainOp->isBinaryOp())
+    Candidate.push_back(std::make_unique<InterchangeableBinOp>(MainOp));
   return Candidate;
 }
 
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll
index aa424b9031e77..03a89e54e4212 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll
@@ -13,7 +13,7 @@ define void @test() {
 ; CHECK:       [[BB6]]:
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB]] ], [ [[TMP8:%.*]], %[[BB6]] ]
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> <i32 1, i32 0, i32 poison, i32 poison>, <4 x i32> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 5, i32 4>
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> <i32 0, i32 0, i32 poison, i32 poison>, <4 x i32> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 5, i32 4>
 ; CHECK-NEXT:    [[TMP4]] = mul <4 x i32> [[TMP3]], zeroinitializer
 ; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> <i32 0, i32 poison>, <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[TMP8]] = mul <2 x i32> zeroinitializer, [[TMP7]]
diff --git a/llvm/test/Transforms/SLPVectorizer/isOpcodeOrAlt.ll b/llvm/test/Transforms/SLPVectorizer/isOpcodeOrAlt.ll
index 623c9e816a59c..c3b0de084b748 100644
--- a/llvm/test/Transforms/SLPVectorizer/isOpcodeOrAlt.ll
+++ b/llvm/test/Transforms/SLPVectorizer/isOpcodeOrAlt.ll
@@ -8,9 +8,7 @@ define void @test(ptr %a, ptr %b) {
 ; CHECK-NEXT:    [[GEP4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[GEP0]], align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> [[TMP0]], <i32 1, i32 0, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = mul <4 x i32> [[TMP0]], <i32 1, i32 0, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
-; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[GEP4]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP1]], ptr [[GEP4]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:

>From 0e8d5670dca3701467b9116bfe5cb3e49af61ac0 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Wed, 19 Feb 2025 01:10:21 -0800
Subject: [PATCH 11/38] add SeenBefore

---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 22 +++++++++----------
 .../X86/shuffle-mask-emission.ll              |  2 +-
 .../alternate-opcode-sindle-bv.ll             |  4 ++--
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 393cf84062eea..fb70ff7c72abc 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -865,6 +865,7 @@ class InterchangeableBinOp final : public InterchangeableInstruction {
       Instruction::AShr, Instruction::And, Instruction::Or,  Instruction::Xor};
   // from high to low bit: Xor Or And Sub Add Mul AShr Shl
   MaskType Mask = 0b11111111;
+  MaskType SeenBefore = 0;
 
   static MaskType opcodeToMask(unsigned Opcode) {
     switch (Opcode) {
@@ -902,6 +903,7 @@ class InterchangeableBinOp final : public InterchangeableInstruction {
     unsigned Opcode = I->getOpcode();
     if (!binary_search(SupportedOp, Opcode))
       return false;
+    SeenBefore |= opcodeToMask(Opcode);
     ConstantInt *CI = isBinOpWithConstantInt(I);
     if (CI) {
       const APInt &Op1Int = CI->getValue();
@@ -929,24 +931,22 @@ class InterchangeableBinOp final : public InterchangeableInstruction {
     return tryAnd(opcodeToMask(Opcode));
   }
   unsigned getInterchangeableInstructionOpcode() override {
-    unsigned Opcode = MainOp->getOpcode();
-    if (Mask & opcodeToMask(Opcode))
-      return Opcode;
-    if (Mask & 0b1)
+    MaskType Candidate = Mask & SeenBefore;
+    if (Candidate & 0b1)
       return Instruction::Shl;
-    if (Mask & 0b10)
+    if (Candidate & 0b10)
       return Instruction::AShr;
-    if (Mask & 0b100)
+    if (Candidate & 0b100)
       return Instruction::Mul;
-    if (Mask & 0b1000)
+    if (Candidate & 0b1000)
       return Instruction::Add;
-    if (Mask & 0b10000)
+    if (Candidate & 0b10000)
       return Instruction::Sub;
-    if (Mask & 0b100000)
+    if (Candidate & 0b100000)
       return Instruction::And;
-    if (Mask & 0b1000000)
+    if (Candidate & 0b1000000)
       return Instruction::Or;
-    if (Mask & 0b10000000)
+    if (Candidate & 0b10000000)
       return Instruction::Xor;
     llvm_unreachable("Cannot find interchangeable instruction.");
   }
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shuffle-mask-emission.ll b/llvm/test/Transforms/SLPVectorizer/X86/shuffle-mask-emission.ll
index 70beef71b2e34..a17ccb4b46ef9 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/shuffle-mask-emission.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/shuffle-mask-emission.ll
@@ -6,7 +6,7 @@ define i1 @test() {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[H_PROMOTED118_I_FR:%.*]] = freeze i32 1
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[H_PROMOTED118_I_FR]], i32 2
-; CHECK-NEXT:    [[TMP1:%.*]] = xor <4 x i32> zeroinitializer, [[TMP0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> zeroinitializer, [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> <i32 2, i32 2, i32 7, i32 2>
 ; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = and <4 x i32> [[TMP5]], <i32 0, i32 1, i32 1, i32 1>
diff --git a/llvm/test/Transforms/SLPVectorizer/alternate-opcode-sindle-bv.ll b/llvm/test/Transforms/SLPVectorizer/alternate-opcode-sindle-bv.ll
index 32139a5f54816..dacc49fcd6be8 100644
--- a/llvm/test/Transforms/SLPVectorizer/alternate-opcode-sindle-bv.ll
+++ b/llvm/test/Transforms/SLPVectorizer/alternate-opcode-sindle-bv.ll
@@ -28,8 +28,8 @@ define <2 x i32> @test(i32 %arg) {
 ; AARCH64-LABEL: define <2 x i32> @test(
 ; AARCH64-SAME: i32 [[ARG:%.*]]) {
 ; AARCH64-NEXT:  bb:
-; AARCH64-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[ARG]], i32 0
-; AARCH64-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[TMP0]], zeroinitializer
+; AARCH64-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 1>, i32 [[ARG]], i32 0
+; AARCH64-NEXT:    [[TMP1:%.*]] = mul <2 x i32> [[TMP0]], zeroinitializer
 ; AARCH64-NEXT:    [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
 ; AARCH64-NEXT:    [[TMP3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
 ; AARCH64-NEXT:    [[MUL1:%.*]] = mul i32 [[TMP2]], [[TMP3]]

>From bf43fff62826da4709be544a6f4c335632ef1f55 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Wed, 19 Feb 2025 01:44:20 -0800
Subject: [PATCH 12/38] make isBinOpWithConstantInt support left hand side
 operand

---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 96 +++++++++++--------
 .../AArch64/gather-with-minbith-user.ll       | 13 ++-
 .../X86/extract-scalar-from-undef.ll          |  4 +-
 .../X86/multi-extracts-bv-combined.ll         |  2 +-
 .../X86/reorder_diamond_match.ll              |  4 +-
 .../SLPVectorizer/shuffle-mask-resized.ll     |  2 +-
 6 files changed, 71 insertions(+), 50 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index fb70ff7c72abc..a303d2dcb4386 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -828,20 +828,6 @@ class InterchangeableInstruction {
 protected:
   Instruction *const MainOp;
 
-  /// Return non nullptr if the right operand of I is ConstantInt.
-  static ConstantInt *isBinOpWithConstantInt(Instruction *I) {
-    Constant *C;
-    if (!match(I, m_BinOp(m_Value(), m_Constant(C))))
-      return nullptr;
-    if (auto *CI = dyn_cast<ConstantInt>(C))
-      return CI;
-    if (auto *CDV = dyn_cast<ConstantDataVector>(C)) {
-      if (auto *CI = dyn_cast_if_present<ConstantInt>(CDV->getSplatValue()))
-        return CI;
-    }
-    return nullptr;
-  }
-
 public:
   InterchangeableInstruction(Instruction *MainOp) : MainOp(MainOp) {}
   virtual bool isSame(Instruction *I) {
@@ -867,6 +853,29 @@ class InterchangeableBinOp final : public InterchangeableInstruction {
   MaskType Mask = 0b11111111;
   MaskType SeenBefore = 0;
 
+  /// Return a non-nullptr if either operand of I is a ConstantInt.
+  static std::pair<ConstantInt *, unsigned>
+  isBinOpWithConstantInt(Instruction *I) {
+    unsigned Opcode = I->getOpcode();
+    unsigned Pos = 1;
+    Constant *C;
+    if (!match(I, m_BinOp(m_Value(), m_Constant(C)))) {
+      if (Opcode == Instruction::Sub || Opcode == Instruction::Shl ||
+          Opcode == Instruction::AShr)
+        return std::make_pair(nullptr, Pos);
+      if (!match(I, m_BinOp(m_Constant(C), m_Value())))
+        return std::make_pair(nullptr, Pos);
+      Pos = 0;
+    }
+    if (auto *CI = dyn_cast<ConstantInt>(C))
+      return std::make_pair(CI, Pos);
+    if (auto *CDV = dyn_cast<ConstantDataVector>(C)) {
+      if (auto *CI = dyn_cast_if_present<ConstantInt>(CDV->getSplatValue()))
+        return std::make_pair(CI, Pos);
+    }
+    return std::make_pair(nullptr, Pos);
+  }
+
   static MaskType opcodeToMask(unsigned Opcode) {
     switch (Opcode) {
     case Instruction::Shl:
@@ -904,26 +913,26 @@ class InterchangeableBinOp final : public InterchangeableInstruction {
     if (!binary_search(SupportedOp, Opcode))
       return false;
     SeenBefore |= opcodeToMask(Opcode);
-    ConstantInt *CI = isBinOpWithConstantInt(I);
+    ConstantInt *CI = isBinOpWithConstantInt(I).first;
     if (CI) {
-      const APInt &Op1Int = CI->getValue();
+      const APInt &CIValue = CI->getValue();
       switch (Opcode) {
       case Instruction::Shl:
-        if (Op1Int.isZero())
+        if (CIValue.isZero())
           return true;
         return tryAnd(0b101);
       case Instruction::Mul:
-        if (Op1Int.isOne())
+        if (CIValue.isOne())
           return true;
-        if (Op1Int.isPowerOf2())
+        if (CIValue.isPowerOf2())
           return tryAnd(0b101);
         break;
       case Instruction::And:
-        if (Op1Int.isAllOnes())
+        if (CIValue.isAllOnes())
           return true;
         break;
       default:
-        if (Op1Int.isZero())
+        if (CIValue.isZero())
           return true;
         break;
       }
@@ -957,41 +966,48 @@ class InterchangeableBinOp final : public InterchangeableInstruction {
     unsigned FromOpcode = MainOp->getOpcode();
     if (FromOpcode == ToOpcode)
       return SmallVector<Value *>(MainOp->operands());
-    const APInt &Op1Int = isBinOpWithConstantInt(MainOp)->getValue();
-    unsigned Op1IntBitWidth = Op1Int.getBitWidth();
-    APInt RHSV;
+    auto [CI, Pos] = isBinOpWithConstantInt(MainOp);
+    const APInt &FromCIValue = CI->getValue();
+    unsigned FromCIValueBitWidth = FromCIValue.getBitWidth();
+    APInt ToCIValue;
     switch (FromOpcode) {
     case Instruction::Shl:
       if (ToOpcode == Instruction::Mul) {
-        RHSV = APInt::getOneBitSet(Op1IntBitWidth, Op1Int.getZExtValue());
+        ToCIValue = APInt::getOneBitSet(FromCIValueBitWidth,
+                                        FromCIValue.getZExtValue());
       } else {
-        assert(Op1Int.isZero() && "Cannot convert the instruction.");
-        RHSV = ToOpcode == Instruction::And ? APInt::getAllOnes(Op1IntBitWidth)
-                                            : APInt::getZero(Op1IntBitWidth);
+        assert(FromCIValue.isZero() && "Cannot convert the instruction.");
+        ToCIValue = ToOpcode == Instruction::And
+                        ? APInt::getAllOnes(FromCIValueBitWidth)
+                        : APInt::getZero(FromCIValueBitWidth);
       }
       break;
     case Instruction::Mul:
-      assert(Op1Int.isPowerOf2() && "Cannot convert the instruction.");
+      assert(FromCIValue.isPowerOf2() && "Cannot convert the instruction.");
       if (ToOpcode == Instruction::Shl) {
-        RHSV = APInt(Op1IntBitWidth, Op1Int.logBase2());
+        ToCIValue = APInt(FromCIValueBitWidth, FromCIValue.logBase2());
       } else {
-        assert(Op1Int.isOne() && "Cannot convert the instruction.");
-        RHSV = ToOpcode == Instruction::And ? APInt::getAllOnes(Op1IntBitWidth)
-                                            : APInt::getZero(Op1IntBitWidth);
+        assert(FromCIValue.isOne() && "Cannot convert the instruction.");
+        ToCIValue = ToOpcode == Instruction::And
+                        ? APInt::getAllOnes(FromCIValueBitWidth)
+                        : APInt::getZero(FromCIValueBitWidth);
       }
       break;
     case Instruction::And:
-      assert(Op1Int.isAllOnes() && "Cannot convert the instruction.");
-      RHSV = ToOpcode == Instruction::Mul
-                 ? APInt::getOneBitSet(Op1IntBitWidth, 0)
-                 : APInt::getZero(Op1IntBitWidth);
+      assert(FromCIValue.isAllOnes() && "Cannot convert the instruction.");
+      ToCIValue = ToOpcode == Instruction::Mul
+                      ? APInt::getOneBitSet(FromCIValueBitWidth, 0)
+                      : APInt::getZero(FromCIValueBitWidth);
       break;
     default:
-      RHSV = APInt::getZero(Op1IntBitWidth);
+      ToCIValue = APInt::getZero(FromCIValueBitWidth);
       break;
     }
-    return {MainOp->getOperand(0),
-            ConstantInt::get(MainOp->getOperand(1)->getType(), RHSV)};
+    auto LHS = MainOp->getOperand(1 - Pos);
+    auto RHS = ConstantInt::get(MainOp->getOperand(Pos)->getType(), ToCIValue);
+    if (Pos == 1)
+      return SmallVector<Value *>({LHS, RHS});
+    return SmallVector<Value *>({RHS, LHS});
   }
 };
 
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-with-minbith-user.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-with-minbith-user.ll
index 781954cbec2f7..aa4a219925634 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-with-minbith-user.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-with-minbith-user.ll
@@ -5,9 +5,16 @@ define void @h() {
 ; CHECK-LABEL: define void @h() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr i8, ptr null, i64 16
-; CHECK-NEXT:    [[ARRAYIDX18:%.*]] = getelementptr i8, ptr null, i64 24
-; CHECK-NEXT:    store <4 x i16> zeroinitializer, ptr [[ARRAYIDX2]], align 2
-; CHECK-NEXT:    store <4 x i16> zeroinitializer, ptr [[ARRAYIDX18]], align 2
+; CHECK-NEXT:    [[TMP0:%.*]] = call <8 x i1> @llvm.vector.insert.v8i1.v2i1(<8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 poison, i1 poison, i1 false, i1 false>, <2 x i1> zeroinitializer, i64 4)
+; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.vector.insert.v8i1.v2i1(<8 x i1> <i1 poison, i1 poison, i1 poison, i1 poison, i1 false, i1 false, i1 poison, i1 poison>, <2 x i1> zeroinitializer, i64 0)
+; CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.vector.insert.v8i1.v2i1(<8 x i1> [[TMP1]], <2 x i1> zeroinitializer, i64 2)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i1> @llvm.vector.insert.v8i1.v2i1(<8 x i1> [[TMP2]], <2 x i1> zeroinitializer, i64 6)
+; CHECK-NEXT:    [[TMP4:%.*]] = sub <8 x i1> [[TMP0]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = add <8 x i1> [[TMP0]], [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    [[TMP7:%.*]] = or <8 x i1> [[TMP6]], zeroinitializer
+; CHECK-NEXT:    [[TMP8:%.*]] = zext <8 x i1> [[TMP7]] to <8 x i16>
+; CHECK-NEXT:    store <8 x i16> [[TMP8]], ptr [[ARRAYIDX2]], align 2
 ; CHECK-NEXT:    ret void
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll
index 1c0b3f41d523b..514d5f974cb16 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll
@@ -8,9 +8,9 @@ define i64 @foo(i32 %tmp7) {
 ; CHECK-NEXT:    [[TMP4:%.*]] = sub <8 x i32> [[TMP0]], <i32 0, i32 0, i32 poison, i32 0, i32 0, i32 poison, i32 0, i32 poison>
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 0>, <8 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 14, i32 poison, i32 poison, i32 7>
 ; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <8 x i32> [[TMP2]], i32 0, i32 5
-; CHECK-NEXT:    [[TMP6:%.*]] = add nsw <8 x i32> [[TMP13]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = sub nsw <8 x i32> [[TMP13]], [[TMP4]]
-; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP5]], <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 4, i32 5, i32 14, i32 15>
+; CHECK-NEXT:    [[TMP6:%.*]] = add nsw <8 x i32> [[TMP13]], [[TMP4]]
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> [[TMP6]], <8 x i32> <i32 8, i32 1, i32 2, i32 3, i32 12, i32 13, i32 6, i32 7>
 ; CHECK-NEXT:    [[TMP8:%.*]] = add <8 x i32> zeroinitializer, [[TMP7]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = xor <8 x i32> [[TMP8]], zeroinitializer
 ; CHECK-NEXT:    [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP9]])
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll b/llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll
index 94f2c79faa8c9..230e165e43edc 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll
@@ -8,7 +8,7 @@ define i32 @foo() {
 ; CHECK-NEXT:    [[D:%.*]] = load i32, ptr null, align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> <i32 0, i32 undef, i32 1, i32 0>, i32 [[D]], i32 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[TMP2:%.*]] = or <8 x i32> zeroinitializer, [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = add <8 x i32> zeroinitializer, [[TMP1]]
 ; CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr getelementptr inbounds ([64 x i32], ptr null, i64 0, i64 15), align 4
 ; CHECK-NEXT:    ret i32 0
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll
index 9682567b173c3..43977da19377d 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll
@@ -14,10 +14,8 @@ define void @test() {
 ; CHECK-NEXT:    [[TMP9:%.*]] = add <4 x i16> [[TMP7]], [[TMP8]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = sub <4 x i16> [[TMP7]], [[TMP8]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i32> <i32 1, i32 4, i32 3, i32 6>
-; CHECK-NEXT:    [[TMP12:%.*]] = add <4 x i16> zeroinitializer, [[TMP11]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = sub <4 x i16> zeroinitializer, [[TMP11]]
-; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP15:%.*]] = sext <4 x i16> [[TMP14]] to <4 x i32>
+; CHECK-NEXT:    [[TMP15:%.*]] = sext <4 x i16> [[TMP13]] to <4 x i32>
 ; CHECK-NEXT:    store <4 x i32> [[TMP15]], ptr [[TMP2]], align 16
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/shuffle-mask-resized.ll b/llvm/test/Transforms/SLPVectorizer/shuffle-mask-resized.ll
index 1e3255f2187af..cf5927bf58327 100644
--- a/llvm/test/Transforms/SLPVectorizer/shuffle-mask-resized.ll
+++ b/llvm/test/Transforms/SLPVectorizer/shuffle-mask-resized.ll
@@ -12,7 +12,7 @@ define i32 @test() {
 ; CHECK-NEXT:    br i1 false, label [[BB4:%.*]], label [[BB3]]
 ; CHECK:       bb3:
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> <i32 0, i32 poison>, <2 x i32> <i32 2, i32 1>
-; CHECK-NEXT:    [[TMP5]] = or <2 x i32> zeroinitializer, [[TMP2]]
+; CHECK-NEXT:    [[TMP5]] = add <2 x i32> zeroinitializer, [[TMP2]]
 ; CHECK-NEXT:    br label [[BB1]]
 ; CHECK:       bb4:
 ; CHECK-NEXT:    [[TMP6:%.*]] = phi <8 x i32> [ [[TMP1]], [[BB1]] ]

>From 29c8cff49c65814c589f0f8499a22d093ea46dc4 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Wed, 19 Feb 2025 21:26:39 -0800
Subject: [PATCH 13/38] prefer AltOp instead of interchangeable instructions of
 MainOp

---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 21 ++++++++++++-------
 .../AArch64/gather-with-minbith-user.ll       | 11 +---------
 2 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index a303d2dcb4386..0242a3d8577a4 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1036,17 +1036,20 @@ static bool getInterchangeableInstruction(
 
 static bool isConvertible(Instruction *I, Instruction *MainOp,
                           Instruction *AltOp) {
-  if (!I->isBinaryOp())
-    return I->getOpcode() == MainOp->getOpcode() ||
-           I->getOpcode() == AltOp->getOpcode();
   assert(MainOp && "MainOp cannot be nullptr.");
+  if (I->getOpcode() == MainOp->getOpcode())
+    return true;
+  assert(AltOp && "AltOp cannot be nullptr.");
+  if (I->getOpcode() == AltOp->getOpcode())
+    return true;
+  if (!I->isBinaryOp())
+    return false;
   SmallVector<std::unique_ptr<InterchangeableInstruction>> Candidate(
       getInterchangeableInstruction(I));
   for (std::unique_ptr<InterchangeableInstruction> &C : Candidate)
     if (C->isSame(I) && C->isSame(MainOp))
       return true;
   Candidate = getInterchangeableInstruction(I);
-  assert(AltOp && "AltOp cannot be nullptr.");
   for (std::unique_ptr<InterchangeableInstruction> &C : Candidate)
     if (C->isSame(I) && C->isSame(AltOp))
       return true;
@@ -1056,10 +1059,12 @@ static bool isConvertible(Instruction *I, Instruction *MainOp,
 static std::pair<Instruction *, SmallVector<Value *>>
 convertTo(Instruction *I, Instruction *MainOp, Instruction *AltOp) {
   assert(isConvertible(I, MainOp, AltOp) && "Cannot convert the instruction.");
-  if (!I->isBinaryOp())
-    return std::make_pair(I->getOpcode() == MainOp->getOpcode() ? MainOp
-                                                                : AltOp,
-                          SmallVector<Value *>(I->operands()));
+  if (I->getOpcode() == MainOp->getOpcode())
+    return std::make_pair(MainOp, SmallVector<Value *>(I->operands()));
+  // Prefer AltOp instead of interchangeable instruction of MainOp.
+  if (I->getOpcode() == AltOp->getOpcode())
+    return std::make_pair(AltOp, SmallVector<Value *>(I->operands()));
+  assert(I->isBinaryOp() && "Cannot convert the instruction.");
   SmallVector<std::unique_ptr<InterchangeableInstruction>> Candidate(
       getInterchangeableInstruction(I));
   for (std::unique_ptr<InterchangeableInstruction> &C : Candidate)
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-with-minbith-user.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-with-minbith-user.ll
index aa4a219925634..3ebe920d17343 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-with-minbith-user.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-with-minbith-user.ll
@@ -5,16 +5,7 @@ define void @h() {
 ; CHECK-LABEL: define void @h() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr i8, ptr null, i64 16
-; CHECK-NEXT:    [[TMP0:%.*]] = call <8 x i1> @llvm.vector.insert.v8i1.v2i1(<8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 poison, i1 poison, i1 false, i1 false>, <2 x i1> zeroinitializer, i64 4)
-; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.vector.insert.v8i1.v2i1(<8 x i1> <i1 poison, i1 poison, i1 poison, i1 poison, i1 false, i1 false, i1 poison, i1 poison>, <2 x i1> zeroinitializer, i64 0)
-; CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.vector.insert.v8i1.v2i1(<8 x i1> [[TMP1]], <2 x i1> zeroinitializer, i64 2)
-; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i1> @llvm.vector.insert.v8i1.v2i1(<8 x i1> [[TMP2]], <2 x i1> zeroinitializer, i64 6)
-; CHECK-NEXT:    [[TMP4:%.*]] = sub <8 x i1> [[TMP0]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = add <8 x i1> [[TMP0]], [[TMP3]]
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:    [[TMP7:%.*]] = or <8 x i1> [[TMP6]], zeroinitializer
-; CHECK-NEXT:    [[TMP8:%.*]] = zext <8 x i1> [[TMP7]] to <8 x i16>
-; CHECK-NEXT:    store <8 x i16> [[TMP8]], ptr [[ARRAYIDX2]], align 2
+; CHECK-NEXT:    store <8 x i16> zeroinitializer, ptr [[ARRAYIDX2]], align 2
 ; CHECK-NEXT:    ret void
 ;
 entry:

>From ea092b646c9939cc8484cf91379ff737b33adbe0 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Wed, 19 Feb 2025 23:13:34 -0800
Subject: [PATCH 14/38] rename getInterchangeableInstructionOpcode to getOpcode
 and getInterchangeableInstructionOps to getOperand

---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 26 +++++++------------
 1 file changed, 9 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 0242a3d8577a4..6977cda0024f8 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -820,10 +820,8 @@ namespace {
 /// The class maintains a reference to the main instruction (MainOp) and
 /// provides methods to:
 /// - Check if another instruction is interchangeable (isSame)
-/// - Get the opcode for the interchangeable form
-/// (getInterchangeableInstructionOpcode)
-/// - Get the operands for the interchangeable form
-/// (getInterchangeableInstructionOps)
+/// - Get the opcode for the interchangeable form (getOpcode)
+/// - Get the operands for the interchangeable form (getOperand)
 class InterchangeableInstruction {
 protected:
   Instruction *const MainOp;
@@ -833,11 +831,8 @@ class InterchangeableInstruction {
   virtual bool isSame(Instruction *I) {
     return MainOp->getOpcode() == I->getOpcode();
   }
-  virtual unsigned getInterchangeableInstructionOpcode() {
-    return MainOp->getOpcode();
-  }
-  virtual SmallVector<Value *>
-  getInterchangeableInstructionOps(Instruction *I) {
+  virtual unsigned getOpcode() { return MainOp->getOpcode(); }
+  virtual SmallVector<Value *> getOperand(Instruction *I) {
     assert(MainOp->getOpcode() == I->getOpcode());
     return SmallVector<Value *>(MainOp->operands());
   }
@@ -939,7 +934,7 @@ class InterchangeableBinOp final : public InterchangeableInstruction {
     }
     return tryAnd(opcodeToMask(Opcode));
   }
-  unsigned getInterchangeableInstructionOpcode() override {
+  unsigned getOpcode() override {
     MaskType Candidate = Mask & SeenBefore;
     if (Candidate & 0b1)
       return Instruction::Shl;
@@ -959,8 +954,7 @@ class InterchangeableBinOp final : public InterchangeableInstruction {
       return Instruction::Xor;
     llvm_unreachable("Cannot find interchangeable instruction.");
   }
-  SmallVector<Value *>
-  getInterchangeableInstructionOps(Instruction *I) override {
+  SmallVector<Value *> getOperand(Instruction *I) override {
     unsigned ToOpcode = I->getOpcode();
     assert(binary_search(SupportedOp, ToOpcode) && "Unsupported opcode.");
     unsigned FromOpcode = MainOp->getOpcode();
@@ -1069,12 +1063,11 @@ convertTo(Instruction *I, Instruction *MainOp, Instruction *AltOp) {
       getInterchangeableInstruction(I));
   for (std::unique_ptr<InterchangeableInstruction> &C : Candidate)
     if (C->isSame(I) && C->isSame(MainOp))
-      return std::make_pair(MainOp,
-                            C->getInterchangeableInstructionOps(MainOp));
+      return std::make_pair(MainOp, C->getOperand(MainOp));
   Candidate = getInterchangeableInstruction(I);
   for (std::unique_ptr<InterchangeableInstruction> &C : Candidate)
     if (C->isSame(I) && C->isSame(AltOp))
-      return std::make_pair(AltOp, C->getInterchangeableInstructionOps(AltOp));
+      return std::make_pair(AltOp, C->getOperand(AltOp));
   llvm_unreachable("Cannot convert the instruction.");
 }
 
@@ -1366,8 +1359,7 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
         [&](ArrayRef<std::unique_ptr<InterchangeableInstruction>> Candidate) {
           for (const std::unique_ptr<InterchangeableInstruction> &I :
                Candidate) {
-            unsigned InterchangeableInstructionOpcode =
-                I->getInterchangeableInstructionOpcode();
+            unsigned InterchangeableInstructionOpcode = I->getOpcode();
             for (Value *V : VL) {
               if (isa<PoisonValue>(V))
                 continue;

>From 968f3463f6833675e504b8fb159224cd6176089f Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Thu, 20 Feb 2025 22:56:53 -0800
Subject: [PATCH 15/38] MainOp may not be the first insturction in a bundle

---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 6977cda0024f8..daa647f2a87bd 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -8795,12 +8795,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
 
   BlockScheduling &BS = *BSRef;
 
-  SmallVector<Value *> MainOpIsTheFirst(UniqueValues);
-  auto MainOpIter = find(MainOpIsTheFirst, S.getMainOp());
-  std::rotate(MainOpIsTheFirst.begin(), MainOpIter, std::next(MainOpIter));
-
   std::optional<ScheduleData *> Bundle =
-      BS.tryScheduleBundle(MainOpIsTheFirst, this, S);
+      BS.tryScheduleBundle(UniqueValues, this, S);
 #ifdef EXPENSIVE_CHECKS
   // Make sure we didn't break any internal invariants
   BS.verify();
@@ -17467,7 +17463,6 @@ BoUpSLP::BlockScheduling::buildBundle(ArrayRef<Value *> VL) {
 std::optional<BoUpSLP::ScheduleData *>
 BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
                                             const InstructionsState &S) {
-  assert(VL[0] == S.getMainOp() && "MainOp must be the first element of VL.");
   // No need to schedule PHIs, insertelement, extractelement and extractvalue
   // instructions.
   if (isa<PHINode>(S.getMainOp()) ||
@@ -17557,21 +17552,21 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
   auto *Bundle = buildBundle(VL);
   TryScheduleBundleImpl(ReSchedule, Bundle);
   if (!Bundle->isReady()) {
-    cancelScheduling(VL, S.getMainOp());
+    cancelScheduling(VL, Bundle->Inst);
     return std::nullopt;
   }
   return Bundle;
 }
 
 void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef<Value *> VL,
-                                                Value *OpValue) {
-  if (isa<PHINode>(OpValue) || isVectorLikeInstWithConstOps(OpValue) ||
+                                                Value *Inst) {
+  if (isa<PHINode>(Inst) || isVectorLikeInstWithConstOps(Inst) ||
       doesNotNeedToSchedule(VL))
     return;
 
-  if (doesNotNeedToBeScheduled(OpValue))
-    OpValue = *find_if_not(VL, doesNotNeedToBeScheduled);
-  ScheduleData *Bundle = getScheduleData(OpValue);
+  if (doesNotNeedToBeScheduled(Inst))
+    Inst = *find_if_not(VL, doesNotNeedToBeScheduled);
+  ScheduleData *Bundle = getScheduleData(Inst);
   LLVM_DEBUG(dbgs() << "SLP:  cancel scheduling of " << *Bundle << "\n");
   assert(!Bundle->IsScheduled &&
          "Can't cancel bundle which is already scheduled");

>From 3f067dc2779f269d623a7651b28556968e73f1e0 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Thu, 20 Feb 2025 22:57:25 -0800
Subject: [PATCH 16/38] apply comment

---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 21 +++++++++++--------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index daa647f2a87bd..0295a8f2f784f 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -824,16 +824,17 @@ namespace {
 /// - Get the operands for the interchangeable form (getOperand)
 class InterchangeableInstruction {
 protected:
-  Instruction *const MainOp;
+  Instruction *const MainOp = nullptr;
 
 public:
   InterchangeableInstruction(Instruction *MainOp) : MainOp(MainOp) {}
   virtual bool isSame(Instruction *I) {
     return MainOp->getOpcode() == I->getOpcode();
   }
-  virtual unsigned getOpcode() { return MainOp->getOpcode(); }
-  virtual SmallVector<Value *> getOperand(Instruction *I) {
-    assert(MainOp->getOpcode() == I->getOpcode());
+  virtual unsigned getOpcode() const { return MainOp->getOpcode(); }
+  virtual SmallVector<Value *> getOperand(Instruction *I) const {
+    assert(MainOp->getOpcode() == I->getOpcode() &&
+           "Cannot convert the instruction.");
     return SmallVector<Value *>(MainOp->operands());
   }
   virtual ~InterchangeableInstruction() = default;
@@ -841,6 +842,7 @@ class InterchangeableInstruction {
 
 class InterchangeableBinOp final : public InterchangeableInstruction {
   using MaskType = std::uint_fast8_t;
+  // Sort SupportedOp because it is used by binary_search.
   constexpr static std::initializer_list<unsigned> SupportedOp = {
       Instruction::Add,  Instruction::Sub, Instruction::Mul, Instruction::Shl,
       Instruction::AShr, Instruction::And, Instruction::Or,  Instruction::Xor};
@@ -934,7 +936,7 @@ class InterchangeableBinOp final : public InterchangeableInstruction {
     }
     return tryAnd(opcodeToMask(Opcode));
   }
-  unsigned getOpcode() override {
+  unsigned getOpcode() const override {
     MaskType Candidate = Mask & SeenBefore;
     if (Candidate & 0b1)
       return Instruction::Shl;
@@ -954,7 +956,7 @@ class InterchangeableBinOp final : public InterchangeableInstruction {
       return Instruction::Xor;
     llvm_unreachable("Cannot find interchangeable instruction.");
   }
-  SmallVector<Value *> getOperand(Instruction *I) override {
+  SmallVector<Value *> getOperand(Instruction *I) const override {
     unsigned ToOpcode = I->getOpcode();
     assert(binary_search(SupportedOp, ToOpcode) && "Unsupported opcode.");
     unsigned FromOpcode = MainOp->getOpcode();
@@ -997,8 +999,9 @@ class InterchangeableBinOp final : public InterchangeableInstruction {
       ToCIValue = APInt::getZero(FromCIValueBitWidth);
       break;
     }
-    auto LHS = MainOp->getOperand(1 - Pos);
-    auto RHS = ConstantInt::get(MainOp->getOperand(Pos)->getType(), ToCIValue);
+    Value *LHS = MainOp->getOperand(1 - Pos);
+    Constant *RHS =
+        ConstantInt::get(MainOp->getOperand(Pos)->getType(), ToCIValue);
     if (Pos == 1)
       return SmallVector<Value *>({LHS, RHS});
     return SmallVector<Value *>({RHS, LHS});
@@ -1363,7 +1366,7 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
             for (Value *V : VL) {
               if (isa<PoisonValue>(V))
                 continue;
-              Instruction *Inst = cast<Instruction>(V);
+              auto *Inst = cast<Instruction>(V);
               if (Inst->getOpcode() == InterchangeableInstructionOpcode)
                 return Inst;
             }

>From 3cedcd43b8f7992b2b44efb7d749257f0c73e82d Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Thu, 20 Feb 2025 23:35:28 -0800
Subject: [PATCH 17/38] do no use magic number

---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 55 ++++++++++++-------
 1 file changed, 35 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 0295a8f2f784f..15036abe4f576 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -846,8 +846,23 @@ class InterchangeableBinOp final : public InterchangeableInstruction {
   constexpr static std::initializer_list<unsigned> SupportedOp = {
       Instruction::Add,  Instruction::Sub, Instruction::Mul, Instruction::Shl,
       Instruction::AShr, Instruction::And, Instruction::Or,  Instruction::Xor};
-  // from high to low bit: Xor Or And Sub Add Mul AShr Shl
-  MaskType Mask = 0b11111111;
+  enum : MaskType {
+    SHL_BIT = 0b1,
+    AShr_BIT = 0b10,
+    Mul_BIT = 0b100,
+    Add_BIT = 0b1000,
+    Sub_BIT = 0b10000,
+    And_BIT = 0b100000,
+    Or_BIT = 0b1000000,
+    Xor_BIT = 0b10000000,
+  };
+  // The bit it sets represents whether MainOp can be converted to.
+  MaskType Mask = Xor_BIT | Or_BIT | And_BIT | Sub_BIT | Add_BIT | Mul_BIT |
+                  AShr_BIT | SHL_BIT;
+  // We cannot create an interchangeable instruction that does not exist in VL.
+  // For example, VL [x + 0, y * 1] can be converted to [x << 0, y << 0], but
+  // 'shl' does not exist in VL. In the end, we convert VL to [x * 1, y * 1].
+  // SeenBefore is used to know what operations have been seen before.
   MaskType SeenBefore = 0;
 
   /// Return a non-nullptr if either operand of I is a ConstantInt.
@@ -876,21 +891,21 @@ class InterchangeableBinOp final : public InterchangeableInstruction {
   static MaskType opcodeToMask(unsigned Opcode) {
     switch (Opcode) {
     case Instruction::Shl:
-      return 0b1;
+      return SHL_BIT;
     case Instruction::AShr:
-      return 0b10;
+      return AShr_BIT;
     case Instruction::Mul:
-      return 0b100;
+      return Mul_BIT;
     case Instruction::Add:
-      return 0b1000;
+      return Add_BIT;
     case Instruction::Sub:
-      return 0b10000;
+      return Sub_BIT;
     case Instruction::And:
-      return 0b100000;
+      return And_BIT;
     case Instruction::Or:
-      return 0b1000000;
+      return Or_BIT;
     case Instruction::Xor:
-      return 0b10000000;
+      return Xor_BIT;
     }
     llvm_unreachable("Unsupported opcode.");
   }
@@ -917,12 +932,12 @@ class InterchangeableBinOp final : public InterchangeableInstruction {
       case Instruction::Shl:
         if (CIValue.isZero())
           return true;
-        return tryAnd(0b101);
+        return tryAnd(Mul_BIT | SHL_BIT);
       case Instruction::Mul:
         if (CIValue.isOne())
           return true;
         if (CIValue.isPowerOf2())
-          return tryAnd(0b101);
+          return tryAnd(Mul_BIT | SHL_BIT);
         break;
       case Instruction::And:
         if (CIValue.isAllOnes())
@@ -938,21 +953,21 @@ class InterchangeableBinOp final : public InterchangeableInstruction {
   }
   unsigned getOpcode() const override {
     MaskType Candidate = Mask & SeenBefore;
-    if (Candidate & 0b1)
+    if (Candidate & SHL_BIT)
       return Instruction::Shl;
-    if (Candidate & 0b10)
+    if (Candidate & AShr_BIT)
       return Instruction::AShr;
-    if (Candidate & 0b100)
+    if (Candidate & Mul_BIT)
       return Instruction::Mul;
-    if (Candidate & 0b1000)
+    if (Candidate & Add_BIT)
       return Instruction::Add;
-    if (Candidate & 0b10000)
+    if (Candidate & Sub_BIT)
       return Instruction::Sub;
-    if (Candidate & 0b100000)
+    if (Candidate & And_BIT)
       return Instruction::And;
-    if (Candidate & 0b1000000)
+    if (Candidate & Or_BIT)
       return Instruction::Or;
-    if (Candidate & 0b10000000)
+    if (Candidate & Xor_BIT)
       return Instruction::Xor;
     llvm_unreachable("Cannot find interchangeable instruction.");
   }

>From c573e92c5e094ae3118cc71e5387d1980899d9f6 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Thu, 20 Feb 2025 23:44:57 -0800
Subject: [PATCH 18/38] add const to isSame

---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 15036abe4f576..816c7e13dd12c 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -828,7 +828,7 @@ class InterchangeableInstruction {
 
 public:
   InterchangeableInstruction(Instruction *MainOp) : MainOp(MainOp) {}
-  virtual bool isSame(Instruction *I) {
+  virtual bool isSame(Instruction *I) const {
     return MainOp->getOpcode() == I->getOpcode();
   }
   virtual unsigned getOpcode() const { return MainOp->getOpcode(); }
@@ -857,13 +857,13 @@ class InterchangeableBinOp final : public InterchangeableInstruction {
     Xor_BIT = 0b10000000,
   };
   // The bit it sets represents whether MainOp can be converted to.
-  MaskType Mask = Xor_BIT | Or_BIT | And_BIT | Sub_BIT | Add_BIT | Mul_BIT |
-                  AShr_BIT | SHL_BIT;
+  mutable MaskType Mask = Xor_BIT | Or_BIT | And_BIT | Sub_BIT | Add_BIT |
+                          Mul_BIT | AShr_BIT | SHL_BIT;
   // We cannot create an interchangeable instruction that does not exist in VL.
   // For example, VL [x + 0, y * 1] can be converted to [x << 0, y << 0], but
   // 'shl' does not exist in VL. In the end, we convert VL to [x * 1, y * 1].
   // SeenBefore is used to know what operations have been seen before.
-  MaskType SeenBefore = 0;
+  mutable MaskType SeenBefore = 0;
 
   /// Return a non-nullptr if either operand of I is a ConstantInt.
   static std::pair<ConstantInt *, unsigned>
@@ -910,7 +910,7 @@ class InterchangeableBinOp final : public InterchangeableInstruction {
     llvm_unreachable("Unsupported opcode.");
   }
 
-  bool tryAnd(MaskType X) {
+  bool tryAnd(MaskType X) const {
     if (Mask & X) {
       Mask &= X;
       return true;
@@ -920,7 +920,7 @@ class InterchangeableBinOp final : public InterchangeableInstruction {
 
 public:
   using InterchangeableInstruction::InterchangeableInstruction;
-  bool isSame(Instruction *I) override {
+  bool isSame(Instruction *I) const override {
     unsigned Opcode = I->getOpcode();
     if (!binary_search(SupportedOp, Opcode))
       return false;

>From ba9ab597de3f147097070354435a883d29d7818e Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Sun, 23 Feb 2025 20:32:33 -0800
Subject: [PATCH 19/38] fix merge conflict

---
 .../gathered-delayed-nodes-with-reused-user.ll  | 17 +++--------------
 1 file changed, 3 insertions(+), 14 deletions(-)

diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll b/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll
index 1e8734f58bdcd..5a9ea0d292fa0 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll
@@ -14,10 +14,10 @@ define i64 @foo() {
 ; CHECK:       bb3:
 ; CHECK-NEXT:    [[PHI4:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ 0, [[BB3]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x i64> [ zeroinitializer, [[BB]] ], [ [[TMP3:%.*]], [[BB3]] ]
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[PHI4]], i32 0
-; CHECK-NEXT:    [[TMP5]] = add <2 x i64> [[TMP0]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> <i64 poison, i64 0>, <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[PHI4]], i32 0
 ; CHECK-NEXT:    [[TMP3]] = add <2 x i64> [[TMP4]], [[TMP2]]
+; CHECK-NEXT:    [[TMP5]] = add <2 x i64> [[TMP0]], [[TMP2]]
 ; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 [[ADD7]]
 ; CHECK-NEXT:    [[OR:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
 ; CHECK-NEXT:    [[ICMP:%.*]] = icmp ult i64 [[OR]], 0
@@ -35,22 +35,11 @@ define i64 @foo() {
 ; FORCED-NEXT:    [[TMP1:%.*]] = phi <2 x i64> [ zeroinitializer, [[BB]] ], [ [[TMP7:%.*]], [[BB3]] ]
 ; FORCED-NEXT:    [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> <i64 poison, i64 0>, <2 x i32> <i32 0, i32 3>
 ; FORCED-NEXT:    [[TMP2:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[PHI5]], i32 0
-<<<<<<< HEAD
-; FORCED-NEXT:    [[TMP5]] = add <2 x i64> [[TMP1]], [[TMP2]]
-; FORCED-NEXT:    [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> <i64 poison, i64 0>, <2 x i32> <i32 0, i32 3>
 ; FORCED-NEXT:    [[TMP7]] = add <2 x i64> [[TMP6]], [[TMP2]]
+; FORCED-NEXT:    [[TMP5]] = add <2 x i64> [[TMP1]], [[TMP2]]
 ; FORCED-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 [[ADD7]]
 ; FORCED-NEXT:    [[TMP8:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
 ; FORCED-NEXT:    [[ICMP:%.*]] = icmp ult i64 [[TMP8]], 0
-=======
-; FORCED-NEXT:    [[TMP7]] = add <2 x i64> [[TMP6]], [[TMP2]]
-; FORCED-NEXT:    [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]]
-; FORCED-NEXT:    [[TMP4:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
-; FORCED-NEXT:    [[TMP5]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i32> <i32 0, i32 3>
-; FORCED-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 [[TMP8]]
-; FORCED-NEXT:    [[TMP9:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
-; FORCED-NEXT:    [[ICMP:%.*]] = icmp ult i64 [[TMP9]], 0
->>>>>>> upstream/main
 ; FORCED-NEXT:    br i1 false, label [[BB3]], label [[BB1:%.*]]
 ;
 bb:

>From b5ae1807b42a97c7493cfdd19bbe36018923b449 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Tue, 25 Feb 2025 05:47:22 -0800
Subject: [PATCH 20/38] merge InterchangeableInstruction and
 InterchangeableBinOp

rename isSame to add
rename tryAnd to trySet
make Mask support MainOp_BIT
---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 169 +++++++-----------
 1 file changed, 62 insertions(+), 107 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 780202ab2bf5a..b096db156e9ad 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -814,34 +814,14 @@ namespace {
 /// equivalent forms. For example, multiplication by a power of 2 can be
 /// interchanged with a left shift.
 ///
-/// Derived classes implement specific interchange patterns by overriding the
-/// virtual methods to define their interchange logic.
-///
 /// The class maintains a reference to the main instruction (MainOp) and
 /// provides methods to:
-/// - Check if another instruction is interchangeable (isSame)
+/// - Check if the incoming instruction can use the same instruction as MainOp
+/// (add)
 /// - Get the opcode for the interchangeable form (getOpcode)
 /// - Get the operands for the interchangeable form (getOperand)
-class InterchangeableInstruction {
-protected:
-  Instruction *const MainOp = nullptr;
-
-public:
-  InterchangeableInstruction(Instruction *MainOp) : MainOp(MainOp) {}
-  virtual bool isSame(Instruction *I) const {
-    return MainOp->getOpcode() == I->getOpcode();
-  }
-  virtual unsigned getOpcode() const { return MainOp->getOpcode(); }
-  virtual SmallVector<Value *> getOperand(Instruction *I) const {
-    assert(MainOp->getOpcode() == I->getOpcode() &&
-           "Cannot convert the instruction.");
-    return SmallVector<Value *>(MainOp->operands());
-  }
-  virtual ~InterchangeableInstruction() = default;
-};
-
-class InterchangeableBinOp final : public InterchangeableInstruction {
-  using MaskType = std::uint_fast8_t;
+class InterchangeableBinOp {
+  using MaskType = std::uint_fast16_t;
   // Sort SupportedOp because it is used by binary_search.
   constexpr static std::initializer_list<unsigned> SupportedOp = {
       Instruction::Add,  Instruction::Sub, Instruction::Mul, Instruction::Shl,
@@ -855,15 +835,17 @@ class InterchangeableBinOp final : public InterchangeableInstruction {
     And_BIT = 0b100000,
     Or_BIT = 0b1000000,
     Xor_BIT = 0b10000000,
+    MainOp_BIT = 0b100000000
   };
+  Instruction *MainOp = nullptr;
   // The bit it sets represents whether MainOp can be converted to.
-  mutable MaskType Mask = Xor_BIT | Or_BIT | And_BIT | Sub_BIT | Add_BIT |
-                          Mul_BIT | AShr_BIT | SHL_BIT;
+  MaskType Mask = MainOp_BIT | Xor_BIT | Or_BIT | And_BIT | Sub_BIT | Add_BIT |
+                  Mul_BIT | AShr_BIT | SHL_BIT;
   // We cannot create an interchangeable instruction that does not exist in VL.
   // For example, VL [x + 0, y * 1] can be converted to [x << 0, y << 0], but
   // 'shl' does not exist in VL. In the end, we convert VL to [x * 1, y * 1].
   // SeenBefore is used to know what operations have been seen before.
-  mutable MaskType SeenBefore = 0;
+  MaskType SeenBefore = 0;
 
   /// Return a non-nullptr if either operand of I is a ConstantInt.
   static std::pair<ConstantInt *, unsigned>
@@ -910,7 +892,7 @@ class InterchangeableBinOp final : public InterchangeableInstruction {
     llvm_unreachable("Unsupported opcode.");
   }
 
-  bool tryAnd(MaskType X) const {
+  bool trySet(MaskType X) {
     if (Mask & X) {
       Mask &= X;
       return true;
@@ -919,39 +901,47 @@ class InterchangeableBinOp final : public InterchangeableInstruction {
   }
 
 public:
-  using InterchangeableInstruction::InterchangeableInstruction;
-  bool isSame(Instruction *I) const override {
+  InterchangeableBinOp(Instruction *MainOp) : MainOp(MainOp) {}
+  bool add(Instruction *I) {
     unsigned Opcode = I->getOpcode();
-    if (!binary_search(SupportedOp, Opcode))
+    assert(is_sorted(SupportedOp) && "SupportedOp is not sorted.");
+    if (!binary_search(SupportedOp, Opcode)) {
+      if (MainOp->getOpcode() == Opcode)
+        return trySet(MainOp_BIT);
       return false;
+    }
     SeenBefore |= opcodeToMask(Opcode);
     ConstantInt *CI = isBinOpWithConstantInt(I).first;
     if (CI) {
+      constexpr MaskType CanBeAll = Xor_BIT | Or_BIT | And_BIT | Sub_BIT |
+                                    Add_BIT | Mul_BIT | AShr_BIT | SHL_BIT;
       const APInt &CIValue = CI->getValue();
       switch (Opcode) {
       case Instruction::Shl:
         if (CIValue.isZero())
-          return true;
-        return tryAnd(Mul_BIT | SHL_BIT);
+          return trySet(CanBeAll);
+        return trySet(Mul_BIT | SHL_BIT);
       case Instruction::Mul:
         if (CIValue.isOne())
-          return true;
+          return trySet(CanBeAll);
         if (CIValue.isPowerOf2())
-          return tryAnd(Mul_BIT | SHL_BIT);
+          return trySet(Mul_BIT | SHL_BIT);
         break;
       case Instruction::And:
         if (CIValue.isAllOnes())
-          return true;
+          return trySet(CanBeAll);
         break;
       default:
         if (CIValue.isZero())
-          return true;
+          return trySet(CanBeAll);
         break;
       }
     }
-    return tryAnd(opcodeToMask(Opcode));
+    return trySet(opcodeToMask(Opcode));
   }
-  unsigned getOpcode() const override {
+  unsigned getOpcode() const {
+    if (Mask & MainOp_BIT)
+      return MainOp->getOpcode();
     MaskType Candidate = Mask & SeenBefore;
     if (Candidate & SHL_BIT)
       return Instruction::Shl;
@@ -971,12 +961,12 @@ class InterchangeableBinOp final : public InterchangeableInstruction {
       return Instruction::Xor;
     llvm_unreachable("Cannot find interchangeable instruction.");
   }
-  SmallVector<Value *> getOperand(Instruction *I) const override {
+  SmallVector<Value *> getOperand(Instruction *I) const {
     unsigned ToOpcode = I->getOpcode();
-    assert(binary_search(SupportedOp, ToOpcode) && "Unsupported opcode.");
     unsigned FromOpcode = MainOp->getOpcode();
     if (FromOpcode == ToOpcode)
       return SmallVector<Value *>(MainOp->operands());
+    assert(binary_search(SupportedOp, ToOpcode) && "Unsupported opcode.");
     auto [CI, Pos] = isBinOpWithConstantInt(MainOp);
     const APInt &FromCIValue = CI->getValue();
     unsigned FromCIValueBitWidth = FromCIValue.getBitWidth();
@@ -1023,27 +1013,12 @@ class InterchangeableBinOp final : public InterchangeableInstruction {
   }
 };
 
-static SmallVector<std::unique_ptr<InterchangeableInstruction>>
-getInterchangeableInstruction(Instruction *MainOp) {
-  SmallVector<std::unique_ptr<InterchangeableInstruction>> Candidate;
-  Candidate.push_back(std::make_unique<InterchangeableInstruction>(MainOp));
-  if (MainOp->isBinaryOp())
-    Candidate.push_back(std::make_unique<InterchangeableBinOp>(MainOp));
-  return Candidate;
-}
-
-static bool getInterchangeableInstruction(
-    SmallVector<std::unique_ptr<InterchangeableInstruction>> &Candidate,
-    Instruction *I) {
-  auto Iter = std::stable_partition(
-      Candidate.begin(), Candidate.end(),
-      [&](const std::unique_ptr<InterchangeableInstruction> &C) {
-        return C->isSame(I);
-      });
-  if (Iter == Candidate.begin())
-    return false;
-  Candidate.erase(Iter, Candidate.end());
-  return true;
+static std::optional<InterchangeableBinOp> isConvertible(Instruction *From,
+                                                         Instruction *To) {
+  InterchangeableBinOp Converter(From);
+  if (Converter.add(From) && Converter.add(To))
+    return Converter;
+  return {};
 }
 
 static bool isConvertible(Instruction *I, Instruction *MainOp,
@@ -1056,16 +1031,7 @@ static bool isConvertible(Instruction *I, Instruction *MainOp,
     return true;
   if (!I->isBinaryOp())
     return false;
-  SmallVector<std::unique_ptr<InterchangeableInstruction>> Candidate(
-      getInterchangeableInstruction(I));
-  for (std::unique_ptr<InterchangeableInstruction> &C : Candidate)
-    if (C->isSame(I) && C->isSame(MainOp))
-      return true;
-  Candidate = getInterchangeableInstruction(I);
-  for (std::unique_ptr<InterchangeableInstruction> &C : Candidate)
-    if (C->isSame(I) && C->isSame(AltOp))
-      return true;
-  return false;
+  return isConvertible(I, MainOp) || isConvertible(I, AltOp);
 }
 
 static std::pair<Instruction *, SmallVector<Value *>>
@@ -1077,15 +1043,12 @@ convertTo(Instruction *I, Instruction *MainOp, Instruction *AltOp) {
   if (I->getOpcode() == AltOp->getOpcode())
     return std::make_pair(AltOp, SmallVector<Value *>(I->operands()));
   assert(I->isBinaryOp() && "Cannot convert the instruction.");
-  SmallVector<std::unique_ptr<InterchangeableInstruction>> Candidate(
-      getInterchangeableInstruction(I));
-  for (std::unique_ptr<InterchangeableInstruction> &C : Candidate)
-    if (C->isSame(I) && C->isSame(MainOp))
-      return std::make_pair(MainOp, C->getOperand(MainOp));
-  Candidate = getInterchangeableInstruction(I);
-  for (std::unique_ptr<InterchangeableInstruction> &C : Candidate)
-    if (C->isSame(I) && C->isSame(AltOp))
-      return std::make_pair(AltOp, C->getOperand(AltOp));
+  std::optional<InterchangeableBinOp> Converter(isConvertible(I, MainOp));
+  if (Converter)
+    return std::make_pair(MainOp, Converter->getOperand(MainOp));
+  Converter = isConvertible(I, AltOp);
+  if (Converter)
+    return std::make_pair(AltOp, Converter->getOperand(AltOp));
   llvm_unreachable("Cannot convert the instruction.");
 }
 
@@ -1209,11 +1172,8 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
   unsigned Opcode = MainOp->getOpcode();
   unsigned AltOpcode = Opcode;
 
-  SmallVector<std::unique_ptr<InterchangeableInstruction>>
-      InterchangeableInstructionCandidate(
-          getInterchangeableInstruction(MainOp));
-  SmallVector<std::unique_ptr<InterchangeableInstruction>>
-      AlternateInterchangeableInstructionCandidate;
+  InterchangeableBinOp InterchangeableConverter(MainOp);
+  std::optional<InterchangeableBinOp> AlternateInterchangeableConverter;
   bool SwappedPredsCompatible = IsCmpOp && [&]() {
     SetVector<unsigned> UniquePreds, UniqueNonSwappedPreds;
     UniquePreds.insert(BasePred);
@@ -1260,17 +1220,15 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
       return InstructionsState::invalid();
     unsigned InstOpcode = I->getOpcode();
     if (IsBinOp && isa<BinaryOperator>(I)) {
-      if (getInterchangeableInstruction(InterchangeableInstructionCandidate, I))
+      if (InterchangeableConverter.add(I))
         continue;
-      if (AlternateInterchangeableInstructionCandidate.empty()) {
+      if (!AlternateInterchangeableConverter) {
         if (!isValidForAlternation(Opcode) ||
             !isValidForAlternation(InstOpcode))
           return InstructionsState::invalid();
-        AlternateInterchangeableInstructionCandidate =
-            getInterchangeableInstruction(I);
+        AlternateInterchangeableConverter = InterchangeableBinOp(I);
       }
-      if (getInterchangeableInstruction(
-              AlternateInterchangeableInstructionCandidate, I))
+      if (AlternateInterchangeableConverter->add(I))
         continue;
     } else if (IsCastOp && isa<CastInst>(I)) {
       Value *Op0 = MainOp->getOperand(0);
@@ -1374,25 +1332,22 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
 
   if (IsBinOp) {
     auto FindOp =
-        [&](ArrayRef<std::unique_ptr<InterchangeableInstruction>> Candidate) {
-          for (const std::unique_ptr<InterchangeableInstruction> &I :
-               Candidate) {
-            unsigned InterchangeableInstructionOpcode = I->getOpcode();
-            for (Value *V : VL) {
-              if (isa<PoisonValue>(V))
-                continue;
-              auto *Inst = cast<Instruction>(V);
-              if (Inst->getOpcode() == InterchangeableInstructionOpcode)
-                return Inst;
-            }
+        [&](const InterchangeableBinOp &Converter) {
+          unsigned InterchangeableInstructionOpcode = Converter.getOpcode();
+          for (Value *V : VL) {
+            if (isa<PoisonValue>(V))
+              continue;
+            auto *Inst = cast<Instruction>(V);
+            if (Inst->getOpcode() == InterchangeableInstructionOpcode)
+              return Inst;
           }
           llvm_unreachable(
               "Cannot find the candidate instruction for InstructionsState.");
         };
-    MainOp = FindOp(InterchangeableInstructionCandidate);
-    AltOp = AlternateInterchangeableInstructionCandidate.empty()
-                ? MainOp
-                : FindOp(AlternateInterchangeableInstructionCandidate);
+    MainOp = FindOp(InterchangeableConverter);
+    AltOp = AlternateInterchangeableConverter
+                ? FindOp(*AlternateInterchangeableConverter)
+                : MainOp;
   }
   return InstructionsState(MainOp, AltOp);
 }

>From 4f0008372a7b12ac1965181e67a5aaf6b8827490 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Tue, 25 Feb 2025 06:40:14 -0800
Subject: [PATCH 21/38] reduce opcodeToMask(Opcode) usage

---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b096db156e9ad..bde09e5eb0d19 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -910,7 +910,8 @@ class InterchangeableBinOp {
         return trySet(MainOp_BIT);
       return false;
     }
-    SeenBefore |= opcodeToMask(Opcode);
+    MaskType opcodeMask = opcodeToMask(Opcode);
+    SeenBefore |= opcodeMask;
     ConstantInt *CI = isBinOpWithConstantInt(I).first;
     if (CI) {
       constexpr MaskType CanBeAll = Xor_BIT | Or_BIT | And_BIT | Sub_BIT |
@@ -937,7 +938,7 @@ class InterchangeableBinOp {
         break;
       }
     }
-    return trySet(opcodeToMask(Opcode));
+    return trySet(opcodeMask);
   }
   unsigned getOpcode() const {
     if (Mask & MainOp_BIT)

>From 20c5597cc6c6933f1f621c97c16c85d1253f99ff Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Tue, 25 Feb 2025 08:39:20 -0800
Subject: [PATCH 22/38] use assert instead of llvm_unreachable

---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index bde09e5eb0d19..5fb3ae8a42efc 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1048,9 +1048,8 @@ convertTo(Instruction *I, Instruction *MainOp, Instruction *AltOp) {
   if (Converter)
     return std::make_pair(MainOp, Converter->getOperand(MainOp));
   Converter = isConvertible(I, AltOp);
-  if (Converter)
-    return std::make_pair(AltOp, Converter->getOperand(AltOp));
-  llvm_unreachable("Cannot convert the instruction.");
+  assert(Converter && "Cannot convert the instruction.");
+  return std::make_pair(AltOp, Converter->getOperand(AltOp));
 }
 
 /// Main data required for vectorization of instructions.

>From f6b0561892b5fd9274a99e7512c39082415f5cdf Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Wed, 26 Feb 2025 20:32:02 -0800
Subject: [PATCH 23/38] rename mask and use LLVM_MARK_AS_BITMASK_ENUM

---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 67 ++++++++++---------
 1 file changed, 34 insertions(+), 33 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 5fb3ae8a42efc..2ecf8ebb57e2a 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -827,20 +827,21 @@ class InterchangeableBinOp {
       Instruction::Add,  Instruction::Sub, Instruction::Mul, Instruction::Shl,
       Instruction::AShr, Instruction::And, Instruction::Or,  Instruction::Xor};
   enum : MaskType {
-    SHL_BIT = 0b1,
-    AShr_BIT = 0b10,
-    Mul_BIT = 0b100,
-    Add_BIT = 0b1000,
-    Sub_BIT = 0b10000,
-    And_BIT = 0b100000,
-    Or_BIT = 0b1000000,
-    Xor_BIT = 0b10000000,
-    MainOp_BIT = 0b100000000
+    ShlBIT = 0b1,
+    AShrBIT = 0b10,
+    MulBIT = 0b100,
+    AddBIT = 0b1000,
+    SubBIT = 0b10000,
+    AndBIT = 0b100000,
+    OrBIT = 0b1000000,
+    XorBIT = 0b10000000,
+    MainOpBIT = 0b100000000,
+    LLVM_MARK_AS_BITMASK_ENUM(MainOpBIT)
   };
   Instruction *MainOp = nullptr;
   // The bit it sets represents whether MainOp can be converted to.
-  MaskType Mask = MainOp_BIT | Xor_BIT | Or_BIT | And_BIT | Sub_BIT | Add_BIT |
-                  Mul_BIT | AShr_BIT | SHL_BIT;
+  MaskType Mask = MainOpBIT | XorBIT | OrBIT | AndBIT | SubBIT | AddBIT |
+                  MulBIT | AShrBIT | ShlBIT;
   // We cannot create an interchangeable instruction that does not exist in VL.
   // For example, VL [x + 0, y * 1] can be converted to [x << 0, y << 0], but
   // 'shl' does not exist in VL. In the end, we convert VL to [x * 1, y * 1].
@@ -873,21 +874,21 @@ class InterchangeableBinOp {
   static MaskType opcodeToMask(unsigned Opcode) {
     switch (Opcode) {
     case Instruction::Shl:
-      return SHL_BIT;
+      return ShlBIT;
     case Instruction::AShr:
-      return AShr_BIT;
+      return AShrBIT;
     case Instruction::Mul:
-      return Mul_BIT;
+      return MulBIT;
     case Instruction::Add:
-      return Add_BIT;
+      return AddBIT;
     case Instruction::Sub:
-      return Sub_BIT;
+      return SubBIT;
     case Instruction::And:
-      return And_BIT;
+      return AndBIT;
     case Instruction::Or:
-      return Or_BIT;
+      return OrBIT;
     case Instruction::Xor:
-      return Xor_BIT;
+      return XorBIT;
     }
     llvm_unreachable("Unsupported opcode.");
   }
@@ -907,26 +908,26 @@ class InterchangeableBinOp {
     assert(is_sorted(SupportedOp) && "SupportedOp is not sorted.");
     if (!binary_search(SupportedOp, Opcode)) {
       if (MainOp->getOpcode() == Opcode)
-        return trySet(MainOp_BIT);
+        return trySet(MainOpBIT);
       return false;
     }
     MaskType opcodeMask = opcodeToMask(Opcode);
     SeenBefore |= opcodeMask;
     ConstantInt *CI = isBinOpWithConstantInt(I).first;
     if (CI) {
-      constexpr MaskType CanBeAll = Xor_BIT | Or_BIT | And_BIT | Sub_BIT |
-                                    Add_BIT | Mul_BIT | AShr_BIT | SHL_BIT;
+      constexpr MaskType CanBeAll =
+          XorBIT | OrBIT | AndBIT | SubBIT | AddBIT | MulBIT | AShrBIT | ShlBIT;
       const APInt &CIValue = CI->getValue();
       switch (Opcode) {
       case Instruction::Shl:
         if (CIValue.isZero())
           return trySet(CanBeAll);
-        return trySet(Mul_BIT | SHL_BIT);
+        return trySet(MulBIT | ShlBIT);
       case Instruction::Mul:
         if (CIValue.isOne())
           return trySet(CanBeAll);
         if (CIValue.isPowerOf2())
-          return trySet(Mul_BIT | SHL_BIT);
+          return trySet(MulBIT | ShlBIT);
         break;
       case Instruction::And:
         if (CIValue.isAllOnes())
@@ -941,24 +942,24 @@ class InterchangeableBinOp {
     return trySet(opcodeMask);
   }
   unsigned getOpcode() const {
-    if (Mask & MainOp_BIT)
+    if (Mask & MainOpBIT)
       return MainOp->getOpcode();
     MaskType Candidate = Mask & SeenBefore;
-    if (Candidate & SHL_BIT)
+    if (Candidate & ShlBIT)
       return Instruction::Shl;
-    if (Candidate & AShr_BIT)
+    if (Candidate & AShrBIT)
       return Instruction::AShr;
-    if (Candidate & Mul_BIT)
+    if (Candidate & MulBIT)
       return Instruction::Mul;
-    if (Candidate & Add_BIT)
+    if (Candidate & AddBIT)
       return Instruction::Add;
-    if (Candidate & Sub_BIT)
+    if (Candidate & SubBIT)
       return Instruction::Sub;
-    if (Candidate & And_BIT)
+    if (Candidate & AndBIT)
       return Instruction::And;
-    if (Candidate & Or_BIT)
+    if (Candidate & OrBIT)
       return Instruction::Or;
-    if (Candidate & Xor_BIT)
+    if (Candidate & XorBIT)
       return Instruction::Xor;
     llvm_unreachable("Cannot find interchangeable instruction.");
   }

>From 751cfd909b1ddd3d4b88b735f308ff6d997a5b47 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Wed, 26 Feb 2025 20:51:52 -0800
Subject: [PATCH 24/38] support more pattern for add and sub

---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp  | 16 ++++++++++++++++
 .../SLPVectorizer/X86/propagate_ir_flags.ll      | 12 +++---------
 .../SLPVectorizer/X86/vect_copyable_in_binops.ll |  8 ++------
 3 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 2ecf8ebb57e2a..e540d0b350889 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -929,6 +929,11 @@ class InterchangeableBinOp {
         if (CIValue.isPowerOf2())
           return trySet(MulBIT | ShlBIT);
         break;
+      case Instruction::Add:
+      case Instruction::Sub:
+        if (CIValue.isZero())
+          return trySet(CanBeAll);
+        return trySet(SubBIT | AddBIT);
       case Instruction::And:
         if (CIValue.isAllOnes())
           return trySet(CanBeAll);
@@ -996,6 +1001,17 @@ class InterchangeableBinOp {
                         : APInt::getZero(FromCIValueBitWidth);
       }
       break;
+    case Instruction::Add:
+    case Instruction::Sub:
+      if (FromCIValue.isZero()) {
+        ToCIValue = APInt::getZero(FromCIValueBitWidth);
+      } else {
+        assert(is_contained({Instruction::Add, Instruction::Sub}, ToOpcode) &&
+               "Cannot convert the instruction.");
+        ToCIValue = FromCIValue;
+        ToCIValue.negate();
+      }
+      break;
     case Instruction::And:
       assert(FromCIValue.isAllOnes() && "Cannot convert the instruction.");
       ToCIValue = ToOpcode == Instruction::Mul
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll b/llvm/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll
index cb02f4d10923c..ad8e905a8ca02 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll
@@ -330,9 +330,7 @@ define void @only_arcp(ptr %x) {
 define void @addsub_all_nsw(ptr %x) {
 ; CHECK-LABEL: @addsub_all_nsw(
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], splat (i32 1)
-; CHECK-NEXT:    [[TMP4:%.*]] = sub nsw <4 x i32> [[TMP2]], splat (i32 1)
-; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <4 x i32> [[TMP2]], <i32 1, i32 -1, i32 1, i32 -1>
 ; CHECK-NEXT:    store <4 x i32> [[TMP5]], ptr [[X]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -361,9 +359,7 @@ define void @addsub_all_nsw(ptr %x) {
 define void @addsub_some_nsw(ptr %x) {
 ; CHECK-LABEL: @addsub_some_nsw(
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], splat (i32 1)
-; CHECK-NEXT:    [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], splat (i32 1)
-; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[TMP2]], <i32 1, i32 -1, i32 1, i32 -1>
 ; CHECK-NEXT:    store <4 x i32> [[TMP5]], ptr [[X]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -392,9 +388,7 @@ define void @addsub_some_nsw(ptr %x) {
 define void @addsub_no_nsw(ptr %x) {
 ; CHECK-LABEL: @addsub_no_nsw(
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = add <4 x i32> [[TMP2]], splat (i32 1)
-; CHECK-NEXT:    [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], splat (i32 1)
-; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[TMP2]], <i32 1, i32 -1, i32 1, i32 -1>
 ; CHECK-NEXT:    store <4 x i32> [[TMP5]], ptr [[X]], align 4
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll b/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll
index 869a9d1aee80e..4f3d551e21122 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll
@@ -192,9 +192,7 @@ define void @addsub0(ptr noalias %dst, ptr noalias %src) {
 ; CHECK-NEXT:    [[INCDEC_PTR3:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 2
 ; CHECK-NEXT:    store i32 [[TMP1]], ptr [[INCDEC_PTR1]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[INCDEC_PTR2]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <2 x i32> [[TMP2]], <i32 -2, i32 -3>
-; CHECK-NEXT:    [[TMP4:%.*]] = sub nsw <2 x i32> [[TMP2]], <i32 -2, i32 -3>
-; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <2 x i32> [[TMP2]], <i32 -2, i32 3>
 ; CHECK-NEXT:    store <2 x i32> [[TMP5]], ptr [[INCDEC_PTR3]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -225,9 +223,7 @@ define void @addsub1(ptr noalias %dst, ptr noalias %src) {
 ; CHECK-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 2
 ; CHECK-NEXT:    [[INCDEC_PTR3:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 2
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[SRC]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = add nsw <2 x i32> [[TMP0]], splat (i32 -1)
-; CHECK-NEXT:    [[TMP2:%.*]] = sub nsw <2 x i32> [[TMP0]], splat (i32 -1)
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <2 x i32> [[TMP0]], <i32 -1, i32 1>
 ; CHECK-NEXT:    store <2 x i32> [[TMP3]], ptr [[DST]], align 4
 ; CHECK-NEXT:    [[INCDEC_PTR4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 3
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[INCDEC_PTR2]], align 4

>From 5e80a55136a460f6e40c1fa16eff8e84edbdc858 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Wed, 26 Feb 2025 21:06:54 -0800
Subject: [PATCH 25/38] add comment

---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index e540d0b350889..7f819b9fb9cce 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -848,7 +848,11 @@ class InterchangeableBinOp {
   // SeenBefore is used to know what operations have been seen before.
   MaskType SeenBefore = 0;
 
-  /// Return a non-nullptr if either operand of I is a ConstantInt.
+  // Return a non-nullptr if either operand of I is a ConstantInt.
+  // The second return value represents the operand position. We check the
+  // right-hand side first (1). If the right hand side is not a ConstantInt and
+  // the instruction is neither Sub, Shl, nor AShr, we then check the left hand
+  // side (0).
   static std::pair<ConstantInt *, unsigned>
   isBinOpWithConstantInt(Instruction *I) {
     unsigned Opcode = I->getOpcode();
@@ -893,6 +897,9 @@ class InterchangeableBinOp {
     llvm_unreachable("Unsupported opcode.");
   }
 
+  // Return false allows getSameOpcode to find an alternate instruction.
+  // Directly setting the mask will destroy the mask state, preventing us from
+  // determining which instruction the MainOp should convert to.
   bool trySet(MaskType X) {
     if (Mask & X) {
       Mask &= X;
@@ -947,6 +954,7 @@ class InterchangeableBinOp {
     return trySet(opcodeMask);
   }
   unsigned getOpcode() const {
+    // MainOpBIT is set before SeenBefore. It must be the first one to check.
     if (Mask & MainOpBIT)
       return MainOp->getOpcode();
     MaskType Candidate = Mask & SeenBefore;

>From 0474893644a284d1cfedba0fe0c4d817a15b4cd5 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Mon, 3 Mar 2025 22:22:23 -0800
Subject: [PATCH 26/38] refactor InterchangeableBinOp and add
 getInterchangeableMask

---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 75 ++++++++++---------
 1 file changed, 39 insertions(+), 36 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 7f819b9fb9cce..72ade643b1f33 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -827,6 +827,7 @@ class InterchangeableBinOp {
       Instruction::Add,  Instruction::Sub, Instruction::Mul, Instruction::Shl,
       Instruction::AShr, Instruction::And, Instruction::Or,  Instruction::Xor};
   enum : MaskType {
+    NOBIT = 0,
     ShlBIT = 0b1,
     AShrBIT = 0b10,
     MulBIT = 0b100,
@@ -856,6 +857,7 @@ class InterchangeableBinOp {
   static std::pair<ConstantInt *, unsigned>
   isBinOpWithConstantInt(Instruction *I) {
     unsigned Opcode = I->getOpcode();
+    assert(binary_search(SupportedOp, Opcode) && "Unsupported opcode.");
     unsigned Pos = 1;
     Constant *C;
     if (!match(I, m_BinOp(m_Value(), m_Constant(C)))) {
@@ -875,7 +877,8 @@ class InterchangeableBinOp {
     return std::make_pair(nullptr, Pos);
   }
 
-  static MaskType opcodeToMask(unsigned Opcode) {
+  // Prefer Shl, AShr, Mul, Add, Sub, And, Or and Xor over MainOp.
+  MaskType opcodeToMask(unsigned Opcode) const {
     switch (Opcode) {
     case Instruction::Shl:
       return ShlBIT;
@@ -894,32 +897,13 @@ class InterchangeableBinOp {
     case Instruction::Xor:
       return XorBIT;
     }
-    llvm_unreachable("Unsupported opcode.");
+    return Opcode == MainOp->getOpcode() ? MainOpBIT : NOBIT;
   }
 
-  // Return false allows getSameOpcode to find an alternate instruction.
-  // Directly setting the mask will destroy the mask state, preventing us from
-  // determining which instruction the MainOp should convert to.
-  bool trySet(MaskType X) {
-    if (Mask & X) {
-      Mask &= X;
-      return true;
-    }
-    return false;
-  }
-
-public:
-  InterchangeableBinOp(Instruction *MainOp) : MainOp(MainOp) {}
-  bool add(Instruction *I) {
+  MaskType getInterchangeableMask(Instruction *I) const {
     unsigned Opcode = I->getOpcode();
-    assert(is_sorted(SupportedOp) && "SupportedOp is not sorted.");
-    if (!binary_search(SupportedOp, Opcode)) {
-      if (MainOp->getOpcode() == Opcode)
-        return trySet(MainOpBIT);
-      return false;
-    }
-    MaskType opcodeMask = opcodeToMask(Opcode);
-    SeenBefore |= opcodeMask;
+    if (!binary_search(SupportedOp, Opcode))
+      return opcodeToMask(Opcode);
     ConstantInt *CI = isBinOpWithConstantInt(I).first;
     if (CI) {
       constexpr MaskType CanBeAll =
@@ -928,36 +912,55 @@ class InterchangeableBinOp {
       switch (Opcode) {
       case Instruction::Shl:
         if (CIValue.isZero())
-          return trySet(CanBeAll);
-        return trySet(MulBIT | ShlBIT);
+          return CanBeAll;
+        return MulBIT | ShlBIT;
       case Instruction::Mul:
         if (CIValue.isOne())
-          return trySet(CanBeAll);
+          return CanBeAll;
         if (CIValue.isPowerOf2())
-          return trySet(MulBIT | ShlBIT);
+          return MulBIT | ShlBIT;
         break;
       case Instruction::Add:
       case Instruction::Sub:
         if (CIValue.isZero())
-          return trySet(CanBeAll);
-        return trySet(SubBIT | AddBIT);
+          return CanBeAll;
+        return SubBIT | AddBIT;
       case Instruction::And:
         if (CIValue.isAllOnes())
-          return trySet(CanBeAll);
+          return CanBeAll;
         break;
       default:
         if (CIValue.isZero())
-          return trySet(CanBeAll);
+          return CanBeAll;
         break;
       }
     }
-    return trySet(opcodeMask);
+    return opcodeToMask(Opcode);
+  }
+
+  // Return false allows getSameOpcode to find an alternate instruction.
+  // Directly setting the mask will destroy the mask state, preventing us from
+  // determining which instruction the MainOp should convert to.
+  bool trySet(MaskType X) {
+    if (Mask & X) {
+      Mask &= X;
+      return true;
+    }
+    return false;
+  }
+
+public:
+  InterchangeableBinOp(Instruction *MainOp) : MainOp(MainOp) {
+    assert(is_sorted(SupportedOp) && "SupportedOp is not sorted.");
+  }
+  bool add(Instruction *I) {
+    SeenBefore |= opcodeToMask(I->getOpcode());
+    return trySet(getInterchangeableMask(I));
   }
   unsigned getOpcode() const {
-    // MainOpBIT is set before SeenBefore. It must be the first one to check.
-    if (Mask & MainOpBIT)
-      return MainOp->getOpcode();
     MaskType Candidate = Mask & SeenBefore;
+    if (Candidate & MainOpBIT)
+      return MainOp->getOpcode();
     if (Candidate & ShlBIT)
       return Instruction::Shl;
     if (Candidate & AShrBIT)

>From ad7bec92d18a7bc908d6717bdb49b69843ab323c Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Tue, 4 Mar 2025 00:22:05 -0800
Subject: [PATCH 27/38] add InterchangeableBinOp inside InstructionsState add
 InterchangeableBinOp::contain add InterchangeableBinOp::getMainOpConverter
 add InterchangeableBinOp::getAltOpConverter add
 InterchangeableBinOp::hasOpConverter

---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 35 +++++++++++++++++--
 1 file changed, 32 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 72ade643b1f33..5129d8653133f 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -957,6 +957,9 @@ class InterchangeableBinOp {
     SeenBefore |= opcodeToMask(I->getOpcode());
     return trySet(getInterchangeableMask(I));
   }
+  bool contain(Instruction *I) const {
+    return Mask & getInterchangeableMask(I);
+  }
   unsigned getOpcode() const {
     MaskType Candidate = Mask & SeenBefore;
     if (Candidate & MainOpBIT)
@@ -1085,6 +1088,9 @@ class InstructionsState {
   /// The main/alternate instruction. MainOp is also VL0.
   Instruction *MainOp = nullptr;
   Instruction *AltOp = nullptr;
+  // Only BinaryOperator will activate this.
+  std::optional<InterchangeableBinOp> MainOpConverter;
+  std::optional<InterchangeableBinOp> AltOpConverter;
 
 public:
   Instruction *getMainOp() const {
@@ -1102,11 +1108,26 @@ class InstructionsState {
 
   unsigned getAltOpcode() const { return getAltOp()->getOpcode(); }
 
+  const InterchangeableBinOp &getMainOpConverter() const {
+    assert(MainOpConverter && "MainOpConverter is not initialized.");
+    return *MainOpConverter;
+  }
+
+  const InterchangeableBinOp &getAltOpConverter() const {
+    assert(AltOpConverter && "AltOpConverter is not initialized.");
+    return *AltOpConverter;
+  }
+
+  bool hasOpConverter() const { return MainOpConverter && AltOpConverter; }
+
   /// Some of the instructions in the list have alternate opcodes.
   bool isAltShuffle() const { return getMainOp() != getAltOp(); }
 
   bool isOpcodeOrAlt(Instruction *I) const {
-    return isConvertible(I, MainOp, AltOp);
+    if (MainOpConverter)
+      return getMainOpConverter().contain(I) || getAltOpConverter().contain(I);
+    unsigned CheckedOpcode = I->getOpcode();
+    return getOpcode() == CheckedOpcode || getAltOpcode() == CheckedOpcode;
   }
 
   /// Checks if the current state is valid, i.e. has non-null MainOp
@@ -1115,8 +1136,12 @@ class InstructionsState {
   explicit operator bool() const { return valid(); }
 
   InstructionsState() = delete;
-  InstructionsState(Instruction *MainOp, Instruction *AltOp)
-      : MainOp(MainOp), AltOp(AltOp) {}
+  InstructionsState(
+      Instruction *MainOp, Instruction *AltOp,
+      const std::optional<InterchangeableBinOp> &MainOpConverter = {},
+      const std::optional<InterchangeableBinOp> &AltOpConverter = {})
+      : MainOp(MainOp), AltOp(AltOp), MainOpConverter(MainOpConverter),
+        AltOpConverter(AltOpConverter) {}
   static InstructionsState invalid() { return {nullptr, nullptr}; }
 };
 
@@ -1376,6 +1401,10 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
     AltOp = AlternateInterchangeableConverter
                 ? FindOp(*AlternateInterchangeableConverter)
                 : MainOp;
+    return InstructionsState(MainOp, AltOp, InterchangeableConverter,
+                             AlternateInterchangeableConverter
+                                 ? AlternateInterchangeableConverter
+                                 : InterchangeableConverter);
   }
   return InstructionsState(MainOp, AltOp);
 }

>From ddcd456f749f1c4b12f7b96aa7c52197c0ac201a Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Tue, 4 Mar 2025 01:22:53 -0800
Subject: [PATCH 28/38] refactor convertTo

---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 57 ++++++-------------
 1 file changed, 17 insertions(+), 40 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 5129d8653133f..a4e11280b52d5 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1045,44 +1045,6 @@ class InterchangeableBinOp {
   }
 };
 
-static std::optional<InterchangeableBinOp> isConvertible(Instruction *From,
-                                                         Instruction *To) {
-  InterchangeableBinOp Converter(From);
-  if (Converter.add(From) && Converter.add(To))
-    return Converter;
-  return {};
-}
-
-static bool isConvertible(Instruction *I, Instruction *MainOp,
-                          Instruction *AltOp) {
-  assert(MainOp && "MainOp cannot be nullptr.");
-  if (I->getOpcode() == MainOp->getOpcode())
-    return true;
-  assert(AltOp && "AltOp cannot be nullptr.");
-  if (I->getOpcode() == AltOp->getOpcode())
-    return true;
-  if (!I->isBinaryOp())
-    return false;
-  return isConvertible(I, MainOp) || isConvertible(I, AltOp);
-}
-
-static std::pair<Instruction *, SmallVector<Value *>>
-convertTo(Instruction *I, Instruction *MainOp, Instruction *AltOp) {
-  assert(isConvertible(I, MainOp, AltOp) && "Cannot convert the instruction.");
-  if (I->getOpcode() == MainOp->getOpcode())
-    return std::make_pair(MainOp, SmallVector<Value *>(I->operands()));
-  // Prefer AltOp instead of interchangeable instruction of MainOp.
-  if (I->getOpcode() == AltOp->getOpcode())
-    return std::make_pair(AltOp, SmallVector<Value *>(I->operands()));
-  assert(I->isBinaryOp() && "Cannot convert the instruction.");
-  std::optional<InterchangeableBinOp> Converter(isConvertible(I, MainOp));
-  if (Converter)
-    return std::make_pair(MainOp, Converter->getOperand(MainOp));
-  Converter = isConvertible(I, AltOp);
-  assert(Converter && "Cannot convert the instruction.");
-  return std::make_pair(AltOp, Converter->getOperand(AltOp));
-}
-
 /// Main data required for vectorization of instructions.
 class InstructionsState {
   /// The main/alternate instruction. MainOp is also VL0.
@@ -1145,6 +1107,22 @@ class InstructionsState {
   static InstructionsState invalid() { return {nullptr, nullptr}; }
 };
 
+static std::pair<Instruction *, SmallVector<Value *>>
+convertTo(Instruction *I, const InstructionsState &S) {
+  Instruction *MainOp = S.getMainOp();
+  if (I->getOpcode() == MainOp->getOpcode())
+    return std::make_pair(MainOp, SmallVector<Value *>(I->operands()));
+  Instruction *AltOp = S.getAltOp();
+  // Prefer AltOp instead of interchangeable instruction of MainOp.
+  if (I->getOpcode() == AltOp->getOpcode())
+    return std::make_pair(AltOp, SmallVector<Value *>(I->operands()));
+  assert(S.hasOpConverter() && "Cannot convert the instruction.");
+  if (S.getMainOpConverter().contain(I))
+    return std::make_pair(MainOp, InterchangeableBinOp(I).getOperand(MainOp));
+  assert(S.getAltOpConverter().contain(I) && "Cannot convert the instruction.");
+  return std::make_pair(AltOp, InterchangeableBinOp(I).getOperand(AltOp));
+}
+
 } // end anonymous namespace
 
 /// \returns true if \p Opcode is allowed as part of the main/alternate
@@ -2808,8 +2786,7 @@ class BoUpSLP {
           }
           continue;
         }
-        auto [SelectedOp, Ops] =
-            convertTo(cast<Instruction>(VL[Lane]), MainOp, S.getAltOp());
+        auto [SelectedOp, Ops] = convertTo(cast<Instruction>(VL[Lane]), S);
         bool IsInverseOperation = !isCommutative(SelectedOp);
         for (unsigned OpIdx : seq<unsigned>(NumOperands)) {
           bool APO = (OpIdx == 0) ? false : IsInverseOperation;

>From 386c35539c0ff656fa4303fd2856efa219657ae6 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Tue, 4 Mar 2025 01:34:26 -0800
Subject: [PATCH 29/38] rename OpValue to Inst

---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index a4e11280b52d5..6325032219d17 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -4596,7 +4596,7 @@ class BoUpSLP {
                       const InstructionsState &S);
 
     /// Un-bundles a group of instructions.
-    void cancelScheduling(ArrayRef<Value *> VL, Value *OpValue);
+    void cancelScheduling(ArrayRef<Value *> VL, Value *Inst);
 
     /// Allocates schedule data chunk.
     ScheduleData *allocateScheduleDataChunks();

>From 81698e4863dcb0e7c739d7ad778e210e363a13b9 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Wed, 5 Mar 2025 10:34:54 -0800
Subject: [PATCH 30/38] Revert "refactor convertTo"

This reverts commit ddcd456f749f1c4b12f7b96aa7c52197c0ac201a.
---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 57 +++++++++++++------
 1 file changed, 40 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 6325032219d17..b3e66461fd55c 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1045,6 +1045,44 @@ class InterchangeableBinOp {
   }
 };
 
+static std::optional<InterchangeableBinOp> isConvertible(Instruction *From,
+                                                         Instruction *To) {
+  InterchangeableBinOp Converter(From);
+  if (Converter.add(From) && Converter.add(To))
+    return Converter;
+  return {};
+}
+
+static bool isConvertible(Instruction *I, Instruction *MainOp,
+                          Instruction *AltOp) {
+  assert(MainOp && "MainOp cannot be nullptr.");
+  if (I->getOpcode() == MainOp->getOpcode())
+    return true;
+  assert(AltOp && "AltOp cannot be nullptr.");
+  if (I->getOpcode() == AltOp->getOpcode())
+    return true;
+  if (!I->isBinaryOp())
+    return false;
+  return isConvertible(I, MainOp) || isConvertible(I, AltOp);
+}
+
+static std::pair<Instruction *, SmallVector<Value *>>
+convertTo(Instruction *I, Instruction *MainOp, Instruction *AltOp) {
+  assert(isConvertible(I, MainOp, AltOp) && "Cannot convert the instruction.");
+  if (I->getOpcode() == MainOp->getOpcode())
+    return std::make_pair(MainOp, SmallVector<Value *>(I->operands()));
+  // Prefer AltOp instead of interchangeable instruction of MainOp.
+  if (I->getOpcode() == AltOp->getOpcode())
+    return std::make_pair(AltOp, SmallVector<Value *>(I->operands()));
+  assert(I->isBinaryOp() && "Cannot convert the instruction.");
+  std::optional<InterchangeableBinOp> Converter(isConvertible(I, MainOp));
+  if (Converter)
+    return std::make_pair(MainOp, Converter->getOperand(MainOp));
+  Converter = isConvertible(I, AltOp);
+  assert(Converter && "Cannot convert the instruction.");
+  return std::make_pair(AltOp, Converter->getOperand(AltOp));
+}
+
 /// Main data required for vectorization of instructions.
 class InstructionsState {
   /// The main/alternate instruction. MainOp is also VL0.
@@ -1107,22 +1145,6 @@ class InstructionsState {
   static InstructionsState invalid() { return {nullptr, nullptr}; }
 };
 
-static std::pair<Instruction *, SmallVector<Value *>>
-convertTo(Instruction *I, const InstructionsState &S) {
-  Instruction *MainOp = S.getMainOp();
-  if (I->getOpcode() == MainOp->getOpcode())
-    return std::make_pair(MainOp, SmallVector<Value *>(I->operands()));
-  Instruction *AltOp = S.getAltOp();
-  // Prefer AltOp instead of interchangeable instruction of MainOp.
-  if (I->getOpcode() == AltOp->getOpcode())
-    return std::make_pair(AltOp, SmallVector<Value *>(I->operands()));
-  assert(S.hasOpConverter() && "Cannot convert the instruction.");
-  if (S.getMainOpConverter().contain(I))
-    return std::make_pair(MainOp, InterchangeableBinOp(I).getOperand(MainOp));
-  assert(S.getAltOpConverter().contain(I) && "Cannot convert the instruction.");
-  return std::make_pair(AltOp, InterchangeableBinOp(I).getOperand(AltOp));
-}
-
 } // end anonymous namespace
 
 /// \returns true if \p Opcode is allowed as part of the main/alternate
@@ -2786,7 +2808,8 @@ class BoUpSLP {
           }
           continue;
         }
-        auto [SelectedOp, Ops] = convertTo(cast<Instruction>(VL[Lane]), S);
+        auto [SelectedOp, Ops] =
+            convertTo(cast<Instruction>(VL[Lane]), MainOp, S.getAltOp());
         bool IsInverseOperation = !isCommutative(SelectedOp);
         for (unsigned OpIdx : seq<unsigned>(NumOperands)) {
           bool APO = (OpIdx == 0) ? false : IsInverseOperation;

>From 9af04c0bc99a807fadaf89ba19a9230ad45aad1a Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Wed, 5 Mar 2025 10:35:04 -0800
Subject: [PATCH 31/38] Revert "add InterchangeableBinOp inside
 InstructionsState"

This reverts commit ad7bec92d18a7bc908d6717bdb49b69843ab323c.
---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 35 ++-----------------
 1 file changed, 3 insertions(+), 32 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b3e66461fd55c..65b7ecfd0da65 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -957,9 +957,6 @@ class InterchangeableBinOp {
     SeenBefore |= opcodeToMask(I->getOpcode());
     return trySet(getInterchangeableMask(I));
   }
-  bool contain(Instruction *I) const {
-    return Mask & getInterchangeableMask(I);
-  }
   unsigned getOpcode() const {
     MaskType Candidate = Mask & SeenBefore;
     if (Candidate & MainOpBIT)
@@ -1088,9 +1085,6 @@ class InstructionsState {
   /// The main/alternate instruction. MainOp is also VL0.
   Instruction *MainOp = nullptr;
   Instruction *AltOp = nullptr;
-  // Only BinaryOperator will activate this.
-  std::optional<InterchangeableBinOp> MainOpConverter;
-  std::optional<InterchangeableBinOp> AltOpConverter;
 
 public:
   Instruction *getMainOp() const {
@@ -1108,26 +1102,11 @@ class InstructionsState {
 
   unsigned getAltOpcode() const { return getAltOp()->getOpcode(); }
 
-  const InterchangeableBinOp &getMainOpConverter() const {
-    assert(MainOpConverter && "MainOpConverter is not initialized.");
-    return *MainOpConverter;
-  }
-
-  const InterchangeableBinOp &getAltOpConverter() const {
-    assert(AltOpConverter && "AltOpConverter is not initialized.");
-    return *AltOpConverter;
-  }
-
-  bool hasOpConverter() const { return MainOpConverter && AltOpConverter; }
-
   /// Some of the instructions in the list have alternate opcodes.
   bool isAltShuffle() const { return getMainOp() != getAltOp(); }
 
   bool isOpcodeOrAlt(Instruction *I) const {
-    if (MainOpConverter)
-      return getMainOpConverter().contain(I) || getAltOpConverter().contain(I);
-    unsigned CheckedOpcode = I->getOpcode();
-    return getOpcode() == CheckedOpcode || getAltOpcode() == CheckedOpcode;
+    return isConvertible(I, MainOp, AltOp);
   }
 
   /// Checks if the current state is valid, i.e. has non-null MainOp
@@ -1136,12 +1115,8 @@ class InstructionsState {
   explicit operator bool() const { return valid(); }
 
   InstructionsState() = delete;
-  InstructionsState(
-      Instruction *MainOp, Instruction *AltOp,
-      const std::optional<InterchangeableBinOp> &MainOpConverter = {},
-      const std::optional<InterchangeableBinOp> &AltOpConverter = {})
-      : MainOp(MainOp), AltOp(AltOp), MainOpConverter(MainOpConverter),
-        AltOpConverter(AltOpConverter) {}
+  InstructionsState(Instruction *MainOp, Instruction *AltOp)
+      : MainOp(MainOp), AltOp(AltOp) {}
   static InstructionsState invalid() { return {nullptr, nullptr}; }
 };
 
@@ -1401,10 +1376,6 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
     AltOp = AlternateInterchangeableConverter
                 ? FindOp(*AlternateInterchangeableConverter)
                 : MainOp;
-    return InstructionsState(MainOp, AltOp, InterchangeableConverter,
-                             AlternateInterchangeableConverter
-                                 ? AlternateInterchangeableConverter
-                                 : InterchangeableConverter);
   }
   return InstructionsState(MainOp, AltOp);
 }

>From 62f0a1d96fa5aa34244171bd91d9c876aed8dc9e Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Wed, 12 Mar 2025 01:31:51 -0700
Subject: [PATCH 32/38] reduce binary_search usage

---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 98 +++++++++----------
 1 file changed, 49 insertions(+), 49 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 65b7ecfd0da65..6930e115c6d06 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -827,7 +827,6 @@ class InterchangeableBinOp {
       Instruction::Add,  Instruction::Sub, Instruction::Mul, Instruction::Shl,
       Instruction::AShr, Instruction::And, Instruction::Or,  Instruction::Xor};
   enum : MaskType {
-    NOBIT = 0,
     ShlBIT = 0b1,
     AShrBIT = 0b10,
     MulBIT = 0b100,
@@ -877,33 +876,58 @@ class InterchangeableBinOp {
     return std::make_pair(nullptr, Pos);
   }
 
-  // Prefer Shl, AShr, Mul, Add, Sub, And, Or and Xor over MainOp.
-  MaskType opcodeToMask(unsigned Opcode) const {
+  // Return false allows getSameOpcode to find an alternate instruction.
+  // Directly setting the mask will destroy the mask state, preventing us from
+  // determining which instruction the MainOp should convert to.
+  bool trySet(MaskType X) {
+    if (Mask & X) {
+      Mask &= X;
+      return true;
+    }
+    return false;
+  }
+
+public:
+  InterchangeableBinOp(Instruction *MainOp) : MainOp(MainOp) {
+    assert(is_sorted(SupportedOp) && "SupportedOp is not sorted.");
+  }
+  bool add(Instruction *I) {
+    unsigned Opcode = I->getOpcode();
+    MaskType OpcodeInMaskForm;
+    // Prefer Shl, AShr, Mul, Add, Sub, And, Or and Xor over MainOp.
     switch (Opcode) {
     case Instruction::Shl:
-      return ShlBIT;
+      OpcodeInMaskForm = ShlBIT;
+      break;
     case Instruction::AShr:
-      return AShrBIT;
+      OpcodeInMaskForm = AShrBIT;
+      break;
     case Instruction::Mul:
-      return MulBIT;
+      OpcodeInMaskForm = MulBIT;
+      break;
     case Instruction::Add:
-      return AddBIT;
+      OpcodeInMaskForm = AddBIT;
+      break;
     case Instruction::Sub:
-      return SubBIT;
+      OpcodeInMaskForm = SubBIT;
+      break;
     case Instruction::And:
-      return AndBIT;
+      OpcodeInMaskForm = AndBIT;
+      break;
     case Instruction::Or:
-      return OrBIT;
+      OpcodeInMaskForm = OrBIT;
+      break;
     case Instruction::Xor:
-      return XorBIT;
+      OpcodeInMaskForm = XorBIT;
+      break;
+    default:
+      if (Opcode == MainOp->getOpcode()) {
+        SeenBefore |= MainOpBIT;
+        return trySet(MainOpBIT);
+      }
+      return false;
     }
-    return Opcode == MainOp->getOpcode() ? MainOpBIT : NOBIT;
-  }
-
-  MaskType getInterchangeableMask(Instruction *I) const {
-    unsigned Opcode = I->getOpcode();
-    if (!binary_search(SupportedOp, Opcode))
-      return opcodeToMask(Opcode);
+    SeenBefore |= OpcodeInMaskForm;
     ConstantInt *CI = isBinOpWithConstantInt(I).first;
     if (CI) {
       constexpr MaskType CanBeAll =
@@ -911,51 +935,27 @@ class InterchangeableBinOp {
       const APInt &CIValue = CI->getValue();
       switch (Opcode) {
       case Instruction::Shl:
-        if (CIValue.isZero())
-          return CanBeAll;
-        return MulBIT | ShlBIT;
+        return trySet(CIValue.isZero() ? CanBeAll : MulBIT | ShlBIT);
       case Instruction::Mul:
         if (CIValue.isOne())
-          return CanBeAll;
+          return trySet(CanBeAll);
         if (CIValue.isPowerOf2())
-          return MulBIT | ShlBIT;
+          return trySet(MulBIT | ShlBIT);
         break;
       case Instruction::Add:
       case Instruction::Sub:
-        if (CIValue.isZero())
-          return CanBeAll;
-        return SubBIT | AddBIT;
+        return trySet(CIValue.isZero() ? CanBeAll : SubBIT | AddBIT);
       case Instruction::And:
         if (CIValue.isAllOnes())
-          return CanBeAll;
+          return trySet(CanBeAll);
         break;
       default:
         if (CIValue.isZero())
-          return CanBeAll;
+          return trySet(CanBeAll);
         break;
       }
     }
-    return opcodeToMask(Opcode);
-  }
-
-  // Return false allows getSameOpcode to find an alternate instruction.
-  // Directly setting the mask will destroy the mask state, preventing us from
-  // determining which instruction the MainOp should convert to.
-  bool trySet(MaskType X) {
-    if (Mask & X) {
-      Mask &= X;
-      return true;
-    }
-    return false;
-  }
-
-public:
-  InterchangeableBinOp(Instruction *MainOp) : MainOp(MainOp) {
-    assert(is_sorted(SupportedOp) && "SupportedOp is not sorted.");
-  }
-  bool add(Instruction *I) {
-    SeenBefore |= opcodeToMask(I->getOpcode());
-    return trySet(getInterchangeableMask(I));
+    return trySet(OpcodeInMaskForm);
   }
   unsigned getOpcode() const {
     MaskType Candidate = Mask & SeenBefore;

>From 28f2d5865ef5c49ed048a09c7f6cd75b993984d3 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Wed, 12 Mar 2025 02:19:40 -0700
Subject: [PATCH 33/38] refactor isBinOpWithConstantInt

---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 27 +++++++------------
 1 file changed, 10 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 6930e115c6d06..83096bb626959 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -855,25 +855,18 @@ class InterchangeableBinOp {
   // side (0).
   static std::pair<ConstantInt *, unsigned>
   isBinOpWithConstantInt(Instruction *I) {
+    assert(isa<BinaryOperator>(I));
     unsigned Opcode = I->getOpcode();
     assert(binary_search(SupportedOp, Opcode) && "Unsupported opcode.");
-    unsigned Pos = 1;
-    Constant *C;
-    if (!match(I, m_BinOp(m_Value(), m_Constant(C)))) {
-      if (Opcode == Instruction::Sub || Opcode == Instruction::Shl ||
-          Opcode == Instruction::AShr)
-        return std::make_pair(nullptr, Pos);
-      if (!match(I, m_BinOp(m_Constant(C), m_Value())))
-        return std::make_pair(nullptr, Pos);
-      Pos = 0;
-    }
-    if (auto *CI = dyn_cast<ConstantInt>(C))
-      return std::make_pair(CI, Pos);
-    if (auto *CDV = dyn_cast<ConstantDataVector>(C)) {
-      if (auto *CI = dyn_cast_if_present<ConstantInt>(CDV->getSplatValue()))
-        return std::make_pair(CI, Pos);
-    }
-    return std::make_pair(nullptr, Pos);
+    auto *BinOp = cast<BinaryOperator>(I);
+    if (auto *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1)))
+      return {CI, 1};
+    if (Opcode == Instruction::Sub || Opcode == Instruction::Shl ||
+        Opcode == Instruction::AShr)
+      return {nullptr, 0};
+    if (auto *CI = dyn_cast<ConstantInt>(BinOp->getOperand(0)))
+      return {CI, 0};
+    return {nullptr, 0};
   }
 
   // Return false allows getSameOpcode to find an alternate instruction.

>From 8fff436233672f31b185439f3ac59e7b9cc18491 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Thu, 13 Mar 2025 01:27:47 -0700
Subject: [PATCH 34/38] remove static: functions are in anonymous namespace
 already

---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 83096bb626959..19483edde1d8b 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1035,16 +1035,15 @@ class InterchangeableBinOp {
   }
 };
 
-static std::optional<InterchangeableBinOp> isConvertible(Instruction *From,
-                                                         Instruction *To) {
+std::optional<InterchangeableBinOp> isConvertible(Instruction *From,
+                                                  Instruction *To) {
   InterchangeableBinOp Converter(From);
   if (Converter.add(From) && Converter.add(To))
     return Converter;
   return {};
 }
 
-static bool isConvertible(Instruction *I, Instruction *MainOp,
-                          Instruction *AltOp) {
+bool isConvertible(Instruction *I, Instruction *MainOp, Instruction *AltOp) {
   assert(MainOp && "MainOp cannot be nullptr.");
   if (I->getOpcode() == MainOp->getOpcode())
     return true;
@@ -1056,7 +1055,7 @@ static bool isConvertible(Instruction *I, Instruction *MainOp,
   return isConvertible(I, MainOp) || isConvertible(I, AltOp);
 }
 
-static std::pair<Instruction *, SmallVector<Value *>>
+std::pair<Instruction *, SmallVector<Value *>>
 convertTo(Instruction *I, Instruction *MainOp, Instruction *AltOp) {
   assert(isConvertible(I, MainOp, AltOp) && "Cannot convert the instruction.");
   if (I->getOpcode() == MainOp->getOpcode())

>From 9f9913da2750d19adf82b5ed324fdb478dc0c52d Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Wed, 12 Mar 2025 04:34:17 -0700
Subject: [PATCH 35/38] refactor InterchangeableBinOp to BinOpSameOpcodeHelper

---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 377 +++++++++---------
 1 file changed, 193 insertions(+), 184 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 19483edde1d8b..76b19adce3128 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -809,18 +809,24 @@ static std::optional<unsigned> getExtractIndex(Instruction *E) {
 }
 
 namespace {
-
-/// Base class for representing instructions that can be interchanged with other
-/// equivalent forms. For example, multiplication by a power of 2 can be
-/// interchanged with a left shift.
+/// \returns true if \p Opcode is allowed as part of the main/alternate
+/// instruction for SLP vectorization.
 ///
-/// The class maintains a reference to the main instruction (MainOp) and
-/// provides methods to:
-/// - Check if the incoming instruction can use the same instruction as MainOp
-/// (add)
-/// - Get the opcode for the interchangeable form (getOpcode)
-/// - Get the operands for the interchangeable form (getOperand)
-class InterchangeableBinOp {
+/// Example of unsupported opcode is SDIV that can potentially cause UB if the
+/// "shuffled out" lane would result in division by zero.
+bool isValidForAlternation(unsigned Opcode) {
+  if (Instruction::isIntDivRem(Opcode))
+    return false;
+
+  return true;
+}
+
+/// Helper class that determines VL can use the same opcode.
+/// Alternate instruction is supported. In addition, it supports interchangeable
+/// instruction. An interchangeable instruction is an instruction that can be
+/// converted to another instruction with same semantics. For example, x << 1 is
+/// equal to x * 2. x * 1 is equal to x | 0.
+class BinOpSameOpcodeHelper {
   using MaskType = std::uint_fast16_t;
   // Sort SupportedOp because it is used by binary_search.
   constexpr static std::initializer_list<unsigned> SupportedOp = {
@@ -838,16 +844,6 @@ class InterchangeableBinOp {
     MainOpBIT = 0b100000000,
     LLVM_MARK_AS_BITMASK_ENUM(MainOpBIT)
   };
-  Instruction *MainOp = nullptr;
-  // The bit it sets represents whether MainOp can be converted to.
-  MaskType Mask = MainOpBIT | XorBIT | OrBIT | AndBIT | SubBIT | AddBIT |
-                  MulBIT | AShrBIT | ShlBIT;
-  // We cannot create an interchangeable instruction that does not exist in VL.
-  // For example, VL [x + 0, y * 1] can be converted to [x << 0, y << 0], but
-  // 'shl' does not exist in VL. In the end, we convert VL to [x * 1, y * 1].
-  // SeenBefore is used to know what operations have been seen before.
-  MaskType SeenBefore = 0;
-
   // Return a non-nullptr if either operand of I is a ConstantInt.
   // The second return value represents the operand position. We check the
   // right-hand side first (1). If the right hand side is not a ConstantInt and
@@ -855,7 +851,6 @@ class InterchangeableBinOp {
   // side (0).
   static std::pair<ConstantInt *, unsigned>
   isBinOpWithConstantInt(Instruction *I) {
-    assert(isa<BinaryOperator>(I));
     unsigned Opcode = I->getOpcode();
     assert(binary_search(SupportedOp, Opcode) && "Unsupported opcode.");
     auto *BinOp = cast<BinaryOperator>(I);
@@ -868,23 +863,140 @@ class InterchangeableBinOp {
       return {CI, 0};
     return {nullptr, 0};
   }
-
-  // Return false allows getSameOpcode to find an alternate instruction.
-  // Directly setting the mask will destroy the mask state, preventing us from
-  // determining which instruction the MainOp should convert to.
-  bool trySet(MaskType X) {
-    if (Mask & X) {
-      Mask &= X;
-      return true;
+  struct InterchangeableInfo {
+    Instruction *I;
+    // The bit it sets represents whether MainOp can be converted to.
+    MaskType Mask = MainOpBIT | XorBIT | OrBIT | AndBIT | SubBIT | AddBIT |
+                    MulBIT | AShrBIT | ShlBIT;
+    // We cannot create an interchangeable instruction that does not exist in
+    // VL. For example, VL [x + 0, y * 1] can be converted to [x << 0, y << 0],
+    // but << does not exist in VL. In the end, we convert VL to [x * 1, y * 1].
+    // SeenBefore is used to know what operations have been seen before.
+    MaskType SeenBefore = 0;
+    InterchangeableInfo(Instruction *I) : I(I) {}
+    // Return false allows BinOpSameOpcodeHelper to find an alternate
+    // instruction. Directly setting the mask will destroy the mask state,
+    // preventing us from determining which instruction it should convert to.
+    bool trySet(MaskType OpcodeInMaskForm, MaskType InterchangeableMask) {
+      if (Mask & InterchangeableMask) {
+        SeenBefore |= OpcodeInMaskForm;
+        Mask &= InterchangeableMask;
+        return true;
+      }
+      return false;
     }
-    return false;
+    bool equal(unsigned Opcode) {
+      if (Opcode == I->getOpcode())
+        return trySet(MainOpBIT, MainOpBIT);
+      return false;
+    }
+    unsigned getOpcode() const {
+      MaskType Candidate = Mask & SeenBefore;
+      if (Candidate & MainOpBIT)
+        return I->getOpcode();
+      if (Candidate & ShlBIT)
+        return Instruction::Shl;
+      if (Candidate & AShrBIT)
+        return Instruction::AShr;
+      if (Candidate & MulBIT)
+        return Instruction::Mul;
+      if (Candidate & AddBIT)
+        return Instruction::Add;
+      if (Candidate & SubBIT)
+        return Instruction::Sub;
+      if (Candidate & AndBIT)
+        return Instruction::And;
+      if (Candidate & OrBIT)
+        return Instruction::Or;
+      if (Candidate & XorBIT)
+        return Instruction::Xor;
+      llvm_unreachable("Cannot find interchangeable instruction.");
+    }
+    SmallVector<Value *> getOperand(Instruction *To) const {
+      unsigned ToOpcode = To->getOpcode();
+      unsigned FromOpcode = I->getOpcode();
+      if (FromOpcode == ToOpcode)
+        return SmallVector<Value *>(I->operands());
+      assert(binary_search(SupportedOp, ToOpcode) && "Unsupported opcode.");
+      auto [CI, Pos] = isBinOpWithConstantInt(I);
+      const APInt &FromCIValue = CI->getValue();
+      unsigned FromCIValueBitWidth = FromCIValue.getBitWidth();
+      APInt ToCIValue;
+      switch (FromOpcode) {
+      case Instruction::Shl:
+        if (ToOpcode == Instruction::Mul) {
+          ToCIValue = APInt::getOneBitSet(FromCIValueBitWidth,
+                                          FromCIValue.getZExtValue());
+        } else {
+          assert(FromCIValue.isZero() && "Cannot convert the instruction.");
+          ToCIValue = ToOpcode == Instruction::And
+                          ? APInt::getAllOnes(FromCIValueBitWidth)
+                          : APInt::getZero(FromCIValueBitWidth);
+        }
+        break;
+      case Instruction::Mul:
+        assert(FromCIValue.isPowerOf2() && "Cannot convert the instruction.");
+        if (ToOpcode == Instruction::Shl) {
+          ToCIValue = APInt(FromCIValueBitWidth, FromCIValue.logBase2());
+        } else {
+          assert(FromCIValue.isOne() && "Cannot convert the instruction.");
+          ToCIValue = ToOpcode == Instruction::And
+                          ? APInt::getAllOnes(FromCIValueBitWidth)
+                          : APInt::getZero(FromCIValueBitWidth);
+        }
+        break;
+      case Instruction::Add:
+      case Instruction::Sub:
+        if (FromCIValue.isZero()) {
+          ToCIValue = APInt::getZero(FromCIValueBitWidth);
+        } else {
+          assert(is_contained({Instruction::Add, Instruction::Sub}, ToOpcode) &&
+                 "Cannot convert the instruction.");
+          ToCIValue = FromCIValue;
+          ToCIValue.negate();
+        }
+        break;
+      case Instruction::And:
+        assert(FromCIValue.isAllOnes() && "Cannot convert the instruction.");
+        ToCIValue = ToOpcode == Instruction::Mul
+                        ? APInt::getOneBitSet(FromCIValueBitWidth, 0)
+                        : APInt::getZero(FromCIValueBitWidth);
+        break;
+      default:
+        assert(FromCIValue.isZero() && "Cannot convert the instruction.");
+        ToCIValue = APInt::getZero(FromCIValueBitWidth);
+        break;
+      }
+      Value *LHS = I->getOperand(1 - Pos);
+      Constant *RHS =
+          ConstantInt::get(I->getOperand(Pos)->getType(), ToCIValue);
+      if (Pos == 1)
+        return SmallVector<Value *>({LHS, RHS});
+      return SmallVector<Value *>({RHS, LHS});
+    }
+  };
+  InterchangeableInfo MainOp;
+  InterchangeableInfo AltOp;
+  bool isValidForAlternation(Instruction *I) const {
+    return ::isValidForAlternation(MainOp.I->getOpcode()) &&
+           ::isValidForAlternation(I->getOpcode());
+  }
+  bool initializeAltOp(Instruction *I) {
+    if (!AltOp.I) {
+      if (!isValidForAlternation(I))
+        return false;
+      AltOp.I = I;
+    }
+    return true;
   }
 
 public:
-  InterchangeableBinOp(Instruction *MainOp) : MainOp(MainOp) {
+  BinOpSameOpcodeHelper(Instruction *MainOp, Instruction *AltOp = nullptr)
+      : MainOp(MainOp), AltOp(AltOp) {
     assert(is_sorted(SupportedOp) && "SupportedOp is not sorted.");
   }
   bool add(Instruction *I) {
+    assert(isa<BinaryOperator>(I));
     unsigned Opcode = I->getOpcode();
     MaskType OpcodeInMaskForm;
     // Prefer Shl, AShr, Mul, Add, Sub, And, Or and Xor over MainOp.
@@ -914,13 +1026,10 @@ class InterchangeableBinOp {
       OpcodeInMaskForm = XorBIT;
       break;
     default:
-      if (Opcode == MainOp->getOpcode()) {
-        SeenBefore |= MainOpBIT;
-        return trySet(MainOpBIT);
-      }
-      return false;
+      return MainOp.equal(Opcode) ||
+             (initializeAltOp(I) && AltOp.equal(Opcode));
     }
-    SeenBefore |= OpcodeInMaskForm;
+    MaskType InterchangeableMask = OpcodeInMaskForm;
     ConstantInt *CI = isBinOpWithConstantInt(I).first;
     if (CI) {
       constexpr MaskType CanBeAll =
@@ -928,121 +1037,47 @@ class InterchangeableBinOp {
       const APInt &CIValue = CI->getValue();
       switch (Opcode) {
       case Instruction::Shl:
-        return trySet(CIValue.isZero() ? CanBeAll : MulBIT | ShlBIT);
+        InterchangeableMask = CIValue.isZero() ? CanBeAll : MulBIT | ShlBIT;
+        break;
       case Instruction::Mul:
-        if (CIValue.isOne())
-          return trySet(CanBeAll);
+        if (CIValue.isOne()) {
+          InterchangeableMask = CanBeAll;
+          break;
+        }
         if (CIValue.isPowerOf2())
-          return trySet(MulBIT | ShlBIT);
+          InterchangeableMask = MulBIT | ShlBIT;
         break;
       case Instruction::Add:
       case Instruction::Sub:
-        return trySet(CIValue.isZero() ? CanBeAll : SubBIT | AddBIT);
+        InterchangeableMask = CIValue.isZero() ? CanBeAll : SubBIT | AddBIT;
+        break;
       case Instruction::And:
         if (CIValue.isAllOnes())
-          return trySet(CanBeAll);
+          InterchangeableMask = CanBeAll;
         break;
       default:
         if (CIValue.isZero())
-          return trySet(CanBeAll);
+          InterchangeableMask = CanBeAll;
         break;
       }
     }
-    return trySet(OpcodeInMaskForm);
-  }
-  unsigned getOpcode() const {
-    MaskType Candidate = Mask & SeenBefore;
-    if (Candidate & MainOpBIT)
-      return MainOp->getOpcode();
-    if (Candidate & ShlBIT)
-      return Instruction::Shl;
-    if (Candidate & AShrBIT)
-      return Instruction::AShr;
-    if (Candidate & MulBIT)
-      return Instruction::Mul;
-    if (Candidate & AddBIT)
-      return Instruction::Add;
-    if (Candidate & SubBIT)
-      return Instruction::Sub;
-    if (Candidate & AndBIT)
-      return Instruction::And;
-    if (Candidate & OrBIT)
-      return Instruction::Or;
-    if (Candidate & XorBIT)
-      return Instruction::Xor;
-    llvm_unreachable("Cannot find interchangeable instruction.");
-  }
-  SmallVector<Value *> getOperand(Instruction *I) const {
-    unsigned ToOpcode = I->getOpcode();
-    unsigned FromOpcode = MainOp->getOpcode();
-    if (FromOpcode == ToOpcode)
-      return SmallVector<Value *>(MainOp->operands());
-    assert(binary_search(SupportedOp, ToOpcode) && "Unsupported opcode.");
-    auto [CI, Pos] = isBinOpWithConstantInt(MainOp);
-    const APInt &FromCIValue = CI->getValue();
-    unsigned FromCIValueBitWidth = FromCIValue.getBitWidth();
-    APInt ToCIValue;
-    switch (FromOpcode) {
-    case Instruction::Shl:
-      if (ToOpcode == Instruction::Mul) {
-        ToCIValue = APInt::getOneBitSet(FromCIValueBitWidth,
-                                        FromCIValue.getZExtValue());
-      } else {
-        assert(FromCIValue.isZero() && "Cannot convert the instruction.");
-        ToCIValue = ToOpcode == Instruction::And
-                        ? APInt::getAllOnes(FromCIValueBitWidth)
-                        : APInt::getZero(FromCIValueBitWidth);
-      }
-      break;
-    case Instruction::Mul:
-      assert(FromCIValue.isPowerOf2() && "Cannot convert the instruction.");
-      if (ToOpcode == Instruction::Shl) {
-        ToCIValue = APInt(FromCIValueBitWidth, FromCIValue.logBase2());
-      } else {
-        assert(FromCIValue.isOne() && "Cannot convert the instruction.");
-        ToCIValue = ToOpcode == Instruction::And
-                        ? APInt::getAllOnes(FromCIValueBitWidth)
-                        : APInt::getZero(FromCIValueBitWidth);
-      }
-      break;
-    case Instruction::Add:
-    case Instruction::Sub:
-      if (FromCIValue.isZero()) {
-        ToCIValue = APInt::getZero(FromCIValueBitWidth);
-      } else {
-        assert(is_contained({Instruction::Add, Instruction::Sub}, ToOpcode) &&
-               "Cannot convert the instruction.");
-        ToCIValue = FromCIValue;
-        ToCIValue.negate();
-      }
-      break;
-    case Instruction::And:
-      assert(FromCIValue.isAllOnes() && "Cannot convert the instruction.");
-      ToCIValue = ToOpcode == Instruction::Mul
-                      ? APInt::getOneBitSet(FromCIValueBitWidth, 0)
-                      : APInt::getZero(FromCIValueBitWidth);
-      break;
-    default:
-      ToCIValue = APInt::getZero(FromCIValueBitWidth);
-      break;
-    }
-    Value *LHS = MainOp->getOperand(1 - Pos);
-    Constant *RHS =
-        ConstantInt::get(MainOp->getOperand(Pos)->getType(), ToCIValue);
-    if (Pos == 1)
-      return SmallVector<Value *>({LHS, RHS});
-    return SmallVector<Value *>({RHS, LHS});
+    return MainOp.trySet(OpcodeInMaskForm, InterchangeableMask) ||
+           (initializeAltOp(I) &&
+            AltOp.trySet(OpcodeInMaskForm, InterchangeableMask));
+  }
+  unsigned getMainOpcode() const { return MainOp.getOpcode(); }
+  bool hasAltOp() const { return AltOp.I; }
+  unsigned getAltOpcode() const {
+    return hasAltOp() ? AltOp.getOpcode() : getMainOpcode();
+  }
+  SmallVector<Value *> getMainOperand(Instruction *I) const {
+    return MainOp.getOperand(I);
+  }
+  SmallVector<Value *> getAltOperand(Instruction *I) const {
+    return AltOp.getOperand(I);
   }
 };
 
-std::optional<InterchangeableBinOp> isConvertible(Instruction *From,
-                                                  Instruction *To) {
-  InterchangeableBinOp Converter(From);
-  if (Converter.add(From) && Converter.add(To))
-    return Converter;
-  return {};
-}
-
 bool isConvertible(Instruction *I, Instruction *MainOp, Instruction *AltOp) {
   assert(MainOp && "MainOp cannot be nullptr.");
   if (I->getOpcode() == MainOp->getOpcode())
@@ -1052,7 +1087,8 @@ bool isConvertible(Instruction *I, Instruction *MainOp, Instruction *AltOp) {
     return true;
   if (!I->isBinaryOp())
     return false;
-  return isConvertible(I, MainOp) || isConvertible(I, AltOp);
+  BinOpSameOpcodeHelper Converter(MainOp, AltOp);
+  return Converter.add(I) && Converter.add(MainOp) && Converter.add(AltOp);
 }
 
 std::pair<Instruction *, SmallVector<Value *>>
@@ -1064,12 +1100,10 @@ convertTo(Instruction *I, Instruction *MainOp, Instruction *AltOp) {
   if (I->getOpcode() == AltOp->getOpcode())
     return std::make_pair(AltOp, SmallVector<Value *>(I->operands()));
   assert(I->isBinaryOp() && "Cannot convert the instruction.");
-  std::optional<InterchangeableBinOp> Converter(isConvertible(I, MainOp));
-  if (Converter)
-    return std::make_pair(MainOp, Converter->getOperand(MainOp));
-  Converter = isConvertible(I, AltOp);
-  assert(Converter && "Cannot convert the instruction.");
-  return std::make_pair(AltOp, Converter->getOperand(AltOp));
+  BinOpSameOpcodeHelper Converter(I);
+  if (Converter.add(I) && Converter.add(MainOp) && !Converter.hasAltOp())
+    return std::make_pair(MainOp, Converter.getMainOperand(MainOp));
+  return std::make_pair(AltOp, Converter.getAltOperand(AltOp));
 }
 
 /// Main data required for vectorization of instructions.
@@ -1114,18 +1148,6 @@ class InstructionsState {
 
 } // end anonymous namespace
 
-/// \returns true if \p Opcode is allowed as part of the main/alternate
-/// instruction for SLP vectorization.
-///
-/// Example of unsupported opcode is SDIV that can potentially cause UB if the
-/// "shuffled out" lane would result in division by zero.
-static bool isValidForAlternation(unsigned Opcode) {
-  if (Instruction::isIntDivRem(Opcode))
-    return false;
-
-  return true;
-}
-
 static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
                                        const TargetLibraryInfo &TLI);
 
@@ -1183,6 +1205,17 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
       (VL.size() == 2 && InstCnt < 2))
     return InstructionsState::invalid();
 
+  auto FindInstructionWithOpcode = [&](unsigned Opcode) {
+    for (Value *V : VL) {
+      if (isa<PoisonValue>(V))
+        continue;
+      auto *Inst = cast<Instruction>(V);
+      if (Inst->getOpcode() == Opcode)
+        return Inst;
+    }
+    llvm_unreachable("Opcode not found.");
+  };
+
   bool IsCastOp = isa<CastInst>(MainOp);
   bool IsBinOp = isa<BinaryOperator>(MainOp);
   bool IsCmpOp = isa<CmpInst>(MainOp);
@@ -1192,8 +1225,7 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
   unsigned Opcode = MainOp->getOpcode();
   unsigned AltOpcode = Opcode;
 
-  InterchangeableBinOp InterchangeableConverter(MainOp);
-  std::optional<InterchangeableBinOp> AlternateInterchangeableConverter;
+  BinOpSameOpcodeHelper BinOpHelper(MainOp);
   bool SwappedPredsCompatible = IsCmpOp && [&]() {
     SetVector<unsigned> UniquePreds, UniqueNonSwappedPreds;
     UniquePreds.insert(BasePred);
@@ -1240,15 +1272,7 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
       return InstructionsState::invalid();
     unsigned InstOpcode = I->getOpcode();
     if (IsBinOp && isa<BinaryOperator>(I)) {
-      if (InterchangeableConverter.add(I))
-        continue;
-      if (!AlternateInterchangeableConverter) {
-        if (!isValidForAlternation(Opcode) ||
-            !isValidForAlternation(InstOpcode))
-          return InstructionsState::invalid();
-        AlternateInterchangeableConverter = InterchangeableBinOp(I);
-      }
-      if (AlternateInterchangeableConverter->add(I))
+      if (BinOpHelper.add(I))
         continue;
     } else if (IsCastOp && isa<CastInst>(I)) {
       Value *Op0 = MainOp->getOperand(0);
@@ -1351,23 +1375,8 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
   }
 
   if (IsBinOp) {
-    auto FindOp =
-        [&](const InterchangeableBinOp &Converter) {
-          unsigned InterchangeableInstructionOpcode = Converter.getOpcode();
-          for (Value *V : VL) {
-            if (isa<PoisonValue>(V))
-              continue;
-            auto *Inst = cast<Instruction>(V);
-            if (Inst->getOpcode() == InterchangeableInstructionOpcode)
-              return Inst;
-          }
-          llvm_unreachable(
-              "Cannot find the candidate instruction for InstructionsState.");
-        };
-    MainOp = FindOp(InterchangeableConverter);
-    AltOp = AlternateInterchangeableConverter
-                ? FindOp(*AlternateInterchangeableConverter)
-                : MainOp;
+    MainOp = FindInstructionWithOpcode(BinOpHelper.getMainOpcode());
+    AltOp = FindInstructionWithOpcode(BinOpHelper.getAltOpcode());
   }
   return InstructionsState(MainOp, AltOp);
 }

>From 29f0813d34d0c326d8b8eb41bc599324a186d596 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Thu, 13 Mar 2025 02:43:20 -0700
Subject: [PATCH 36/38] fix merge

---
 .../Transforms/SLPVectorizer/X86/reorder_diamond_match.ll  | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll
index 3238bc509d9a3..fff2b72df613e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll
@@ -11,16 +11,9 @@ define void @test() {
 ; CHECK-NEXT:    [[TMP6:%.*]] = shl <4 x i16> [[TMP5]], zeroinitializer
 ; CHECK-NEXT:    [[TMP7:%.*]] = add <4 x i16> [[TMP6]], zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i16> [[TMP7]], <4 x i16> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-<<<<<<< HEAD
-; CHECK-NEXT:    [[TMP9:%.*]] = add <4 x i16> [[TMP7]], [[TMP8]]
-; CHECK-NEXT:    [[TMP10:%.*]] = sub <4 x i16> [[TMP7]], [[TMP8]]
-; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i32> <i32 1, i32 4, i32 3, i32 6>
-=======
 ; CHECK-NEXT:    [[TMP9:%.*]] = add <4 x i16> [[TMP8]], [[TMP7]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = sub <4 x i16> [[TMP8]], [[TMP7]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEXT:    [[TMP12:%.*]] = add <4 x i16> zeroinitializer, [[TMP11]]
->>>>>>> upstream/main
 ; CHECK-NEXT:    [[TMP13:%.*]] = sub <4 x i16> zeroinitializer, [[TMP11]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = sext <4 x i16> [[TMP13]] to <4 x i32>
 ; CHECK-NEXT:    store <4 x i32> [[TMP15]], ptr [[TMP2]], align 16

>From f104cb0da4a9375c14ce9787019638113ed52a9d Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Thu, 13 Mar 2025 19:20:24 +0800
Subject: [PATCH 37/38] Update llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Co-authored-by: Alexey Bataev <a.bataev at gmx.com>
---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 0f2b63f25b431..20cdedd37946c 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -868,7 +868,7 @@ class BinOpSameOpcodeHelper {
     return {nullptr, 0};
   }
   struct InterchangeableInfo {
-    Instruction *I;
+    Instruction *I = nullptr;
     // The bit it sets represents whether MainOp can be converted to.
     MaskType Mask = MainOpBIT | XorBIT | OrBIT | AndBIT | SubBIT | AddBIT |
                     MulBIT | AShrBIT | ShlBIT;

>From 5b1c64ec3f4104247415c595463f05c2f9dca751 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Thu, 13 Mar 2025 04:23:33 -0700
Subject: [PATCH 38/38] remove CHECK

---
 .../SLPVectorizer/alternate-opcode-sindle-bv.ll       | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/llvm/test/Transforms/SLPVectorizer/alternate-opcode-sindle-bv.ll b/llvm/test/Transforms/SLPVectorizer/alternate-opcode-sindle-bv.ll
index dacc49fcd6be8..9b6511d0d8284 100644
--- a/llvm/test/Transforms/SLPVectorizer/alternate-opcode-sindle-bv.ll
+++ b/llvm/test/Transforms/SLPVectorizer/alternate-opcode-sindle-bv.ll
@@ -3,17 +3,6 @@
 ; RUN: %if aarch64-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=AARCH64 %}
 
 define <2 x i32> @test(i32 %arg) {
-; CHECK-LABEL: define <2 x i32> @test(
-; CHECK-SAME: i32 [[ARG:%.*]]) {
-; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[OR:%.*]] = or i32 [[ARG]], 0
-; CHECK-NEXT:    [[MUL:%.*]] = mul i32 0, 1
-; CHECK-NEXT:    [[MUL1:%.*]] = mul i32 [[OR]], [[MUL]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 0, [[MUL1]]
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[OR]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[MUL]], i32 1
-; CHECK-NEXT:    ret <2 x i32> [[TMP1]]
-;
 ; X86-LABEL: define <2 x i32> @test(
 ; X86-SAME: i32 [[ARG:%.*]]) {
 ; X86-NEXT:  bb:



More information about the llvm-commits mailing list