[llvm] [InstCombine] Fold patterns which uses <2N x iM> type for comparisons on <N x i2M> vector types (PR #184328)

Tue Mar 10 21:02:35 PDT 2026

================
@@ -1296,6 +1296,190 @@ Value *InstCombinerImpl::foldUsingDistributiveLaws(BinaryOperator &I) {
   return SimplifySelectsFeedingBinaryOp(I, LHS, RHS);
 }
 
+// Folds patterns which uses comparisons on <2N x iM> type for a <N x i2M>
+// equality comparison.
+//
+// (A1, ..., AN) -> (A1Lower, A1Upper, ..., ANLower, ANUpper)
+// (B1, ..., BN) -> (B1Lower, B1Upper, ..., BNLower, BNUpper)
+// (Result1, ..., ResultN) -> (Result1, Result1, ..., ResultN, ResultN)
+//
+// where,
+//
+// ResultX = EqLowerX & EqUpperX
+// EqLowerX = AXLower == BXLower
+// EqUpperX = AXUpper == BXUpper
+//
+// Bitwise AND between the upper and lower parts can be achived by performing
+// the operation between the original and shuffled equality vector.
+static Value *foldVecCmpEqOnHalfElementSize(Instruction &I,
+                                            FixedVectorType *ResultVecType,
+                                            InstCombiner::BuilderTy &Builder) {
+  // Check pattern existance
+  Value *L, *R;
+  CmpPredicate Pred;
+  ArrayRef<int> Mask;
+
+  auto Equal = m_SExtOrSelf(m_ICmp(Pred, m_Value(L), m_Value(R)));
+  auto Shuffle = m_SExtOrSelf(m_Shuffle(Equal, m_Poison(), m_Mask(Mask)));
+  if (!match(&I,
+             m_SExtOrSelf(m_CombineOr(m_c_And(Equal, Shuffle),
+                                      m_Select(Equal, Shuffle, m_Zero())))) ||
+      Pred != CmpInst::ICMP_EQ)
+    return nullptr;
+
+  auto *OldVecType = cast<FixedVectorType>(L->getType());
+
+  if (OldVecType != ResultVecType)
+    return nullptr;
+
+  // Example shuffle mask: {1, 0, 3, 2}
+  for (int I = 0; I < static_cast<int>(Mask.size()); I += 2)
+    if (Mask[I] != I + 1 || Mask[I + 1] != I)
+      return nullptr;
+
+  LLVM_DEBUG(dbgs() << "IC: Folding Vn2im CmpEq using V2nim CmpEq pattern"
+                    << '\n');
+
+  // Perform folding
+  unsigned int OldElementCount = OldVecType->getElementCount().getFixedValue();
+  unsigned int OldElementWidth = OldVecType->getScalarSizeInBits();
+  Type *NewElementType = IntegerType::get(I.getContext(), OldElementWidth * 2);
+  Type *NewVecType =
+      VectorType::get(NewElementType, OldElementCount / 2, false);
+  Value *BitCastL = Builder.CreateBitCast(L, NewVecType);
+  Value *BitCastR = Builder.CreateBitCast(R, NewVecType);
+  Value *Cmp = Builder.CreateICmp(Pred, BitCastL, BitCastR);
+  Value *SExt = Builder.CreateSExt(Cmp, NewVecType);
+  Value *BitCastCmp = Builder.CreateBitCast(SExt, OldVecType);
+
+  return BitCastCmp;
+}
+
+// Folds patterns which uses comparisons on <2N x iM> type for a <N x i2M>
+// greater / less than comparison.
+//
+// (A1, ..., AN) -> (A1Lower, A1Upper, ..., ANLower, ANUpper)
+// (B1, ..., BN) -> (B1Lower, B1Upper, ..., BNLower, BNUpper)
+// (Result1, ..., ResultN) -> (Result1, Result1, ..., ResultN, ResultN)
+//
+// where,
+//
+// ResultX = (GtLowerX & EqUpperX) | (GtUpperX)
+// GtLowerX = AXLower OP BXLower
+// GtUpperX = AXUpper OP BXUpper
+// EqUpperX = AXUpper EQ BXUpper
+//
+// Upper and lower parts are obtained through vector shuffles.
+//
+// Note that comparison of the lower parts are always unsigned comparisons
+// regardless of the resulting signedness. Also note that, unsigned comparison
+// can be derived from signed comparison by flipping the MSB of both operands.
+static Value *foldVecCmpGtOnHalfElementSize(Instruction &I,
+                                            FixedVectorType *ResultVecType,
+                                            InstCombiner::BuilderTy &Builder) {
+  // Check pattern existance
+  Value *A, *B, *Greater1, *Greater2;
+  CmpPredicate PredEq;
+  ArrayRef<int> MaskLower, MaskUpper1, MaskUpper2;
+
+  auto GreaterLower = m_SExtOrSelf(m_Shuffle(m_SExtOrSelf(m_Value(Greater1)),
+                                             m_Poison(), m_Mask(MaskLower)));
+  auto GreaterUpper = m_SExtOrSelf(m_Shuffle(m_SExtOrSelf(m_Value(Greater2)),
+                                             m_Poison(), m_Mask(MaskUpper1)));
+  auto EqUpper = m_SExtOrSelf(
+      m_Shuffle(m_SExtOrSelf(m_c_ICmp(PredEq, m_Value(A), m_Value(B))),
+                m_Poison(), m_Mask(MaskUpper2)));
+  auto And =
+      m_SExtOrSelf(m_CombineOr(m_c_And(GreaterLower, EqUpper),
+                               m_Select(EqUpper, GreaterLower, m_Zero())));
+  auto Or = m_SExtOrSelf(m_c_Or(And, GreaterUpper));
+
+  if (!match(&I, Or) || Greater1 != Greater2 || MaskUpper1 != MaskUpper2 ||
+      PredEq != ICmpInst::ICMP_EQ)
+    return nullptr;
+
+  auto *OldVecType = cast<FixedVectorType>(A->getType());
+
+  if (OldVecType != ResultVecType)
+    return nullptr;
+
+  // Example lower shuffle mask: {0, 0, 2, 2}
+  // Example upper shuffle mask: {1, 1, 3, 3}
+  for (int I = 0; I < static_cast<int>(MaskLower.size()); I += 2)
+    if (MaskLower[I] != I || MaskLower[I + 1] != I || MaskUpper1[I] != I + 1 ||
+        MaskUpper1[I + 1] != I + 1)
+      return nullptr;
+
----------------
fuad1502 wrote:

Handled here: https://github.com/llvm/llvm-project/pull/184328/commits/db017d38e4ca0a73cca6889ebb6f20f0810ac2e2

I also added the alive proof for big endian in the PR description.

https://github.com/llvm/llvm-project/pull/184328