[llvm] Fold patterns which uses v4i32 type for comparisons on v2i64 type (PR #184328)
Fuad Ismail via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 6 10:19:25 PST 2026
https://github.com/fuad1502 updated https://github.com/llvm/llvm-project/pull/184328
>From 5c444710bf7fd07d8405aa8b6802cff845f9994a Mon Sep 17 00:00:00 2001
From: Fuad Ismail <fuad1502 at gmail.com>
Date: Tue, 3 Mar 2026 07:15:43 +0700
Subject: [PATCH 01/13] Add folding v4i32 equals-shuffle-and pattern to v2i64
equals lit test
---
.../fold-v4i32-eq-shuffle-and-to-v2i64-eq.ll | 34 +++++++++++++++++++
1 file changed, 34 insertions(+)
create mode 100644 llvm/test/Transforms/VectorCombine/fold-v4i32-eq-shuffle-and-to-v2i64-eq.ll
diff --git a/llvm/test/Transforms/VectorCombine/fold-v4i32-eq-shuffle-and-to-v2i64-eq.ll b/llvm/test/Transforms/VectorCombine/fold-v4i32-eq-shuffle-and-to-v2i64-eq.ll
new file mode 100644
index 0000000000000..3c1b98af193f5
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/fold-v4i32-eq-shuffle-and-to-v2i64-eq.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes=vector-combine -S | FileCheck %s
+
+define <4 x i32> @cmpeq_epi64_select(<4 x i32> noundef %a, <4 x i32> noundef %b) {
+; CHECK-LABEL: define <4 x i32> @cmpeq_epi64_select(
+; CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) {
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> [[A]], [[B]]
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i1> [[CMP]], <4 x i1> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; CHECK-NEXT: [[SHUFFLE:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[SELECT:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[SHUFFLE]], <4 x i32> zeroinitializer
+; CHECK-NEXT: ret <4 x i32> [[SELECT]]
+;
+ %cmp = icmp eq <4 x i32> %a, %b
+ %sext = sext <4 x i1> %cmp to <4 x i32>
+ %shuffle = shufflevector <4 x i32> %sext, <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+ %select = select <4 x i1> %cmp, <4 x i32> %shuffle, <4 x i32> zeroinitializer
+ ret <4 x i32> %select
+}
+
+define <4 x i32> @cmpeq_epi64_and(<4 x i32> noundef %a, <4 x i32> noundef %b) {
+; CHECK-LABEL: define <4 x i32> @cmpeq_epi64_and(
+; CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) {
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> [[A]], [[B]]
+; CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[SEXT]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; CHECK-NEXT: [[AND:%.*]] = and <4 x i32> [[SEXT]], [[SHUFFLE]]
+; CHECK-NEXT: ret <4 x i32> [[AND]]
+;
+ %cmp = icmp eq <4 x i32> %a, %b
+ %sext = sext <4 x i1> %cmp to <4 x i32>
+ %shuffle = shufflevector <4 x i32> %sext, <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+ %and = and <4 x i32> %sext, %shuffle
+ ret <4 x i32> %and
+}
>From b8f17f37a75293051498bac60b03aa40c0e3c365 Mon Sep 17 00:00:00 2001
From: Fuad Ismail <fuad1502 at gmail.com>
Date: Tue, 3 Mar 2026 12:08:15 +0700
Subject: [PATCH 02/13] Apply folding for v4i32 equals-shuffle-and pattern
---
.../Transforms/Vectorize/VectorCombine.cpp | 73 +++++++++++++++++++
.../fold-v4i32-eq-shuffle-and-to-v2i64-eq.ll | 18 +++--
2 files changed, 83 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 1f37e435b8080..ee29ce690a435 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -27,12 +27,15 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
@@ -153,6 +156,7 @@ class VectorCombine {
bool foldEquivalentReductionCmp(Instruction &I);
bool foldSelectShuffle(Instruction &I, bool FromReduction = false);
bool foldInterleaveIntrinsics(Instruction &I);
+ bool foldEqualShuffleAnd(Instruction &I);
bool shrinkType(Instruction &I);
bool shrinkLoadForShuffles(Instruction &I);
bool shrinkPhiOfShuffles(Instruction &I);
@@ -5435,6 +5439,69 @@ bool VectorCombine::foldInterleaveIntrinsics(Instruction &I) {
return true;
}
+// Prior to SSE4.1, performing equality comparison on v2i64 types require a
+// comparison on v4i32 types using the following pattern:
+//
+// ...
+// %3 = icmp eq <4 x i32> %1, %2
+// %4 = sext <4 x i1> %3 to <4 x i32>
+// %5 = shufflevector <4 x i32> %4, <4 x i32> poison, <4 x i32> <i32 1, i32 0,
+// i32 3, i32 2> %6 = select <4 x i1> %3, <4 x i32> %5, <4 x i32>
+// zeroinitializer
+// ...
+//
+// We should detect such patterns and fold them to:
+//
+// %3 = bitcast <4 x i32> %1 to <2 x i64>
+// %4 = bitcast <4 x i32> %2 to <2 x i64>
+// %5 = icmp eq <2 x i64> %3, %4
+// %6 = bitcast <2 x i64> %5 to <4 x i32>
+//
+bool VectorCombine::foldEqualShuffleAnd(Instruction &I) {
+ // Check pattern existance
+ Value *L, *R;
+ CmpPredicate Pred;
+
+ auto Equal = m_ICmp(Pred, m_Value(L), m_Value(R));
+ SmallVector<int> Mask = {1, 0, 3, 2};
+ auto Shuffle =
+ m_CombineOr(m_SExt(m_Shuffle(Equal, m_Poison(), m_SpecificMask(Mask))),
+ m_Shuffle(m_SExt(Equal), m_Poison(), m_SpecificMask(Mask)));
+
+ if (!match(&I, m_CombineOr(m_And(m_SExt(Equal), Shuffle),
+ m_Select(Equal, Shuffle, m_ZeroInt()))) ||
+ !ICmpInst::isEquality(Pred) || !L->getType()->isVectorTy())
+ return false;
+
+ auto *OldVecType = cast<VectorType>(L->getType());
+
+ if (OldVecType->isScalableTy() ||
+ !OldVecType->getElementType()->isIntegerTy())
+ return false;
+
+ int ElementCount = OldVecType->getElementCount().getFixedValue();
+ int ElementBitWidth = OldVecType->getElementType()->getIntegerBitWidth();
+
+ if (ElementCount != 4 || ElementBitWidth != 32)
+ return false;
+
+ LLVM_DEBUG(dbgs() << "VC: Found equal-shuffle-and pattern" << '\n');
+
+ // Perform folding
+ IRBuilder Builder(&I);
+ auto *NewElementType = IntegerType::get(I.getContext(), ElementBitWidth * 2);
+ auto *NewVecType = VectorType::get(NewElementType, ElementCount / 2, false);
+ auto *BitCastL = Builder.CreateBitCast(L, NewVecType);
+ auto *BitCastR = Builder.CreateBitCast(R, NewVecType);
+ auto *Cmp = Builder.CreateICmp(Pred, BitCastL, BitCastR);
+ auto *SExt = Builder.CreateSExt(Cmp, NewVecType);
+ auto *BitCastCmp = Builder.CreateBitCast(SExt, OldVecType);
+
+ replaceValue(I, *BitCastCmp);
+
+ return false;
+}
+
// Attempt to shrink loads that are only used by shufflevector instructions.
bool VectorCombine::shrinkLoadForShuffles(Instruction &I) {
auto *OldLoad = dyn_cast<LoadInst>(&I);
@@ -5777,11 +5844,17 @@ bool VectorCombine::run() {
return true;
if (foldBitOpOfCastConstant(I))
return true;
+ if (foldEqualShuffleAnd(I))
+ return true;
break;
case Instruction::PHI:
if (shrinkPhiOfShuffles(I))
return true;
break;
+ case Instruction::Select:
+ if (foldEqualShuffleAnd(I))
+ return true;
+ break;
default:
if (shrinkType(I))
return true;
diff --git a/llvm/test/Transforms/VectorCombine/fold-v4i32-eq-shuffle-and-to-v2i64-eq.ll b/llvm/test/Transforms/VectorCombine/fold-v4i32-eq-shuffle-and-to-v2i64-eq.ll
index 3c1b98af193f5..42f3222ae3d27 100644
--- a/llvm/test/Transforms/VectorCombine/fold-v4i32-eq-shuffle-and-to-v2i64-eq.ll
+++ b/llvm/test/Transforms/VectorCombine/fold-v4i32-eq-shuffle-and-to-v2i64-eq.ll
@@ -4,10 +4,11 @@
define <4 x i32> @cmpeq_epi64_select(<4 x i32> noundef %a, <4 x i32> noundef %b) {
; CHECK-LABEL: define <4 x i32> @cmpeq_epi64_select(
; CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) {
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> [[A]], [[B]]
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i1> [[CMP]], <4 x i1> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-; CHECK-NEXT: [[SHUFFLE:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
-; CHECK-NEXT: [[SELECT:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[SHUFFLE]], <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64>
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[B]] to <2 x i64>
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i64> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
+; CHECK-NEXT: [[SELECT:%.*]] = bitcast <2 x i64> [[TMP4]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[SELECT]]
;
%cmp = icmp eq <4 x i32> %a, %b
@@ -20,10 +21,11 @@ define <4 x i32> @cmpeq_epi64_select(<4 x i32> noundef %a, <4 x i32> noundef %b)
define <4 x i32> @cmpeq_epi64_and(<4 x i32> noundef %a, <4 x i32> noundef %b) {
; CHECK-LABEL: define <4 x i32> @cmpeq_epi64_and(
; CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) {
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> [[A]], [[B]]
-; CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[SEXT]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-; CHECK-NEXT: [[AND:%.*]] = and <4 x i32> [[SEXT]], [[SHUFFLE]]
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64>
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[B]] to <2 x i64>
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i64> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
+; CHECK-NEXT: [[AND:%.*]] = bitcast <2 x i64> [[TMP4]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[AND]]
;
%cmp = icmp eq <4 x i32> %a, %b
>From 10ad4ba00e85fadba6255e53594144aa8973f068 Mon Sep 17 00:00:00 2001
From: Fuad Ismail <fuad1502 at gmail.com>
Date: Tue, 3 Mar 2026 13:13:33 +0700
Subject: [PATCH 03/13] Handle commutated and instruction
---
llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 2 +-
.../fold-v4i32-eq-shuffle-and-to-v2i64-eq.ll | 17 +++++++++++++++++
2 files changed, 18 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index ee29ce690a435..13559610e37a2 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -5468,7 +5468,7 @@ bool VectorCombine::foldEqualShuffleAnd(Instruction &I) {
m_CombineOr(m_SExt(m_Shuffle(Equal, m_Poison(), m_SpecificMask(Mask))),
m_Shuffle(m_SExt(Equal), m_Poison(), m_SpecificMask(Mask)));
- if (!match(&I, m_CombineOr(m_And(m_SExt(Equal), Shuffle),
+ if (!match(&I, m_CombineOr(m_c_And(m_SExt(Equal), Shuffle),
m_Select(Equal, Shuffle, m_ZeroInt()))) ||
!ICmpInst::isEquality(Pred) || !L->getType()->isVectorTy())
return false;
diff --git a/llvm/test/Transforms/VectorCombine/fold-v4i32-eq-shuffle-and-to-v2i64-eq.ll b/llvm/test/Transforms/VectorCombine/fold-v4i32-eq-shuffle-and-to-v2i64-eq.ll
index 42f3222ae3d27..983a5a6708609 100644
--- a/llvm/test/Transforms/VectorCombine/fold-v4i32-eq-shuffle-and-to-v2i64-eq.ll
+++ b/llvm/test/Transforms/VectorCombine/fold-v4i32-eq-shuffle-and-to-v2i64-eq.ll
@@ -34,3 +34,20 @@ define <4 x i32> @cmpeq_epi64_and(<4 x i32> noundef %a, <4 x i32> noundef %b) {
%and = and <4 x i32> %sext, %shuffle
ret <4 x i32> %and
}
+
+define <4 x i32> @cmpeq_epi64_commutated_and(<4 x i32> noundef %a, <4 x i32> noundef %b) {
+; CHECK-LABEL: define <4 x i32> @cmpeq_epi64_commutated_and(
+; CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64>
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[B]] to <2 x i64>
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i64> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
+; CHECK-NEXT: [[AND:%.*]] = bitcast <2 x i64> [[TMP4]] to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> [[AND]]
+;
+ %cmp = icmp eq <4 x i32> %a, %b
+ %sext = sext <4 x i1> %cmp to <4 x i32>
+ %shuffle = shufflevector <4 x i32> %sext, <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+ %and = and <4 x i32> %shuffle, %sext
+ ret <4 x i32> %and
+}
>From 79bbff7d352bfd6f49990f76b2e04621695ab128 Mon Sep 17 00:00:00 2001
From: Fuad Ismail <fuad1502 at gmail.com>
Date: Tue, 3 Mar 2026 16:44:27 +0700
Subject: [PATCH 04/13] Don't fold when intermediate instructions have uses
outside pattern
---
.../Transforms/Vectorize/VectorCombine.cpp | 53 ++++++++++++-----
.../fold-v4i32-eq-shuffle-and-to-v2i64-eq.ll | 57 +++++++++++++++++++
2 files changed, 95 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 13559610e37a2..805aedfc61c04 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -5442,37 +5442,46 @@ bool VectorCombine::foldInterleaveIntrinsics(Instruction &I) {
// Prior to SSE4.1, performing equality comparison on v2i64 types require a
// comparison on v4i32 types using the following pattern:
//
-// ...
// %3 = icmp eq <4 x i32> %1, %2
+//
// %4 = sext <4 x i1> %3 to <4 x i32>
+//
// %5 = shufflevector <4 x i32> %4, <4 x i32> poison, <4 x i32> <i32 1, i32 0,
-// i32 3, i32 2> %6 = select <4 x i1> %3, <4 x i32> %5, <4 x i32>
-// zeroinitializer
-// ...
+// i32 3, i32 2>
+//
+// %6 = select <4 x i1> %3, <4 x i32> %5, <4 x i32> zeroinitializer
+//
+// OR
+//
+// %6 = and <4 x i32> %sext, %shuffle
//
-// We should detect such patterns and fold them to:
+// We should detect such patterns and fold them into:
//
// %3 = bitcast <4 x i32> %1 to <2 x i64>
+//
// %4 = bitcast <4 x i32> %2 to <2 x i64>
+//
// %5 = icmp eq <2 x i64> %3, %4
+//
// %6 = bitcast <2 x i64> %5 to <4 x i32>
//
bool VectorCombine::foldEqualShuffleAnd(Instruction &I) {
- // Check pattern existance
- Value *L, *R;
+ Value *Equal, *Shuffle, *L, *R;
CmpPredicate Pred;
-
- auto Equal = m_ICmp(Pred, m_Value(L), m_Value(R));
SmallVector<int> Mask = {1, 0, 3, 2};
- auto Shuffle =
- m_CombineOr(m_SExt(m_Shuffle(Equal, m_Poison(), m_SpecificMask(Mask))),
- m_Shuffle(m_SExt(Equal), m_Poison(), m_SpecificMask(Mask)));
- if (!match(&I, m_CombineOr(m_c_And(m_SExt(Equal), Shuffle),
- m_Select(Equal, Shuffle, m_ZeroInt()))) ||
- !ICmpInst::isEquality(Pred) || !L->getType()->isVectorTy())
+ // Check pattern existance
+ if (!match(&I, m_CombineOr(m_c_And(m_SExt(m_Value(Equal)),
+ m_SExtOrSelf(m_Value(Shuffle))),
+ m_Select(m_Value(Equal),
+ m_SExtOrSelf(m_Value(Shuffle)),
+ m_ZeroInt()))) ||
+ !match(Shuffle, m_Shuffle(m_SExtOrSelf(m_Specific(Equal)), m_Poison(),
+ m_SpecificMask(Mask))) ||
+ !match(Equal, m_ICmp(Pred, m_Value(L), m_Value(R))))
return false;
+ // Check argument type
auto *OldVecType = cast<VectorType>(L->getType());
if (OldVecType->isScalableTy() ||
@@ -5485,6 +5494,20 @@ bool VectorCombine::foldEqualShuffleAnd(Instruction &I) {
if (ElementCount != 4 || ElementBitWidth != 32)
return false;
+ // Check uses outside pattern
+ if (!Shuffle->hasOneUse())
+ return false;
+
+ for (auto *U : Equal->users()) {
+ if (U == &I || U == Shuffle)
+ continue;
+ if (!isa<llvm::CastInst>(U))
+ return false;
+ for (auto *U : U->users())
+ if (U != &I && U != Shuffle)
+ return false;
+ }
+
LLVM_DEBUG(dbgs() << "VC: Found equal-shuffle-and pattern" << '\n');
// Perform folding
diff --git a/llvm/test/Transforms/VectorCombine/fold-v4i32-eq-shuffle-and-to-v2i64-eq.ll b/llvm/test/Transforms/VectorCombine/fold-v4i32-eq-shuffle-and-to-v2i64-eq.ll
index 983a5a6708609..2d7e72d359973 100644
--- a/llvm/test/Transforms/VectorCombine/fold-v4i32-eq-shuffle-and-to-v2i64-eq.ll
+++ b/llvm/test/Transforms/VectorCombine/fold-v4i32-eq-shuffle-and-to-v2i64-eq.ll
@@ -51,3 +51,60 @@ define <4 x i32> @cmpeq_epi64_commutated_and(<4 x i32> noundef %a, <4 x i32> nou
%and = and <4 x i32> %shuffle, %sext
ret <4 x i32> %and
}
+
+declare void @use.v4i1(<4 x i1>)
+declare void @use.v4i32(<4 x i32>)
+
+define <4 x i32> @cmpeq_epi64_multi_use_cmp(<4 x i32> noundef %a, <4 x i32> noundef %b) {
+; CHECK-LABEL: define <4 x i32> @cmpeq_epi64_multi_use_cmp(
+; CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) {
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> [[A]], [[B]]
+; CHECK-NEXT: call void @use.v4i1(<4 x i1> [[CMP]])
+; CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[SEXT]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; CHECK-NEXT: [[AND:%.*]] = and <4 x i32> [[SHUFFLE]], [[SEXT]]
+; CHECK-NEXT: ret <4 x i32> [[AND]]
+;
+ %cmp = icmp eq <4 x i32> %a, %b
+ call void @use.v4i1(<4 x i1> %cmp)
+ %sext = sext <4 x i1> %cmp to <4 x i32>
+ %shuffle = shufflevector <4 x i32> %sext, <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+ %and = and <4 x i32> %shuffle, %sext
+ ret <4 x i32> %and
+}
+
+define <4 x i32> @cmpeq_epi64_multi_use_sext(<4 x i32> noundef %a, <4 x i32> noundef %b) {
+; CHECK-LABEL: define <4 x i32> @cmpeq_epi64_multi_use_sext(
+; CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) {
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> [[A]], [[B]]
+; CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+; CHECK-NEXT: call void @use.v4i32(<4 x i32> [[SEXT]])
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[SEXT]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; CHECK-NEXT: [[AND:%.*]] = and <4 x i32> [[SHUFFLE]], [[SEXT]]
+; CHECK-NEXT: ret <4 x i32> [[AND]]
+;
+ %cmp = icmp eq <4 x i32> %a, %b
+ %sext = sext <4 x i1> %cmp to <4 x i32>
+ call void @use.v4i32(<4 x i32> %sext)
+ %shuffle = shufflevector <4 x i32> %sext, <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+ %and = and <4 x i32> %shuffle, %sext
+ ret <4 x i32> %and
+}
+
+define <4 x i32> @cmpeq_epi64_multi_use_shuffle(<4 x i32> noundef %a, <4 x i32> noundef %b) {
+; CHECK-LABEL: define <4 x i32> @cmpeq_epi64_multi_use_shuffle(
+; CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) {
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> [[A]], [[B]]
+; CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[SEXT]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; CHECK-NEXT: call void @use.v4i32(<4 x i32> [[SHUFFLE]])
+; CHECK-NEXT: [[AND:%.*]] = and <4 x i32> [[SHUFFLE]], [[SEXT]]
+; CHECK-NEXT: ret <4 x i32> [[AND]]
+;
+ %cmp = icmp eq <4 x i32> %a, %b
+ %sext = sext <4 x i1> %cmp to <4 x i32>
+ %shuffle = shufflevector <4 x i32> %sext, <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+ call void @use.v4i32(<4 x i32> %shuffle)
+ %and = and <4 x i32> %shuffle, %sext
+ ret <4 x i32> %and
+}
>From dd0432152c35d705e03983a55f5fae2758699564 Mon Sep 17 00:00:00 2001
From: Fuad Ismail <fuad1502 at gmail.com>
Date: Tue, 3 Mar 2026 18:10:25 +0700
Subject: [PATCH 05/13] Add negative test cases and add icmp condition code
check
---
.../Transforms/Vectorize/VectorCombine.cpp | 3 +-
.../fold-v4i32-eq-shuffle-and-to-v2i64-eq.ll | 64 +++++++++++++++++++
2 files changed, 66 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 805aedfc61c04..fd2ca3ef7c901 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -5478,7 +5478,8 @@ bool VectorCombine::foldEqualShuffleAnd(Instruction &I) {
m_ZeroInt()))) ||
!match(Shuffle, m_Shuffle(m_SExtOrSelf(m_Specific(Equal)), m_Poison(),
m_SpecificMask(Mask))) ||
- !match(Equal, m_ICmp(Pred, m_Value(L), m_Value(R))))
+ !match(Equal, m_ICmp(Pred, m_Value(L), m_Value(R))) ||
+ !CmpInst::isEquality(Pred))
return false;
// Check argument type
diff --git a/llvm/test/Transforms/VectorCombine/fold-v4i32-eq-shuffle-and-to-v2i64-eq.ll b/llvm/test/Transforms/VectorCombine/fold-v4i32-eq-shuffle-and-to-v2i64-eq.ll
index 2d7e72d359973..4e13afd360673 100644
--- a/llvm/test/Transforms/VectorCombine/fold-v4i32-eq-shuffle-and-to-v2i64-eq.ll
+++ b/llvm/test/Transforms/VectorCombine/fold-v4i32-eq-shuffle-and-to-v2i64-eq.ll
@@ -108,3 +108,67 @@ define <4 x i32> @cmpeq_epi64_multi_use_shuffle(<4 x i32> noundef %a, <4 x i32>
%and = and <4 x i32> %shuffle, %sext
ret <4 x i32> %and
}
+
+define <4 x i32> @cmpeq_epi64_select_neg_0(<4 x i32> noundef %a, <4 x i32> noundef %b) {
+; CHECK-LABEL: define <4 x i32> @cmpeq_epi64_select_neg_0(
+; CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) {
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <4 x i32> [[A]], [[B]]
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i1> [[CMP]], <4 x i1> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; CHECK-NEXT: [[SHUFFLE:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[SELECT:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[SHUFFLE]], <4 x i32> zeroinitializer
+; CHECK-NEXT: ret <4 x i32> [[SELECT]]
+;
+ %cmp = icmp sgt <4 x i32> %a, %b
+ %sext = sext <4 x i1> %cmp to <4 x i32>
+ %shuffle = shufflevector <4 x i32> %sext, <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+ %select = select <4 x i1> %cmp, <4 x i32> %shuffle, <4 x i32> zeroinitializer
+ ret <4 x i32> %select
+}
+
+define <4 x i32> @cmpeq_epi64_and_neg_1(<4 x i32> noundef %a, <4 x i32> noundef %b) {
+; CHECK-LABEL: define <4 x i32> @cmpeq_epi64_and_neg_1(
+; CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) {
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> [[A]], [[B]]
+; CHECK-NEXT: [[SEXT:%.*]] = zext <4 x i1> [[CMP]] to <4 x i32>
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[SEXT]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; CHECK-NEXT: [[AND:%.*]] = and <4 x i32> [[SEXT]], [[SHUFFLE]]
+; CHECK-NEXT: ret <4 x i32> [[AND]]
+;
+ %cmp = icmp eq <4 x i32> %a, %b
+ %sext = zext <4 x i1> %cmp to <4 x i32>
+ %shuffle = shufflevector <4 x i32> %sext, <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+ %and = and <4 x i32> %sext, %shuffle
+ ret <4 x i32> %and
+}
+
+define <4 x i32> @cmpeq_epi64_select_neg_2(<4 x i32> noundef %a, <4 x i32> noundef %b) {
+; CHECK-LABEL: define <4 x i32> @cmpeq_epi64_select_neg_2(
+; CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) {
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> [[A]], [[B]]
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i1> [[CMP]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[SHUFFLE:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[SELECT:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[SHUFFLE]], <4 x i32> zeroinitializer
+; CHECK-NEXT: ret <4 x i32> [[SELECT]]
+;
+ %cmp = icmp eq <4 x i32> %a, %b
+ %sext = sext <4 x i1> %cmp to <4 x i32>
+ %shuffle = shufflevector <4 x i32> %sext, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+ %select = select <4 x i1> %cmp, <4 x i32> %shuffle, <4 x i32> zeroinitializer
+ ret <4 x i32> %select
+}
+
+define <4 x i32> @cmpeq_epi64_select_neg_3(<4 x i32> noundef %a, <4 x i32> noundef %b) {
+; CHECK-LABEL: define <4 x i32> @cmpeq_epi64_select_neg_3(
+; CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) {
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> [[A]], [[B]]
+; CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[SEXT]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; CHECK-NEXT: [[SELECT:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[SHUFFLE]], <4 x i32> [[SEXT]]
+; CHECK-NEXT: ret <4 x i32> [[SELECT]]
+;
+ %cmp = icmp eq <4 x i32> %a, %b
+ %sext = sext <4 x i1> %cmp to <4 x i32>
+ %shuffle = shufflevector <4 x i32> %sext, <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+ %select = select <4 x i1> %cmp, <4 x i32> %shuffle, <4 x i32> %sext
+ ret <4 x i32> %select
+}
>From 32b31c2f728bfbbfcc982db0550d1f5f289dc71f Mon Sep 17 00:00:00 2001
From: Fuad Ismail <fuad1502 at gmail.com>
Date: Tue, 3 Mar 2026 19:03:24 +0700
Subject: [PATCH 06/13] Remove unnecessary additional headers
---
llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 3 ---
1 file changed, 3 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index fd2ca3ef7c901..9832b396bde71 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -27,15 +27,12 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
-#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
>From 5f46a5f41a65ab7759110db98c5797e61ad5b4d0 Mon Sep 17 00:00:00 2001
From: Fuad Ismail <fuad1502 at gmail.com>
Date: Wed, 4 Mar 2026 18:06:25 +0700
Subject: [PATCH 07/13] Move v4i32 eq-shuffle-and folding to InstCombine
---
.../InstCombine/InstCombineAndOrXor.cpp | 3 +
.../InstCombine/InstCombineInternal.h | 2 +
.../InstCombine/InstCombineSelect.cpp | 3 +
.../InstCombine/InstructionCombining.cpp | 84 +++++++++++++++++
.../Transforms/Vectorize/VectorCombine.cpp | 94 -------------------
.../fold-v4i32-eq-shuffle-and-to-v2i64-eq.ll | 28 +++---
6 files changed, 106 insertions(+), 108 deletions(-)
rename llvm/test/Transforms/{VectorCombine => InstCombine}/fold-v4i32-eq-shuffle-and-to-v2i64-eq.ll (88%)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 073f094639fa0..30c1e8e8aca73 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2898,6 +2898,9 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
/*SimplifyOnly*/ false, *this))
return BinaryOperator::CreateAnd(Op0, V);
+ if (auto *Folded = foldV4EqualShuffleAndToV2Equal(I))
+ return Folded;
+
return nullptr;
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 2e7758e952eaf..fed88cc84f46e 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -484,6 +484,8 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
return Sel;
}
+ Instruction *foldV4EqualShuffleAndToV2Equal(Instruction &I);
+
public:
/// Create and insert the idiom we use to indicate a block is unreachable
/// without having to rewrite the CFG from within InstCombine.
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 6707d1abf5ca0..598008a3d8d24 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -4982,5 +4982,8 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
return CallInst::Create(Scmp, {CmpLHS, ConstantInt::get(SI.getType(), 0)});
}
+ if (auto *Folded = foldV4EqualShuffleAndToV2Equal(SI))
+ return Folded;
+
return nullptr;
}
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 0ca5da1bbf251..1c71bb5387ab5 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1296,6 +1296,90 @@ Value *InstCombinerImpl::foldUsingDistributiveLaws(BinaryOperator &I) {
return SimplifySelectsFeedingBinaryOp(I, LHS, RHS);
}
+// Prior to SSE4.1, performing equality comparison on v2i64 types require a
+// comparison on v4i32 types using the following pattern:
+//
+// %3 = icmp eq <4 x i32> %1, %2
+//
+// %4 = sext <4 x i1> %3 to <4 x i32>
+//
+// %5 = shufflevector <4 x i32> %4, <4 x i32> poison, <4 x i32> <i32 1, i32 0,
+// i32 3, i32 2>
+//
+// %6 = select <4 x i1> %3, <4 x i32> %5, <4 x i32> zeroinitializer
+//
+// OR
+//
+// %6 = and <4 x i32> %sext, %shuffle
+//
+// We should detect such patterns and fold them into:
+//
+// %3 = bitcast <4 x i32> %1 to <2 x i64>
+//
+// %4 = bitcast <4 x i32> %2 to <2 x i64>
+//
+// %5 = icmp eq <2 x i64> %3, %4
+//
+// %6 = bitcast <2 x i64> %5 to <4 x i32>
+//
+Instruction *InstCombinerImpl::foldV4EqualShuffleAndToV2Equal(Instruction &I) {
+ Value *Equal, *Shuffle, *L, *R;
+ CmpPredicate Pred;
+ SmallVector<int> Mask = {1, 0, 3, 2};
+
+ // Check pattern existance
+ if (!match(&I,
+ m_CombineOr(m_c_And(m_SExt(m_Value(Equal)),
+ m_SExtOrSelf(m_Value(Shuffle))),
+ m_Select(m_Value(Equal),
+ m_SExtOrSelf(m_Value(Shuffle)), m_Zero()))) ||
+ !match(Shuffle, m_Shuffle(m_SExtOrSelf(m_Specific(Equal)), m_Poison(),
+ m_SpecificMask(Mask))) ||
+ !match(Equal, m_ICmp(Pred, m_Value(L), m_Value(R))) ||
+ Pred != CmpInst::ICMP_EQ)
+ return nullptr;
+
+ // Check argument type
+ auto *OldVecType = cast<VectorType>(L->getType());
+
+ if (OldVecType->isScalableTy() ||
+ !OldVecType->getElementType()->isIntegerTy())
+ return nullptr;
+
+ int ElementCount = OldVecType->getElementCount().getFixedValue();
+ int ElementBitWidth = OldVecType->getElementType()->getIntegerBitWidth();
+
+ if (ElementCount != 4 || ElementBitWidth != 32)
+ return nullptr;
+
+ // Check uses outside pattern
+ if (!Shuffle->hasOneUse())
+ return nullptr;
+
+ for (auto *U : Equal->users()) {
+ if (U == &I || U == Shuffle)
+ continue;
+ if (!isa<llvm::CastInst>(U))
+ return nullptr;
+ for (auto *U : U->users())
+ if (U != &I && U != Shuffle)
+ return nullptr;
+ }
+
+ LLVM_DEBUG(dbgs() << "IC: Folding equal-shuffle-and pattern" << '\n');
+
+ // Perform folding
+ auto *NewElementType = IntegerType::get(I.getContext(), ElementBitWidth * 2);
+ auto *NewVecType = VectorType::get(NewElementType, ElementCount / 2, false);
+ auto *BitCastL = Builder.CreateBitCast(L, NewVecType);
+ auto *BitCastR = Builder.CreateBitCast(R, NewVecType);
+ auto *Cmp = Builder.CreateICmp(Pred, BitCastL, BitCastR);
+ auto *SExt = Builder.CreateSExt(Cmp, NewVecType);
+ auto *BitCastCmp = Builder.CreateBitCast(SExt, OldVecType);
+
+ return replaceInstUsesWith(I, BitCastCmp);
+}
+
static std::optional<std::pair<Value *, Value *>>
matchSymmetricPhiNodesPair(PHINode *LHS, PHINode *RHS) {
if (LHS->getParent() != RHS->getParent())
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 9832b396bde71..1f37e435b8080 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -153,7 +153,6 @@ class VectorCombine {
bool foldEquivalentReductionCmp(Instruction &I);
bool foldSelectShuffle(Instruction &I, bool FromReduction = false);
bool foldInterleaveIntrinsics(Instruction &I);
- bool foldEqualShuffleAnd(Instruction &I);
bool shrinkType(Instruction &I);
bool shrinkLoadForShuffles(Instruction &I);
bool shrinkPhiOfShuffles(Instruction &I);
@@ -5436,93 +5435,6 @@ bool VectorCombine::foldInterleaveIntrinsics(Instruction &I) {
return true;
}
-// Prior to SSE4.1, performing equality comparison on v2i64 types require a
-// comparison on v4i32 types using the following pattern:
-//
-// %3 = icmp eq <4 x i32> %1, %2
-//
-// %4 = sext <4 x i1> %3 to <4 x i32>
-//
-// %5 = shufflevector <4 x i32> %4, <4 x i32> poison, <4 x i32> <i32 1, i32 0,
-// i32 3, i32 2>
-//
-// %6 = select <4 x i1> %3, <4 x i32> %5, <4 x i32> zeroinitializer
-//
-// OR
-//
-// %6 = and <4 x i32> %sext, %shuffle
-//
-// We should detect such patterns and fold them into:
-//
-// %3 = bitcast <4 x i32> %1 to <2 x i64>
-//
-// %4 = bitcast <4 x i32> %2 to <2 x i64>
-//
-// %5 = icmp eq <2 x i64> %3, %4
-//
-// %6 = bitcast <2 x i64> %5 to <4 x i32>
-//
-bool VectorCombine::foldEqualShuffleAnd(Instruction &I) {
- Value *Equal, *Shuffle, *L, *R;
- CmpPredicate Pred;
- SmallVector<int> Mask = {1, 0, 3, 2};
-
- // Check pattern existance
- if (!match(&I, m_CombineOr(m_c_And(m_SExt(m_Value(Equal)),
- m_SExtOrSelf(m_Value(Shuffle))),
- m_Select(m_Value(Equal),
- m_SExtOrSelf(m_Value(Shuffle)),
- m_ZeroInt()))) ||
- !match(Shuffle, m_Shuffle(m_SExtOrSelf(m_Specific(Equal)), m_Poison(),
- m_SpecificMask(Mask))) ||
- !match(Equal, m_ICmp(Pred, m_Value(L), m_Value(R))) ||
- !CmpInst::isEquality(Pred))
- return false;
-
- // Check argument type
- auto *OldVecType = cast<VectorType>(L->getType());
-
- if (OldVecType->isScalableTy() ||
- !OldVecType->getElementType()->isIntegerTy())
- return false;
-
- int ElementCount = OldVecType->getElementCount().getFixedValue();
- int ElementBitWidth = OldVecType->getElementType()->getIntegerBitWidth();
-
- if (ElementCount != 4 || ElementBitWidth != 32)
- return false;
-
- // Check uses outside pattern
- if (!Shuffle->hasOneUse())
- return false;
-
- for (auto *U : Equal->users()) {
- if (U == &I || U == Shuffle)
- continue;
- if (!isa<llvm::CastInst>(U))
- return false;
- for (auto *U : U->users())
- if (U != &I && U != Shuffle)
- return false;
- }
-
- LLVM_DEBUG(dbgs() << "VC: Found equal-shuffle-and pattern" << '\n');
-
- // Perform folding
- IRBuilder Builder(&I);
- auto *NewElementType = IntegerType::get(I.getContext(), ElementBitWidth * 2);
- auto *NewVecType = VectorType::get(NewElementType, ElementCount / 2, false);
- auto *BitCastL = Builder.CreateBitCast(L, NewVecType);
- auto *BitCastR = Builder.CreateBitCast(R, NewVecType);
- auto *Cmp = Builder.CreateICmp(Pred, BitCastL, BitCastR);
- auto *SExt = Builder.CreateSExt(Cmp, NewVecType);
- auto *BitCastCmp = Builder.CreateBitCast(SExt, OldVecType);
-
- replaceValue(I, *BitCastCmp);
-
- return false;
-}
-
// Attempt to shrink loads that are only used by shufflevector instructions.
bool VectorCombine::shrinkLoadForShuffles(Instruction &I) {
auto *OldLoad = dyn_cast<LoadInst>(&I);
@@ -5865,17 +5777,11 @@ bool VectorCombine::run() {
return true;
if (foldBitOpOfCastConstant(I))
return true;
- if (foldEqualShuffleAnd(I))
- return true;
break;
case Instruction::PHI:
if (shrinkPhiOfShuffles(I))
return true;
break;
- case Instruction::Select:
- if (foldEqualShuffleAnd(I))
- return true;
- break;
default:
if (shrinkType(I))
return true;
diff --git a/llvm/test/Transforms/VectorCombine/fold-v4i32-eq-shuffle-and-to-v2i64-eq.ll b/llvm/test/Transforms/InstCombine/fold-v4i32-eq-shuffle-and-to-v2i64-eq.ll
similarity index 88%
rename from llvm/test/Transforms/VectorCombine/fold-v4i32-eq-shuffle-and-to-v2i64-eq.ll
rename to llvm/test/Transforms/InstCombine/fold-v4i32-eq-shuffle-and-to-v2i64-eq.ll
index 4e13afd360673..ff23a9d4e277c 100644
--- a/llvm/test/Transforms/VectorCombine/fold-v4i32-eq-shuffle-and-to-v2i64-eq.ll
+++ b/llvm/test/Transforms/InstCombine/fold-v4i32-eq-shuffle-and-to-v2i64-eq.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
-; RUN: opt < %s -passes=vector-combine -S | FileCheck %s
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
define <4 x i32> @cmpeq_epi64_select(<4 x i32> noundef %a, <4 x i32> noundef %b) {
; CHECK-LABEL: define <4 x i32> @cmpeq_epi64_select(
@@ -62,7 +62,7 @@ define <4 x i32> @cmpeq_epi64_multi_use_cmp(<4 x i32> noundef %a, <4 x i32> noun
; CHECK-NEXT: call void @use.v4i1(<4 x i1> [[CMP]])
; CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[SEXT]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-; CHECK-NEXT: [[AND:%.*]] = and <4 x i32> [[SHUFFLE]], [[SEXT]]
+; CHECK-NEXT: [[AND:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[SHUFFLE]], <4 x i32> zeroinitializer
; CHECK-NEXT: ret <4 x i32> [[AND]]
;
%cmp = icmp eq <4 x i32> %a, %b
@@ -80,7 +80,7 @@ define <4 x i32> @cmpeq_epi64_multi_use_sext(<4 x i32> noundef %a, <4 x i32> nou
; CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
; CHECK-NEXT: call void @use.v4i32(<4 x i32> [[SEXT]])
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[SEXT]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-; CHECK-NEXT: [[AND:%.*]] = and <4 x i32> [[SHUFFLE]], [[SEXT]]
+; CHECK-NEXT: [[AND:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[SHUFFLE]], <4 x i32> zeroinitializer
; CHECK-NEXT: ret <4 x i32> [[AND]]
;
%cmp = icmp eq <4 x i32> %a, %b
@@ -98,7 +98,7 @@ define <4 x i32> @cmpeq_epi64_multi_use_shuffle(<4 x i32> noundef %a, <4 x i32>
; CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[SEXT]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; CHECK-NEXT: call void @use.v4i32(<4 x i32> [[SHUFFLE]])
-; CHECK-NEXT: [[AND:%.*]] = and <4 x i32> [[SHUFFLE]], [[SEXT]]
+; CHECK-NEXT: [[AND:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[SHUFFLE]], <4 x i32> zeroinitializer
; CHECK-NEXT: ret <4 x i32> [[AND]]
;
%cmp = icmp eq <4 x i32> %a, %b
@@ -113,8 +113,8 @@ define <4 x i32> @cmpeq_epi64_select_neg_0(<4 x i32> noundef %a, <4 x i32> nound
; CHECK-LABEL: define <4 x i32> @cmpeq_epi64_select_neg_0(
; CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) {
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <4 x i32> [[A]], [[B]]
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i1> [[CMP]], <4 x i1> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-; CHECK-NEXT: [[SHUFFLE:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[SEXT]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; CHECK-NEXT: [[SELECT:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[SHUFFLE]], <4 x i32> zeroinitializer
; CHECK-NEXT: ret <4 x i32> [[SELECT]]
;
@@ -131,7 +131,7 @@ define <4 x i32> @cmpeq_epi64_and_neg_1(<4 x i32> noundef %a, <4 x i32> noundef
; CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> [[A]], [[B]]
; CHECK-NEXT: [[SEXT:%.*]] = zext <4 x i1> [[CMP]] to <4 x i32>
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[SEXT]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-; CHECK-NEXT: [[AND:%.*]] = and <4 x i32> [[SEXT]], [[SHUFFLE]]
+; CHECK-NEXT: [[AND:%.*]] = and <4 x i32> [[SHUFFLE]], [[SEXT]]
; CHECK-NEXT: ret <4 x i32> [[AND]]
;
%cmp = icmp eq <4 x i32> %a, %b
@@ -145,8 +145,8 @@ define <4 x i32> @cmpeq_epi64_select_neg_2(<4 x i32> noundef %a, <4 x i32> nound
; CHECK-LABEL: define <4 x i32> @cmpeq_epi64_select_neg_2(
; CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) {
; CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> [[A]], [[B]]
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i1> [[CMP]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[SHUFFLE:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[SEXT]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[SELECT:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[SHUFFLE]], <4 x i32> zeroinitializer
; CHECK-NEXT: ret <4 x i32> [[SELECT]]
;
@@ -162,13 +162,13 @@ define <4 x i32> @cmpeq_epi64_select_neg_3(<4 x i32> noundef %a, <4 x i32> nound
; CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) {
; CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> [[A]], [[B]]
; CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[SEXT]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-; CHECK-NEXT: [[SELECT:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[SHUFFLE]], <4 x i32> [[SEXT]]
-; CHECK-NEXT: ret <4 x i32> [[SELECT]]
+; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[SEXT]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; CHECK-NEXT: [[SHUFFLE:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[SHUFFLE1]], <4 x i32> splat (i32 1)
+; CHECK-NEXT: ret <4 x i32> [[SHUFFLE]]
;
%cmp = icmp eq <4 x i32> %a, %b
%sext = sext <4 x i1> %cmp to <4 x i32>
- %shuffle = shufflevector <4 x i32> %sext, <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
- %select = select <4 x i1> %cmp, <4 x i32> %shuffle, <4 x i32> %sext
+ %shuffle = shufflevector <4 x i32> %sext, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+ %select = select <4 x i1> %cmp, <4 x i32> %shuffle, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %select
}
>From 4b9e070d638ff320b0f085715e85dd03703523c5 Mon Sep 17 00:00:00 2001
From: Fuad Ismail <fuad1502 at gmail.com>
Date: Thu, 5 Mar 2026 15:25:30 +0700
Subject: [PATCH 08/13] Add folding v2i64 cmpgt using v4i32 pattern test
---
.../fold-v2i64-cmpgt-using-v4i32-pattern.ll | 86 +++++++++++++++++++
1 file changed, 86 insertions(+)
create mode 100644 llvm/test/Transforms/InstCombine/fold-v2i64-cmpgt-using-v4i32-pattern.ll
diff --git a/llvm/test/Transforms/InstCombine/fold-v2i64-cmpgt-using-v4i32-pattern.ll b/llvm/test/Transforms/InstCombine/fold-v2i64-cmpgt-using-v4i32-pattern.ll
new file mode 100644
index 0000000000000..3442b004eff13
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fold-v2i64-cmpgt-using-v4i32-pattern.ll
@@ -0,0 +1,86 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+define <4 x i32> @alt_cmpgt_epi64(<4 x i32> noundef %a, <4 x i32> noundef %b) {
+; CHECK-LABEL: define <4 x i32> @alt_cmpgt_epi64(
+; CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) {
+; CHECK-NEXT: [[XOR_A:%.*]] = xor <4 x i32> [[A]], <i32 -2147483648, i32 0, i32 -2147483648, i32 0>
+; CHECK-NEXT: [[XOR_B:%.*]] = xor <4 x i32> [[B]], <i32 -2147483648, i32 0, i32 -2147483648, i32 0>
+; CHECK-NEXT: [[GT:%.*]] = icmp sgt <4 x i32> [[XOR_A]], [[XOR_B]]
+; CHECK-NEXT: [[SEXT_GT:%.*]] = sext <4 x i1> [[GT]] to <4 x i32>
+; CHECK-NEXT: [[GT_0:%.*]] = shufflevector <4 x i32> [[SEXT_GT]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+; CHECK-NEXT: [[GT_1:%.*]] = shufflevector <4 x i32> [[SEXT_GT]], <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
+; CHECK-NEXT: [[EQ:%.*]] = icmp eq <4 x i32> [[A]], [[B]]
+; CHECK-NEXT: [[SEXT_EQ:%.*]] = sext <4 x i1> [[EQ]] to <4 x i32>
+; CHECK-NEXT: [[EQ_0:%.*]] = shufflevector <4 x i32> [[SEXT_EQ]], <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
+; CHECK-NEXT: [[AND:%.*]] = and <4 x i32> [[GT_0]], [[EQ_0]]
+; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[AND]], [[GT_1]]
+; CHECK-NEXT: ret <4 x i32> [[OR]]
+;
+ %xor.a = xor <4 x i32> %a, <i32 -2147483648, i32 0, i32 -2147483648, i32 0>
+ %xor.b = xor <4 x i32> %b, <i32 -2147483648, i32 0, i32 -2147483648, i32 0>
+ %gt = icmp sgt <4 x i32> %xor.a, %xor.b
+ %sext.gt = sext <4 x i1> %gt to <4 x i32>
+ %gt.0 = shufflevector <4 x i32> %sext.gt, <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+ %gt.1 = shufflevector <4 x i32> %sext.gt, <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
+ %eq = icmp eq <4 x i32> %a, %b
+ %sext.eq = sext <4 x i1> %eq to <4 x i32>
+ %eq.0 = shufflevector <4 x i32> %sext.eq, <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
+ %and = and <4 x i32> %gt.0, %eq.0
+ %or = or <4 x i32> %and, %gt.1
+ ret <4 x i32> %or
+}
+
+define <4 x i32> @alt_cmpgt_epu64_1(<4 x i32> noundef %a, <4 x i32> noundef %b) {
+; CHECK-LABEL: define <4 x i32> @alt_cmpgt_epu64_1(
+; CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) {
+; CHECK-NEXT: [[GT:%.*]] = icmp ugt <4 x i32> [[A]], [[B]]
+; CHECK-NEXT: [[SEXT_GT:%.*]] = sext <4 x i1> [[GT]] to <4 x i32>
+; CHECK-NEXT: [[GT_0:%.*]] = shufflevector <4 x i32> [[SEXT_GT]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+; CHECK-NEXT: [[GT_1:%.*]] = shufflevector <4 x i32> [[SEXT_GT]], <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
+; CHECK-NEXT: [[EQ:%.*]] = icmp eq <4 x i32> [[A]], [[B]]
+; CHECK-NEXT: [[SEXT_EQ:%.*]] = sext <4 x i1> [[EQ]] to <4 x i32>
+; CHECK-NEXT: [[EQ_0:%.*]] = shufflevector <4 x i32> [[SEXT_EQ]], <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
+; CHECK-NEXT: [[AND:%.*]] = and <4 x i32> [[GT_0]], [[EQ_0]]
+; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[AND]], [[GT_1]]
+; CHECK-NEXT: ret <4 x i32> [[OR]]
+;
+ %gt = icmp ugt <4 x i32> %a, %b
+ %sext.gt = sext <4 x i1> %gt to <4 x i32>
+ %gt.0 = shufflevector <4 x i32> %sext.gt, <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+ %gt.1 = shufflevector <4 x i32> %sext.gt, <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
+ %eq = icmp eq <4 x i32> %a, %b
+ %sext.eq = sext <4 x i1> %eq to <4 x i32>
+ %eq.0 = shufflevector <4 x i32> %sext.eq, <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
+ %and = and <4 x i32> %gt.0, %eq.0
+ %or = or <4 x i32> %and, %gt.1
+ ret <4 x i32> %or
+}
+
+define <4 x i32> @alt_cmpgt_epu64_2(<4 x i32> noundef %a, <4 x i32> noundef %b) {
+; CHECK-LABEL: define <4 x i32> @alt_cmpgt_epu64_2(
+; CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) {
+; CHECK-NEXT: [[GT:%.*]] = icmp ugt <4 x i32> [[A]], [[B]]
+; CHECK-NEXT: [[SEXT_GT:%.*]] = sext <4 x i1> [[GT]] to <4 x i32>
+; CHECK-NEXT: [[GT_0:%.*]] = shufflevector <4 x i32> [[SEXT_GT]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+; CHECK-NEXT: [[GT_1:%.*]] = shufflevector <4 x i32> [[SEXT_GT]], <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
+; CHECK-NEXT: [[EQ:%.*]] = icmp eq <4 x i32> [[A]], [[B]]
+; CHECK-NEXT: [[SEXT_EQ:%.*]] = sext <4 x i1> [[EQ]] to <4 x i32>
+; CHECK-NEXT: [[EQ_0:%.*]] = shufflevector <4 x i32> [[SEXT_EQ]], <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
+; CHECK-NEXT: [[AND:%.*]] = and <4 x i32> [[GT_0]], [[EQ_0]]
+; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[AND]], [[GT_1]]
+; CHECK-NEXT: ret <4 x i32> [[OR]]
+;
+ %xor.a = xor <4 x i32> %a, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
+ %xor.b = xor <4 x i32> %b, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
+ %gt = icmp sgt <4 x i32> %xor.a, %xor.b
+ %sext.gt = sext <4 x i1> %gt to <4 x i32>
+ %gt.0 = shufflevector <4 x i32> %sext.gt, <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+ %gt.1 = shufflevector <4 x i32> %sext.gt, <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
+ %eq = icmp eq <4 x i32> %a, %b
+ %sext.eq = sext <4 x i1> %eq to <4 x i32>
+ %eq.0 = shufflevector <4 x i32> %sext.eq, <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
+ %and = and <4 x i32> %gt.0, %eq.0
+ %or = or <4 x i32> %and, %gt.1
+ ret <4 x i32> %or
+}
>From 28599ed5b4ef8a838cb589700c28fc739ab86f55 Mon Sep 17 00:00:00 2001
From: Fuad Ismail <fuad1502 at gmail.com>
Date: Fri, 6 Mar 2026 16:40:03 +0700
Subject: [PATCH 09/13] Apply folding for v2i64 greater comparison using v4i32
pattern
---
.../InstCombine/InstCombineAndOrXor.cpp | 3 +
.../InstCombine/InstCombineInternal.h | 2 +
.../InstCombine/InstructionCombining.cpp | 79 +++++++++++++++++++
.../fold-v2i64-cmpgt-using-v4i32-pattern.ll | 44 ++++-------
4 files changed, 99 insertions(+), 29 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 30c1e8e8aca73..b872ac423f0c9 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -4652,6 +4652,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
if (Value *Res = FoldOrOfSelectSmaxToAbs(I, Builder))
return replaceInstUsesWith(I, Res);
+ if (Instruction *Folded = foldV2CmpGtUsingV4CmpGtPattern(I))
+ return Folded;
+
return nullptr;
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index fed88cc84f46e..fb79291977eb6 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -486,6 +486,8 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
Instruction *foldV4EqualShuffleAndToV2Equal(Instruction &I);
+ Instruction *foldV2CmpGtUsingV4CmpGtPattern(BinaryOperator &I);
+
public:
/// Create and insert the idiom we use to indicate a block is unreachable
/// without having to rewrite the CFG from within InstCombine.
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 1c71bb5387ab5..c2ef56ad67ce5 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1380,6 +1380,85 @@ Instruction *InstCombinerImpl::foldV4EqualShuffleAndToV2Equal(Instruction &I) {
return replaceInstUsesWith(I, BitCastCmp);
}
+// Prior to SSE4.2, to perform greater (or less than) comparisons between two
+// v2i64 values, the comparison is performed on v4i32 values:
+//
+// (A1, A2) -> (A1Lower, A1Upper, A2Lower, A2Upper)
+// (B1, B2) -> (B1Lower, B1Upper, B2Lower, B2Upper)
+// (Result1, Result2) -> (Result1, Result1, Result2, Result2)
+//
+// where,
+//
+// ResultX = (GtLowerX & EqUpperX) | (GtUpperX)
+// GtLowerX = AXLower OP BXLower
+// GtUpperX = AXUpper OP BXUpper
+// EqUpperX = AXUpper EQ BXUpper
+//
+// Upper and lower values are obtained through vector shuffles.
+Instruction *
+InstCombinerImpl::foldV2CmpGtUsingV4CmpGtPattern(BinaryOperator &I) {
+ if (I.getOpcode() != Instruction::Or)
+ return nullptr;
+
+ auto *OldVecType = dyn_cast<VectorType>(I.getType());
+
+ if (!OldVecType || OldVecType->isScalableTy() ||
+ !OldVecType->getElementType()->isIntegerTy(32) ||
+ OldVecType->getElementCount().getFixedValue() != 4)
+ return nullptr;
+
+ Value *A, *B, *Greater1, *Greater2, *Greater;
+ CmpPredicate PredEq;
+ SmallVector<int> MaskLower = {0, 0, 2, 2};
+ SmallVector<int> MaskUpper = {1, 1, 3, 3};
+
+ auto GreaterLower = m_SExtOrSelf(m_Shuffle(
+ m_SExtOrSelf(m_Value(Greater1)), m_Poison(), m_SpecificMask(MaskLower)));
+ auto GreaterUpper = m_SExtOrSelf(m_Shuffle(
+ m_SExtOrSelf(m_Value(Greater2)), m_Poison(), m_SpecificMask(MaskUpper)));
+ auto EqUpper = m_SExtOrSelf(
+ m_Shuffle(m_SExtOrSelf(m_c_ICmp(PredEq, m_Value(A), m_Value(B))),
+ m_Poison(), m_SpecificMask(MaskUpper)));
+
+ if (!match(&I, m_c_Or(m_c_And(GreaterLower, EqUpper), GreaterUpper)) ||
+ Greater1 != Greater2 || PredEq != ICmpInst::ICMP_EQ)
+ return nullptr;
+
+ Greater = Greater1;
+
+ auto *Zero = ConstantInt::get(IntegerType::getInt32Ty(I.getContext()), 0);
+ auto *Flip =
+ ConstantInt::get(IntegerType::getInt32Ty(I.getContext()), 0x80000000);
+ auto *FlipLower = ConstantVector::get({Flip, Zero, Flip, Zero});
+ auto *FlipAll = ConstantVector::get({Flip, Flip, Flip, Flip});
+
+ CmpPredicate PredGt;
+ auto UGt = m_c_ICmp(PredGt, m_Specific(A), m_Specific(B));
+ auto UGtAlt = m_c_ICmp(PredGt, m_c_Xor(m_Specific(A), m_Specific(FlipAll)),
+ m_c_Xor(m_Specific(B), m_Specific(FlipAll)));
+ auto SGt = m_c_ICmp(PredGt, m_c_Xor(m_Specific(A), m_Specific(FlipLower)),
+ m_c_Xor(m_Specific(B), m_Specific(FlipLower)));
+
+ if (!(match(Greater, UGt) &&
+ (PredGt == ICmpInst::ICMP_UGT || PredGt == ICmpInst::ICMP_ULT)) &&
+ !((match(Greater, SGt) || match(Greater, UGtAlt)) &&
+ (PredGt == ICmpInst::ICMP_SGT || PredGt == ICmpInst::ICMP_SLT)))
+ return nullptr;
+
+ LLVM_DEBUG(dbgs() << "Found V2CmpGt using V4CmpGt pattern" << '\n');
+
+ // Perform folding
+ auto *NewElementType = IntegerType::get(I.getContext(), 64);
+ auto *NewVecType = VectorType::get(NewElementType, 2, false);
+ auto *BitCastA = Builder.CreateBitCast(A, NewVecType);
+ auto *BitCastB = Builder.CreateBitCast(B, NewVecType);
+ auto *Cmp = Builder.CreateICmp(PredGt, BitCastA, BitCastB);
+ auto *SExt = Builder.CreateSExt(Cmp, NewVecType);
+ auto *BitCastCmp = Builder.CreateBitCast(SExt, OldVecType);
+
+ return replaceInstUsesWith(I, BitCastCmp);
+}
+
static std::optional<std::pair<Value *, Value *>>
matchSymmetricPhiNodesPair(PHINode *LHS, PHINode *RHS) {
if (LHS->getParent() != RHS->getParent())
diff --git a/llvm/test/Transforms/InstCombine/fold-v2i64-cmpgt-using-v4i32-pattern.ll b/llvm/test/Transforms/InstCombine/fold-v2i64-cmpgt-using-v4i32-pattern.ll
index 3442b004eff13..1ca2c71f988f3 100644
--- a/llvm/test/Transforms/InstCombine/fold-v2i64-cmpgt-using-v4i32-pattern.ll
+++ b/llvm/test/Transforms/InstCombine/fold-v2i64-cmpgt-using-v4i32-pattern.ll
@@ -4,17 +4,11 @@
define <4 x i32> @alt_cmpgt_epi64(<4 x i32> noundef %a, <4 x i32> noundef %b) {
; CHECK-LABEL: define <4 x i32> @alt_cmpgt_epi64(
; CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) {
-; CHECK-NEXT: [[XOR_A:%.*]] = xor <4 x i32> [[A]], <i32 -2147483648, i32 0, i32 -2147483648, i32 0>
-; CHECK-NEXT: [[XOR_B:%.*]] = xor <4 x i32> [[B]], <i32 -2147483648, i32 0, i32 -2147483648, i32 0>
-; CHECK-NEXT: [[GT:%.*]] = icmp sgt <4 x i32> [[XOR_A]], [[XOR_B]]
-; CHECK-NEXT: [[SEXT_GT:%.*]] = sext <4 x i1> [[GT]] to <4 x i32>
-; CHECK-NEXT: [[GT_0:%.*]] = shufflevector <4 x i32> [[SEXT_GT]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
-; CHECK-NEXT: [[GT_1:%.*]] = shufflevector <4 x i32> [[SEXT_GT]], <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
-; CHECK-NEXT: [[EQ:%.*]] = icmp eq <4 x i32> [[A]], [[B]]
-; CHECK-NEXT: [[SEXT_EQ:%.*]] = sext <4 x i1> [[EQ]] to <4 x i32>
-; CHECK-NEXT: [[EQ_0:%.*]] = shufflevector <4 x i32> [[SEXT_EQ]], <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
-; CHECK-NEXT: [[AND:%.*]] = and <4 x i32> [[GT_0]], [[EQ_0]]
-; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[AND]], [[GT_1]]
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64>
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[B]] to <2 x i64>
+; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <2 x i64> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
+; CHECK-NEXT: [[OR:%.*]] = bitcast <2 x i64> [[TMP4]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[OR]]
;
%xor.a = xor <4 x i32> %a, <i32 -2147483648, i32 0, i32 -2147483648, i32 0>
@@ -34,15 +28,11 @@ define <4 x i32> @alt_cmpgt_epi64(<4 x i32> noundef %a, <4 x i32> noundef %b) {
define <4 x i32> @alt_cmpgt_epu64_1(<4 x i32> noundef %a, <4 x i32> noundef %b) {
; CHECK-LABEL: define <4 x i32> @alt_cmpgt_epu64_1(
; CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) {
-; CHECK-NEXT: [[GT:%.*]] = icmp ugt <4 x i32> [[A]], [[B]]
-; CHECK-NEXT: [[SEXT_GT:%.*]] = sext <4 x i1> [[GT]] to <4 x i32>
-; CHECK-NEXT: [[GT_0:%.*]] = shufflevector <4 x i32> [[SEXT_GT]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
-; CHECK-NEXT: [[GT_1:%.*]] = shufflevector <4 x i32> [[SEXT_GT]], <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
-; CHECK-NEXT: [[EQ:%.*]] = icmp eq <4 x i32> [[A]], [[B]]
-; CHECK-NEXT: [[SEXT_EQ:%.*]] = sext <4 x i1> [[EQ]] to <4 x i32>
-; CHECK-NEXT: [[EQ_0:%.*]] = shufflevector <4 x i32> [[SEXT_EQ]], <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
-; CHECK-NEXT: [[AND:%.*]] = and <4 x i32> [[GT_0]], [[EQ_0]]
-; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[AND]], [[GT_1]]
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64>
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[B]] to <2 x i64>
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt <2 x i64> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
+; CHECK-NEXT: [[OR:%.*]] = bitcast <2 x i64> [[TMP4]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[OR]]
;
%gt = icmp ugt <4 x i32> %a, %b
@@ -60,15 +50,11 @@ define <4 x i32> @alt_cmpgt_epu64_1(<4 x i32> noundef %a, <4 x i32> noundef %b)
define <4 x i32> @alt_cmpgt_epu64_2(<4 x i32> noundef %a, <4 x i32> noundef %b) {
; CHECK-LABEL: define <4 x i32> @alt_cmpgt_epu64_2(
; CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) {
-; CHECK-NEXT: [[GT:%.*]] = icmp ugt <4 x i32> [[A]], [[B]]
-; CHECK-NEXT: [[SEXT_GT:%.*]] = sext <4 x i1> [[GT]] to <4 x i32>
-; CHECK-NEXT: [[GT_0:%.*]] = shufflevector <4 x i32> [[SEXT_GT]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
-; CHECK-NEXT: [[GT_1:%.*]] = shufflevector <4 x i32> [[SEXT_GT]], <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
-; CHECK-NEXT: [[EQ:%.*]] = icmp eq <4 x i32> [[A]], [[B]]
-; CHECK-NEXT: [[SEXT_EQ:%.*]] = sext <4 x i1> [[EQ]] to <4 x i32>
-; CHECK-NEXT: [[EQ_0:%.*]] = shufflevector <4 x i32> [[SEXT_EQ]], <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
-; CHECK-NEXT: [[AND:%.*]] = and <4 x i32> [[GT_0]], [[EQ_0]]
-; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[AND]], [[GT_1]]
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64>
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[B]] to <2 x i64>
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt <2 x i64> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
+; CHECK-NEXT: [[OR:%.*]] = bitcast <2 x i64> [[TMP4]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[OR]]
;
%xor.a = xor <4 x i32> %a, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
>From 5af139970d24bccc8cf266543cced2d34d01543f Mon Sep 17 00:00:00 2001
From: Fuad Ismail <fuad1502 at gmail.com>
Date: Fri, 6 Mar 2026 19:28:08 +0700
Subject: [PATCH 10/13] Remove multi-use test and improve documentation
comments
---
.../InstCombine/InstructionCombining.cpp | 81 ++++++-------------
.../fold-v4i32-eq-shuffle-and-to-v2i64-eq.ll | 57 -------------
2 files changed, 26 insertions(+), 112 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index c2ef56ad67ce5..f56883e57740c 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1296,46 +1296,32 @@ Value *InstCombinerImpl::foldUsingDistributiveLaws(BinaryOperator &I) {
return SimplifySelectsFeedingBinaryOp(I, LHS, RHS);
}
-// Prior to SSE4.1, performing equality comparison on v2i64 types require a
-// comparison on v4i32 types using the following pattern:
-//
-// %3 = icmp eq <4 x i32> %1, %2
-//
-// %4 = sext <4 x i1> %3 to <4 x i32>
-//
-// %5 = shufflevector <4 x i32> %4, <4 x i32> poison, <4 x i32> <i32 1, i32 0,
-// i32 3, i32 2>
-//
-// %6 = select <4 x i1> %3, <4 x i32> %5, <4 x i32> zeroinitializer
-//
-// OR
-//
-// %6 = and <4 x i32> %sext, %shuffle
-//
-// We should detect such patterns and fold them into:
-//
-// %3 = bitcast <4 x i32> %1 to <2 x i64>
+// Prior to SSE4.1, to perform equality comparisons between two
+// v2i64 values, the comparison is performed on v4i32 values:
//
-// %4 = bitcast <4 x i32> %2 to <2 x i64>
+// (A1, A2) -> (A1Lower, A1Upper, A2Lower, A2Upper)
+// (B1, B2) -> (B1Lower, B1Upper, B2Lower, B2Upper)
+// (Result1, Result2) -> (Result1, Result1, Result2, Result2)
//
-// %5 = icmp eq <2 x i64> %3, %4
+// where,
//
-// %6 = bitcast <2 x i64> %5 to <4 x i32>
+// ResultX = EqLowerX & EqUpperX
+// EqLowerX = AXLower == BXLower
+// EqUpperX = AXUpper == BXUpper
//
+// Bitwise AND between the upper and lower parts can be achived by performing
+// the operation between the original and shuffled equality vector.
Instruction *InstCombinerImpl::foldV4EqualShuffleAndToV2Equal(Instruction &I) {
- Value *Equal, *Shuffle, *L, *R;
+ Value *L, *R;
CmpPredicate Pred;
SmallVector<int> Mask = {1, 0, 3, 2};
// Check pattern existance
- if (!match(&I,
- m_CombineOr(m_c_And(m_SExt(m_Value(Equal)),
- m_SExtOrSelf(m_Value(Shuffle))),
- m_Select(m_Value(Equal),
- m_SExtOrSelf(m_Value(Shuffle)), m_Zero()))) ||
- !match(Shuffle, m_Shuffle(m_SExtOrSelf(m_Specific(Equal)), m_Poison(),
- m_SpecificMask(Mask))) ||
- !match(Equal, m_ICmp(Pred, m_Value(L), m_Value(R))) ||
+ auto Equal = m_ICmp(Pred, m_Value(L), m_Value(R));
+ auto Shuffle = m_SExtOrSelf(
+ m_Shuffle(m_SExtOrSelf(Equal), m_Poison(), m_SpecificMask(Mask)));
+ if (!match(&I, m_CombineOr(m_c_And(m_SExt(Equal), Shuffle),
+ m_Select(Equal, Shuffle, m_Zero()))) ||
Pred != CmpInst::ICMP_EQ)
return nullptr;
@@ -1343,34 +1329,15 @@ Instruction *InstCombinerImpl::foldV4EqualShuffleAndToV2Equal(Instruction &I) {
auto *OldVecType = cast<VectorType>(L->getType());
if (OldVecType->isScalableTy() ||
- !OldVecType->getElementType()->isIntegerTy())
- return nullptr;
-
- int ElementCount = OldVecType->getElementCount().getFixedValue();
- int ElementBitWidth = OldVecType->getElementType()->getIntegerBitWidth();
-
- if (ElementCount != 4 || ElementBitWidth != 32)
- return nullptr;
-
- // Check uses outside pattern
- if (!Shuffle->hasOneUse())
+ !OldVecType->getElementType()->isIntegerTy(32) ||
+ OldVecType->getElementCount().getFixedValue() != 4)
return nullptr;
- for (auto *U : Equal->users()) {
- if (U == &I || U == Shuffle)
- continue;
- if (!isa<llvm::CastInst>(U))
- return nullptr;
- for (auto *U : U->users())
- if (U != &I && U != Shuffle)
- return nullptr;
- }
-
LLVM_DEBUG(dbgs() << "IC: Folding equal-shuffle-and pattern" << '\n');
// Perform folding
- auto *NewElementType = IntegerType::get(I.getContext(), ElementBitWidth * 2);
- auto *NewVecType = VectorType::get(NewElementType, ElementCount / 2, false);
+ auto *NewElementType = IntegerType::get(I.getContext(), 64);
+ auto *NewVecType = VectorType::get(NewElementType, 2, false);
auto *BitCastL = Builder.CreateBitCast(L, NewVecType);
auto *BitCastR = Builder.CreateBitCast(R, NewVecType);
auto *Cmp = Builder.CreateICmp(Pred, BitCastL, BitCastR);
@@ -1394,7 +1361,11 @@ Instruction *InstCombinerImpl::foldV4EqualShuffleAndToV2Equal(Instruction &I) {
// GtUpperX = AXUpper OP BXUpper
// EqUpperX = AXUpper EQ BXUpper
//
-// Upper and lower values are obtained through vector shuffles.
+// Upper and lower parts are obtained through vector shuffles.
+//
+// Note that comparison of the lower parts are always unsigned comparisons
+// regardless of the resulting signedness. Also note that, unsigned comparison
+// can be derived from signed comparison by flipping the MSB of both operands.
Instruction *
InstCombinerImpl::foldV2CmpGtUsingV4CmpGtPattern(BinaryOperator &I) {
if (I.getOpcode() != Instruction::Or)
diff --git a/llvm/test/Transforms/InstCombine/fold-v4i32-eq-shuffle-and-to-v2i64-eq.ll b/llvm/test/Transforms/InstCombine/fold-v4i32-eq-shuffle-and-to-v2i64-eq.ll
index ff23a9d4e277c..6300ff30f103c 100644
--- a/llvm/test/Transforms/InstCombine/fold-v4i32-eq-shuffle-and-to-v2i64-eq.ll
+++ b/llvm/test/Transforms/InstCombine/fold-v4i32-eq-shuffle-and-to-v2i64-eq.ll
@@ -52,63 +52,6 @@ define <4 x i32> @cmpeq_epi64_commutated_and(<4 x i32> noundef %a, <4 x i32> nou
ret <4 x i32> %and
}
-declare void @use.v4i1(<4 x i1>)
-declare void @use.v4i32(<4 x i32>)
-
-define <4 x i32> @cmpeq_epi64_multi_use_cmp(<4 x i32> noundef %a, <4 x i32> noundef %b) {
-; CHECK-LABEL: define <4 x i32> @cmpeq_epi64_multi_use_cmp(
-; CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) {
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> [[A]], [[B]]
-; CHECK-NEXT: call void @use.v4i1(<4 x i1> [[CMP]])
-; CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[SEXT]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-; CHECK-NEXT: [[AND:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[SHUFFLE]], <4 x i32> zeroinitializer
-; CHECK-NEXT: ret <4 x i32> [[AND]]
-;
- %cmp = icmp eq <4 x i32> %a, %b
- call void @use.v4i1(<4 x i1> %cmp)
- %sext = sext <4 x i1> %cmp to <4 x i32>
- %shuffle = shufflevector <4 x i32> %sext, <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
- %and = and <4 x i32> %shuffle, %sext
- ret <4 x i32> %and
-}
-
-define <4 x i32> @cmpeq_epi64_multi_use_sext(<4 x i32> noundef %a, <4 x i32> noundef %b) {
-; CHECK-LABEL: define <4 x i32> @cmpeq_epi64_multi_use_sext(
-; CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) {
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> [[A]], [[B]]
-; CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
-; CHECK-NEXT: call void @use.v4i32(<4 x i32> [[SEXT]])
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[SEXT]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-; CHECK-NEXT: [[AND:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[SHUFFLE]], <4 x i32> zeroinitializer
-; CHECK-NEXT: ret <4 x i32> [[AND]]
-;
- %cmp = icmp eq <4 x i32> %a, %b
- %sext = sext <4 x i1> %cmp to <4 x i32>
- call void @use.v4i32(<4 x i32> %sext)
- %shuffle = shufflevector <4 x i32> %sext, <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
- %and = and <4 x i32> %shuffle, %sext
- ret <4 x i32> %and
-}
-
-define <4 x i32> @cmpeq_epi64_multi_use_shuffle(<4 x i32> noundef %a, <4 x i32> noundef %b) {
-; CHECK-LABEL: define <4 x i32> @cmpeq_epi64_multi_use_shuffle(
-; CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) {
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> [[A]], [[B]]
-; CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[SEXT]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-; CHECK-NEXT: call void @use.v4i32(<4 x i32> [[SHUFFLE]])
-; CHECK-NEXT: [[AND:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[SHUFFLE]], <4 x i32> zeroinitializer
-; CHECK-NEXT: ret <4 x i32> [[AND]]
-;
- %cmp = icmp eq <4 x i32> %a, %b
- %sext = sext <4 x i1> %cmp to <4 x i32>
- %shuffle = shufflevector <4 x i32> %sext, <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
- call void @use.v4i32(<4 x i32> %shuffle)
- %and = and <4 x i32> %shuffle, %sext
- ret <4 x i32> %and
-}
-
define <4 x i32> @cmpeq_epi64_select_neg_0(<4 x i32> noundef %a, <4 x i32> noundef %b) {
; CHECK-LABEL: define <4 x i32> @cmpeq_epi64_select_neg_0(
; CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) {
>From 834f12ce4ba7934e5efe8ab5d301e5349ad48010 Mon Sep 17 00:00:00 2001
From: Fuad Ismail <fuad1502 at gmail.com>
Date: Fri, 6 Mar 2026 23:33:21 +0700
Subject: [PATCH 11/13] Add commutated and negative test cases to fold v2i64
cmpgt using v4i32 pattern test
---
.../fold-v2i64-cmpgt-using-v4i32-pattern.ll | 82 +++++++++++++++++++
1 file changed, 82 insertions(+)
diff --git a/llvm/test/Transforms/InstCombine/fold-v2i64-cmpgt-using-v4i32-pattern.ll b/llvm/test/Transforms/InstCombine/fold-v2i64-cmpgt-using-v4i32-pattern.ll
index 1ca2c71f988f3..bd310db1a35a8 100644
--- a/llvm/test/Transforms/InstCombine/fold-v2i64-cmpgt-using-v4i32-pattern.ll
+++ b/llvm/test/Transforms/InstCombine/fold-v2i64-cmpgt-using-v4i32-pattern.ll
@@ -70,3 +70,85 @@ define <4 x i32> @alt_cmpgt_epu64_2(<4 x i32> noundef %a, <4 x i32> noundef %b)
%or = or <4 x i32> %and, %gt.1
ret <4 x i32> %or
}
+
+define <4 x i32> @alt_cmpgt_epi64_commutated_gt(<4 x i32> noundef %a, <4 x i32> noundef %b) {
+; CHECK-LABEL: define <4 x i32> @alt_cmpgt_epi64_commutated_gt(
+; CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64>
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[B]] to <2 x i64>
+; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <2 x i64> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
+; CHECK-NEXT: [[OR:%.*]] = bitcast <2 x i64> [[TMP4]] to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> [[OR]]
+;
+ %xor.a = xor <4 x i32> %a, <i32 -2147483648, i32 0, i32 -2147483648, i32 0>
+ %xor.b = xor <4 x i32> %b, <i32 -2147483648, i32 0, i32 -2147483648, i32 0>
+ %gt = icmp sgt <4 x i32> %xor.b, %xor.a
+ %sext.gt = sext <4 x i1> %gt to <4 x i32>
+ %gt.0 = shufflevector <4 x i32> %sext.gt, <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+ %gt.1 = shufflevector <4 x i32> %sext.gt, <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
+ %eq = icmp eq <4 x i32> %a, %b
+ %sext.eq = sext <4 x i1> %eq to <4 x i32>
+ %eq.0 = shufflevector <4 x i32> %sext.eq, <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
+ %and = and <4 x i32> %gt.0, %eq.0
+ %or = or <4 x i32> %and, %gt.1
+ ret <4 x i32> %or
+}
+
+define <4 x i32> @alt_cmpgt_epi64_neg_0(<4 x i32> noundef %a, <4 x i32> noundef %b) {
+; CHECK-LABEL: define <4 x i32> @alt_cmpgt_epi64_neg_0(
+; CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) {
+; CHECK-NEXT: [[XOR_A:%.*]] = xor <4 x i32> [[A]], <i32 -2147483648, i32 0, i32 -2147483648, i32 0>
+; CHECK-NEXT: [[XOR_B:%.*]] = xor <4 x i32> [[B]], <i32 -2147483648, i32 0, i32 -2147483648, i32 0>
+; CHECK-NEXT: [[GT:%.*]] = icmp sgt <4 x i32> [[XOR_B]], [[XOR_A]]
+; CHECK-NEXT: [[SEXT_GT:%.*]] = sext <4 x i1> [[GT]] to <4 x i32>
+; CHECK-NEXT: [[GT_1:%.*]] = shufflevector <4 x i32> [[SEXT_GT]], <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
+; CHECK-NEXT: [[EQ:%.*]] = icmp eq <4 x i32> [[A]], [[B]]
+; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i1> [[GT]], [[EQ]]
+; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[AND:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[AND]], [[GT_1]]
+; CHECK-NEXT: ret <4 x i32> [[OR]]
+;
+ %xor.a = xor <4 x i32> %a, <i32 -2147483648, i32 0, i32 -2147483648, i32 0>
+ %xor.b = xor <4 x i32> %b, <i32 -2147483648, i32 0, i32 -2147483648, i32 0>
+ %gt = icmp sgt <4 x i32> %xor.b, %xor.a
+ %sext.gt = sext <4 x i1> %gt to <4 x i32>
+ %gt.0 = shufflevector <4 x i32> %sext.gt, <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+ %gt.1 = shufflevector <4 x i32> %sext.gt, <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
+ %eq = icmp eq <4 x i32> %a, %b
+ %sext.eq = sext <4 x i1> %eq to <4 x i32>
+ %eq.0 = shufflevector <4 x i32> %sext.eq, <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+ %and = and <4 x i32> %gt.0, %eq.0
+ %or = or <4 x i32> %and, %gt.1
+ ret <4 x i32> %or
+}
+
+define <4 x i32> @alt_cmpgt_epi64_neg_1(<4 x i32> noundef %a, <4 x i32> noundef %b) {
+; CHECK-LABEL: define <4 x i32> @alt_cmpgt_epi64_neg_1(
+; CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) {
+; CHECK-NEXT: [[XOR_A:%.*]] = xor <4 x i32> [[A]], <i32 -2147483648, i32 0, i32 -2147483648, i32 0>
+; CHECK-NEXT: [[XOR_B:%.*]] = xor <4 x i32> [[B]], <i32 -2147483648, i32 0, i32 -2147483648, i32 0>
+; CHECK-NEXT: [[GT:%.*]] = icmp sgt <4 x i32> [[XOR_B]], [[XOR_A]]
+; CHECK-NEXT: [[SEXT_GT:%.*]] = sext <4 x i1> [[GT]] to <4 x i32>
+; CHECK-NEXT: [[GT_0:%.*]] = shufflevector <4 x i32> [[SEXT_GT]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+; CHECK-NEXT: [[EQ:%.*]] = icmp eq <4 x i32> [[A]], [[B]]
+; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i1> [[GT]], [[EQ]]
+; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[AND:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
+; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[AND]], [[GT_0]]
+; CHECK-NEXT: ret <4 x i32> [[OR]]
+;
+ %xor.a = xor <4 x i32> %a, <i32 -2147483648, i32 0, i32 -2147483648, i32 0>
+ %xor.b = xor <4 x i32> %b, <i32 -2147483648, i32 0, i32 -2147483648, i32 0>
+ %gt = icmp sgt <4 x i32> %xor.b, %xor.a
+ %sext.gt = sext <4 x i1> %gt to <4 x i32>
+ %gt.0 = shufflevector <4 x i32> %sext.gt, <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+ %gt.1 = shufflevector <4 x i32> %sext.gt, <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
+ %eq = icmp eq <4 x i32> %a, %b
+ %sext.eq = sext <4 x i1> %eq to <4 x i32>
+ %eq.0 = shufflevector <4 x i32> %sext.eq, <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
+ %and = and <4 x i32> %gt.1, %eq.0
+ %or = or <4 x i32> %and, %gt.0
+ ret <4 x i32> %or
+}
>From 0f1a660354ad125079d20eb431091ab383a4d8a5 Mon Sep 17 00:00:00 2001
From: Fuad Ismail <fuad1502 at gmail.com>
Date: Sat, 7 Mar 2026 00:48:50 +0700
Subject: [PATCH 12/13] Handle and transformed into select in folding v2i64
cmpgt using v4i32 pattern
---
.../InstCombine/InstructionCombining.cpp | 8 +++++--
.../fold-v2i64-cmpgt-using-v4i32-pattern.ll | 23 +++++++++++++++++++
2 files changed, 29 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index f56883e57740c..d08c67a518fc4 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1387,11 +1387,15 @@ InstCombinerImpl::foldV2CmpGtUsingV4CmpGtPattern(BinaryOperator &I) {
m_SExtOrSelf(m_Value(Greater1)), m_Poison(), m_SpecificMask(MaskLower)));
auto GreaterUpper = m_SExtOrSelf(m_Shuffle(
m_SExtOrSelf(m_Value(Greater2)), m_Poison(), m_SpecificMask(MaskUpper)));
- auto EqUpper = m_SExtOrSelf(
+ auto EqUpper = m_Shuffle(m_c_ICmp(PredEq, m_Value(A), m_Value(B)), m_Poison(),
+ m_SpecificMask(MaskUpper));
+ auto EqUpperSExt = m_SExtOrSelf(
m_Shuffle(m_SExtOrSelf(m_c_ICmp(PredEq, m_Value(A), m_Value(B))),
m_Poison(), m_SpecificMask(MaskUpper)));
- if (!match(&I, m_c_Or(m_c_And(GreaterLower, EqUpper), GreaterUpper)) ||
+ if (!match(&I, m_c_Or(m_CombineOr(m_c_And(GreaterLower, EqUpperSExt),
+ m_Select(EqUpper, GreaterLower, m_Zero())),
+ GreaterUpper)) ||
Greater1 != Greater2 || PredEq != ICmpInst::ICMP_EQ)
return nullptr;
diff --git a/llvm/test/Transforms/InstCombine/fold-v2i64-cmpgt-using-v4i32-pattern.ll b/llvm/test/Transforms/InstCombine/fold-v2i64-cmpgt-using-v4i32-pattern.ll
index bd310db1a35a8..8d1b8e40655c2 100644
--- a/llvm/test/Transforms/InstCombine/fold-v2i64-cmpgt-using-v4i32-pattern.ll
+++ b/llvm/test/Transforms/InstCombine/fold-v2i64-cmpgt-using-v4i32-pattern.ll
@@ -25,6 +25,29 @@ define <4 x i32> @alt_cmpgt_epi64(<4 x i32> noundef %a, <4 x i32> noundef %b) {
ret <4 x i32> %or
}
+define <4 x i32> @alt_cmpgt_epi64_select(<4 x i32> noundef %a, <4 x i32> noundef %b) {
+; CHECK-LABEL: define <4 x i32> @alt_cmpgt_epi64_select(
+; CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64>
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[B]] to <2 x i64>
+; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <2 x i64> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
+; CHECK-NEXT: [[OR:%.*]] = bitcast <2 x i64> [[TMP4]] to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> [[OR]]
+;
+ %xor.a = xor <4 x i32> %a, <i32 -2147483648, i32 0, i32 -2147483648, i32 0>
+ %xor.b = xor <4 x i32> %b, <i32 -2147483648, i32 0, i32 -2147483648, i32 0>
+ %gt = icmp sgt <4 x i32> %xor.a, %xor.b
+ %sext.gt = sext <4 x i1> %gt to <4 x i32>
+ %gt.0 = shufflevector <4 x i32> %sext.gt, <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+ %gt.1 = shufflevector <4 x i32> %sext.gt, <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
+ %eq = icmp eq <4 x i32> %a, %b
+ %eq.0 = shufflevector <4 x i1> %eq, <4 x i1> poison, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
+ %select = select <4 x i1> %eq.0, <4 x i32> %gt.0, <4 x i32> zeroinitializer
+ %or = or <4 x i32> %select, %gt.1
+ ret <4 x i32> %or
+}
+
define <4 x i32> @alt_cmpgt_epu64_1(<4 x i32> noundef %a, <4 x i32> noundef %b) {
; CHECK-LABEL: define <4 x i32> @alt_cmpgt_epu64_1(
; CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) {
>From 07cb34afbccf6aad25069cbd374003c9f176cc89 Mon Sep 17 00:00:00 2001
From: Fuad Ismail <fuad1502 at gmail.com>
Date: Sat, 7 Mar 2026 01:10:00 +0700
Subject: [PATCH 13/13] Check type upfront in folding equal-shuffle-and pattern
---
.../InstCombine/InstructionCombining.cpp | 25 +++++++++----------
1 file changed, 12 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index d08c67a518fc4..abe6d96d0567d 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1312,11 +1312,19 @@ Value *InstCombinerImpl::foldUsingDistributiveLaws(BinaryOperator &I) {
// Bitwise AND between the upper and lower parts can be achived by performing
// the operation between the original and shuffled equality vector.
Instruction *InstCombinerImpl::foldV4EqualShuffleAndToV2Equal(Instruction &I) {
+ // Check argument type
+ auto *OldVecType = dyn_cast<VectorType>(I.getType());
+
+ if (!OldVecType || OldVecType->isScalableTy() ||
+ !OldVecType->getElementType()->isIntegerTy(32) ||
+ OldVecType->getElementCount().getFixedValue() != 4)
+ return nullptr;
+
+ // Check pattern existance
Value *L, *R;
CmpPredicate Pred;
SmallVector<int> Mask = {1, 0, 3, 2};
- // Check pattern existance
auto Equal = m_ICmp(Pred, m_Value(L), m_Value(R));
auto Shuffle = m_SExtOrSelf(
m_Shuffle(m_SExtOrSelf(Equal), m_Poison(), m_SpecificMask(Mask)));
@@ -1325,14 +1333,6 @@ Instruction *InstCombinerImpl::foldV4EqualShuffleAndToV2Equal(Instruction &I) {
Pred != CmpInst::ICMP_EQ)
return nullptr;
- // Check argument type
- auto *OldVecType = cast<VectorType>(L->getType());
-
- if (OldVecType->isScalableTy() ||
- !OldVecType->getElementType()->isIntegerTy(32) ||
- OldVecType->getElementCount().getFixedValue() != 4)
- return nullptr;
-
LLVM_DEBUG(dbgs() << "IC: Folding equal-shuffle-and pattern" << '\n');
// Perform folding
@@ -1368,9 +1368,7 @@ Instruction *InstCombinerImpl::foldV4EqualShuffleAndToV2Equal(Instruction &I) {
// can be derived from signed comparison by flipping the MSB of both operands.
Instruction *
InstCombinerImpl::foldV2CmpGtUsingV4CmpGtPattern(BinaryOperator &I) {
- if (I.getOpcode() != Instruction::Or)
- return nullptr;
-
+ // Check argument type
auto *OldVecType = dyn_cast<VectorType>(I.getType());
if (!OldVecType || OldVecType->isScalableTy() ||
@@ -1378,6 +1376,7 @@ InstCombinerImpl::foldV2CmpGtUsingV4CmpGtPattern(BinaryOperator &I) {
OldVecType->getElementCount().getFixedValue() != 4)
return nullptr;
+ // Check pattern existance
Value *A, *B, *Greater1, *Greater2, *Greater;
CmpPredicate PredEq;
SmallVector<int> MaskLower = {0, 0, 2, 2};
@@ -1420,7 +1419,7 @@ InstCombinerImpl::foldV2CmpGtUsingV4CmpGtPattern(BinaryOperator &I) {
(PredGt == ICmpInst::ICMP_SGT || PredGt == ICmpInst::ICMP_SLT)))
return nullptr;
- LLVM_DEBUG(dbgs() << "Found V2CmpGt using V4CmpGt pattern" << '\n');
+ LLVM_DEBUG(dbgs() << "IC: Folding V2CmpGt using V4CmpGt pattern" << '\n');
// Perform folding
auto *NewElementType = IntegerType::get(I.getContext(), 64);
More information about the llvm-commits
mailing list