[llvm] [InstCombine] Fold icmp (vreduce_(or|and) %x), (0|-1) (PR #182684)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Feb 21 07:55:23 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Ramkumar Ramachandra (artagnon)
<details>
<summary>Changes</summary>
We can just compare the entire bitcasted value against Zero or AllOnes in this case.
It is profitable on all major targets: https://godbolt.org/z/o7ecKjbsK
---
Full diff: https://github.com/llvm/llvm-project/pull/182684.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp (+41)
- (added) llvm/test/Transforms/InstCombine/icmp-vector-bitwise-reductions.ll (+101)
``````````diff
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index ec6ac25bb8b9c..546b3e11a075f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -7523,6 +7523,44 @@ static Instruction *foldICmpInvariantGroup(ICmpInst &I) {
return nullptr;
}
+static Instruction *foldICmpOfVectorReduce(ICmpInst &I, const DataLayout &DL,
+ IRBuilderBase &Builder) {
+ if (!ICmpInst::isEquality(I.getPredicate()))
+ return nullptr;
+
+ // The caller puts constants after non-constants.
+ Value *Op = I.getOperand(0);
+ Value *Const = I.getOperand(1);
+
+ // For Cond an equality condition, fold
+ //
+ // icmp (eq|ne) (vreduce_(or|and) Op), (Zero|AllOnes) ->
+ // icmp (eq|ne) Op, (Zero|AllOnes)
+ //
+ // with a bitcast.
+ Value *Vec;
+ if ((match(Const, m_ZeroInt()) &&
+ match(Op, m_Intrinsic<Intrinsic::vector_reduce_or>(m_Value(Vec)))) ||
+ (match(Const, m_AllOnes()) &&
+ match(Op, m_Intrinsic<Intrinsic::vector_reduce_and>(m_Value(Vec))))) {
+ auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
+ if (!VecTy)
+ return nullptr;
+ Type *VecEltTy = VecTy->getElementType();
+ unsigned ScalarBW =
+ DL.getTypeSizeInBits(VecEltTy) * VecTy->getNumElements();
+ if (!DL.fitsInLegalInteger(ScalarBW))
+ return nullptr;
+ Type *ScalarTy = IntegerType::get(I.getContext(), ScalarBW);
+ Value *NewConst = match(Const, m_ZeroInt())
+ ? ConstantInt::get(ScalarTy, 0)
+ : ConstantInt::getAllOnesValue(ScalarTy);
+ return CmpInst::Create(Instruction::ICmp, I.getPredicate(),
+ Builder.CreateBitCast(Vec, ScalarTy), NewConst);
+ }
+ return nullptr;
+}
+
/// This function folds patterns produced by lowering of reduce idioms, such as
/// llvm.vector.reduce.and which are lowered into instruction chains. This code
/// attempts to generate fewer number of scalar comparisons instead of vector
@@ -7998,6 +8036,9 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
if (Instruction *Res = foldICmpOfUAddOv(I))
return Res;
+ if (Instruction *Res = foldICmpOfVectorReduce(I, DL, Builder))
+ return Res;
+
// The 'cmpxchg' instruction returns an aggregate containing the old value and
// an i1 which indicates whether or not we successfully did the swap.
//
diff --git a/llvm/test/Transforms/InstCombine/icmp-vector-bitwise-reductions.ll b/llvm/test/Transforms/InstCombine/icmp-vector-bitwise-reductions.ll
new file mode 100644
index 0000000000000..09da979b7e873
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/icmp-vector-bitwise-reductions.ll
@@ -0,0 +1,101 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -p instcombine -S %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define i1 @vreduce_and_eq(<4 x i8> %v) {
+; CHECK-LABEL: define i1 @vreduce_and_eq(
+; CHECK-SAME: <4 x i8> [[V:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[V]] to i32
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP1]], -1
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %red = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> %v)
+ %cmp = icmp eq i8 %red, -1
+ ret i1 %cmp
+}
+
+define i1 @vreduce_and_ne(<4 x i8> %v) {
+; CHECK-LABEL: define i1 @vreduce_and_ne(
+; CHECK-SAME: <4 x i8> [[V:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[V]] to i32
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP1]], -1
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %red = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> %v)
+ %cmp = icmp ne i8 %red, -1
+ ret i1 %cmp
+}
+
+define i1 @vreduce_or_eq(<4 x i8> %v) {
+; CHECK-LABEL: define i1 @vreduce_or_eq(
+; CHECK-SAME: <4 x i8> [[V:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[V]] to i32
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %red = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> %v)
+ %cmp = icmp eq i8 %red, 0
+ ret i1 %cmp
+}
+
+define i1 @vreduce_or_ne(<4 x i8> %v) {
+; CHECK-LABEL: define i1 @vreduce_or_ne(
+; CHECK-SAME: <4 x i8> [[V:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[V]] to i32
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %red = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> %v)
+ %cmp = icmp ne i8 %red, 0
+ ret i1 %cmp
+}
+
+define i1 @loaded_value(ptr %p) {
+; CHECK-LABEL: define i1 @loaded_value(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V1]], -1
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %v = load <4 x i8>, ptr %p
+ %red = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> %v)
+ %cmp = icmp eq i8 %red, -1
+ ret i1 %cmp
+}
+
+define i1 @vector_elt_type_legality(<4 x i32> %v) {
+; CHECK-LABEL: define i1 @vector_elt_type_legality(
+; CHECK-SAME: <4 x i32> [[V:%.*]]) {
+; CHECK-NEXT: [[RED:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[V]])
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[RED]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %red = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %v)
+ %cmp = icmp ne i32 %red, 0
+ ret i1 %cmp
+}
+
+define i1 @vreduce_and_sgt(<4 x i8> %v) {
+; CHECK-LABEL: define i1 @vreduce_and_sgt(
+; CHECK-SAME: <4 x i8> [[V:%.*]]) {
+; CHECK-NEXT: [[RED:%.*]] = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> [[V]])
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[RED]], -1
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %red = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> %v)
+ %cmp = icmp sgt i8 %red, -1
+ ret i1 %cmp
+}
+
+define i1 @scalable(<vscale x 4 x i8> %v) {
+; CHECK-LABEL: define i1 @scalable(
+; CHECK-SAME: <vscale x 4 x i8> [[V:%.*]]) {
+; CHECK-NEXT: [[RED:%.*]] = call i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8> [[V]])
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[RED]], -1
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %red = call i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8> %v)
+ %cmp = icmp eq i8 %red, -1
+ ret i1 %cmp
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/182684
More information about the llvm-commits
mailing list