[llvm] fabaca1 - Revert "[InstCombine] Fold and-reduce idiom"
Hans Wennborg via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 28 03:18:27 PST 2022
Author: Hans Wennborg
Date: 2022-01-28T12:16:03+01:00
New Revision: fabaca10b86f77f7d2d34db91fa6b284da924395
URL: https://github.com/llvm/llvm-project/commit/fabaca10b86f77f7d2d34db91fa6b284da924395
DIFF: https://github.com/llvm/llvm-project/commit/fabaca10b86f77f7d2d34db91fa6b284da924395.diff
LOG: Revert "[InstCombine] Fold and-reduce idiom"
It causes builds to fail with
llvm/include/llvm/Support/Casting.h:269:
typename llvm::cast_retty<X, Y*>::ret_type llvm::cast(Y*)
[with X = llvm::IntegerType; Y = const llvm::Type; typename llvm::cast_retty<X, Y*>::ret_type = const llvm::IntegerType*]:
Assertion `isa<X>(Val) && "cast<Ty>() argument of incompatible type!"' failed.
See the code review for link to a reproducer.
> This patch introduces folding of and-reduce idiom and generates code
> that is easier to read and which is lest costly in terms of icmp operations.
> The folding is
> ```
> icmp eq (bitcast(icmp ne (lhs, rhs)), 0)
> ```
> into
> ```
> icmp eq(bitcast(lhs), bitcast(rhs))
> ```
>
> See PR53419.
>
> Differential Revision: https://reviews.llvm.org/D118317
> Reviewed By: lebedev.ri, spatel
This reverts commit 8599bb0f26738ed88aae62aba57d82f7cf326cf9.
This also revertes the dependent change:
"[Test] Add 'ne' tests for and-reduce pattern folding"
This reverts commit a4aaa5995308ac2ba1bf180c9ce9c321cdb9f28a.
Added:
Modified:
llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
llvm/test/Transforms/InstCombine/icmp-vec.ll
llvm/test/Transforms/InstCombine/reduction-and-sext-zext-i1.ll
llvm/test/Transforms/InstCombine/reduction-or-sext-zext-i1.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index b9aff7c50bb29..fd58a44504b3c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -5882,54 +5882,6 @@ static Instruction *foldICmpInvariantGroup(ICmpInst &I) {
return nullptr;
}
-/// This function folds patterns produced by lowering of reduce idioms, such as
-/// llvm.vector.reduce.and which are lowered into instruction chains. This code
-/// attempts to generate fewer number of scalar comparisons instead of vector
-/// comparisons when possible.
-static Instruction *foldReductionIdiom(ICmpInst &I,
- InstCombiner::BuilderTy &Builder,
- const DataLayout &DL) {
- if (I.getType()->isVectorTy())
- return nullptr;
- ICmpInst::Predicate OuterPred, InnerPred;
- Value *LHS, *RHS;
-
- // Match lowering of @llvm.vector.reduce.and. Turn
- /// %vec_ne = icmp ne <8 x i8> %lhs, %rhs
- /// %scalar_ne = bitcast <8 x i1> %vec_ne to i8
- /// %all_eq = icmp eq i8 %scalar_ne, 0
- ///
- /// into
- ///
- /// %lhs.scalar = bitcast <8 x i8> %lhs to i64
- /// %rhs.scalar = bitcast <8 x i8> %rhs to i64
- /// %all_eq = icmp eq i64 %lhs.scalar, %rhs.scalar
- if (!match(&I, m_ICmp(OuterPred,
- m_OneUse(m_BitCast(m_OneUse(
- m_ICmp(InnerPred, m_Value(LHS), m_Value(RHS))))),
- m_Zero())))
- return nullptr;
- auto *LHSTy = dyn_cast<FixedVectorType>(LHS->getType());
- if (!LHSTy)
- return nullptr;
- unsigned NumBits =
- LHSTy->getNumElements() * LHSTy->getElementType()->getIntegerBitWidth();
- // TODO: Relax this to "not wider than max legal integer type"?
- if (!DL.isLegalInteger(NumBits))
- return nullptr;
-
- // TODO: Generalize to isEquality and support other patterns.
- if (OuterPred == ICmpInst::ICMP_EQ && InnerPred == ICmpInst::ICMP_NE) {
- auto *ScalarTy = Builder.getIntNTy(NumBits);
- LHS = Builder.CreateBitCast(LHS, ScalarTy, LHS->getName() + ".scalar");
- RHS = Builder.CreateBitCast(RHS, ScalarTy, RHS->getName() + ".scalar");
- return ICmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_EQ, LHS, RHS,
- I.getName());
- }
-
- return nullptr;
-}
-
Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
bool Changed = false;
const SimplifyQuery Q = SQ.getWithInstruction(&I);
@@ -6172,9 +6124,6 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
if (Instruction *Res = foldICmpInvariantGroup(I))
return Res;
- if (Instruction *Res = foldReductionIdiom(I, Builder, DL))
- return Res;
-
return Changed ? &I : nullptr;
}
diff --git a/llvm/test/Transforms/InstCombine/icmp-vec.ll b/llvm/test/Transforms/InstCombine/icmp-vec.ll
index 39067e68688f4..8c90fd3ae89e5 100644
--- a/llvm/test/Transforms/InstCombine/icmp-vec.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-vec.ll
@@ -404,9 +404,9 @@ define <vscale x 2 x i1> @icmp_logical_or_scalablevec(<vscale x 2 x i64> %x, <vs
define i1 @eq_cast_eq-1(<2 x i4> %x, <2 x i4> %y) {
; CHECK-LABEL: @eq_cast_eq-1(
-; CHECK-NEXT: [[X_SCALAR:%.*]] = bitcast <2 x i4> [[X:%.*]] to i8
-; CHECK-NEXT: [[Y_SCALAR:%.*]] = bitcast <2 x i4> [[Y:%.*]] to i8
-; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[X_SCALAR]], [[Y_SCALAR]]
+; CHECK-NEXT: [[IC:%.*]] = icmp ne <2 x i4> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i1> [[IC]] to i2
+; CHECK-NEXT: [[R:%.*]] = icmp eq i2 [[TMP1]], 0
; CHECK-NEXT: ret i1 [[R]]
;
%ic = icmp eq <2 x i4> %x, %y
diff --git a/llvm/test/Transforms/InstCombine/reduction-and-sext-zext-i1.ll b/llvm/test/Transforms/InstCombine/reduction-and-sext-zext-i1.ll
index a38d26b1f8bf5..40af5709796a0 100644
--- a/llvm/test/Transforms/InstCombine/reduction-and-sext-zext-i1.ll
+++ b/llvm/test/Transforms/InstCombine/reduction-and-sext-zext-i1.ll
@@ -100,12 +100,14 @@ define i64 @reduce_and_zext_external_use(<8 x i1> %x) {
define i1 @reduce_and_pointer_cast(i8* %arg, i8* %arg1) {
; CHECK-LABEL: @reduce_and_pointer_cast(
; CHECK-NEXT: bb:
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[ARG1:%.*]] to i64*
-; CHECK-NEXT: [[LHS1:%.*]] = load i64, i64* [[TMP0]], align 8
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[ARG:%.*]] to i64*
-; CHECK-NEXT: [[RHS2:%.*]] = load i64, i64* [[TMP1]], align 8
-; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[LHS1]], [[RHS2]]
-; CHECK-NEXT: ret i1 [[TMP2]]
+; CHECK-NEXT: [[PTR1:%.*]] = bitcast i8* [[ARG1:%.*]] to <8 x i8>*
+; CHECK-NEXT: [[PTR2:%.*]] = bitcast i8* [[ARG:%.*]] to <8 x i8>*
+; CHECK-NEXT: [[LHS:%.*]] = load <8 x i8>, <8 x i8>* [[PTR1]], align 8
+; CHECK-NEXT: [[RHS:%.*]] = load <8 x i8>, <8 x i8>* [[PTR2]], align 8
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne <8 x i8> [[LHS]], [[RHS]]
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i1> [[CMP]] to i8
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 [[TMP0]], 0
+; CHECK-NEXT: ret i1 [[TMP1]]
;
bb:
%ptr1 = bitcast i8* %arg1 to <8 x i8>*
@@ -139,50 +141,6 @@ bb:
ret i1 %all_eq
}
-define i1 @reduce_and_pointer_cast_ne(i8* %arg, i8* %arg1) {
-; CHECK-LABEL: @reduce_and_pointer_cast_ne(
-; CHECK-NEXT: bb:
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[ARG1:%.*]] to i64*
-; CHECK-NEXT: [[LHS1:%.*]] = load i64, i64* [[TMP0]], align 8
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[ARG:%.*]] to i64*
-; CHECK-NEXT: [[RHS2:%.*]] = load i64, i64* [[TMP1]], align 8
-; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[LHS1]], [[RHS2]]
-; CHECK-NEXT: ret i1 [[TMP2]]
-;
-bb:
- %ptr1 = bitcast i8* %arg1 to <8 x i8>*
- %ptr2 = bitcast i8* %arg to <8 x i8>*
- %lhs = load <8 x i8>, <8 x i8>* %ptr1
- %rhs = load <8 x i8>, <8 x i8>* %ptr2
- %cmp = icmp eq <8 x i8> %lhs, %rhs
- %all_eq = call i1 @llvm.vector.reduce.and.v8i32(<8 x i1> %cmp)
- %any_ne = xor i1 %all_eq, 1
- ret i1 %any_ne
-}
-
-define i1 @reduce_and_pointer_cast_ne_wide(i8* %arg, i8* %arg1) {
-; CHECK-LABEL: @reduce_and_pointer_cast_ne_wide(
-; CHECK-NEXT: bb:
-; CHECK-NEXT: [[PTR1:%.*]] = bitcast i8* [[ARG1:%.*]] to <8 x i16>*
-; CHECK-NEXT: [[PTR2:%.*]] = bitcast i8* [[ARG:%.*]] to <8 x i16>*
-; CHECK-NEXT: [[LHS:%.*]] = load <8 x i16>, <8 x i16>* [[PTR1]], align 16
-; CHECK-NEXT: [[RHS:%.*]] = load <8 x i16>, <8 x i16>* [[PTR2]], align 16
-; CHECK-NEXT: [[CMP:%.*]] = icmp ne <8 x i16> [[LHS]], [[RHS]]
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i1> [[CMP]] to i8
-; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i8 [[TMP0]], 0
-; CHECK-NEXT: ret i1 [[TMP1]]
-;
-bb:
- %ptr1 = bitcast i8* %arg1 to <8 x i16>*
- %ptr2 = bitcast i8* %arg to <8 x i16>*
- %lhs = load <8 x i16>, <8 x i16>* %ptr1
- %rhs = load <8 x i16>, <8 x i16>* %ptr2
- %cmp = icmp eq <8 x i16> %lhs, %rhs
- %all_eq = call i1 @llvm.vector.reduce.and.v8i32(<8 x i1> %cmp)
- %any_ne = xor i1 %all_eq, 1
- ret i1 %any_ne
-}
-
declare i1 @llvm.vector.reduce.and.v8i32(<8 x i1> %a)
declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %a)
declare i64 @llvm.vector.reduce.and.v8i64(<8 x i64> %a)
diff --git a/llvm/test/Transforms/InstCombine/reduction-or-sext-zext-i1.ll b/llvm/test/Transforms/InstCombine/reduction-or-sext-zext-i1.ll
index 35174d4321565..6f5b34e02c793 100644
--- a/llvm/test/Transforms/InstCombine/reduction-or-sext-zext-i1.ll
+++ b/llvm/test/Transforms/InstCombine/reduction-or-sext-zext-i1.ll
@@ -100,11 +100,13 @@ define i64 @reduce_or_zext_external_use(<8 x i1> %x) {
define i1 @reduce_or_pointer_cast(i8* %arg, i8* %arg1) {
; CHECK-LABEL: @reduce_or_pointer_cast(
; CHECK-NEXT: bb:
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[ARG1:%.*]] to i64*
-; CHECK-NEXT: [[LHS1:%.*]] = load i64, i64* [[TMP0]], align 8
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[ARG:%.*]] to i64*
-; CHECK-NEXT: [[RHS2:%.*]] = load i64, i64* [[TMP1]], align 8
-; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i64 [[LHS1]], [[RHS2]]
+; CHECK-NEXT: [[PTR1:%.*]] = bitcast i8* [[ARG1:%.*]] to <8 x i8>*
+; CHECK-NEXT: [[PTR2:%.*]] = bitcast i8* [[ARG:%.*]] to <8 x i8>*
+; CHECK-NEXT: [[LHS:%.*]] = load <8 x i8>, <8 x i8>* [[PTR1]], align 8
+; CHECK-NEXT: [[RHS:%.*]] = load <8 x i8>, <8 x i8>* [[PTR2]], align 8
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne <8 x i8> [[LHS]], [[RHS]]
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i1> [[CMP]] to i8
+; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP0]], 0
; CHECK-NEXT: ret i1 [[DOTNOT]]
;
bb:
@@ -141,51 +143,6 @@ bb:
ret i1 %all_eq
}
-
-define i1 @reduce_or_pointer_cast_ne(i8* %arg, i8* %arg1) {
-; CHECK-LABEL: @reduce_or_pointer_cast_ne(
-; CHECK-NEXT: bb:
-; CHECK-NEXT: [[PTR1:%.*]] = bitcast i8* [[ARG1:%.*]] to <8 x i8>*
-; CHECK-NEXT: [[PTR2:%.*]] = bitcast i8* [[ARG:%.*]] to <8 x i8>*
-; CHECK-NEXT: [[LHS:%.*]] = load <8 x i8>, <8 x i8>* [[PTR1]], align 8
-; CHECK-NEXT: [[RHS:%.*]] = load <8 x i8>, <8 x i8>* [[PTR2]], align 8
-; CHECK-NEXT: [[CMP:%.*]] = icmp ne <8 x i8> [[LHS]], [[RHS]]
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i1> [[CMP]] to i8
-; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i8 [[TMP0]], 0
-; CHECK-NEXT: ret i1 [[TMP1]]
-;
-bb:
- %ptr1 = bitcast i8* %arg1 to <8 x i8>*
- %ptr2 = bitcast i8* %arg to <8 x i8>*
- %lhs = load <8 x i8>, <8 x i8>* %ptr1
- %rhs = load <8 x i8>, <8 x i8>* %ptr2
- %cmp = icmp ne <8 x i8> %lhs, %rhs
- %any_ne = call i1 @llvm.vector.reduce.or.v8i32(<8 x i1> %cmp)
- ret i1 %any_ne
-}
-
-define i1 @reduce_or_pointer_cast_ne_wide(i8* %arg, i8* %arg1) {
-; CHECK-LABEL: @reduce_or_pointer_cast_ne_wide(
-; CHECK-NEXT: bb:
-; CHECK-NEXT: [[PTR1:%.*]] = bitcast i8* [[ARG1:%.*]] to <8 x i16>*
-; CHECK-NEXT: [[PTR2:%.*]] = bitcast i8* [[ARG:%.*]] to <8 x i16>*
-; CHECK-NEXT: [[LHS:%.*]] = load <8 x i16>, <8 x i16>* [[PTR1]], align 16
-; CHECK-NEXT: [[RHS:%.*]] = load <8 x i16>, <8 x i16>* [[PTR2]], align 16
-; CHECK-NEXT: [[CMP:%.*]] = icmp ne <8 x i16> [[LHS]], [[RHS]]
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i1> [[CMP]] to i8
-; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i8 [[TMP0]], 0
-; CHECK-NEXT: ret i1 [[TMP1]]
-;
-bb:
- %ptr1 = bitcast i8* %arg1 to <8 x i16>*
- %ptr2 = bitcast i8* %arg to <8 x i16>*
- %lhs = load <8 x i16>, <8 x i16>* %ptr1
- %rhs = load <8 x i16>, <8 x i16>* %ptr2
- %cmp = icmp ne <8 x i16> %lhs, %rhs
- %any_ne = call i1 @llvm.vector.reduce.or.v8i32(<8 x i1> %cmp)
- ret i1 %any_ne
-}
-
declare i1 @llvm.vector.reduce.or.v8i32(<8 x i1> %a)
declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %a)
declare i64 @llvm.vector.reduce.or.v8i64(<8 x i64> %a)
More information about the llvm-commits
mailing list