[llvm] e409204 - VectorCombine: teach foldExtractedCmps about samesign (#122883)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 14 04:04:18 PST 2025
Author: Ramkumar Ramachandra
Date: 2025-01-14T12:04:14Z
New Revision: e409204a89c7fb1d1c040c18fac2fa8db9dfe893
URL: https://github.com/llvm/llvm-project/commit/e409204a89c7fb1d1c040c18fac2fa8db9dfe893
DIFF: https://github.com/llvm/llvm-project/commit/e409204a89c7fb1d1c040c18fac2fa8db9dfe893.diff
LOG: VectorCombine: teach foldExtractedCmps about samesign (#122883)
Follow up on 4a0d53a (PatternMatch: migrate to CmpPredicate) to get rid
of one of the FIXMEs it introduced by replacing a predicate comparison
with CmpPredicate::getMatching.
Added:
Modified:
llvm/lib/Transforms/Vectorize/VectorCombine.cpp
llvm/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index ae2af6d3468799..d17be8e1ac79ee 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1097,10 +1097,12 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) {
Instruction *I0, *I1;
Constant *C0, *C1;
CmpPredicate P0, P1;
- // FIXME: Use CmpPredicate::getMatching here.
if (!match(B0, m_Cmp(P0, m_Instruction(I0), m_Constant(C0))) ||
- !match(B1, m_Cmp(P1, m_Instruction(I1), m_Constant(C1))) ||
- P0 != static_cast<CmpInst::Predicate>(P1))
+ !match(B1, m_Cmp(P1, m_Instruction(I1), m_Constant(C1))))
+ return false;
+
+ auto MatchingPred = CmpPredicate::getMatching(P0, P1);
+ if (!MatchingPred)
return false;
// The compare operands must be extracts of the same vector with constant
@@ -1121,7 +1123,7 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) {
// The original scalar pattern is:
// binop i1 (cmp Pred (ext X, Index0), C0), (cmp Pred (ext X, Index1), C1)
- CmpInst::Predicate Pred = P0;
+ CmpInst::Predicate Pred = *MatchingPred;
unsigned CmpOpcode =
CmpInst::isFPPredicate(Pred) ? Instruction::FCmp : Instruction::ICmp;
auto *VecTy = dyn_cast<FixedVectorType>(X->getType());
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll b/llvm/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll
index 775f2d2da5721f..3346ebf0997f1d 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll
@@ -66,6 +66,22 @@ define i1 @icmp_xor_v4i32(<4 x i32> %a) {
ret i1 %r
}
+define i1 @icmp_samesign_xor_v4i32(<4 x i32> %a) {
+; CHECK-LABEL: @icmp_samesign_xor_v4i32(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[A:%.*]], <i32 poison, i32 -8, i32 poison, i32 42>
+; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> <i32 poison, i32 3, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[SHIFT]], [[TMP1]]
+; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %e1 = extractelement <4 x i32> %a, i32 3
+ %e2 = extractelement <4 x i32> %a, i32 1
+ %cmp1 = icmp samesign ugt i32 %e1, 42
+ %cmp2 = icmp sgt i32 %e2, -8
+ %r = xor i1 %cmp1, %cmp2
+ ret i1 %r
+}
+
; add is not canonical (should be xor), but that is ok.
define i1 @icmp_add_v8i32(<8 x i32> %a) {
@@ -146,6 +162,27 @@ define i1 @icmp_xor_v4i32_multiuse(<4 x i32> %a) {
ret i1 %r
}
+define i1 @icmp_samesign_xor_v4i32_multiuse(<4 x i32> %a) {
+; CHECK-LABEL: @icmp_samesign_xor_v4i32_multiuse(
+; CHECK-NEXT: [[E2:%.*]] = extractelement <4 x i32> [[A:%.*]], i32 1
+; CHECK-NEXT: call void @use(i32 [[E2]])
+; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[A]], <i32 poison, i32 -8, i32 poison, i32 42>
+; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> <i32 poison, i32 3, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[SHIFT]], [[TMP1]]
+; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1
+; CHECK-NEXT: call void @use(i1 [[R]])
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %e1 = extractelement <4 x i32> %a, i32 3
+ %e2 = extractelement <4 x i32> %a, i32 1
+ call void @use(i32 %e2)
+ %cmp1 = icmp sgt i32 %e1, 42
+ %cmp2 = icmp samesign ugt i32 %e2, -8
+ %r = xor i1 %cmp1, %cmp2
+ call void @use(i1 %r)
+ ret i1 %r
+}
+
; Negative test - this could CSE/simplify.
define i1 @same_extract_index(<4 x i32> %a) {
More information about the llvm-commits
mailing list