[llvm] 05e838f - [VectorCombine] foldExtractedCmps - disable fold on non-commutative binops

Tue Nov 5 03:49:59 PST 2024

Author: Simon Pilgrim
Date: 2024-11-05T11:42:30Z
New Revision: 05e838f428555bcc4507bd37912da60ea9110ef6

URL: https://github.com/llvm/llvm-project/commit/05e838f428555bcc4507bd37912da60ea9110ef6
DIFF: https://github.com/llvm/llvm-project/commit/05e838f428555bcc4507bd37912da60ea9110ef6.diff

LOG: [VectorCombine] foldExtractedCmps - disable fold on non-commutative binops

The fold needs to be adjusted to correctly track the LHS/RHS operands, which will take some refactoring, for now just disable the fold in this case.

Fixes #114901

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/VectorCombine.cpp
    llvm/test/Transforms/VectorCombine/X86/pr114901.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 025234c54956b2..332c52040e21c4 100644

--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1032,9 +1032,15 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
 /// a vector into vector operations followed by extract. Note: The SLP pass
 /// may miss this pattern because of implementation problems.
 bool VectorCombine::foldExtractedCmps(Instruction &I) {
+  auto *BI = dyn_cast<BinaryOperator>(&I);
+
   // We are looking for a scalar binop of booleans.
   // binop i1 (cmp Pred I0, C0), (cmp Pred I1, C1)
-  if (!I.isBinaryOp() || !I.getType()->isIntegerTy(1))
+  if (!BI || !I.getType()->isIntegerTy(1))
+    return false;
+
+  // TODO: Support non-commutative binary ops.
+  if (!BI->isCommutative())
     return false;
 
   // The compare predicates should match, and each compare should have a
@@ -1113,8 +1119,7 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) {
   Value *VCmp = Builder.CreateCmp(Pred, X, ConstantVector::get(CmpC));
 
   Value *Shuf = createShiftShuffle(VCmp, ExpensiveIndex, CheapIndex, Builder);
-  Value *VecLogic = Builder.CreateBinOp(cast<BinaryOperator>(I).getOpcode(),
-                                        VCmp, Shuf);
+  Value *VecLogic = Builder.CreateBinOp(BI->getOpcode(), VCmp, Shuf);
   Value *NewExt = Builder.CreateExtractElement(VecLogic, CheapIndex);
   replaceValue(I, *NewExt);
   ++NumVecCmpBO;

diff  --git a/llvm/test/Transforms/VectorCombine/X86/pr114901.ll b/llvm/test/Transforms/VectorCombine/X86/pr114901.ll
index 51614ebfc26bd4..4daa569358d533 100644
--- a/llvm/test/Transforms/VectorCombine/X86/pr114901.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/pr114901.ll
@@ -2,7 +2,7 @@
 ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefixes=SSE
 ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=AVX
 
-; FIXME: PR114901 - ensure that the ASHR node doesn't commute the operands.
+; PR114901 - ensure that the ASHR node doesn't commute the operands.
 define i1 @PR114901(<4 x i32> %a) {
 ; SSE-LABEL: define i1 @PR114901(
 ; SSE-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0:[0-9]+]] {
@@ -15,10 +15,11 @@ define i1 @PR114901(<4 x i32> %a) {
 ;
 ; AVX-LABEL: define i1 @PR114901(
 ; AVX-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0:[0-9]+]] {
-; AVX-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[A]], <i32 poison, i32 -8, i32 poison, i32 42>
-; AVX-NEXT:    [[SHIFT:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> <i32 poison, i32 3, i32 poison, i32 poison>
-; AVX-NEXT:    [[TMP2:%.*]] = ashr <4 x i1> [[TMP1]], [[SHIFT]]
-; AVX-NEXT:    [[R:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1
+; AVX-NEXT:    [[E1:%.*]] = extractelement <4 x i32> [[A]], i32 1
+; AVX-NEXT:    [[E3:%.*]] = extractelement <4 x i32> [[A]], i32 3
+; AVX-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[E1]], -8
+; AVX-NEXT:    [[CMP3:%.*]] = icmp sgt i32 [[E3]], 42
+; AVX-NEXT:    [[R:%.*]] = ashr i1 [[CMP3]], [[CMP1]]
 ; AVX-NEXT:    ret i1 [[R]]
 ;
   %e1 = extractelement <4 x i32> %a, i32 1