[llvm] e3a0775 - [VectorCombine] foldExtractedCmps - (re-)enable fold on non-commutative binops

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 6 04:14:53 PST 2024


Author: Simon Pilgrim
Date: 2024-11-06T12:10:31Z
New Revision: e3a0775651190a23d8234615b9fdadd81c1c24bc

URL: https://github.com/llvm/llvm-project/commit/e3a0775651190a23d8234615b9fdadd81c1c24bc
DIFF: https://github.com/llvm/llvm-project/commit/e3a0775651190a23d8234615b9fdadd81c1c24bc.diff

LOG: [VectorCombine] foldExtractedCmps - (re-)enable fold on non-commutative binops

#114901 exposed that foldExtractedCmps didn't account for non-commutative binops, and were disabled by 05e838f428555bcc4507bd37912da60ea9110ef6

This patch re-enables support for non-commutative binops by ensuring that the LHS/RHS arg order of the binop is retained.

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/VectorCombine.cpp
    llvm/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll
    llvm/test/Transforms/VectorCombine/X86/pr114901.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 332c52040e21c4..4cb0b68eee1599 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1039,10 +1039,6 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) {
   if (!BI || !I.getType()->isIntegerTy(1))
     return false;
 
-  // TODO: Support non-commutative binary ops.
-  if (!BI->isCommutative())
-    return false;
-
   // The compare predicates should match, and each compare should have a
   // constant operand.
   Value *B0 = I.getOperand(0), *B1 = I.getOperand(1);
@@ -1066,6 +1062,8 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) {
   ExtractElementInst *ConvertToShuf = getShuffleExtract(Ext0, Ext1);
   if (!ConvertToShuf)
     return false;
+  assert((ConvertToShuf == Ext0 || ConvertToShuf == Ext1) &&
+         "Unknown ExtractElementInst");
 
   // The original scalar pattern is:
   // binop i1 (cmp Pred (ext X, Index0), C0), (cmp Pred (ext X, Index1), C1)
@@ -1117,9 +1115,10 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) {
   CmpC[Index0] = C0;
   CmpC[Index1] = C1;
   Value *VCmp = Builder.CreateCmp(Pred, X, ConstantVector::get(CmpC));
-
   Value *Shuf = createShiftShuffle(VCmp, ExpensiveIndex, CheapIndex, Builder);
-  Value *VecLogic = Builder.CreateBinOp(BI->getOpcode(), VCmp, Shuf);
+  Value *LHS = ConvertToShuf == Ext0 ? Shuf : VCmp;
+  Value *RHS = ConvertToShuf == Ext0 ? VCmp : Shuf;
+  Value *VecLogic = Builder.CreateBinOp(BI->getOpcode(), LHS, RHS);
   Value *NewExt = Builder.CreateExtractElement(VecLogic, CheapIndex);
   replaceValue(I, *NewExt);
   ++NumVecCmpBO;

diff  --git a/llvm/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll b/llvm/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll
index be5359f549ac94..775f2d2da5721f 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll
@@ -54,7 +54,7 @@ define i1 @icmp_xor_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: @icmp_xor_v4i32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[A:%.*]], <i32 poison, i32 -8, i32 poison, i32 42>
 ; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> <i32 poison, i32 3, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], [[SHIFT]]
+; CHECK-NEXT:    [[TMP2:%.*]] = xor <4 x i1> [[SHIFT]], [[TMP1]]
 ; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
@@ -80,7 +80,7 @@ define i1 @icmp_add_v8i32(<8 x i32> %a) {
 ; AVX-LABEL: @icmp_add_v8i32(
 ; AVX-NEXT:    [[TMP1:%.*]] = icmp eq <8 x i32> [[A:%.*]], <i32 poison, i32 poison, i32 -8, i32 poison, i32 poison, i32 poison, i32 poison, i32 42>
 ; AVX-NEXT:    [[SHIFT:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> poison, <8 x i32> <i32 poison, i32 poison, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX-NEXT:    [[TMP2:%.*]] = add <8 x i1> [[TMP1]], [[SHIFT]]
+; AVX-NEXT:    [[TMP2:%.*]] = add <8 x i1> [[SHIFT]], [[TMP1]]
 ; AVX-NEXT:    [[R:%.*]] = extractelement <8 x i1> [[TMP2]], i64 2
 ; AVX-NEXT:    ret i1 [[R]]
 ;
@@ -131,7 +131,7 @@ define i1 @icmp_xor_v4i32_multiuse(<4 x i32> %a) {
 ; CHECK-NEXT:    call void @use(i32 [[E2]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[A]], <i32 poison, i32 -8, i32 poison, i32 42>
 ; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> <i32 poison, i32 3, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], [[SHIFT]]
+; CHECK-NEXT:    [[TMP2:%.*]] = xor <4 x i1> [[SHIFT]], [[TMP1]]
 ; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1
 ; CHECK-NEXT:    call void @use(i1 [[R]])
 ; CHECK-NEXT:    ret i1 [[R]]

diff  --git a/llvm/test/Transforms/VectorCombine/X86/pr114901.ll b/llvm/test/Transforms/VectorCombine/X86/pr114901.ll
index 9c91c3c75bfcf8..d6917e1007cf4c 100644
--- a/llvm/test/Transforms/VectorCombine/X86/pr114901.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/pr114901.ll
@@ -15,11 +15,10 @@ define i1 @PR114901(<4 x i32> %a) {
 ;
 ; AVX-LABEL: define i1 @PR114901(
 ; AVX-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0:[0-9]+]] {
-; AVX-NEXT:    [[E1:%.*]] = extractelement <4 x i32> [[A]], i32 1
-; AVX-NEXT:    [[E3:%.*]] = extractelement <4 x i32> [[A]], i32 3
-; AVX-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[E1]], -8
-; AVX-NEXT:    [[CMP3:%.*]] = icmp sgt i32 [[E3]], 42
-; AVX-NEXT:    [[R:%.*]] = ashr i1 [[CMP3]], [[CMP1]]
+; AVX-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[A]], <i32 poison, i32 -8, i32 poison, i32 42>
+; AVX-NEXT:    [[SHIFT:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> <i32 poison, i32 3, i32 poison, i32 poison>
+; AVX-NEXT:    [[TMP2:%.*]] = ashr <4 x i1> [[SHIFT]], [[TMP1]]
+; AVX-NEXT:    [[R:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1
 ; AVX-NEXT:    ret i1 [[R]]
 ;
   %e1 = extractelement <4 x i32> %a, i32 1
@@ -42,11 +41,10 @@ define i1 @PR114901_flip(<4 x i32> %a) {
 ;
 ; AVX-LABEL: define i1 @PR114901_flip(
 ; AVX-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0]] {
-; AVX-NEXT:    [[E1:%.*]] = extractelement <4 x i32> [[A]], i32 1
-; AVX-NEXT:    [[E3:%.*]] = extractelement <4 x i32> [[A]], i32 3
-; AVX-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[E1]], -8
-; AVX-NEXT:    [[CMP3:%.*]] = icmp sgt i32 [[E3]], 42
-; AVX-NEXT:    [[R:%.*]] = ashr i1 [[CMP1]], [[CMP3]]
+; AVX-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[A]], <i32 poison, i32 -8, i32 poison, i32 42>
+; AVX-NEXT:    [[SHIFT:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> <i32 poison, i32 3, i32 poison, i32 poison>
+; AVX-NEXT:    [[TMP2:%.*]] = ashr <4 x i1> [[TMP1]], [[SHIFT]]
+; AVX-NEXT:    [[R:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1
 ; AVX-NEXT:    ret i1 [[R]]
 ;
   %e1 = extractelement <4 x i32> %a, i32 1


        


More information about the llvm-commits mailing list