[llvm] e3a0775 - [VectorCombine] foldExtractedCmps - (re-)enable fold on non-commutative binops
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 6 04:14:53 PST 2024
Author: Simon Pilgrim
Date: 2024-11-06T12:10:31Z
New Revision: e3a0775651190a23d8234615b9fdadd81c1c24bc
URL: https://github.com/llvm/llvm-project/commit/e3a0775651190a23d8234615b9fdadd81c1c24bc
DIFF: https://github.com/llvm/llvm-project/commit/e3a0775651190a23d8234615b9fdadd81c1c24bc.diff
LOG: [VectorCombine] foldExtractedCmps - (re-)enable fold on non-commutative binops
#114901 exposed that foldExtractedCmps didn't account for non-commutative binops, and were disabled by 05e838f428555bcc4507bd37912da60ea9110ef6
This patch re-enables support for non-commutative binops by ensuring that the LHS/RHS arg order of the binop is retained.
Added:
Modified:
llvm/lib/Transforms/Vectorize/VectorCombine.cpp
llvm/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll
llvm/test/Transforms/VectorCombine/X86/pr114901.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 332c52040e21c4..4cb0b68eee1599 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1039,10 +1039,6 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) {
if (!BI || !I.getType()->isIntegerTy(1))
return false;
- // TODO: Support non-commutative binary ops.
- if (!BI->isCommutative())
- return false;
-
// The compare predicates should match, and each compare should have a
// constant operand.
Value *B0 = I.getOperand(0), *B1 = I.getOperand(1);
@@ -1066,6 +1062,8 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) {
ExtractElementInst *ConvertToShuf = getShuffleExtract(Ext0, Ext1);
if (!ConvertToShuf)
return false;
+ assert((ConvertToShuf == Ext0 || ConvertToShuf == Ext1) &&
+ "Unknown ExtractElementInst");
// The original scalar pattern is:
// binop i1 (cmp Pred (ext X, Index0), C0), (cmp Pred (ext X, Index1), C1)
@@ -1117,9 +1115,10 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) {
CmpC[Index0] = C0;
CmpC[Index1] = C1;
Value *VCmp = Builder.CreateCmp(Pred, X, ConstantVector::get(CmpC));
-
Value *Shuf = createShiftShuffle(VCmp, ExpensiveIndex, CheapIndex, Builder);
- Value *VecLogic = Builder.CreateBinOp(BI->getOpcode(), VCmp, Shuf);
+ Value *LHS = ConvertToShuf == Ext0 ? Shuf : VCmp;
+ Value *RHS = ConvertToShuf == Ext0 ? VCmp : Shuf;
+ Value *VecLogic = Builder.CreateBinOp(BI->getOpcode(), LHS, RHS);
Value *NewExt = Builder.CreateExtractElement(VecLogic, CheapIndex);
replaceValue(I, *NewExt);
++NumVecCmpBO;
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll b/llvm/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll
index be5359f549ac94..775f2d2da5721f 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll
@@ -54,7 +54,7 @@ define i1 @icmp_xor_v4i32(<4 x i32> %a) {
; CHECK-LABEL: @icmp_xor_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[A:%.*]], <i32 poison, i32 -8, i32 poison, i32 42>
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> <i32 poison, i32 3, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], [[SHIFT]]
+; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[SHIFT]], [[TMP1]]
; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1
; CHECK-NEXT: ret i1 [[R]]
;
@@ -80,7 +80,7 @@ define i1 @icmp_add_v8i32(<8 x i32> %a) {
; AVX-LABEL: @icmp_add_v8i32(
; AVX-NEXT: [[TMP1:%.*]] = icmp eq <8 x i32> [[A:%.*]], <i32 poison, i32 poison, i32 -8, i32 poison, i32 poison, i32 poison, i32 poison, i32 42>
; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> poison, <8 x i32> <i32 poison, i32 poison, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX-NEXT: [[TMP2:%.*]] = add <8 x i1> [[TMP1]], [[SHIFT]]
+; AVX-NEXT: [[TMP2:%.*]] = add <8 x i1> [[SHIFT]], [[TMP1]]
; AVX-NEXT: [[R:%.*]] = extractelement <8 x i1> [[TMP2]], i64 2
; AVX-NEXT: ret i1 [[R]]
;
@@ -131,7 +131,7 @@ define i1 @icmp_xor_v4i32_multiuse(<4 x i32> %a) {
; CHECK-NEXT: call void @use(i32 [[E2]])
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[A]], <i32 poison, i32 -8, i32 poison, i32 42>
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> <i32 poison, i32 3, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], [[SHIFT]]
+; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[SHIFT]], [[TMP1]]
; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1
; CHECK-NEXT: call void @use(i1 [[R]])
; CHECK-NEXT: ret i1 [[R]]
diff --git a/llvm/test/Transforms/VectorCombine/X86/pr114901.ll b/llvm/test/Transforms/VectorCombine/X86/pr114901.ll
index 9c91c3c75bfcf8..d6917e1007cf4c 100644
--- a/llvm/test/Transforms/VectorCombine/X86/pr114901.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/pr114901.ll
@@ -15,11 +15,10 @@ define i1 @PR114901(<4 x i32> %a) {
;
; AVX-LABEL: define i1 @PR114901(
; AVX-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0:[0-9]+]] {
-; AVX-NEXT: [[E1:%.*]] = extractelement <4 x i32> [[A]], i32 1
-; AVX-NEXT: [[E3:%.*]] = extractelement <4 x i32> [[A]], i32 3
-; AVX-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[E1]], -8
-; AVX-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[E3]], 42
-; AVX-NEXT: [[R:%.*]] = ashr i1 [[CMP3]], [[CMP1]]
+; AVX-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[A]], <i32 poison, i32 -8, i32 poison, i32 42>
+; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> <i32 poison, i32 3, i32 poison, i32 poison>
+; AVX-NEXT: [[TMP2:%.*]] = ashr <4 x i1> [[SHIFT]], [[TMP1]]
+; AVX-NEXT: [[R:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1
; AVX-NEXT: ret i1 [[R]]
;
%e1 = extractelement <4 x i32> %a, i32 1
@@ -42,11 +41,10 @@ define i1 @PR114901_flip(<4 x i32> %a) {
;
; AVX-LABEL: define i1 @PR114901_flip(
; AVX-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0]] {
-; AVX-NEXT: [[E1:%.*]] = extractelement <4 x i32> [[A]], i32 1
-; AVX-NEXT: [[E3:%.*]] = extractelement <4 x i32> [[A]], i32 3
-; AVX-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[E1]], -8
-; AVX-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[E3]], 42
-; AVX-NEXT: [[R:%.*]] = ashr i1 [[CMP1]], [[CMP3]]
+; AVX-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[A]], <i32 poison, i32 -8, i32 poison, i32 42>
+; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> <i32 poison, i32 3, i32 poison, i32 poison>
+; AVX-NEXT: [[TMP2:%.*]] = ashr <4 x i1> [[TMP1]], [[SHIFT]]
+; AVX-NEXT: [[R:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1
; AVX-NEXT: ret i1 [[R]]
;
%e1 = extractelement <4 x i32> %a, i32 1
More information about the llvm-commits
mailing list