[PATCH] D115392: [SLP] Don't vectorize div/rem with undef denominators
Simon Pilgrim via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 8 13:26:25 PST 2021
RKSimon created this revision.
RKSimon added reviewers: ABataev, spatel.
Herald added a subscriber: hiraditya.
RKSimon requested review of this revision.
Herald added a project: LLVM.
If any of the elements contains a DIV/REM node that is likely to fold to undef, then don't vectorize with it as it'll poison the entire vector.
I'm not certain if this IR can actually occur going into SLP or not, but we have tests for it, and I encountered further cases while altering the costs to try and fix the regressions in D111968 <https://reviews.llvm.org/D111968>.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D115392
Files:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/X86/arith-div-undef.ll
Index: llvm/test/Transforms/SLPVectorizer/X86/arith-div-undef.ll
===================================================================
--- llvm/test/Transforms/SLPVectorizer/X86/arith-div-undef.ll
+++ llvm/test/Transforms/SLPVectorizer/X86/arith-div-undef.ll
@@ -3,7 +3,21 @@
define <8 x i32> @sdiv_v8i32_undefs(<8 x i32> %a) {
; CHECK-LABEL: @sdiv_v8i32_undefs(
-; CHECK-NEXT: ret <8 x i32> poison
+; CHECK-NEXT: [[A1:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 1
+; CHECK-NEXT: [[A5:%.*]] = extractelement <8 x i32> [[A]], i32 5
+; CHECK-NEXT: [[AB1:%.*]] = sdiv i32 [[A1]], 4
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT: [[TMP2:%.*]] = sdiv <2 x i32> [[TMP1]], <i32 8, i32 16>
+; CHECK-NEXT: [[AB5:%.*]] = sdiv i32 [[A5]], 4
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <2 x i32> <i32 6, i32 7>
+; CHECK-NEXT: [[TMP4:%.*]] = sdiv <2 x i32> [[TMP3]], <i32 8, i32 16>
+; CHECK-NEXT: [[R1:%.*]] = insertelement <8 x i32> poison, i32 [[AB1]], i32 1
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: [[R32:%.*]] = shufflevector <8 x i32> [[R1]], <8 x i32> [[TMP5]], <8 x i32> <i32 undef, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R32]], i32 [[AB5]], i32 5
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: [[R71:%.*]] = shufflevector <8 x i32> [[R5]], <8 x i32> [[TMP6]], <8 x i32> <i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 5, i32 8, i32 9>
+; CHECK-NEXT: ret <8 x i32> [[R71]]
;
%a0 = extractelement <8 x i32> %a, i32 0
%a1 = extractelement <8 x i32> %a, i32 1
Index: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -3848,6 +3848,21 @@
buildTree_rec(Right, Depth + 1, {TE, 1});
return;
}
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::URem:
+ case Instruction::SRem: {
+ if (any_of(VL, [](Value *V) {
+ return isa<UndefValue>(cast<Instruction>(V)->getOperand(1));
+ })) {
+ BS.cancelScheduling(VL, VL0);
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
+ LLVM_DEBUG(dbgs() << "SLP: DIV/REM contains UNDEF denominators.\n");
+ return;
+ }
+ LLVM_FALLTHROUGH;
+ }
case Instruction::Select:
case Instruction::FNeg:
case Instruction::Add:
@@ -3856,11 +3871,7 @@
case Instruction::FSub:
case Instruction::Mul:
case Instruction::FMul:
- case Instruction::UDiv:
- case Instruction::SDiv:
case Instruction::FDiv:
- case Instruction::URem:
- case Instruction::SRem:
case Instruction::FRem:
case Instruction::Shl:
case Instruction::LShr:
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D115392.392914.patch
Type: text/x-patch
Size: 3268 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20211208/a1904e82/attachment.bin>
More information about the llvm-commits
mailing list