[PATCH] D115392: [SLP] Don't vectorize div/rem with undef denominators

Simon Pilgrim via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 8 13:26:25 PST 2021


RKSimon created this revision.
RKSimon added reviewers: ABataev, spatel.
Herald added a subscriber: hiraditya.
RKSimon requested review of this revision.
Herald added a project: LLVM.

If any of the elements contains a DIV/REM node that is likely to fold to undef, then don't vectorize with it as it'll poison the entire vector.

I'm not certain if this IR can actually occur going into SLP or not, but we have tests for it, and I encountered further cases while altering the costs to try and fix the regressions in D111968 <https://reviews.llvm.org/D111968>.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D115392

Files:
  llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
  llvm/test/Transforms/SLPVectorizer/X86/arith-div-undef.ll


Index: llvm/test/Transforms/SLPVectorizer/X86/arith-div-undef.ll
===================================================================
--- llvm/test/Transforms/SLPVectorizer/X86/arith-div-undef.ll
+++ llvm/test/Transforms/SLPVectorizer/X86/arith-div-undef.ll
@@ -3,7 +3,21 @@
 
 define <8 x i32> @sdiv_v8i32_undefs(<8 x i32> %a) {
 ; CHECK-LABEL: @sdiv_v8i32_undefs(
-; CHECK-NEXT:    ret <8 x i32> poison
+; CHECK-NEXT:    [[A1:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 1
+; CHECK-NEXT:    [[A5:%.*]] = extractelement <8 x i32> [[A]], i32 5
+; CHECK-NEXT:    [[AB1:%.*]] = sdiv i32 [[A1]], 4
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[TMP2:%.*]] = sdiv <2 x i32> [[TMP1]], <i32 8, i32 16>
+; CHECK-NEXT:    [[AB5:%.*]] = sdiv i32 [[A5]], 4
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <2 x i32> <i32 6, i32 7>
+; CHECK-NEXT:    [[TMP4:%.*]] = sdiv <2 x i32> [[TMP3]], <i32 8, i32 16>
+; CHECK-NEXT:    [[R1:%.*]] = insertelement <8 x i32> poison, i32 [[AB1]], i32 1
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[R32:%.*]] = shufflevector <8 x i32> [[R1]], <8 x i32> [[TMP5]], <8 x i32> <i32 undef, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[R5:%.*]] = insertelement <8 x i32> [[R32]], i32 [[AB5]], i32 5
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[R71:%.*]] = shufflevector <8 x i32> [[R5]], <8 x i32> [[TMP6]], <8 x i32> <i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 5, i32 8, i32 9>
+; CHECK-NEXT:    ret <8 x i32> [[R71]]
 ;
   %a0 = extractelement <8 x i32> %a, i32 0
   %a1 = extractelement <8 x i32> %a, i32 1
Index: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -3848,6 +3848,21 @@
       buildTree_rec(Right, Depth + 1, {TE, 1});
       return;
     }
+    case Instruction::UDiv:
+    case Instruction::SDiv:
+    case Instruction::URem:
+    case Instruction::SRem: {
+      if (any_of(VL, [](Value *V) {
+            return isa<UndefValue>(cast<Instruction>(V)->getOperand(1));
+          })) {
+        BS.cancelScheduling(VL, VL0);
+        newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
+                     ReuseShuffleIndicies);
+        LLVM_DEBUG(dbgs() << "SLP: DIV/REM contains UNDEF denominators.\n");
+        return;
+      }
+      LLVM_FALLTHROUGH;
+    }
     case Instruction::Select:
     case Instruction::FNeg:
     case Instruction::Add:
@@ -3856,11 +3871,7 @@
     case Instruction::FSub:
     case Instruction::Mul:
     case Instruction::FMul:
-    case Instruction::UDiv:
-    case Instruction::SDiv:
     case Instruction::FDiv:
-    case Instruction::URem:
-    case Instruction::SRem:
     case Instruction::FRem:
     case Instruction::Shl:
     case Instruction::LShr:


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D115392.392914.patch
Type: text/x-patch
Size: 3268 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20211208/a1904e82/attachment.bin>


More information about the llvm-commits mailing list