[PATCH] D82049: Fix crash in VectorCombine when attempting to peephole ConstantVector sequences

Thu Jun 18 00:30:48 PDT 2020

clin1 updated this revision to Diff 271590.
clin1 edited the summary of this revision.
clin1 added a comment.

Updated to run InstCombine before the vector transformations. The insert-binop-with-constant test needed a few changes. InstructionSimplify checks for constant divisors with 0/undef elements and undef's the entire result.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82049/new/

https://reviews.llvm.org/D82049

Files:
  llvm/lib/Transforms/Vectorize/VectorCombine.cpp
  llvm/test/Transforms/VectorCombine/X86/fold-extract.ll
  llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll


Index: llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll
===================================================================

--- llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll
+++ llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll
@@ -333,9 +333,7 @@
 
 define <2 x i64> @urem_constant_op1(i64 %x) {
 ; CHECK-LABEL: @urem_constant_op1(
-; CHECK-NEXT:    [[BO_SCALAR:%.*]] = urem i64 [[X:%.*]], 2
-; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> <i64 undef, i64 0>, i64 [[BO_SCALAR]], i64 1
-; CHECK-NEXT:    ret <2 x i64> [[BO]]
+; CHECK-NEXT:    ret <2 x i64> undef
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 1
   %bo = urem <2 x i64> %ins, <i64 undef, i64 2>
@@ -377,9 +375,7 @@
 
 define <2 x i64> @srem_constant_op1(i64 %x) {
 ; CHECK-LABEL: @srem_constant_op1(
-; CHECK-NEXT:    [[BO_SCALAR:%.*]] = srem i64 [[X:%.*]], 2
-; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> <i64 undef, i64 0>, i64 [[BO_SCALAR]], i64 1
-; CHECK-NEXT:    ret <2 x i64> [[BO]]
+; CHECK-NEXT:    ret <2 x i64> undef
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 1
   %bo = srem <2 x i64> %ins, <i64 undef, i64 2>
@@ -421,9 +417,7 @@
 
 define <2 x i64> @udiv_constant_op1(i64 %x) {
 ; CHECK-LABEL: @udiv_constant_op1(
-; CHECK-NEXT:    [[BO_SCALAR:%.*]] = udiv i64 [[X:%.*]], 2
-; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> <i64 undef, i64 0>, i64 [[BO_SCALAR]], i64 1
-; CHECK-NEXT:    ret <2 x i64> [[BO]]
+; CHECK-NEXT:    ret <2 x i64> undef
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 1
   %bo = udiv <2 x i64> %ins, <i64 undef, i64 2>
@@ -465,9 +459,7 @@
 
 define <2 x i64> @sdiv_constant_op1(i64 %x) {
 ; CHECK-LABEL: @sdiv_constant_op1(
-; CHECK-NEXT:    [[BO_SCALAR:%.*]] = sdiv exact i64 [[X:%.*]], 2
-; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> <i64 undef, i64 0>, i64 [[BO_SCALAR]], i64 1
-; CHECK-NEXT:    ret <2 x i64> [[BO]]
+; CHECK-NEXT:    ret <2 x i64> undef
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 1
   %bo = sdiv exact <2 x i64> %ins, <i64 undef, i64 2>
Index: llvm/test/Transforms/VectorCombine/X86/fold-extract.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/VectorCombine/X86/fold-extract.ll
@@ -0,0 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -S -vector-combine | FileCheck %s
+;
+; foldExtractExtract was crashing with ConstantVector operands.
+; Fold all extracts before applying VectorCombine patterns.
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: norecurse nounwind uwtable
+define dso_local i32 @constant_fold_crash(<4 x i32> %x) local_unnamed_addr #0 {
+; CHECK-LABEL: @constant_fold_crash(
+; CHECK-NEXT:    [[B:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0
+; CHECK-NEXT:    [[C:%.*]] = add i32 17, [[B]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+
+  %a = extractelement <4 x i32> <i32 16, i32 17, i32 18, i32 19>, i32 1
+  %b = extractelement <4 x i32> %x, i32 0
+  %c = add i32 %a, %b
+  ret i32 %c
+}
+attributes #0 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core-avx2" "target-features"="+avx,+avx2,+bmi,+bmi2,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" }
Index: llvm/lib/Transforms/Vectorize/VectorCombine.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -416,6 +416,11 @@
     // Ignore unreachable basic blocks.
     if (!DT.isReachableFromEntry(&BB))
       continue;
+
+    // Fold what can be folded, to avoid Constants showing up in unexpected
+    // places.
+    MadeChange |= SimplifyInstructionsInBlock(&BB);
+
     // Do not delete instructions under here and invalidate the iterator.
     // Walk the block forwards to enable simple iterative chains of transforms.
     // TODO: It could be more efficient to remove dead instructions


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D82049.271590.patch
Type: text/x-patch
Size: 4566 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200618/a131fded/attachment-0001.bin>