[llvm] r333782 - [InstCombine] fix vector shuffle transform to replace undef elements (PR37648)
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 1 12:23:19 PDT 2018
Author: spatel
Date: Fri Jun 1 12:23:18 2018
New Revision: 333782
URL: http://llvm.org/viewvc/llvm-project?rev=333782&view=rev
Log:
[InstCombine] fix vector shuffle transform to replace undef elements (PR37648)
This bug:
https://bugs.llvm.org/show_bug.cgi?id=37648
...was created with the enhancement to this transform with rL332479.
The urem test shows the disaster potential: any undef divisor lane makes
the whole op undef.
The test diffs show that vector demanded elements turns some of the potential,
but not all, unused binop operands back into undef already.
Modified:
llvm/trunk/lib/Transforms/InstCombine/InstructionCombining.cpp
llvm/trunk/test/Transforms/InstCombine/vec_shuffle.ll
Modified: llvm/trunk/lib/Transforms/InstCombine/InstructionCombining.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstructionCombining.cpp?rev=333782&r1=333781&r2=333782&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstructionCombining.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstructionCombining.cpp Fri Jun 1 12:23:18 2018
@@ -1421,6 +1421,22 @@ Value *InstCombiner::SimplifyVectorOp(Bi
}
}
if (MayChange) {
+ // It's not safe to use a vector with undef elements because the entire
+ // instruction can be folded to undef (for example, div/rem divisors).
+ // Replace undef lanes with the first non-undef element. Vector demanded
+ // elements can change those back to undef values if that is safe.
+ Constant *SafeDummyConstant = nullptr;
+ for (unsigned i = 0; i < VWidth; ++i) {
+ if (!isa<UndefValue>(NewVecC[i])) {
+ SafeDummyConstant = NewVecC[i];
+ break;
+ }
+ }
+ assert(SafeDummyConstant && "Undef constant vector was not simplified?");
+ for (unsigned i = 0; i < VWidth; ++i)
+ if (isa<UndefValue>(NewVecC[i]))
+ NewVecC[i] = SafeDummyConstant;
+
// Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask)
// Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask)
Constant *NewC = ConstantVector::get(NewVecC);
Modified: llvm/trunk/test/Transforms/InstCombine/vec_shuffle.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/vec_shuffle.ll?rev=333782&r1=333781&r2=333782&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/vec_shuffle.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/vec_shuffle.ll Fri Jun 1 12:23:18 2018
@@ -452,7 +452,7 @@ define <4 x i32> @mul_const_splat(<4 x i
define <4 x i32> @lshr_const_half_splat(<4 x i32> %v) {
; CHECK-LABEL: @lshr_const_half_splat(
-; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> <i32 undef, i32 8, i32 9, i32 undef>, [[V:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> <i32 8, i32 8, i32 9, i32 8>, [[V:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
; CHECK-NEXT: ret <4 x i32> [[TMP2]]
;
@@ -583,11 +583,11 @@ define <2 x i32*> @pr23113(<4 x i32*> %A
ret <2 x i32*> %1
}
-; FIXME: Unused lanes in the new binop should not kill the entire op.
+; Unused lanes in the new binop should not kill the entire op (although it may simplify anyway as shown here).
define <2 x i32> @PR37648(<2 x i32> %x) {
; CHECK-LABEL: @PR37648(
-; CHECK-NEXT: ret <2 x i32> undef
+; CHECK-NEXT: ret <2 x i32> zeroinitializer
;
%splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
%r = urem <2 x i32> %splat, <i32 1, i32 1>
@@ -596,7 +596,7 @@ define <2 x i32> @PR37648(<2 x i32> %x)
define <2 x float> @splat_first_fp(<2 x float> %x) {
; CHECK-LABEL: @splat_first_fp(
-; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x float> [[X:%.*]], <float 1.000000e+00, float undef>
+; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x float> [[X:%.*]], <float 1.000000e+00, float 1.000000e+00>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x float> [[TMP2]]
;
More information about the llvm-commits
mailing list