[PATCH] D38316: [InstCombine] replace bitcast to scalar + insertelement with widening shuffle + vector bitcast
Sanjay Patel via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 27 07:04:27 PDT 2017
spatel created this revision.
Herald added a subscriber: mcrosier.
insert undef, (bitcast vType X to scalar), C --> bitcast (shuffle X, undef, Mask)
I think this is a universal improvement for vector IR code because it removes a vector-to-scalar-to-vector transition, but I'm not sure if the pattern is relevant to anything besides x86 AVX. In the motivating example from PR34716 ( https://bugs.llvm.org/show_bug.cgi?id=34716 ), we have:
define <8 x i64> @test(i32 %x0, i32 %x1) {
%1 = insertelement <2 x i32> undef, i32 %x0, i32 0
%2 = insertelement <2 x i32> %1, i32 %x1, i32 1
%3 = bitcast <2 x i32> %2 to i64
%4 = insertelement <8 x i64> undef, i64 %3, i32 0
%5 = shufflevector <8 x i64> %4, <8 x i64> undef, <8 x i32> zeroinitializer
ret <8 x i64> %5
}
This leads to inefficient movement between scalar GPRs and vector registers. With this patch, other vector instcombines will fire reducing the IR to:
define <8 x i64> @test(i32 %x0, i32 %x1) {
%1 = insertelement <16 x i32> undef, i32 %x0, i32 0 ; wide vec insert
%2 = insertelement <16 x i32> %1, i32 %x1, i32 1 ; wide vec insert
%3 = bitcast <16 x i32> %2 to <8 x i64> ; free bitcast
%4 = shufflevector <8 x i64> %3, <8 x i64> undef, <8 x i32> zeroinitializer ; splat
ret <8 x i64> %4
}
And through backend folds, a 32-bit AVX512 target could manage to load the two 32-bit scalars and splat in one instruction (although this doesn't quite happen yet):
vmovsd 4(%esp), %xmm0 # xmm0 = mem[0],zero
vbroadcastsd %xmm0, %zmm0
https://reviews.llvm.org/D38316
Files:
lib/Transforms/InstCombine/InstCombineVectorOps.cpp
test/Transforms/InstCombine/insert-extract-shuffle.ll
Index: test/Transforms/InstCombine/insert-extract-shuffle.ll
===================================================================
--- test/Transforms/InstCombine/insert-extract-shuffle.ll
+++ test/Transforms/InstCombine/insert-extract-shuffle.ll
@@ -283,3 +283,31 @@
%ret = select i1 %e, <4 x i32> %b, <4 x i32> zeroinitializer
ret <4 x i32> %ret
}
+
+; insert undef, (bitcast vType X to scalar), C --> bitcast (shuffle X, undef, Mask)
+; PR34716 - https://bugs.llvm.org/show_bug.cgi?id=34716
+
+define <2 x i64> @bitcast_vector_and_insert(<2 x float> %x){
+; CHECK-LABEL: @bitcast_vector_and_insert(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> %x, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT: [[VEC:%.*]] = bitcast <4 x float> [[TMP1]] to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> [[VEC]]
+;
+ %bc = bitcast <2 x float> %x to i64
+ %vec = insertelement <2 x i64> undef, i64 %bc, i32 0
+ ret <2 x i64> %vec
+}
+
+; Use weird types to show the shuffle mask is still correct for unusual cases.
+
+define <4 x i72> @bitcast_vector_and_insert_odd(<3 x i24> %x){
+; CHECK-LABEL: @bitcast_vector_and_insert_odd(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i24> %x, <3 x i24> undef, <12 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: [[VEC:%.*]] = bitcast <12 x i24> [[TMP1]] to <4 x i72>
+; CHECK-NEXT: ret <4 x i72> [[VEC]]
+;
+ %bc = bitcast <3 x i24> %x to i72
+ %vec = insertelement <4 x i72> undef, i72 %bc, i32 2
+ ret <4 x i72> %vec
+}
+
Index: lib/Transforms/InstCombine/InstCombineVectorOps.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -824,6 +824,38 @@
if (Instruction *Broadcast = foldInsSequenceIntoBroadcast(IE))
return Broadcast;
+ Value *X;
+ if (match(ScalarOp, m_OneUse(m_BitCast(m_Value(X)))) &&
+ isa<UndefValue>(VecOp)) {
+ auto *InsertIndex = dyn_cast<ConstantInt>(IdxOp);
+ auto *SrcVecTy = dyn_cast<VectorType>(X->getType());
+ if (InsertIndex && SrcVecTy) {
+ // We're bitcasting from vector to scalar and then inserting into a larger
+ // undef vector. This is really a subvector extension with undef elements,
+ // so use a size-extending shuffle to avoid the scalar conversion:
+ // insert undef, (bitcast vType X to scalar), C -->
+ // bitcast (shuffle X, undef, Mask)
+ unsigned NumSrcElts = SrcVecTy->getNumElements();
+ unsigned ExtRatio = IE.getType()->getBitWidth() / SrcVecTy->getBitWidth();
+ unsigned NumMaskVals = NumSrcElts * ExtRatio;
+ SmallVector<Constant *, 16> MaskValues(NumMaskVals);
+ for (unsigned i = 0; i != NumMaskVals; ++i) {
+ // The bitcast maps all of the elements of the source vector into one
+ // larger element of the resulting vector. All other elements are
+ // undefined. Example:
+ // insert <2 x i16> undef, (bitcast <2 x i8> X to i16), i32 0 -->
+ // bitcast (shuffle <2 x i8> X, undef, <0,1,-1,-1>) to <2 x i16>
+ if (i / NumSrcElts == InsertIndex->getZExtValue())
+ MaskValues[i] = Builder.getInt32(i % NumSrcElts);
+ else
+ MaskValues[i] = UndefValue::get(Builder.getInt32Ty());
+ }
+ Value *SV = Builder.CreateShuffleVector(X, UndefValue::get(SrcVecTy),
+ ConstantVector::get(MaskValues));
+ return new BitCastInst(SV, IE.getType());
+ }
+ }
+
return nullptr;
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D38316.116804.patch
Type: text/x-patch
Size: 3660 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170927/10f0fc32/attachment.bin>
More information about the llvm-commits
mailing list