[llvm] r233941 - [AVX] Improve insertion of i8 or i16 into low element of 256-bit zero vector
Sanjay Patel
spatel at rotateright.com
Thu Apr 2 13:21:52 PDT 2015
Author: spatel
Date: Thu Apr 2 15:21:52 2015
New Revision: 233941
URL: http://llvm.org/viewvc/llvm-project?rev=233941&view=rev
Log:
[AVX] Improve insertion of i8 or i16 into low element of 256-bit zero vector
Without this patch, we split the 256-bit vector into halves and produced something like:
movzwl (%rdi), %eax
vmovd %eax, %xmm0
vxorps %xmm1, %xmm1, %xmm1
vblendps $15, %ymm0, %ymm1, %ymm0 ## ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
Now, we eliminate the xor and blend because those zeros are free with the vmovd:
movzwl (%rdi), %eax
vmovd %eax, %xmm0
This should be the final fix needed to resolve PR22685:
https://llvm.org/bugs/show_bug.cgi?id=22685
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=233941&r1=233940&r2=233941&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Apr 2 15:21:52 2015
@@ -5679,14 +5679,24 @@ X86TargetLowering::LowerBUILD_VECTOR(SDV
return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
}
+ // We can't directly insert an i8 or i16 into a vector, so zero extend
+ // it to i32 first.
if (ExtVT == MVT::i16 || ExtVT == MVT::i8) {
Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
- Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
if (VT.is256BitVector()) {
- SDValue ZeroVec = getZeroVector(MVT::v8i32, Subtarget, DAG, dl);
- Item = Insert128BitVector(ZeroVec, Item, 0, DAG, dl);
+ if (Subtarget->hasAVX()) {
+ Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v8i32, Item);
+ Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
+ } else {
+ // Without AVX, we need to extend to a 128-bit vector and then
+ // insert into the 256-bit vector.
+ Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
+ SDValue ZeroVec = getZeroVector(MVT::v8i32, Subtarget, DAG, dl);
+ Item = Insert128BitVector(ZeroVec, Item, 0, DAG, dl);
+ }
} else {
assert(VT.is128BitVector() && "Expected an SSE value type!");
+ Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
}
return DAG.getNode(ISD::BITCAST, dl, VT, Item);
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll?rev=233941&r1=233940&r2=233941&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll Thu Apr 2 15:21:52 2015
@@ -3249,3 +3249,15 @@ define <16 x i16> @shuffle_v16i16_23_uu_
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 undef, i32 3, i32 undef, i32 20, i32 20, i32 5, i32 undef, i32 31, i32 undef, i32 11, i32 undef, i32 28, i32 28, i32 13, i32 undef>
ret <16 x i16> %shuffle
}
+
+define <16 x i16> @insert_v16i16_0elt_into_zero_vector(i16* %ptr) {
+; ALL-LABEL: insert_v16i16_0elt_into_zero_vector:
+; ALL: # BB#0:
+; ALL-NEXT: movzwl (%rdi), %eax
+; ALL-NEXT: vmovd %eax, %xmm0
+; ALL-NEXT: retq
+ %val = load i16, i16* %ptr
+ %i0 = insertelement <16 x i16> zeroinitializer, i16 %val, i32 0
+ ret <16 x i16> %i0
+}
+
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll?rev=233941&r1=233940&r2=233941&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll Thu Apr 2 15:21:52 2015
@@ -656,8 +656,6 @@ define <32 x i8> @shuffle_v32i8_31_00_00
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
; AVX2-NEXT: movl $15, %eax
; AVX2-NEXT: vmovd %eax, %xmm1
-; AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2
-; AVX2-NEXT: vpblendd $15, %ymm1, %ymm2, %ymm1
; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 31, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
More information about the llvm-commits
mailing list