[llvm] r314792 - [X86][SSE] Add support for decoding PACKSS/PACKUS shuffles masks with UNDEF
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 3 05:41:39 PDT 2017
Author: rksimon
Date: Tue Oct 3 05:41:39 2017
New Revision: 314792
URL: http://llvm.org/viewvc/llvm-project?rev=314792&view=rev
Log:
[X86][SSE] Add support for decoding PACKSS/PACKUS shuffles masks with UNDEF
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=314792&r1=314791&r2=314792&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Oct 3 05:41:39 2017
@@ -5967,13 +5967,13 @@ static bool getFauxShuffleMask(SDValue N
// If we know input saturation won't happen we can treat this
// as a truncation shuffle.
if (Opcode == X86ISD::PACKSS) {
- if (DAG.ComputeNumSignBits(N0) <= NumBitsPerElt ||
- DAG.ComputeNumSignBits(N1) <= NumBitsPerElt)
+ if ((!N0.isUndef() && DAG.ComputeNumSignBits(N0) <= NumBitsPerElt) ||
+ (!N1.isUndef() && DAG.ComputeNumSignBits(N1) <= NumBitsPerElt))
return false;
} else {
APInt ZeroMask = APInt::getHighBitsSet(2 * NumBitsPerElt, NumBitsPerElt);
- if (!DAG.MaskedValueIsZero(N0, ZeroMask) ||
- !DAG.MaskedValueIsZero(N1, ZeroMask))
+ if ((!N0.isUndef() && !DAG.MaskedValueIsZero(N0, ZeroMask)) ||
+ (!N1.isUndef() && !DAG.MaskedValueIsZero(N1, ZeroMask)))
return false;
}
@@ -6042,6 +6042,14 @@ static void resolveTargetShuffleInputsAn
for (int i = 0, e = Inputs.size(); i < e; ++i) {
int lo = UsedInputs.size() * MaskWidth;
int hi = lo + MaskWidth;
+
+ // Strip UNDEF input usage.
+ if (Inputs[i].isUndef())
+ for (int &M : Mask)
+ if ((lo <= M) && (M < hi))
+ M = SM_SentinelUndef;
+
+ // Check for unused inputs.
if (any_of(Mask, [lo, hi](int i) { return (lo <= i) && (i < hi); })) {
UsedInputs.push_back(Inputs[i]);
continue;
Modified: llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll?rev=314792&r1=314791&r2=314792&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll Tue Oct 3 05:41:39 2017
@@ -267,16 +267,14 @@ define <16 x i16> @broadcast_mem_v4i16_v
; X32-AVX2-LABEL: broadcast_mem_v4i16_v16i16:
; X32-AVX2: ## BB#0:
; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX2-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
-; X32-AVX2-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
-; X32-AVX2-NEXT: vpbroadcastq %xmm0, %ymm0
+; X32-AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X32-AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
; X32-AVX2-NEXT: retl
;
; X64-AVX2-LABEL: broadcast_mem_v4i16_v16i16:
; X64-AVX2: ## BB#0:
-; X64-AVX2-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
-; X64-AVX2-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
-; X64-AVX2-NEXT: vpbroadcastq %xmm0, %ymm0
+; X64-AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X64-AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
; X64-AVX2-NEXT: retq
;
; X32-AVX512VL-LABEL: broadcast_mem_v4i16_v16i16:
More information about the llvm-commits
mailing list