[llvm] a17de6b - [X86][SSE] truncateVectorWithPACK - upper undef for 128->64 packing
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Apr 5 03:48:15 PDT 2020
Author: Simon Pilgrim
Date: 2020-04-05T11:47:36+01:00
New Revision: a17de6b91cffc6edf2126d4ac75234e06bf33925
URL: https://github.com/llvm/llvm-project/commit/a17de6b91cffc6edf2126d4ac75234e06bf33925
DIFF: https://github.com/llvm/llvm-project/commit/a17de6b91cffc6edf2126d4ac75234e06bf33925.diff
LOG: [X86][SSE] truncateVectorWithPACK - upper undef for 128->64 packing
If we're packing from 128-bits to 64-bits then we don't need the RHS argument. This helps with register allocation, especially as we avoid repeating a use of the input value.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll
llvm/test/CodeGen/X86/masked_store_trunc_usat.ll
llvm/test/CodeGen/X86/vector-trunc-packus.ll
llvm/test/CodeGen/X86/vector-trunc-ssat.ll
llvm/test/CodeGen/X86/vector-trunc-usat.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e2df09edd03f..59b5700d4804 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -20236,7 +20236,7 @@ static SDValue truncateVectorWithPACK(unsigned Opcode, EVT DstVT, SDValue In,
InVT = EVT::getVectorVT(Ctx, InVT, 128 / InVT.getSizeInBits());
OutVT = EVT::getVectorVT(Ctx, OutVT, 128 / OutVT.getSizeInBits());
In = DAG.getBitcast(InVT, In);
- SDValue Res = DAG.getNode(Opcode, DL, OutVT, In, In);
+ SDValue Res = DAG.getNode(Opcode, DL, OutVT, In, DAG.getUNDEF(InVT));
Res = extractSubVector(Res, 0, DAG, DL, 64);
return DAG.getBitcast(DstVT, Res);
}
diff --git a/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll b/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll
index f89bb072248b..1c1fddd361b0 100644
--- a/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll
+++ b/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll
@@ -1171,7 +1171,7 @@ define void @truncstore_v8i64_v8i8(<8 x i64> %x, <8 x i8>* %p, <8 x i32> %mask)
; SSE4-NEXT: andpd %xmm0, %xmm6
; SSE4-NEXT: packusdw %xmm7, %xmm6
; SSE4-NEXT: packusdw %xmm2, %xmm6
-; SSE4-NEXT: packuswb %xmm6, %xmm6
+; SSE4-NEXT: packuswb %xmm0, %xmm6
; SSE4-NEXT: pcmpeqd %xmm8, %xmm5
; SSE4-NEXT: pcmpeqd %xmm0, %xmm0
; SSE4-NEXT: pxor %xmm0, %xmm5
diff --git a/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll b/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll
index d231ae7d5ab4..993d96da5edc 100644
--- a/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll
+++ b/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll
@@ -870,7 +870,7 @@ define void @truncstore_v8i64_v8i8(<8 x i64> %x, <8 x i8>* %p, <8 x i32> %mask)
; SSE2-NEXT: por %xmm2, %xmm0
; SSE2-NEXT: packuswb %xmm1, %xmm0
; SSE2-NEXT: packuswb %xmm0, %xmm7
-; SSE2-NEXT: packuswb %xmm7, %xmm7
+; SSE2-NEXT: packuswb %xmm0, %xmm7
; SSE2-NEXT: pcmpeqd %xmm8, %xmm5
; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
; SSE2-NEXT: pxor %xmm0, %xmm5
@@ -969,7 +969,7 @@ define void @truncstore_v8i64_v8i8(<8 x i64> %x, <8 x i8>* %p, <8 x i32> %mask)
; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6
; SSE4-NEXT: packusdw %xmm7, %xmm6
; SSE4-NEXT: packusdw %xmm6, %xmm1
-; SSE4-NEXT: packuswb %xmm1, %xmm1
+; SSE4-NEXT: packuswb %xmm0, %xmm1
; SSE4-NEXT: pcmpeqd %xmm8, %xmm5
; SSE4-NEXT: pcmpeqd %xmm0, %xmm0
; SSE4-NEXT: pxor %xmm0, %xmm5
@@ -4364,7 +4364,7 @@ define void @truncstore_v8i32_v8i8(<8 x i32> %x, <8 x i8>* %p, <8 x i32> %mask)
; SSE2-NEXT: pandn %xmm9, %xmm6
; SSE2-NEXT: por %xmm0, %xmm6
; SSE2-NEXT: packuswb %xmm4, %xmm6
-; SSE2-NEXT: packuswb %xmm6, %xmm6
+; SSE2-NEXT: packuswb %xmm0, %xmm6
; SSE2-NEXT: pcmpeqd %xmm8, %xmm3
; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
; SSE2-NEXT: pxor %xmm0, %xmm3
diff --git a/llvm/test/CodeGen/X86/vector-trunc-packus.ll b/llvm/test/CodeGen/X86/vector-trunc-packus.ll
index fa9e394fcd47..0192038ed93b 100644
--- a/llvm/test/CodeGen/X86/vector-trunc-packus.ll
+++ b/llvm/test/CodeGen/X86/vector-trunc-packus.ll
@@ -3854,7 +3854,7 @@ define <8 x i8> @trunc_packus_v8i64_v8i8(<8 x i64>* %p0) "min-legal-vector-width
; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm5
; SSE41-NEXT: packusdw %xmm3, %xmm5
; SSE41-NEXT: packusdw %xmm5, %xmm1
-; SSE41-NEXT: packuswb %xmm1, %xmm1
+; SSE41-NEXT: packuswb %xmm0, %xmm1
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-trunc-ssat.ll b/llvm/test/CodeGen/X86/vector-trunc-ssat.ll
index 856107ac6f77..2f712b47131e 100644
--- a/llvm/test/CodeGen/X86/vector-trunc-ssat.ll
+++ b/llvm/test/CodeGen/X86/vector-trunc-ssat.ll
@@ -3637,7 +3637,7 @@ define <8 x i8> @trunc_ssat_v8i64_v8i8(<8 x i64>* %p0) "min-legal-vector-width"=
; SSE41-NEXT: andpd %xmm0, %xmm1
; SSE41-NEXT: packusdw %xmm4, %xmm1
; SSE41-NEXT: packusdw %xmm3, %xmm1
-; SSE41-NEXT: packuswb %xmm1, %xmm1
+; SSE41-NEXT: packuswb %xmm0, %xmm1
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-trunc-usat.ll b/llvm/test/CodeGen/X86/vector-trunc-usat.ll
index e7bec7629a14..5e0d255b3ff0 100644
--- a/llvm/test/CodeGen/X86/vector-trunc-usat.ll
+++ b/llvm/test/CodeGen/X86/vector-trunc-usat.ll
@@ -2781,7 +2781,7 @@ define <8 x i8> @trunc_usat_v8i64_v8i8(<8 x i64>* %p0) {
; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm2
; SSE41-NEXT: packusdw %xmm4, %xmm2
; SSE41-NEXT: packusdw %xmm2, %xmm1
-; SSE41-NEXT: packuswb %xmm1, %xmm1
+; SSE41-NEXT: packuswb %xmm0, %xmm1
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: retq
;
@@ -3807,24 +3807,23 @@ define void @trunc_usat_v4i32_v4i8_store(<4 x i32> %a0, <4 x i8> *%p1) {
define <8 x i8> @trunc_usat_v8i32_v8i8(<8 x i32> %a0) {
; SSE2-LABEL: trunc_usat_v8i32_v8i8:
; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [255,255,255,255]
-; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
-; SSE2-NEXT: movdqa %xmm1, %xmm5
-; SSE2-NEXT: pxor %xmm4, %xmm5
-; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483903,2147483903,2147483903,2147483903]
-; SSE2-NEXT: movdqa %xmm2, %xmm6
-; SSE2-NEXT: pcmpgtd %xmm5, %xmm6
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255]
+; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm1, %xmm4
+; SSE2-NEXT: pxor %xmm3, %xmm4
+; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147483903,2147483903,2147483903,2147483903]
+; SSE2-NEXT: movdqa %xmm5, %xmm6
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm6
; SSE2-NEXT: pand %xmm6, %xmm1
-; SSE2-NEXT: pandn %xmm3, %xmm6
+; SSE2-NEXT: pandn %xmm2, %xmm6
; SSE2-NEXT: por %xmm1, %xmm6
-; SSE2-NEXT: pxor %xmm0, %xmm4
-; SSE2-NEXT: pcmpgtd %xmm4, %xmm2
-; SSE2-NEXT: pand %xmm2, %xmm0
-; SSE2-NEXT: pandn %xmm3, %xmm2
-; SSE2-NEXT: por %xmm0, %xmm2
-; SSE2-NEXT: packuswb %xmm6, %xmm2
-; SSE2-NEXT: packuswb %xmm2, %xmm2
-; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: pxor %xmm0, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm5
+; SSE2-NEXT: pand %xmm5, %xmm0
+; SSE2-NEXT: pandn %xmm2, %xmm5
+; SSE2-NEXT: por %xmm5, %xmm0
+; SSE2-NEXT: packuswb %xmm6, %xmm0
+; SSE2-NEXT: packuswb %xmm0, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: trunc_usat_v8i32_v8i8:
More information about the llvm-commits
mailing list