[llvm] 9d3b19e - [X86] ReplaceNodeResults - relax the value type constraints for TRUNCATE widening
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 7 01:41:45 PDT 2023
Author: Simon Pilgrim
Date: 2023-08-07T09:41:38+01:00
New Revision: 9d3b19e8e9271c5c9e19549fe5e7223339e1fca4
URL: https://github.com/llvm/llvm-project/commit/9d3b19e8e9271c5c9e19549fe5e7223339e1fca4
DIFF: https://github.com/llvm/llvm-project/commit/9d3b19e8e9271c5c9e19549fe5e7223339e1fca4.diff
LOG: [X86] ReplaceNodeResults - relax the value type constraints for TRUNCATE widening
With SSSE3, widen the truncation for anything other than vXi64 -> vXi8 smaller than v8i64 (where PSHUFB would be better).
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/masked_store_trunc.ll
llvm/test/CodeGen/X86/vector-trunc.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index ab36260ee658d6..9ca383e4742299 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -31983,6 +31983,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
EVT InVT = In.getValueType();
EVT InEltVT = InVT.getVectorElementType();
EVT EltVT = VT.getVectorElementType();
+ unsigned MinElts = VT.getVectorNumElements();
unsigned WidenNumElts = WidenVT.getVectorNumElements();
unsigned InBits = InVT.getSizeInBits();
@@ -32029,7 +32030,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT));
// Use the original element count so we don't do more scalar opts than
// necessary.
- unsigned MinElts = VT.getVectorNumElements();
for (unsigned i=0; i < MinElts; ++i) {
SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, In,
DAG.getIntPtrConstant(i, dl));
@@ -32077,8 +32077,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
// this via type legalization.
if ((InEltVT == MVT::i16 || InEltVT == MVT::i32 || InEltVT == MVT::i64) &&
(EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32) &&
- (!Subtarget.hasSSSE3() || (InVT == MVT::v8i64 && VT == MVT::v8i8) ||
- (InVT == MVT::v4i64 && VT == MVT::v4i16 && !Subtarget.hasAVX()))) {
+ (!Subtarget.hasSSSE3() ||
+ (!isTypeLegal(InVT) &&
+ !(MinElts <= 4 && InEltVT == MVT::i64 && EltVT == MVT::i8)))) {
SDValue WidenIn = widenSubVector(In, false, Subtarget, DAG, dl,
InEltVT.getSizeInBits() * WidenNumElts);
Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, WidenVT, WidenIn));
diff --git a/llvm/test/CodeGen/X86/masked_store_trunc.ll b/llvm/test/CodeGen/X86/masked_store_trunc.ll
index 06594d88f80d11..11803e32ad437e 100644
--- a/llvm/test/CodeGen/X86/masked_store_trunc.ll
+++ b/llvm/test/CodeGen/X86/masked_store_trunc.ll
@@ -3720,10 +3720,11 @@ define void @truncstore_v8i32_v8i8(<8 x i32> %x, ptr %p, <8 x i32> %mask) {
; SSE4-LABEL: truncstore_v8i32_v8i8:
; SSE4: # %bb.0:
; SSE4-NEXT: pxor %xmm4, %xmm4
-; SSE4-NEXT: movdqa {{.*#+}} xmm5 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
-; SSE4-NEXT: pshufb %xmm5, %xmm1
-; SSE4-NEXT: pshufb %xmm5, %xmm0
-; SSE4-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE4-NEXT: movdqa {{.*#+}} xmm5 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; SSE4-NEXT: pand %xmm5, %xmm1
+; SSE4-NEXT: pand %xmm5, %xmm0
+; SSE4-NEXT: packusdw %xmm1, %xmm0
+; SSE4-NEXT: packuswb %xmm4, %xmm0
; SSE4-NEXT: pcmpeqd %xmm4, %xmm3
; SSE4-NEXT: pcmpeqd %xmm4, %xmm2
; SSE4-NEXT: packssdw %xmm3, %xmm2
diff --git a/llvm/test/CodeGen/X86/vector-trunc.ll b/llvm/test/CodeGen/X86/vector-trunc.ll
index 63f3d571edd4c7..91fe39722170d8 100644
--- a/llvm/test/CodeGen/X86/vector-trunc.ll
+++ b/llvm/test/CodeGen/X86/vector-trunc.ll
@@ -493,31 +493,23 @@ entry:
}
define void @trunc8i32_8i8(<8 x i32> %a) {
-; SSE2-LABEL: trunc8i32_8i8:
-; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
-; SSE2-NEXT: pand %xmm2, %xmm1
-; SSE2-NEXT: pand %xmm2, %xmm0
-; SSE2-NEXT: packuswb %xmm1, %xmm0
-; SSE2-NEXT: packuswb %xmm0, %xmm0
-; SSE2-NEXT: movq %xmm0, (%rax)
-; SSE2-NEXT: retq
-;
-; SSSE3-LABEL: trunc8i32_8i8:
-; SSSE3: # %bb.0: # %entry
-; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
-; SSSE3-NEXT: pshufb %xmm2, %xmm1
-; SSSE3-NEXT: pshufb %xmm2, %xmm0
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSSE3-NEXT: movq %xmm0, (%rax)
-; SSSE3-NEXT: retq
+; SSE2-SSSE3-LABEL: trunc8i32_8i8:
+; SSE2-SSSE3: # %bb.0: # %entry
+; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
+; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
+; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0
+; SSE2-SSSE3-NEXT: packuswb %xmm0, %xmm0
+; SSE2-SSSE3-NEXT: movq %xmm0, (%rax)
+; SSE2-SSSE3-NEXT: retq
;
; SSE41-LABEL: trunc8i32_8i8:
; SSE41: # %bb.0: # %entry
-; SSE41-NEXT: movdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
-; SSE41-NEXT: pshufb %xmm2, %xmm1
-; SSE41-NEXT: pshufb %xmm2, %xmm0
-; SSE41-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; SSE41-NEXT: pand %xmm2, %xmm1
+; SSE41-NEXT: pand %xmm2, %xmm0
+; SSE41-NEXT: packusdw %xmm1, %xmm0
+; SSE41-NEXT: packuswb %xmm0, %xmm0
; SSE41-NEXT: movq %xmm0, (%rax)
; SSE41-NEXT: retq
;
More information about the llvm-commits
mailing list