[llvm] 4a32c48 - [X86] LowerTRUNCATE - ensure we handle cases where we truncate to a sub-128bit type (PR66194)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 13 05:18:26 PDT 2023
Author: Simon Pilgrim
Date: 2023-09-13T13:15:42+01:00
New Revision: 4a32c48280912306507a0f8466eae64dca672cfe
URL: https://github.com/llvm/llvm-project/commit/4a32c48280912306507a0f8466eae64dca672cfe
DIFF: https://github.com/llvm/llvm-project/commit/4a32c48280912306507a0f8466eae64dca672cfe.diff
LOG: [X86] LowerTRUNCATE - ensure we handle cases where we truncate to a sub-128bit type (PR66194)
Fixes #66194
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vector-trunc.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 7a9ee45c6beed40..f810d788139848f 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -20369,7 +20369,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
// If we're called by the type legalizer, handle a few cases.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (!TLI.isTypeLegal(InVT)) {
+ if (!TLI.isTypeLegal(VT) || !TLI.isTypeLegal(InVT)) {
if ((InVT == MVT::v8i64 || InVT == MVT::v16i32 || InVT == MVT::v16i64) &&
VT.is128BitVector() && Subtarget.hasAVX512()) {
assert((InVT == MVT::v16i64 || Subtarget.hasVLX()) &&
diff --git a/llvm/test/CodeGen/X86/vector-trunc.ll b/llvm/test/CodeGen/X86/vector-trunc.ll
index 91fe39722170d88..a1efa9d150346b6 100644
--- a/llvm/test/CodeGen/X86/vector-trunc.ll
+++ b/llvm/test/CodeGen/X86/vector-trunc.ll
@@ -1948,6 +1948,200 @@ define void @PR34773(ptr %a0, ptr %a1) {
ret void
}
+define i16 @PR66194(i8 %q) {
+; SSE2-LABEL: PR66194:
+; SSE2: # %bb.0: # %entry
+; SSE2-NEXT: xorl %eax, %eax
+; SSE2-NEXT: xorl %ecx, %ecx
+; SSE2-NEXT: testb %dil, %dil
+; SSE2-NEXT: setne %al
+; SSE2-NEXT: sete %cl
+; SSE2-NEXT: movl %ecx, %edx
+; SSE2-NEXT: shll $16, %edx
+; SSE2-NEXT: orl %eax, %edx
+; SSE2-NEXT: movd %edx, %xmm0
+; SSE2-NEXT: pinsrw $2, %eax, %xmm0
+; SSE2-NEXT: pinsrw $3, %eax, %xmm0
+; SSE2-NEXT: pinsrw $4, %ecx, %xmm0
+; SSE2-NEXT: pinsrw $5, %eax, %xmm0
+; SSE2-NEXT: pinsrw $6, %eax, %xmm0
+; SSE2-NEXT: pinsrw $7, %ecx, %xmm0
+; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE2-NEXT: psubw %xmm1, %xmm0
+; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: packuswb %xmm0, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: psadbw %xmm0, %xmm1
+; SSE2-NEXT: movd %xmm1, %eax
+; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: PR66194:
+; SSSE3: # %bb.0: # %entry
+; SSSE3-NEXT: xorl %eax, %eax
+; SSSE3-NEXT: xorl %ecx, %ecx
+; SSSE3-NEXT: testb %dil, %dil
+; SSSE3-NEXT: setne %al
+; SSSE3-NEXT: sete %cl
+; SSSE3-NEXT: movl %ecx, %edx
+; SSSE3-NEXT: shll $16, %edx
+; SSSE3-NEXT: orl %eax, %edx
+; SSSE3-NEXT: movd %edx, %xmm0
+; SSSE3-NEXT: pinsrw $2, %eax, %xmm0
+; SSSE3-NEXT: pinsrw $3, %eax, %xmm0
+; SSSE3-NEXT: pinsrw $4, %ecx, %xmm0
+; SSSE3-NEXT: pinsrw $5, %eax, %xmm0
+; SSSE3-NEXT: pinsrw $6, %eax, %xmm0
+; SSSE3-NEXT: pinsrw $7, %ecx, %xmm0
+; SSSE3-NEXT: pcmpeqd %xmm1, %xmm1
+; SSSE3-NEXT: psubw %xmm1, %xmm0
+; SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
+; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; SSSE3-NEXT: shll $8, %eax
+; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
+; SSSE3-NEXT: orl %eax, %ecx
+; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; SSSE3-NEXT: shll $8, %eax
+; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
+; SSSE3-NEXT: orl %eax, %edx
+; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; SSSE3-NEXT: shll $16, %eax
+; SSSE3-NEXT: orl %edx, %eax
+; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
+; SSSE3-NEXT: shll $24, %edx
+; SSSE3-NEXT: orl %eax, %edx
+; SSSE3-NEXT: movd %edx, %xmm0
+; SSSE3-NEXT: pinsrw $2, %ecx, %xmm0
+; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; SSSE3-NEXT: shll $8, %eax
+; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
+; SSSE3-NEXT: orl %eax, %ecx
+; SSSE3-NEXT: pinsrw $3, %ecx, %xmm0
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: psadbw %xmm0, %xmm1
+; SSSE3-NEXT: movd %xmm1, %eax
+; SSSE3-NEXT: # kill: def $ax killed $ax killed $eax
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: PR66194:
+; SSE41: # %bb.0: # %entry
+; SSE41-NEXT: xorl %eax, %eax
+; SSE41-NEXT: xorl %ecx, %ecx
+; SSE41-NEXT: testb %dil, %dil
+; SSE41-NEXT: setne %al
+; SSE41-NEXT: sete %cl
+; SSE41-NEXT: movd %eax, %xmm0
+; SSE41-NEXT: pinsrb $2, %ecx, %xmm0
+; SSE41-NEXT: pinsrb $4, %eax, %xmm0
+; SSE41-NEXT: pinsrb $6, %eax, %xmm0
+; SSE41-NEXT: pinsrb $8, %ecx, %xmm0
+; SSE41-NEXT: pinsrb $10, %eax, %xmm0
+; SSE41-NEXT: pinsrb $12, %eax, %xmm0
+; SSE41-NEXT: pinsrb $14, %ecx, %xmm0
+; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE41-NEXT: psubw %xmm1, %xmm0
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; SSE41-NEXT: pxor %xmm1, %xmm1
+; SSE41-NEXT: psadbw %xmm0, %xmm1
+; SSE41-NEXT: movd %xmm1, %eax
+; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: PR66194:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: xorl %eax, %eax
+; AVX1-NEXT: testb %dil, %dil
+; AVX1-NEXT: setne %al
+; AVX1-NEXT: sete %cl
+; AVX1-NEXT: vmovd %eax, %xmm0
+; AVX1-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
+; AVX1-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
+; AVX1-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
+; AVX1-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
+; AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
+; AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
+; AVX1-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
+; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
+; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
+; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vmovd %xmm0, %eax
+; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: PR66194:
+; AVX2: # %bb.0: # %entry
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: xorl %ecx, %ecx
+; AVX2-NEXT: testb %dil, %dil
+; AVX2-NEXT: setne %al
+; AVX2-NEXT: sete %cl
+; AVX2-NEXT: vmovd %eax, %xmm0
+; AVX2-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0
+; AVX2-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
+; AVX2-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0
+; AVX2-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0
+; AVX2-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
+; AVX2-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
+; AVX2-NEXT: vpinsrw $7, %ecx, %xmm0, %xmm0
+; AVX2-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
+; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
+; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
+; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vmovd %xmm0, %eax
+; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: PR66194:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: xorl %ecx, %ecx
+; AVX512-NEXT: testb %dil, %dil
+; AVX512-NEXT: setne %al
+; AVX512-NEXT: sete %cl
+; AVX512-NEXT: vmovd %eax, %xmm0
+; AVX512-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0
+; AVX512-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
+; AVX512-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0
+; AVX512-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0
+; AVX512-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
+; AVX512-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
+; AVX512-NEXT: vpinsrw $7, %ecx, %xmm0, %xmm0
+; AVX512-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
+; AVX512-NEXT: vpaddw %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
+; AVX512-NEXT: vpaddw %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
+; AVX512-NEXT: vpaddw %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX512-NEXT: retq
+entry:
+ %cmp12.i.13 = icmp ne i8 %q, 0
+ %cond.i15.13 = zext i1 %cmp12.i.13 to i16
+ %tobool.not.i.13 = icmp eq i8 %q, 0
+ %cond18.i.13 = zext i1 %tobool.not.i.13 to i16
+ %0 = insertelement <16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, i16 %cond.i15.13, i64 8
+ %1 = insertelement <16 x i16> %0, i16 %cond18.i.13, i64 9
+ %2 = insertelement <16 x i16> %1, i16 %cond.i15.13, i64 10
+ %3 = insertelement <16 x i16> %2, i16 %cond.i15.13, i64 11
+ %4 = insertelement <16 x i16> %3, i16 %cond18.i.13, i64 12
+ %5 = insertelement <16 x i16> %4, i16 %cond.i15.13, i64 13
+ %6 = insertelement <16 x i16> %5, i16 %cond.i15.13, i64 14
+ %7 = insertelement <16 x i16> %6, i16 %cond18.i.13, i64 15
+ %8 = tail call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %7)
+ ret i16 %8
+}
+declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>)
+
; Store merging must not infinitely fight store splitting.
define void @store_merge_split(<8 x i32> %w1, <8 x i32> %w2, i64 %idx, ptr %p) align 2 {
More information about the llvm-commits
mailing list