[llvm] 8859a4f - [X86] LowerBUILD_VECTOR - don't use insert_element(constant, elt, idx) if we have a freeze(undef) element
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 8 02:29:05 PST 2023
Author: Simon Pilgrim
Date: 2023-12-08T10:28:56Z
New Revision: 8859a4f630cb90d28f5dab993aa4aef0e915cd74
URL: https://github.com/llvm/llvm-project/commit/8859a4f630cb90d28f5dab993aa4aef0e915cd74
DIFF: https://github.com/llvm/llvm-project/commit/8859a4f630cb90d28f5dab993aa4aef0e915cd74.diff
LOG: [X86] LowerBUILD_VECTOR - don't use insert_element(constant, elt, idx) if we have a freeze(undef) element
Fixes #74736
Added:
llvm/test/CodeGen/X86/pr74736.ll
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 4fca5afb46dd2..620014c4c520a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -8727,6 +8727,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// constants. Insertion into a zero vector is handled as a special-case
// somewhere below here.
if (NumConstants == NumElems - 1 && NumNonZero != 1 &&
+ FrozenUndefMask.isZero() &&
(isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT) ||
isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT))) {
// Create an all-constant vector. The variable element in the old
diff --git a/llvm/test/CodeGen/X86/pr74736.ll b/llvm/test/CodeGen/X86/pr74736.ll
new file mode 100644
index 0000000000000..3dfdbf102c953
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr74736.ll
@@ -0,0 +1,68 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=SSE
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=AVX
+
+define void @main(<16 x i32> %0, i32 %1) {
+; SSE-LABEL: main:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: movd %edi, %xmm4
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,0,0,0]
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm4[1,0]
+; SSE-NEXT: paddd %xmm0, %xmm0
+; SSE-NEXT: paddd %xmm1, %xmm1
+; SSE-NEXT: paddd %xmm3, %xmm3
+; SSE-NEXT: paddd %xmm2, %xmm2
+; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,1,1,3]
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[1,0]
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
+; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm2[1,0]
+; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[1,3]
+; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,0],xmm3[1,0]
+; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[1,3]
+; SSE-NEXT: xorps %xmm2, %xmm0
+; SSE-NEXT: xorps %xmm4, %xmm1
+; SSE-NEXT: xorps %xmm0, %xmm1
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
+; SSE-NEXT: pxor %xmm1, %xmm0
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
+; SSE-NEXT: pxor %xmm0, %xmm1
+; SSE-NEXT: movd %xmm1, 0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: main:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0],xmm0[1,2,3]
+; AVX-NEXT: movl $1, %eax
+; AVX-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpinsrd $3, %edi, %xmm2, %xmm2
+; AVX-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
+; AVX-NEXT: vpaddd %ymm0, %ymm0, %ymm0
+; AVX-NEXT: vpaddd %ymm1, %ymm1, %ymm1
+; AVX-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,1,3,3,5,5,7]
+; AVX-NEXT: vpermd %ymm0, %ymm2, %ymm2
+; AVX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
+; AVX-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,3,3,3,7,7,7,7]
+; AVX-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,1,1,3,4,5,5,7]
+; AVX-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
+; AVX-NEXT: vpxor %ymm0, %ymm2, %ymm0
+; AVX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
+; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
+; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmovd %xmm0, 0
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
+entry:
+ %2 = insertelement <16 x i32> %0, i32 1, i64 1
+ %3 = insertelement <16 x i32> %2, i32 %1, i64 3
+ %4 = insertelement <16 x i32> %3, i32 0, i64 0
+ %5 = shl <16 x i32> %4, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %6 = shufflevector <16 x i32> %5, <16 x i32> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15>
+ %7 = tail call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> %6)
+ store i32 %7, ptr null, align 4
+ ret void
+}
+declare i32 @llvm.vector.reduce.xor.v16i32(<16 x i32>)
More information about the llvm-commits
mailing list