[llvm] 41bdb8c - [X86] Fold insert_vector_elt(undef, elt, 0) --> scalar_to_vector(elt)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 15 06:56:42 PDT 2022
Author: Simon Pilgrim
Date: 2022-08-15T14:56:30+01:00
New Revision: 41bdb8cd36388ccd0c020798a7b9ce9014af753c
URL: https://github.com/llvm/llvm-project/commit/41bdb8cd36388ccd0c020798a7b9ce9014af753c
DIFF: https://github.com/llvm/llvm-project/commit/41bdb8cd36388ccd0c020798a7b9ce9014af753c.diff
LOG: [X86] Fold insert_vector_elt(undef, elt, 0) --> scalar_to_vector(elt)
I had hoped to make this a generic fold in DAGCombine, but there's quite a few regressions in Thumb2 MVE that need addressing first.
Fixes regressions from D106675.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/avx-insertelt.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 88004c8281f3c..cb6a87ab9c782 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -47269,12 +47269,18 @@ static SDValue combineVectorInsert(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
EVT VT = N->getValueType(0);
- assert(((N->getOpcode() == X86ISD::PINSRB && VT == MVT::v16i8) ||
- (N->getOpcode() == X86ISD::PINSRW && VT == MVT::v8i16) ||
- N->getOpcode() == ISD::INSERT_VECTOR_ELT) &&
+ unsigned Opcode = N->getOpcode();
+ assert(((Opcode == X86ISD::PINSRB && VT == MVT::v16i8) ||
+ (Opcode == X86ISD::PINSRW && VT == MVT::v8i16) ||
+ Opcode == ISD::INSERT_VECTOR_ELT) &&
"Unexpected vector insertion");
- if (N->getOpcode() == X86ISD::PINSRB || N->getOpcode() == X86ISD::PINSRW) {
+ // Fold insert_vector_elt(undef, elt, 0) --> scalar_to_vector(elt).
+ if (Opcode == ISD::INSERT_VECTOR_ELT && N->getOperand(0).isUndef() &&
+ isNullConstant(N->getOperand(2)))
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, N->getOperand(1));
+
+ if (Opcode == X86ISD::PINSRB || Opcode == X86ISD::PINSRW) {
unsigned NumBitsPerElt = VT.getScalarSizeInBits();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (TLI.SimplifyDemandedBits(SDValue(N, 0),
diff --git a/llvm/test/CodeGen/X86/avx-insertelt.ll b/llvm/test/CodeGen/X86/avx-insertelt.ll
index 23b1a0312394b..d3ac3f1f64c83 100644
--- a/llvm/test/CodeGen/X86/avx-insertelt.ll
+++ b/llvm/test/CodeGen/X86/avx-insertelt.ll
@@ -422,7 +422,7 @@ define <8 x i32> @insert_i32_two_elts_of_high_subvector(<8 x i32> %x, i32 %s) {
define <4 x i64> @insert_i64_two_elts_of_high_subvector(<4 x i64> %x, i64 %s) {
; AVX-LABEL: insert_i64_two_elts_of_high_subvector:
; AVX: # %bb.0:
-; AVX-NEXT: vpinsrq $0, %rdi, %xmm0, %xmm1
+; AVX-NEXT: vmovq %rdi, %xmm1
; AVX-NEXT: vpinsrq $1, %rdi, %xmm1, %xmm1
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX-NEXT: retq
@@ -523,7 +523,7 @@ define <8 x i32> @insert_i32_two_elts_of_low_subvector(<8 x i32> %x, i32 %s) {
define <4 x i64> @insert_i64_two_elts_of_low_subvector(<4 x i64> %x, i64 %s) {
; AVX-LABEL: insert_i64_two_elts_of_low_subvector:
; AVX: # %bb.0:
-; AVX-NEXT: vpinsrq $0, %rdi, %xmm0, %xmm1
+; AVX-NEXT: vmovq %rdi, %xmm1
; AVX-NEXT: vpinsrq $1, %rdi, %xmm1, %xmm1
; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; AVX-NEXT: retq
More information about the llvm-commits
mailing list