[llvm] c711c65 - [X86] combineINSERT_SUBVECTOR - attempt to constant fold from constant pool loads (if we're not widening). (#129682)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 4 03:29:03 PST 2025
Author: Simon Pilgrim
Date: 2025-03-04T11:29:00Z
New Revision: c711c65e57fe5e09b8321a675075dac9dbd12f82
URL: https://github.com/llvm/llvm-project/commit/c711c65e57fe5e09b8321a675075dac9dbd12f82
DIFF: https://github.com/llvm/llvm-project/commit/c711c65e57fe5e09b8321a675075dac9dbd12f82.diff
LOG: [X86] combineINSERT_SUBVECTOR - attempt to constant fold from constant pool loads (if we're not widening). (#129682)
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0f82b541744d6..bbab43d4e92af 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -58684,6 +58684,21 @@ static SDValue combineINSERT_SUBVECTOR(SDNode *N, SelectionDAG &DAG,
}
}
+ // Attempt to constant fold (if we're not widening).
+ if (!Vec.isUndef() && !ISD::isBuildVectorAllZeros(Vec.getNode())) {
+ unsigned EltSizeInBits = OpVT.getScalarSizeInBits();
+ APInt VecUndefElts, SubUndefElts;
+ SmallVector<APInt, 16> VecEltBits, SubEltBits;
+ if (getTargetConstantBitsFromNode(Vec, EltSizeInBits, VecUndefElts,
+ VecEltBits) &&
+ getTargetConstantBitsFromNode(SubVec, EltSizeInBits, SubUndefElts,
+ SubEltBits)) {
+ VecUndefElts.insertBits(SubUndefElts, IdxVal);
+ llvm::copy(SubEltBits, VecEltBits.begin() + IdxVal);
+ return getConstVector(VecEltBits, VecUndefElts, OpVT, DAG, dl);
+ }
+ }
+
return SDValue();
}
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll
index 483191fb32bdf..f53c7a3370174 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll
@@ -140,16 +140,11 @@ define <4 x double> @demandedelts_vpermil2pd256_as_shufpd(<4 x double> %a0, <4 x
; X86-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,1,2,3]
; X86-NEXT: retl
;
-; X64-AVX-LABEL: demandedelts_vpermil2pd256_as_shufpd:
-; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpermil2pd {{.*#+}} ymm0 = ymm1[0,0],ymm0[3],ymm1[3]
-; X64-AVX-NEXT: retq
-;
-; X64-AVX2-LABEL: demandedelts_vpermil2pd256_as_shufpd:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[3]
-; X64-AVX2-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,1,2,3]
-; X64-AVX2-NEXT: retq
+; X64-LABEL: demandedelts_vpermil2pd256_as_shufpd:
+; X64: # %bb.0:
+; X64-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[3]
+; X64-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,1,2,3]
+; X64-NEXT: retq
%res0 = insertelement <4 x i64> <i64 0, i64 4, i64 2, i64 7>, i64 %a2, i32 0
%res1 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x i64> %res0, i8 0)
%res2 = shufflevector <4 x double> %res1, <4 x double> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 3>
More information about the llvm-commits
mailing list