[llvm] c4051b2 - [X86] Fold vbroadcast(bitcast(vbroadcast(src))) -> bitcast(vbroadcast(vbroadcast(src)))
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 25 06:03:52 PDT 2022
Author: Simon Pilgrim
Date: 2022-10-25T14:03:43+01:00
New Revision: c4051b2606182d2cdd0cd0c3c70aa1aa4ce61dff
URL: https://github.com/llvm/llvm-project/commit/c4051b2606182d2cdd0cd0c3c70aa1aa4ce61dff
DIFF: https://github.com/llvm/llvm-project/commit/c4051b2606182d2cdd0cd0c3c70aa1aa4ce61dff.diff
LOG: [X86] Fold vbroadcast(bitcast(vbroadcast(src))) -> bitcast(vbroadcast(vbroadcast(src)))
If the inner broadcast scalar type is smaller/same width as the outer broadcast scalar type then we can broadcast using the same inner type directly. Works for vbroadcast_load as well.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/2012-01-12-extract-sv.ll
llvm/test/CodeGen/X86/combine-concatvectors.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 69d36eb2c0f41..c908ec6fb48fa 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -40291,6 +40291,21 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
return DAG.getBitcast(VT, DAG.getNode(X86ISD::VBROADCAST, DL, NewVT, BC));
}
+ // vbroadcast(bitcast(vbroadcast(src))) -> bitcast(vbroadcast(src))
+ // If we're re-broadcasting a smaller type then broadcast with that type and
+ // bitcast.
+ // TODO: Do this for any splat?
+ if (Src.getOpcode() == ISD::BITCAST &&
+ (BC.getOpcode() == X86ISD::VBROADCAST ||
+ BC.getOpcode() == X86ISD::VBROADCAST_LOAD) &&
+ (VT.getScalarSizeInBits() % BCVT.getScalarSizeInBits()) == 0 &&
+ (VT.getSizeInBits() % BCVT.getSizeInBits()) == 0) {
+ MVT NewVT =
+ MVT::getVectorVT(BCVT.getSimpleVT().getScalarType(),
+ VT.getSizeInBits() / BCVT.getScalarSizeInBits());
+ return DAG.getBitcast(VT, DAG.getNode(X86ISD::VBROADCAST, DL, NewVT, BC));
+ }
+
// Reduce broadcast source vector to lowest 128-bits.
if (SrcVT.getSizeInBits() > 128)
return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
diff --git a/llvm/test/CodeGen/X86/2012-01-12-extract-sv.ll b/llvm/test/CodeGen/X86/2012-01-12-extract-sv.ll
index 94e5484b9e93c..600f42a16a711 100644
--- a/llvm/test/CodeGen/X86/2012-01-12-extract-sv.ll
+++ b/llvm/test/CodeGen/X86/2012-01-12-extract-sv.ll
@@ -22,7 +22,7 @@ define void @endless_loop() {
; AVX2-NEXT: vbroadcastss (%eax), %xmm0
; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
-; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
+; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
; AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3,4,5,6],ymm0[7]
; AVX2-NEXT: vmovaps %ymm0, (%eax)
diff --git a/llvm/test/CodeGen/X86/combine-concatvectors.ll b/llvm/test/CodeGen/X86/combine-concatvectors.ll
index b9b8f413c97a3..eba77fcf41045 100644
--- a/llvm/test/CodeGen/X86/combine-concatvectors.ll
+++ b/llvm/test/CodeGen/X86/combine-concatvectors.ll
@@ -62,9 +62,8 @@ define void @concat_of_broadcast_v2f64_v4f64() {
; AVX2-NEXT: movl $1091567616, 30256(%rax) # imm = 0x41100000
; AVX2-NEXT: movabsq $4294967297, %rcx # imm = 0x100000001
; AVX2-NEXT: movq %rcx, 46348(%rax)
-; AVX2-NEXT: vbroadcastss {{.*#+}} xmm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; AVX2-NEXT: vbroadcastsd %xmm0, %ymm1
-; AVX2-NEXT: vmovups %ymm1, 48296(%rax)
+; AVX2-NEXT: vbroadcastss {{.*#+}} ymm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; AVX2-NEXT: vmovups %ymm0, 48296(%rax)
; AVX2-NEXT: vmovlps %xmm0, 47372(%rax)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
More information about the llvm-commits
mailing list