[llvm] c4051b2 - [X86] Fold vbroadcast(bitcast(vbroadcast(src))) -> bitcast(vbroadcast(vbroadcast(src)))

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 25 06:03:52 PDT 2022


Author: Simon Pilgrim
Date: 2022-10-25T14:03:43+01:00
New Revision: c4051b2606182d2cdd0cd0c3c70aa1aa4ce61dff

URL: https://github.com/llvm/llvm-project/commit/c4051b2606182d2cdd0cd0c3c70aa1aa4ce61dff
DIFF: https://github.com/llvm/llvm-project/commit/c4051b2606182d2cdd0cd0c3c70aa1aa4ce61dff.diff

LOG: [X86] Fold vbroadcast(bitcast(vbroadcast(src))) -> bitcast(vbroadcast(vbroadcast(src)))

If the inner broadcast scalar type is smaller/same width as the outer broadcast scalar type then we can broadcast using the same inner type directly. Works for vbroadcast_load as well.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/2012-01-12-extract-sv.ll
    llvm/test/CodeGen/X86/combine-concatvectors.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 69d36eb2c0f41..c908ec6fb48fa 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -40291,6 +40291,21 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
       return DAG.getBitcast(VT, DAG.getNode(X86ISD::VBROADCAST, DL, NewVT, BC));
     }
 
+    // vbroadcast(bitcast(vbroadcast(src))) -> bitcast(vbroadcast(src))
+    // If we're re-broadcasting a smaller type then broadcast with that type and
+    // bitcast.
+    // TODO: Do this for any splat?
+    if (Src.getOpcode() == ISD::BITCAST &&
+        (BC.getOpcode() == X86ISD::VBROADCAST ||
+         BC.getOpcode() == X86ISD::VBROADCAST_LOAD) &&
+        (VT.getScalarSizeInBits() % BCVT.getScalarSizeInBits()) == 0 &&
+        (VT.getSizeInBits() % BCVT.getSizeInBits()) == 0) {
+      MVT NewVT =
+          MVT::getVectorVT(BCVT.getSimpleVT().getScalarType(),
+                           VT.getSizeInBits() / BCVT.getScalarSizeInBits());
+      return DAG.getBitcast(VT, DAG.getNode(X86ISD::VBROADCAST, DL, NewVT, BC));
+    }
+
     // Reduce broadcast source vector to lowest 128-bits.
     if (SrcVT.getSizeInBits() > 128)
       return DAG.getNode(X86ISD::VBROADCAST, DL, VT,

diff  --git a/llvm/test/CodeGen/X86/2012-01-12-extract-sv.ll b/llvm/test/CodeGen/X86/2012-01-12-extract-sv.ll
index 94e5484b9e93c..600f42a16a711 100644
--- a/llvm/test/CodeGen/X86/2012-01-12-extract-sv.ll
+++ b/llvm/test/CodeGen/X86/2012-01-12-extract-sv.ll
@@ -22,7 +22,7 @@ define void @endless_loop() {
 ; AVX2-NEXT:    vbroadcastss (%eax), %xmm0
 ; AVX2-NEXT:    vxorps %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vblendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
-; AVX2-NEXT:    vbroadcastsd %xmm0, %ymm0
+; AVX2-NEXT:    vbroadcastss %xmm0, %ymm0
 ; AVX2-NEXT:    vxorps %xmm2, %xmm2, %xmm2
 ; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3,4,5,6],ymm0[7]
 ; AVX2-NEXT:    vmovaps %ymm0, (%eax)

diff  --git a/llvm/test/CodeGen/X86/combine-concatvectors.ll b/llvm/test/CodeGen/X86/combine-concatvectors.ll
index b9b8f413c97a3..eba77fcf41045 100644
--- a/llvm/test/CodeGen/X86/combine-concatvectors.ll
+++ b/llvm/test/CodeGen/X86/combine-concatvectors.ll
@@ -62,9 +62,8 @@ define void @concat_of_broadcast_v2f64_v4f64() {
 ; AVX2-NEXT:    movl $1091567616, 30256(%rax) # imm = 0x41100000
 ; AVX2-NEXT:    movabsq $4294967297, %rcx # imm = 0x100000001
 ; AVX2-NEXT:    movq %rcx, 46348(%rax)
-; AVX2-NEXT:    vbroadcastss {{.*#+}} xmm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; AVX2-NEXT:    vbroadcastsd %xmm0, %ymm1
-; AVX2-NEXT:    vmovups %ymm1, 48296(%rax)
+; AVX2-NEXT:    vbroadcastss {{.*#+}} ymm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; AVX2-NEXT:    vmovups %ymm0, 48296(%rax)
 ; AVX2-NEXT:    vmovlps %xmm0, 47372(%rax)
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq


        


More information about the llvm-commits mailing list