[llvm] fd71692 - [DAGCombine] Fold Splat(bitcast(buildvector(x, ..))) to splat(x)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 12 00:35:48 PST 2022
Author: David Green
Date: 2022-12-12T08:35:43Z
New Revision: fd716925eca2a65c79a4375fb46151816afc1809
URL: https://github.com/llvm/llvm-project/commit/fd716925eca2a65c79a4375fb46151816afc1809
DIFF: https://github.com/llvm/llvm-project/commit/fd716925eca2a65c79a4375fb46151816afc1809.diff
LOG: [DAGCombine] Fold Splat(bitcast(buildvector(x,..))) to splat(x)
This adds a fold which teaches the backend to fold
splat(bitcast(buildvector(x,..))) or
splat(bitcast(scalar_to_vector(x))) to a single splat.
This only handles lane 0 splats, which are only valid under LE, and
needs to be a little careful with the types it creates for the new
buildvector.
Differential Revision: https://reviews.llvm.org/D139611
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/AArch64/arm64-dup.ll
llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll
llvm/test/CodeGen/Thumb2/mve-vdup.ll
llvm/test/CodeGen/WebAssembly/simd-shuffle-bitcast.ll
llvm/test/CodeGen/X86/vector-shuffle-mmx.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 587405d446917..f9a73351e2b3e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -23021,6 +23021,23 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
if (auto *Idx = dyn_cast<ConstantSDNode>(N0.getOperand(2)))
if (Idx->getAPIntValue() == SplatIndex)
return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(1));
+
+ // Look through a bitcast if LE and splatting lane 0, through to a
+ // scalar_to_vector or a build_vector.
+ if (N0.getOpcode() == ISD::BITCAST && N0.getOperand(0).hasOneUse() &&
+ SplatIndex == 0 && DAG.getDataLayout().isLittleEndian() &&
+ (N0.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR ||
+ N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR)) {
+ EVT N00VT = N0.getOperand(0).getValueType();
+ if (VT.getScalarSizeInBits() <= N00VT.getScalarSizeInBits() &&
+ VT.isInteger() && N00VT.isInteger()) {
+ EVT InVT =
+ TLI.getTypeToTransformTo(*DAG.getContext(), VT.getScalarType());
+ SDValue Op = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0),
+ SDLoc(N), InVT);
+ return DAG.getSplatBuildVector(VT, SDLoc(N), Op);
+ }
+ }
}
// If this is a bit convert that changes the element type of the vector but
diff --git a/llvm/test/CodeGen/AArch64/arm64-dup.ll b/llvm/test/CodeGen/AArch64/arm64-dup.ll
index 2c3af5be816cc..0947730ebab0a 100644
--- a/llvm/test/CodeGen/AArch64/arm64-dup.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-dup.ll
@@ -508,8 +508,7 @@ define <4 x i32> @dup_const24(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C) nounwind
define <8 x i16> @bitcast_i64_v8i16(i64 %a) {
; CHECK-LABEL: bitcast_i64_v8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: fmov d0, x0
-; CHECK-NEXT: dup.8h v0, v0[0]
+; CHECK-NEXT: dup.8h v0, w0
; CHECK-NEXT: ret
%b = bitcast i64 %a to <4 x i16>
%r = shufflevector <4 x i16> %b, <4 x i16> poison, <8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-3v
diff .ll b/llvm/test/CodeGen/AArch64/arm64-neon-3v
diff .ll
index 0994036e105cc..fdbd6f815354f 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-3v
diff .ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-3v
diff .ll
@@ -2538,12 +2538,11 @@ define <8 x i16> @cmplx_mul_combined_re_im(<8 x i16> noundef %a, i64 %scale.coer
; CHECK-LABEL: cmplx_mul_combined_re_im:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: lsr x8, x0, #16
+; CHECK-NEXT: adrp x9, .LCPI196_0
; CHECK-NEXT: fmov d4, x0
; CHECK-NEXT: rev32 v5.8h, v0.8h
-; CHECK-NEXT: fmov d1, x8
-; CHECK-NEXT: adrp x8, .LCPI196_0
-; CHECK-NEXT: dup v1.8h, v1.h[0]
-; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI196_0]
+; CHECK-NEXT: dup v1.8h, w8
+; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI196_0]
; CHECK-NEXT: sqneg v2.8h, v1.8h
; CHECK-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v3.16b
; CHECK-NEXT: sqdmull v2.4s, v0.4h, v4.h[0]
diff --git a/llvm/test/CodeGen/Thumb2/mve-vdup.ll b/llvm/test/CodeGen/Thumb2/mve-vdup.ll
index 74944b3ab76a7..9ba3866ad4730 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vdup.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vdup.ll
@@ -399,8 +399,6 @@ define arm_aapcs_vfpcc <8 x i16> @bitcast_i64_v8i16(i64 %a) {
define arm_aapcs_vfpcc <8 x i16> @bitcast_i128_v8i16(i128 %a) {
; CHECK-LE-LABEL: bitcast_i128_v8i16:
; CHECK-LE: @ %bb.0:
-; CHECK-LE-NEXT: vmov.32 q0[0], r0
-; CHECK-LE-NEXT: vmov.u16 r0, q0[0]
; CHECK-LE-NEXT: vdup.16 q0, r0
; CHECK-LE-NEXT: bx lr
;
@@ -549,8 +547,6 @@ define arm_aapcs_vfpcc <8 x i16> @bitcast_v2f64_v8i16(<2 x i64> %a) {
define arm_aapcs_vfpcc <8 x i16> @other_max_case(i32 %blockSize) {
; CHECK-LE-LABEL: other_max_case:
; CHECK-LE: @ %bb.0:
-; CHECK-LE-NEXT: vmov.32 q0[0], r0
-; CHECK-LE-NEXT: vmov.u16 r0, q0[0]
; CHECK-LE-NEXT: vdup.16 q0, r0
; CHECK-LE-NEXT: bx lr
;
diff --git a/llvm/test/CodeGen/WebAssembly/simd-shuffle-bitcast.ll b/llvm/test/CodeGen/WebAssembly/simd-shuffle-bitcast.ll
index 662a7d74ace84..2aebae5231f4b 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-shuffle-bitcast.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-shuffle-bitcast.ll
@@ -1,8 +1,7 @@
; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s
; Test that a splat shuffle of an fp-to-int bitcasted vector correctly
-; optimizes and lowers to a single splat instruction. Without a custom
-; DAG combine, this ends up doing both a splat and a shuffle.
+; optimizes and lowers to a single splat instruction.
target triple = "wasm32-unknown-unknown"
@@ -19,8 +18,8 @@ define <4 x i32> @f32x4_splat(float %x) {
; CHECK-LABEL: not_a_vec:
; CHECK-NEXT: .functype not_a_vec (i64, i64) -> (v128){{$}}
-; CHECK-NEXT: i64x2.splat $push[[L1:[0-9]+]]=, $0{{$}}
-; CHECK-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $pop[[L1]], $2, 0, 1, 2, 3
+; CHECK-NEXT: i32.wrap_i64 $push[[L:[0-9]+]]=, $0
+; CHECK-NEXT: i32x4.splat $push[[R:[0-9]+]]=, $pop[[L]]
; CHECK-NEXT: return $pop[[R]]
define <4 x i32> @not_a_vec(i128 %x) {
%a = bitcast i128 %x to <4 x i32>
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-mmx.ll b/llvm/test/CodeGen/X86/vector-shuffle-mmx.ll
index d294757f63d2a..422f522e11f8c 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-mmx.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-mmx.ll
@@ -74,8 +74,8 @@ define void @test2() nounwind {
; X64-LABEL: test2:
; X64: ## %bb.0: ## %entry
; X64-NEXT: movq _tmp_V2i at GOTPCREL(%rip), %rax
-; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; X64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X64-NEXT: movq %xmm0, (%rax)
; X64-NEXT: retq
entry:
More information about the llvm-commits
mailing list