[llvm] ec46232 - [DAGCombiner] Fold `ty1 extract_vector(ty2 splat(V)) -> ty1 splat(V)`
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 9 06:31:21 PST 2022
Author: Sander de Smalen
Date: 2022-02-09T14:30:01Z
New Revision: ec462325178ae061e667c7f32aa5727eeb9a7218
URL: https://github.com/llvm/llvm-project/commit/ec462325178ae061e667c7f32aa5727eeb9a7218
DIFF: https://github.com/llvm/llvm-project/commit/ec462325178ae061e667c7f32aa5727eeb9a7218.diff
LOG: [DAGCombiner] Fold `ty1 extract_vector(ty2 splat(V)) -> ty1 splat(V)`
This seems like an obvious fold, which leads to a few improvements.
Reviewed By: david-arm
Differential Revision: https://reviews.llvm.org/D118920
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll
llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll
llvm/test/CodeGen/AArch64/sve-insert-vector.ll
llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-imm.ll
llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index d3f6aaec459ac..8fd0001b4eb3f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -21109,6 +21109,11 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
}
}
+ // ty1 extract_vector(ty2 splat(V))) -> ty1 splat(V)
+ if (V.getOpcode() == ISD::SPLAT_VECTOR)
+ if (DAG.isConstantValueOfAnyType(V.getOperand(0)) || V.hasOneUse())
+ return DAG.getSplatVector(NVT, SDLoc(N), V.getOperand(0));
+
// Try to move vector bitcast after extract_subv by scaling extraction index:
// extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
if (V.getOpcode() == ISD::BITCAST &&
diff --git a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll
index 3be4b94dedd22..e63921040faff 100644
--- a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll
@@ -424,6 +424,44 @@ entry:
ret <4 x i32> %out
}
+;
+; Extract fixed-width vector from a scalable vector splat.
+;
+
+define <2 x float> @extract_v2f32_nxv4f32_splat(float %f) {
+; CHECK-LABEL: extract_v2f32_nxv4f32_splat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT: dup v0.2s, v0.s[0]
+; CHECK-NEXT: ret
+ %ins = insertelement <vscale x 4 x float> poison, float %f, i32 0
+ %splat = shufflevector <vscale x 4 x float> %ins, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+ %ext = call <2 x float> @llvm.experimental.vector.extract.v2f32.nxv4f32(<vscale x 4 x float> %splat, i64 0)
+ ret <2 x float> %ext
+}
+
+define <2 x float> @extract_v2f32_nxv4f32_splat_const() {
+; CHECK-LABEL: extract_v2f32_nxv4f32_splat_const:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov v0.2s, #1.00000000
+; CHECK-NEXT: ret
+ %ins = insertelement <vscale x 4 x float> poison, float 1.0, i32 0
+ %splat = shufflevector <vscale x 4 x float> %ins, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+ %ext = call <2 x float> @llvm.experimental.vector.extract.v2f32.nxv4f32(<vscale x 4 x float> %splat, i64 0)
+ ret <2 x float> %ext
+}
+
+define <4 x i32> @extract_v4i32_nxv8i32_splat_const() {
+; CHECK-LABEL: extract_v4i32_nxv8i32_splat_const:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v0.4s, #1
+; CHECK-NEXT: ret
+ %ins = insertelement <vscale x 8 x i32> poison, i32 1, i32 0
+ %splat = shufflevector <vscale x 8 x i32> %ins, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+ %ext = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv8i32(<vscale x 8 x i32> %splat, i64 0)
+ ret <4 x i32> %ext
+}
+
attributes #0 = { vscale_range(2,2) }
attributes #1 = { vscale_range(8,8) }
@@ -442,3 +480,5 @@ declare <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv4i8(<vscale x 4 x i
declare <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv2i8(<vscale x 2 x i8>, i64)
declare <4 x i64> @llvm.experimental.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64>, i64)
+declare <2 x float> @llvm.experimental.vector.extract.v2f32.nxv4f32(<vscale x 4 x float>, i64)
+declare <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv8i32(<vscale x 8 x i32>, i64)
diff --git a/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll
index d2453d2aae60d..cd2bf99400bfe 100644
--- a/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll
@@ -1014,3 +1014,63 @@ define <vscale x 4 x bfloat> @extract_nxv4bf16_nxv16bf16_12(<vscale x 16 x bfloa
declare <vscale x 4 x bfloat> @llvm.experimental.vector.extract.nxv4bf16.nxv16bf16(<vscale x 16 x bfloat>, i64)
+
+;
+; Extract from a splat
+;
+define <vscale x 2 x float> @extract_nxv2f32_nxv4f32_splat(float %f) {
+; CHECK-LABEL: extract_nxv2f32_nxv4f32_splat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
+; CHECK-NEXT: mov z0.s, s0
+; CHECK-NEXT: ret
+ %ins = insertelement <vscale x 4 x float> poison, float %f, i32 0
+ %splat = shufflevector <vscale x 4 x float> %ins, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+ %ext = call <vscale x 2 x float> @llvm.experimental.vector.extract.nxv2f32.nxv4f32(<vscale x 4 x float> %splat, i64 0)
+ ret <vscale x 2 x float> %ext
+}
+
+define <vscale x 2 x float> @extract_nxv2f32_nxv4f32_splat_const() {
+; CHECK-LABEL: extract_nxv2f32_nxv4f32_splat_const:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov z0.s, #1.00000000
+; CHECK-NEXT: ret
+ %ins = insertelement <vscale x 4 x float> poison, float 1.0, i32 0
+ %splat = shufflevector <vscale x 4 x float> %ins, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+ %ext = call <vscale x 2 x float> @llvm.experimental.vector.extract.nxv2f32.nxv4f32(<vscale x 4 x float> %splat, i64 0)
+ ret <vscale x 2 x float> %ext
+}
+
+define <vscale x 4 x i32> @extract_nxv4i32_nxv8i32_splat_const() {
+; CHECK-LABEL: extract_nxv4i32_nxv8i32_splat_const:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.s, #1 // =0x1
+; CHECK-NEXT: ret
+ %ins = insertelement <vscale x 8 x i32> poison, i32 1, i32 0
+ %splat = shufflevector <vscale x 8 x i32> %ins, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+ %ext = call <vscale x 4 x i32> @llvm.experimental.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> %splat, i64 0)
+ ret <vscale x 4 x i32> %ext
+}
+
+define <vscale x 2 x i1> @extract_nxv2i1_nxv16i1_all_ones() {
+; CHECK-LABEL: extract_nxv2i1_nxv16i1_all_ones:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ret
+ %ins = insertelement <vscale x 16 x i1> poison, i1 1, i32 0
+ %splat = shufflevector <vscale x 16 x i1> %ins, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %ext = call <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1> %splat, i64 0)
+ ret <vscale x 2 x i1> %ext
+}
+
+define <vscale x 2 x i1> @extract_nxv2i1_nxv16i1_all_zero() {
+; CHECK-LABEL: extract_nxv2i1_nxv16i1_all_zero:
+; CHECK: // %bb.0:
+; CHECK-NEXT: pfalse p0.b
+; CHECK-NEXT: ret
+ %ext = call <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1> zeroinitializer, i64 0)
+ ret <vscale x 2 x i1> %ext
+}
+
+declare <vscale x 2 x float> @llvm.experimental.vector.extract.nxv2f32.nxv4f32(<vscale x 4 x float>, i64)
+declare <vscale x 4 x i32> @llvm.experimental.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32>, i64)
diff --git a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll
index 594b3e0b2f8b0..b0b8dd15780cc 100644
--- a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll
@@ -554,10 +554,7 @@ define <vscale x 16 x i1> @insert_nxv16i1_nxv4i1_into_zero(<vscale x 4 x i1> %sv
; CHECK-LABEL: insert_nxv16i1_nxv4i1_into_zero:
; CHECK: // %bb.0:
; CHECK-NEXT: pfalse p1.b
-; CHECK-NEXT: punpklo p2.h, p1.b
-; CHECK-NEXT: punpkhi p1.h, p1.b
-; CHECK-NEXT: punpkhi p2.h, p2.b
-; CHECK-NEXT: uzp1 p0.h, p0.h, p2.h
+; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h
; CHECK-NEXT: uzp1 p0.b, p0.b, p1.b
; CHECK-NEXT: ret
%v0 = call <vscale x 16 x i1> @llvm.experimental.vector.insert.nx16i1.nxv4i1(<vscale x 16 x i1> zeroinitializer, <vscale x 4 x i1> %sv, i64 0)
diff --git a/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-imm.ll b/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-imm.ll
index 06f39a4ba934f..4702e36600292 100644
--- a/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-imm.ll
+++ b/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-imm.ll
@@ -134,11 +134,10 @@ define void @store_nxv6f32(<vscale x 6 x float>* %out) {
; CHECK-LABEL: store_nxv6f32:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov z0.s, #1.00000000
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: uunpklo z1.d, z0.s
-; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: st1w { z0.s }, p0, [x0]
-; CHECK-NEXT: st1w { z1.d }, p1, [x0, #2, mul vl]
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ptrue p1.s
+; CHECK-NEXT: st1w { z0.d }, p0, [x0, #2, mul vl]
+; CHECK-NEXT: st1w { z0.s }, p1, [x0]
; CHECK-NEXT: ret
%ins = insertelement <vscale x 6 x float> undef, float 1.0, i32 0
%splat = shufflevector <vscale x 6 x float> %ins, <vscale x 6 x float> undef, <vscale x 6 x i32> zeroinitializer
@@ -150,11 +149,10 @@ define void @store_nxv12f16(<vscale x 12 x half>* %out) {
; CHECK-LABEL: store_nxv12f16:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov z0.h, #1.00000000
-; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: uunpklo z1.s, z0.h
-; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: st1h { z0.h }, p0, [x0]
-; CHECK-NEXT: st1h { z1.s }, p1, [x0, #2, mul vl]
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ptrue p1.h
+; CHECK-NEXT: st1h { z0.s }, p0, [x0, #2, mul vl]
+; CHECK-NEXT: st1h { z0.h }, p1, [x0]
; CHECK-NEXT: ret
%ins = insertelement <vscale x 12 x half> undef, half 1.0, i32 0
%splat = shufflevector <vscale x 12 x half> %ins, <vscale x 12 x half> undef, <vscale x 12 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll
index 42819bebe43de..2c1dcf14555a4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll
@@ -1572,33 +1572,26 @@ define <vscale x 32 x i32> @vadd_vi_nxv32i32(<vscale x 32 x i32> %va, <vscale x
ret <vscale x 32 x i32> %v
}
-; FIXME: We don't catch this as unmasked.
-
define <vscale x 32 x i32> @vadd_vi_nxv32i32_unmasked(<vscale x 32 x i32> %va, i32 zeroext %evl) {
; CHECK-LABEL: vadd_vi_nxv32i32_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a2, 0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a4, a1, 2
-; CHECK-NEXT: vsetvli a3, zero, e8, m4, ta, mu
-; CHECK-NEXT: vmset.m v24
-; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, mu
; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: sub a3, a0, a1
-; CHECK-NEXT: vslidedown.vx v0, v24, a4
-; CHECK-NEXT: bltu a0, a3, .LBB119_2
+; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: bltu a0, a1, .LBB119_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a2, a3
+; CHECK-NEXT: mv a2, a1
; CHECK-NEXT: .LBB119_2:
+; CHECK-NEXT: li a3, 0
; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, mu
-; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t
+; CHECK-NEXT: sub a1, a0, a1
+; CHECK-NEXT: vadd.vi v8, v8, -1
; CHECK-NEXT: bltu a0, a1, .LBB119_4
; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: mv a3, a1
; CHECK-NEXT: .LBB119_4:
-; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, mu
+; CHECK-NEXT: vadd.vi v16, v16, -1
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 32 x i32> poison, i32 -1, i32 0
%vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer
More information about the llvm-commits
mailing list