[PATCH] D109001: [AArch64] Fold an sqadd of a sqdmull at lane 0 into an sqdmlal
Sam Tebbs via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 31 08:36:39 PDT 2021
samtebbs created this revision.
samtebbs added reviewers: dmgreen, SjoerdMeijer, NickGuy.
Herald added subscribers: hiraditya, kristof.beyls.
samtebbs requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.
This patch folds a sqadd (i32, vector_extract (sqdmull v4i16, v4i16, 0)) into a sqdmlal. We already generate an sqdmlal for lanes greater than 0, so this patch emits an sqdmlal of the same format but for a lane of 0, and is necessary as the existing pattern doesn't match.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D109001
Files:
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll
Index: llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll
===================================================================
--- llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll
+++ llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll
@@ -52,6 +52,8 @@
declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>)
+declare i32 @llvm.aarch64.neon.sqadd.i32(i32, i32) #1
+
declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>)
declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>)
@@ -3235,6 +3237,25 @@
ret <4 x i32> %vqdmlal4.i
}
+define i32 @test_vqdmlal_lane_s16_0_i32(i32 %a, i16 %b, <4 x i16> %c) {
+; CHECK-LABEL: test_vqdmlal_lane_s16_0_i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov s1, w1
+; CHECK-NEXT: fmov s2, w0
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: sqdmlal v2.4s, v1.4h, v0.h[0]
+; CHECK-NEXT: fmov w0, s2
+; CHECK-NEXT: ret
+entry:
+ %0 = insertelement <4 x i16> undef, i16 %b, i64 0
+ %1 = shufflevector <4 x i16> %c, <4 x i16> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+ %vqdmlXl = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %0, <4 x i16> %1)
+ %lane0 = extractelement <4 x i32> %vqdmlXl, i64 0
+ %vqdmlXl1 = tail call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 %lane0)
+ ret i32 %vqdmlXl1
+}
+
+
define <2 x i64> @test_vqdmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
; CHECK-LABEL: test_vqdmlal_lane_s32_0:
; CHECK: // %bb.0: // %entry
Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
===================================================================
--- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1201,6 +1201,17 @@
(ADR_LSL_ZZZ_D_2 $Op1, $Op2)>;
def : Pat<(nxv2i64 (int_aarch64_sve_adrd nxv2i64:$Op1, nxv2i64:$Op2)),
(ADR_LSL_ZZZ_D_3 $Op1, $Op2)>;
+
+ def : Pat<(i32 (int_aarch64_neon_sqadd (i32 FPR32Op:$Rd),
+ (i32 (vector_extract (v4i32 (int_aarch64_neon_sqdmull
+ (v4i16 V64:$Rm),
+ (v4i16 V64:$Rn))),
+ (i64 0))))),
+ (EXTRACT_SUBREG (SQDMLALv4i16_indexed
+ (SUBREG_TO_REG (i32 0), FPR32Op:$Rd, ssub),
+ V64:$Rm, (INSERT_SUBREG
+ (v8i16 (IMPLICIT_DEF)), V64:$Rn, dsub),
+ (i64 0)), ssub)>;
} // End HasSVE
let Predicates = [HasSVEorStreamingSVE] in {
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D109001.369711.patch
Type: text/x-patch
Size: 2491 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210831/f53558a7/attachment.bin>
More information about the llvm-commits
mailing list