[PATCH] D131700: [AArch64] Add pattern for SQDML*Lv1i32_indexed
OverMighty via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Sat Aug 13 08:52:52 PDT 2022
overmighty updated this revision to Diff 452428.
overmighty added a comment.
A separate `Pat` is now used instead of setting the pattern in the `v1i32_indexed` instructions' definition, so that changing the instructions' register types is no longer required.
The tests have been updated.
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D131700/new/
https://reviews.llvm.org/D131700
Files:
llvm/lib/Target/AArch64/AArch64InstrFormats.td
llvm/test/CodeGen/AArch64/arm64-vmul.ll
Index: llvm/test/CodeGen/AArch64/arm64-vmul.ll
===================================================================
--- llvm/test/CodeGen/AArch64/arm64-vmul.ll
+++ llvm/test/CodeGen/AArch64/arm64-vmul.ll
@@ -2894,6 +2894,23 @@
ret <1 x double> %prod
}
+define i32 @sqdmlal_s(i16 %A, i16 %B, i32 %C) nounwind {
+; CHECK-LABEL: sqdmlal_s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov s0, w1
+; CHECK-NEXT: fmov s1, w0
+; CHECK-NEXT: fmov s2, w2
+; CHECK-NEXT: sqdmlal.h s2, h1, v0[0]
+; CHECK-NEXT: fmov w0, s2
+; CHECK-NEXT: ret
+ %tmp1 = insertelement <4 x i16> undef, i16 %A, i64 0
+ %tmp2 = insertelement <4 x i16> undef, i16 %B, i64 0
+ %tmp3 = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ %tmp4 = extractelement <4 x i32> %tmp3, i64 0
+ %tmp5 = tail call i32 @llvm.aarch64.neon.sqadd.i32(i32 %C, i32 %tmp4)
+ ret i32 %tmp5
+}
+
define i64 @sqdmlal_d(i32 %A, i32 %B, i64 %C) nounwind {
; CHECK-LABEL: sqdmlal_d:
; CHECK: // %bb.0:
@@ -2908,6 +2925,23 @@
ret i64 %tmp5
}
+define i32 @sqdmlsl_s(i16 %A, i16 %B, i32 %C) nounwind {
+; CHECK-LABEL: sqdmlsl_s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov s0, w1
+; CHECK-NEXT: fmov s1, w0
+; CHECK-NEXT: fmov s2, w2
+; CHECK-NEXT: sqdmlsl.h s2, h1, v0[0]
+; CHECK-NEXT: fmov w0, s2
+; CHECK-NEXT: ret
+ %tmp1 = insertelement <4 x i16> undef, i16 %A, i64 0
+ %tmp2 = insertelement <4 x i16> undef, i16 %B, i64 0
+ %tmp3 = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ %tmp4 = extractelement <4 x i32> %tmp3, i64 0
+ %tmp5 = tail call i32 @llvm.aarch64.neon.sqsub.i32(i32 %C, i32 %tmp4)
+ ret i32 %tmp5
+}
+
define i64 @sqdmlsl_d(i32 %A, i32 %B, i64 %C) nounwind {
; CHECK-LABEL: sqdmlsl_d:
; CHECK: // %bb.0:
Index: llvm/lib/Target/AArch64/AArch64InstrFormats.td
===================================================================
--- llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -8914,6 +8914,17 @@
let Inst{20} = idx{0};
}
+ def : Pat<(i32 (Accum (i32 FPR32Op:$Rd),
+ (i32 (vector_extract
+ (v4i32 (int_aarch64_neon_sqdmull
+ (v4i16 (scalar_to_vector (i32 FPR32Op:$Rn))),
+ (v4i16 V64:$Rm))),
+ VectorIndexH:$idx)))),
+ (!cast<Instruction>(NAME # v1i32_indexed)
+ (i32 FPR32Op:$Rd),
+ (EXTRACT_SUBREG FPR32Op:$Rn, hsub),
+ (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rm, dsub),
+ VectorIndexH:$idx)>;
def v1i64_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc,
FPR64Op, FPR32Op, V128, VectorIndexS,
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D131700.452428.patch
Type: text/x-patch
Size: 2907 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220813/5944af3c/attachment.bin>
More information about the llvm-commits
mailing list