[llvm] 3ca527d - [AArch64] Add a test case showing both dup and scalar_to_reg in the same function. NFC
David Green via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 24 03:22:04 PDT 2025
Author: David Green
Date: 2025-09-24T11:21:59+01:00
New Revision: 3ca527dd9d6c2acbd4e61995d3c797e53f00aec3
URL: https://github.com/llvm/llvm-project/commit/3ca527dd9d6c2acbd4e61995d3c797e53f00aec3
DIFF: https://github.com/llvm/llvm-project/commit/3ca527dd9d6c2acbd4e61995d3c797e53f00aec3.diff
LOG: [AArch64] Add a test case showing both dup and scalar_to_reg in the same function. NFC
Added:
Modified:
llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
index 8655bb1292ef7..cdde11042462b 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
@@ -1365,7 +1365,72 @@ for.end12: ; preds = %vector.body
ret void
}
-declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
+define noundef <8 x i16> @cmplx_mul_combined_re_im(<8 x i16> noundef %a, i64 %scale.coerce) {
+; CHECK-SD-LABEL: cmplx_mul_combined_re_im:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: lsr x9, x0, #16
+; CHECK-SD-NEXT: adrp x8, .LCPI14_0
+; CHECK-SD-NEXT: dup v4.8h, w0
+; CHECK-SD-NEXT: dup v1.8h, w9
+; CHECK-SD-NEXT: fmov s3, w9
+; CHECK-SD-NEXT: sqneg v2.8h, v1.8h
+; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI14_0]
+; CHECK-SD-NEXT: tbl v1.16b, { v2.16b, v3.16b }, v1.16b
+; CHECK-SD-NEXT: rev32 v2.8h, v0.8h
+; CHECK-SD-NEXT: sqdmull v3.4s, v0.4h, v4.4h
+; CHECK-SD-NEXT: sqdmull2 v0.4s, v0.8h, v4.8h
+; CHECK-SD-NEXT: sqdmlal v3.4s, v2.4h, v1.4h
+; CHECK-SD-NEXT: sqdmlal2 v0.4s, v2.8h, v1.8h
+; CHECK-SD-NEXT: uzp2 v0.8h, v3.8h, v0.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: cmplx_mul_combined_re_im:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: lsr w9, w0, #16
+; CHECK-GI-NEXT: adrp x8, .LCPI14_0
+; CHECK-GI-NEXT: rev32 v4.8h, v0.8h
+; CHECK-GI-NEXT: dup v1.8h, w9
+; CHECK-GI-NEXT: fmov s3, w9
+; CHECK-GI-NEXT: sqneg v2.8h, v1.8h
+; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI14_0]
+; CHECK-GI-NEXT: tbl v1.16b, { v2.16b, v3.16b }, v1.16b
+; CHECK-GI-NEXT: mov d2, v0.d[1]
+; CHECK-GI-NEXT: dup v3.8h, w0
+; CHECK-GI-NEXT: sqdmull v2.4s, v2.4h, v3.4h
+; CHECK-GI-NEXT: sqdmull v5.4s, v4.4h, v1.4h
+; CHECK-GI-NEXT: sqdmlal v5.4s, v0.4h, v3.4h
+; CHECK-GI-NEXT: sqdmlal2 v2.4s, v4.8h, v1.8h
+; CHECK-GI-NEXT: uzp2 v0.8h, v5.8h, v2.8h
+; CHECK-GI-NEXT: ret
+entry:
+ %scale.sroa.0.0.extract.trunc = trunc i64 %scale.coerce to i16
+ %scale.sroa.2.0.extract.shift23 = lshr i64 %scale.coerce, 16
+ %scale.sroa.2.0.extract.trunc = trunc i64 %scale.sroa.2.0.extract.shift23 to i16
+ %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+ %vecinit.i24 = insertelement <8 x i16> poison, i16 %scale.sroa.0.0.extract.trunc, i64 0
+ %vecinit.i = insertelement <8 x i16> poison, i16 %scale.sroa.2.0.extract.trunc, i64 0
+ %vecinit7.i = shufflevector <8 x i16> %vecinit.i, <8 x i16> poison, <8 x i32> zeroinitializer
+ %vqnegq_v1.i = tail call noundef <8 x i16> @llvm.aarch64.neon.sqneg.v8i16(<8 x i16> %vecinit7.i)
+ %vbsl5.i = shufflevector <8 x i16> %vqnegq_v1.i, <8 x i16> %vecinit.i, <8 x i32> <i32 0, i32 8, i32 2, i32 8, i32 4, i32 8, i32 6, i32 8>
+ %shuffle.i40 = shufflevector <8 x i16> %a, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %shuffle.i39 = shufflevector <8 x i16> %vecinit.i24, <8 x i16> poison, <4 x i32> zeroinitializer
+ %vqdmull_v2.i36 = tail call noundef <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i40, <4 x i16> %shuffle.i39)
+ %shuffle.i44 = shufflevector <8 x i16> %a, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %vqdmull_v2.i = tail call noundef <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i44, <4 x i16> %shuffle.i39)
+ %shuffle.i38 = shufflevector <8 x i16> %shuffle.i, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %shuffle.i37 = shufflevector <8 x i16> %vbsl5.i, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %vqdmlal2.i45 = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i38, <4 x i16> %shuffle.i37)
+ %vqdmlal_v3.i46 = tail call noundef <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %vqdmull_v2.i36, <4 x i32> %vqdmlal2.i45)
+ %shuffle.i42 = shufflevector <8 x i16> %shuffle.i, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %shuffle.i41 = shufflevector <8 x i16> %vbsl5.i, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i42, <4 x i16> %shuffle.i41)
+ %vqdmlal_v3.i = tail call noundef <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %vqdmull_v2.i, <4 x i32> %vqdmlal2.i)
+ %0 = bitcast <4 x i32> %vqdmlal_v3.i46 to <8 x i16>
+ %1 = bitcast <4 x i32> %vqdmlal_v3.i to <8 x i16>
+ %shuffle.i35 = shufflevector <8 x i16> %0, <8 x i16> %1, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+ ret <8 x i16> %shuffle.i35
+}
+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK: {{.*}}
More information about the llvm-commits
mailing list