[llvm] [AArch64] Add tests for commutable [usp]mull, [us]addl, [us]abdl (PR #152512)
Cullen Rhodes via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 7 07:15:55 PDT 2025
https://github.com/c-rhodes created https://github.com/llvm/llvm-project/pull/152512
Precommit tests for PR #152158.
>From 18daf9049db88d5cff79d7a63ade1a90152c2437 Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes at arm.com>
Date: Tue, 5 Aug 2025 14:14:01 +0000
Subject: [PATCH] [AArch64] Add tests for commutable [usp]mull, [us]addl,
[us]abdl
Precommit tests for PR #152158.
---
.../test/CodeGen/AArch64/arm64-neon-3vdiff.ll | 52 ++++++++++++-
.../CodeGen/AArch64/arm64-neon-mul-div.ll | 13 ++++
llvm/test/CodeGen/AArch64/arm64-vabs.ll | 60 +++++++++++++++
llvm/test/CodeGen/AArch64/arm64-vmul.ll | 77 +++++++++++++++++--
4 files changed, 192 insertions(+), 10 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll b/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll
index 256ff94830113..557fcbbb41961 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll
@@ -70,6 +70,30 @@ entry:
ret <2 x i64> %add.i
}
+define void @test_commutable_vaddl_s8(<8 x i8> %a, <8 x i8> %b, ptr %c) {
+; CHECK-SD-LABEL: test_commutable_vaddl_s8:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: saddl v0.8h, v0.8b, v1.8b
+; CHECK-SD-NEXT: stp q0, q0, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_commutable_vaddl_s8:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: saddl v2.8h, v0.8b, v1.8b
+; CHECK-GI-NEXT: saddl v0.8h, v1.8b, v0.8b
+; CHECK-GI-NEXT: stp q2, q0, [x0]
+; CHECK-GI-NEXT: ret
+entry:
+ %vmovl.i.i = sext <8 x i8> %a to <8 x i16>
+ %vmovl.i2.i = sext <8 x i8> %b to <8 x i16>
+ %add.i = add <8 x i16> %vmovl.i.i, %vmovl.i2.i
+ store <8 x i16> %add.i, ptr %c
+ %add.i2 = add <8 x i16> %vmovl.i2.i, %vmovl.i.i
+ %c.gep.1 = getelementptr i8, ptr %c, i64 16
+ store <8 x i16> %add.i2, ptr %c.gep.1
+ ret void
+}
+
define <8 x i16> @test_vaddl_u8(<8 x i8> %a, <8 x i8> %b) {
; CHECK-LABEL: test_vaddl_u8:
; CHECK: // %bb.0: // %entry
@@ -106,6 +130,30 @@ entry:
ret <2 x i64> %add.i
}
+define void @test_commutable_vaddl_u8(<8 x i8> %a, <8 x i8> %b, ptr %c) {
+; CHECK-SD-LABEL: test_commutable_vaddl_u8:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: uaddl v0.8h, v0.8b, v1.8b
+; CHECK-SD-NEXT: stp q0, q0, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_commutable_vaddl_u8:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: uaddl v2.8h, v0.8b, v1.8b
+; CHECK-GI-NEXT: uaddl v0.8h, v1.8b, v0.8b
+; CHECK-GI-NEXT: stp q2, q0, [x0]
+; CHECK-GI-NEXT: ret
+entry:
+ %vmovl.i.i = zext <8 x i8> %a to <8 x i16>
+ %vmovl.i2.i = zext <8 x i8> %b to <8 x i16>
+ %add.i = add <8 x i16> %vmovl.i.i, %vmovl.i2.i
+ store <8 x i16> %add.i, ptr %c
+ %add.i2 = add <8 x i16> %vmovl.i2.i, %vmovl.i.i
+ %c.gep.1 = getelementptr i8, ptr %c, i64 16
+ store <8 x i16> %add.i2, ptr %c.gep.1
+ ret void
+}
+
define <8 x i16> @test_vaddl_a8(<8 x i8> %a, <8 x i8> %b) {
; CHECK-SD-LABEL: test_vaddl_a8:
; CHECK-SD: // %bb.0: // %entry
@@ -2892,9 +2940,9 @@ define <8 x i16> @cmplx_mul_combined_re_im(<8 x i16> noundef %a, i64 %scale.coer
; CHECK-GI-LABEL: cmplx_mul_combined_re_im:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: lsr x9, x0, #16
-; CHECK-GI-NEXT: adrp x8, .LCPI196_0
+; CHECK-GI-NEXT: adrp x8, .LCPI198_0
; CHECK-GI-NEXT: rev32 v4.8h, v0.8h
-; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI196_0]
+; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI198_0]
; CHECK-GI-NEXT: fmov d1, x9
; CHECK-GI-NEXT: dup v2.8h, v1.h[0]
; CHECK-GI-NEXT: sqneg v1.8h, v2.8h
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll b/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll
index ecf3f69825c0e..e3515124a6d34 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll
@@ -1608,6 +1608,19 @@ define <16 x i8> @poly_mulv16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
ret <16 x i8> %prod
}
+define <16 x i8> @commutable_poly_mul(<16 x i8> %lhs, <16 x i8> %rhs) {
+; CHECK-LABEL: commutable_poly_mul:
+; CHECK: // %bb.0:
+; CHECK-NEXT: pmul v2.16b, v0.16b, v1.16b
+; CHECK-NEXT: pmul v0.16b, v1.16b, v0.16b
+; CHECK-NEXT: add v0.16b, v2.16b, v0.16b
+; CHECK-NEXT: ret
+ %1 = call <16 x i8> @llvm.aarch64.neon.pmul.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+ %2 = call <16 x i8> @llvm.aarch64.neon.pmul.v16i8(<16 x i8> %rhs, <16 x i8> %lhs)
+ %3 = add <16 x i8> %1, %2
+ ret <16 x i8> %3
+}
+
declare <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16>, <4 x i16>)
declare <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16>, <8 x i16>)
declare <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>)
diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
index 78881c80ccc10..8e3611d312612 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
@@ -44,6 +44,36 @@ define <2 x i64> @sabdl2d(ptr %A, ptr %B) nounwind {
ret <2 x i64> %tmp4
}
+define void @commutable_sabdl(ptr %A, ptr %B, ptr %C) nounwind {
+; CHECK-SD-LABEL: commutable_sabdl:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ldr d0, [x0]
+; CHECK-SD-NEXT: ldr d1, [x1]
+; CHECK-SD-NEXT: sabdl.8h v0, v1, v0
+; CHECK-SD-NEXT: str q0, [x2]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: commutable_sabdl:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldr d0, [x0]
+; CHECK-GI-NEXT: ldr d1, [x1]
+; CHECK-GI-NEXT: sabdl.8h v2, v0, v1
+; CHECK-GI-NEXT: sabdl.8h v0, v1, v0
+; CHECK-GI-NEXT: str q2, [x2]
+; CHECK-GI-NEXT: str q0, [x2]
+; CHECK-GI-NEXT: ret
+ %tmp1 = load <8 x i8>, ptr %A
+ %tmp2 = load <8 x i8>, ptr %B
+ %tmp3 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ %tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
+ store <8 x i16> %tmp4, ptr %C
+ %tmp5 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp1)
+ %tmp6 = zext <8 x i8> %tmp5 to <8 x i16>
+ %tmp7 = getelementptr i8, ptr %C, i64 16
+ store <8 x i16> %tmp6, ptr %C
+ ret void
+}
+
define <8 x i16> @sabdl2_8h(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: sabdl2_8h:
; CHECK-SD: // %bb.0:
@@ -155,6 +185,36 @@ define <2 x i64> @uabdl2d(ptr %A, ptr %B) nounwind {
ret <2 x i64> %tmp4
}
+define void @commutable_uabdl(ptr %A, ptr %B, ptr %C) nounwind {
+; CHECK-SD-LABEL: commutable_uabdl:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ldr d0, [x0]
+; CHECK-SD-NEXT: ldr d1, [x1]
+; CHECK-SD-NEXT: uabdl.8h v0, v1, v0
+; CHECK-SD-NEXT: str q0, [x2]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: commutable_uabdl:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldr d0, [x0]
+; CHECK-GI-NEXT: ldr d1, [x1]
+; CHECK-GI-NEXT: uabdl.8h v2, v0, v1
+; CHECK-GI-NEXT: uabdl.8h v0, v1, v0
+; CHECK-GI-NEXT: str q2, [x2]
+; CHECK-GI-NEXT: str q0, [x2]
+; CHECK-GI-NEXT: ret
+ %tmp1 = load <8 x i8>, ptr %A
+ %tmp2 = load <8 x i8>, ptr %B
+ %tmp3 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ %tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
+ store <8 x i16> %tmp4, ptr %C
+ %tmp5 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp1)
+ %tmp6 = zext <8 x i8> %tmp5 to <8 x i16>
+ %tmp7 = getelementptr i8, ptr %C, i64 16
+ store <8 x i16> %tmp6, ptr %C
+ ret void
+}
+
define <8 x i16> @uabdl2_8h(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: uabdl2_8h:
; CHECK-SD: // %bb.0:
diff --git a/llvm/test/CodeGen/AArch64/arm64-vmul.ll b/llvm/test/CodeGen/AArch64/arm64-vmul.ll
index 07400bbb2f58c..cac980fd99b69 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vmul.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vmul.ll
@@ -3,6 +3,7 @@
; RUN: llc -mtriple=aarch64-none-elf -mattr=+aes -global-isel -global-isel-abort=2 2>&1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; CHECK-GI: warning: Instruction selection used fallback path for pmull8h
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for commutable_pmull8h
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmulh_1s
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_2s
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_4s
@@ -78,6 +79,21 @@ define <2 x i64> @smull2d(ptr %A, ptr %B) nounwind {
ret <2 x i64> %tmp3
}
+define void @commutable_smull(<2 x i32> %A, <2 x i32> %B, ptr %C) {
+; CHECK-LABEL: commutable_smull:
+; CHECK: // %bb.0:
+; CHECK-NEXT: smull v2.2d, v0.2s, v1.2s
+; CHECK-NEXT: smull v0.2d, v1.2s, v0.2s
+; CHECK-NEXT: stp q2, q0, [x0]
+; CHECK-NEXT: ret
+ %1 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %A, <2 x i32> %B)
+ %2 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %B, <2 x i32> %A)
+ store <2 x i64> %1, ptr %C
+ %3 = getelementptr i8, ptr %C, i64 16
+ store <2 x i64> %2, ptr %3
+ ret void
+}
+
declare <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
@@ -121,6 +137,21 @@ define <2 x i64> @umull2d(ptr %A, ptr %B) nounwind {
ret <2 x i64> %tmp3
}
+define void @commutable_umull(<2 x i32> %A, <2 x i32> %B, ptr %C) {
+; CHECK-LABEL: commutable_umull:
+; CHECK: // %bb.0:
+; CHECK-NEXT: umull v2.2d, v0.2s, v1.2s
+; CHECK-NEXT: umull v0.2d, v1.2s, v0.2s
+; CHECK-NEXT: stp q2, q0, [x0]
+; CHECK-NEXT: ret
+ %1 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %A, <2 x i32> %B)
+ %2 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %B, <2 x i32> %A)
+ store <2 x i64> %1, ptr %C
+ %3 = getelementptr i8, ptr %C, i64 16
+ store <2 x i64> %2, ptr %3
+ ret void
+}
+
declare <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
@@ -212,6 +243,21 @@ define <8 x i16> @pmull8h(ptr %A, ptr %B) nounwind {
ret <8 x i16> %tmp3
}
+define void @commutable_pmull8h(<8 x i8> %A, <8 x i8> %B, ptr %C) {
+; CHECK-LABEL: commutable_pmull8h:
+; CHECK: // %bb.0:
+; CHECK-NEXT: pmull v2.8h, v0.8b, v1.8b
+; CHECK-NEXT: pmull v0.8h, v1.8b, v0.8b
+; CHECK-NEXT: stp q2, q0, [x0]
+; CHECK-NEXT: ret
+ %1 = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %A, <8 x i8> %B)
+ %2 = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %B, <8 x i8> %A)
+ store <8 x i16> %1, ptr %C
+ %3 = getelementptr i8, ptr %C, i8 16
+ store <8 x i16> %2, ptr %3
+ ret void
+}
+
declare <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
define <4 x i16> @sqdmulh_4h(ptr %A, ptr %B) nounwind {
@@ -487,10 +533,10 @@ define void @smlal2d_chain_with_constant(ptr %dst, <2 x i32> %v1, <2 x i32> %v2,
; CHECK-GI-LABEL: smlal2d_chain_with_constant:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mvn v3.8b, v2.8b
-; CHECK-GI-NEXT: adrp x8, .LCPI27_0
+; CHECK-GI-NEXT: adrp x8, .LCPI30_0
; CHECK-GI-NEXT: smull v1.2d, v1.2s, v3.2s
; CHECK-GI-NEXT: smlal v1.2d, v0.2s, v2.2s
-; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI27_0]
+; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI30_0]
; CHECK-GI-NEXT: add v0.2d, v1.2d, v0.2d
; CHECK-GI-NEXT: str q0, [x0]
; CHECK-GI-NEXT: ret
@@ -566,8 +612,8 @@ define void @smlsl2d_chain_with_constant(ptr %dst, <2 x i32> %v1, <2 x i32> %v2,
;
; CHECK-GI-LABEL: smlsl2d_chain_with_constant:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI31_0
-; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI31_0]
+; CHECK-GI-NEXT: adrp x8, .LCPI34_0
+; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI34_0]
; CHECK-GI-NEXT: smlsl v3.2d, v0.2s, v2.2s
; CHECK-GI-NEXT: mvn v0.8b, v2.8b
; CHECK-GI-NEXT: smlsl v3.2d, v1.2s, v0.2s
@@ -829,10 +875,10 @@ define void @umlal2d_chain_with_constant(ptr %dst, <2 x i32> %v1, <2 x i32> %v2,
; CHECK-GI-LABEL: umlal2d_chain_with_constant:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mvn v3.8b, v2.8b
-; CHECK-GI-NEXT: adrp x8, .LCPI43_0
+; CHECK-GI-NEXT: adrp x8, .LCPI46_0
; CHECK-GI-NEXT: umull v1.2d, v1.2s, v3.2s
; CHECK-GI-NEXT: umlal v1.2d, v0.2s, v2.2s
-; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI43_0]
+; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI46_0]
; CHECK-GI-NEXT: add v0.2d, v1.2d, v0.2d
; CHECK-GI-NEXT: str q0, [x0]
; CHECK-GI-NEXT: ret
@@ -908,8 +954,8 @@ define void @umlsl2d_chain_with_constant(ptr %dst, <2 x i32> %v1, <2 x i32> %v2,
;
; CHECK-GI-LABEL: umlsl2d_chain_with_constant:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI47_0
-; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI47_0]
+; CHECK-GI-NEXT: adrp x8, .LCPI50_0
+; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI50_0]
; CHECK-GI-NEXT: umlsl v3.2d, v0.2s, v2.2s
; CHECK-GI-NEXT: mvn v0.8b, v2.8b
; CHECK-GI-NEXT: umlsl v3.2d, v1.2s, v0.2s
@@ -3222,6 +3268,21 @@ define <16 x i8> @test_pmull_high_64(<2 x i64> %l, <2 x i64> %r) nounwind {
ret <16 x i8> %val
}
+define <16 x i8> @test_commutable_pmull_64(i64 %l, i64 %r) nounwind {
+; CHECK-LABEL: test_commutable_pmull_64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov d0, x1
+; CHECK-NEXT: fmov d1, x0
+; CHECK-NEXT: pmull v2.1q, v1.1d, v0.1d
+; CHECK-NEXT: pmull v0.1q, v0.1d, v1.1d
+; CHECK-NEXT: add v0.16b, v2.16b, v0.16b
+; CHECK-NEXT: ret
+ %1 = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %l, i64 %r)
+ %2 = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %r, i64 %l)
+ %3 = add <16 x i8> %1, %2
+ ret <16 x i8> %3
+}
+
declare <16 x i8> @llvm.aarch64.neon.pmull64(i64, i64)
define <1 x i64> @test_mul_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) nounwind {
More information about the llvm-commits
mailing list