[llvm] [AArch64] Mark umull as commutative (PR #152158)
Cullen Rhodes via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 5 08:18:06 PDT 2025
https://github.com/c-rhodes created https://github.com/llvm/llvm-project/pull/152158
Fixes #61461.
>From cfad0af2e9c6193eb5e2263fad23c7bb8f83ba79 Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes at arm.com>
Date: Tue, 5 Aug 2025 14:14:01 +0000
Subject: [PATCH 1/2] [AArch64] Precommit test for commutable umull (gh-issue
#61461)
---
llvm/test/CodeGen/AArch64/arm64-vmul.ll | 35 +++++++++++++++++++------
1 file changed, 27 insertions(+), 8 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/arm64-vmul.ll b/llvm/test/CodeGen/AArch64/arm64-vmul.ll
index 07400bbb2f58c..22a9b18b0863f 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vmul.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vmul.ll
@@ -121,6 +121,25 @@ define <2 x i64> @umull2d(ptr %A, ptr %B) nounwind {
ret <2 x i64> %tmp3
}
+define void @commutable_umull(ptr %A, ptr %B) {
+; CHECK-LABEL: commutable_umull:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldp d0, d1, [x0]
+; CHECK-NEXT: umull v2.2d, v0.2s, v1.2s
+; CHECK-NEXT: umull v0.2d, v1.2s, v0.2s
+; CHECK-NEXT: stp q2, q0, [x1]
+; CHECK-NEXT: ret
+ %1 = load <2 x i32>, ptr %A
+ %A.gep.1 = getelementptr i8, ptr %A, i64 8
+ %2 = load <2 x i32>, ptr %A.gep.1
+ %3 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %1, <2 x i32> %2)
+ store <2 x i64> %3, ptr %B
+ %4 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %2, <2 x i32> %1)
+ %B.gep.2 = getelementptr i8, ptr %B, i64 16
+ store <2 x i64> %4, ptr %B.gep.2
+ ret void
+}
+
declare <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
@@ -487,10 +506,10 @@ define void @smlal2d_chain_with_constant(ptr %dst, <2 x i32> %v1, <2 x i32> %v2,
; CHECK-GI-LABEL: smlal2d_chain_with_constant:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mvn v3.8b, v2.8b
-; CHECK-GI-NEXT: adrp x8, .LCPI27_0
+; CHECK-GI-NEXT: adrp x8, .LCPI28_0
; CHECK-GI-NEXT: smull v1.2d, v1.2s, v3.2s
; CHECK-GI-NEXT: smlal v1.2d, v0.2s, v2.2s
-; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI27_0]
+; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI28_0]
; CHECK-GI-NEXT: add v0.2d, v1.2d, v0.2d
; CHECK-GI-NEXT: str q0, [x0]
; CHECK-GI-NEXT: ret
@@ -566,8 +585,8 @@ define void @smlsl2d_chain_with_constant(ptr %dst, <2 x i32> %v1, <2 x i32> %v2,
;
; CHECK-GI-LABEL: smlsl2d_chain_with_constant:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI31_0
-; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI31_0]
+; CHECK-GI-NEXT: adrp x8, .LCPI32_0
+; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI32_0]
; CHECK-GI-NEXT: smlsl v3.2d, v0.2s, v2.2s
; CHECK-GI-NEXT: mvn v0.8b, v2.8b
; CHECK-GI-NEXT: smlsl v3.2d, v1.2s, v0.2s
@@ -829,10 +848,10 @@ define void @umlal2d_chain_with_constant(ptr %dst, <2 x i32> %v1, <2 x i32> %v2,
; CHECK-GI-LABEL: umlal2d_chain_with_constant:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mvn v3.8b, v2.8b
-; CHECK-GI-NEXT: adrp x8, .LCPI43_0
+; CHECK-GI-NEXT: adrp x8, .LCPI44_0
; CHECK-GI-NEXT: umull v1.2d, v1.2s, v3.2s
; CHECK-GI-NEXT: umlal v1.2d, v0.2s, v2.2s
-; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI43_0]
+; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI44_0]
; CHECK-GI-NEXT: add v0.2d, v1.2d, v0.2d
; CHECK-GI-NEXT: str q0, [x0]
; CHECK-GI-NEXT: ret
@@ -908,8 +927,8 @@ define void @umlsl2d_chain_with_constant(ptr %dst, <2 x i32> %v1, <2 x i32> %v2,
;
; CHECK-GI-LABEL: umlsl2d_chain_with_constant:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI47_0
-; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI47_0]
+; CHECK-GI-NEXT: adrp x8, .LCPI48_0
+; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI48_0]
; CHECK-GI-NEXT: umlsl v3.2d, v0.2s, v2.2s
; CHECK-GI-NEXT: mvn v0.8b, v2.8b
; CHECK-GI-NEXT: umlsl v3.2d, v1.2s, v0.2s
>From e3b4f8923e06dbf58c372591a79ac19704722fed Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes at arm.com>
Date: Tue, 24 Jun 2025 09:09:45 +0000
Subject: [PATCH 2/2] [AArch64] Mark umull as commutative
Fixes #61461.
---
llvm/include/llvm/IR/IntrinsicsAArch64.td | 4 +++-
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 1 +
llvm/test/CodeGen/AArch64/arm64-vmul.ll | 5 ++---
3 files changed, 6 insertions(+), 4 deletions(-)
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index ca6e2128812f7..955171ff70a74 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -301,7 +301,9 @@ let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in {
// Vector Long Multiply
def int_aarch64_neon_smull : AdvSIMD_2VectorArg_Long_Intrinsic;
- def int_aarch64_neon_umull : AdvSIMD_2VectorArg_Long_Intrinsic;
+ def int_aarch64_neon_umull : AdvSIMD_2VectorArg_Long_Intrinsic {
+ let IntrProperties = [IntrNoMem, Commutative];
+ }
def int_aarch64_neon_pmull : AdvSIMD_2VectorArg_Long_Intrinsic;
// 64-bit polynomial multiply really returns an i128, which is not legal. Fake
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index ac31236d8f2cf..4eec163c3c645 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -6835,6 +6835,7 @@ defm UMLAL : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal",
TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>;
defm UMLSL : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl",
TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>;
+let isCommutable = 1 in
defm UMULL : SIMDLongThreeVectorBHS<1, 0b1100, "umull", AArch64umull>;
defm USUBL : SIMDLongThreeVectorBHS<1, 0b0010, "usubl",
BinOpFrag<(sub (zanyext node:$LHS), (zanyext node:$RHS))>>;
diff --git a/llvm/test/CodeGen/AArch64/arm64-vmul.ll b/llvm/test/CodeGen/AArch64/arm64-vmul.ll
index 22a9b18b0863f..8a0b59ba4da69 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vmul.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vmul.ll
@@ -125,9 +125,8 @@ define void @commutable_umull(ptr %A, ptr %B) {
; CHECK-LABEL: commutable_umull:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp d0, d1, [x0]
-; CHECK-NEXT: umull v2.2d, v0.2s, v1.2s
-; CHECK-NEXT: umull v0.2d, v1.2s, v0.2s
-; CHECK-NEXT: stp q2, q0, [x1]
+; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s
+; CHECK-NEXT: stp q0, q0, [x1]
; CHECK-NEXT: ret
%1 = load <2 x i32>, ptr %A
%A.gep.1 = getelementptr i8, ptr %A, i64 8
More information about the llvm-commits
mailing list