[llvm] [AArch64] Use sve instructions for fixed-width smulh/umulh. (PR #166168)

via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 2 11:13:48 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-aarch64

Author: David Green (davemgreen)

<details>
<summary>Changes</summary>

Like v2i64 mul and operations like divide, we should be able to use the SVE
umulh and smulh instructions with 128bit vectors, providing that we have
SVE/SVE2. There are a number of other instructions that look like they should
presumably be treated the same way.

---

Patch is 22.85 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/166168.diff


3 Files Affected:

- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+2-4) 
- (modified) llvm/test/CodeGen/AArch64/sve-int-mulh-pred.ll (+80-60) 
- (modified) llvm/test/CodeGen/AArch64/sve2-int-mulh.ll (+64-60) 


``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index a83185d6ade20..f9b86a07fe6e3 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1841,6 +1841,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
                     MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
       setOperationAction(ISD::SDIV, VT, Custom);
       setOperationAction(ISD::UDIV, VT, Custom);
+      setOperationAction(ISD::MULHS, VT, Custom);
+      setOperationAction(ISD::MULHU, VT, Custom);
     }
 
     // NEON doesn't support 64-bit vector integer muls, but SVE does.
@@ -1877,10 +1879,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::CTLZ, MVT::v1i64, Custom);
       setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
       setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
-      setOperationAction(ISD::MULHS, MVT::v1i64, Custom);
-      setOperationAction(ISD::MULHS, MVT::v2i64, Custom);
-      setOperationAction(ISD::MULHU, MVT::v1i64, Custom);
-      setOperationAction(ISD::MULHU, MVT::v2i64, Custom);
       setOperationAction(ISD::SMAX, MVT::v1i64, Custom);
       setOperationAction(ISD::SMAX, MVT::v2i64, Custom);
       setOperationAction(ISD::SMIN, MVT::v1i64, Custom);
diff --git a/llvm/test/CodeGen/AArch64/sve-int-mulh-pred.ll b/llvm/test/CodeGen/AArch64/sve-int-mulh-pred.ll
index 146720febf486..3c817e5ddbd82 100644
--- a/llvm/test/CodeGen/AArch64/sve-int-mulh-pred.ll
+++ b/llvm/test/CodeGen/AArch64/sve-int-mulh-pred.ll
@@ -127,9 +127,11 @@ define <vscale x 2 x i64> @umulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %
 define <16 x i8> @smulh_v16i8(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-LABEL: smulh_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smull2 v2.8h, v0.16b, v1.16b
-; CHECK-NEXT:    smull v0.8h, v0.8b, v1.8b
-; CHECK-NEXT:    uzp2 v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    ptrue p0.b, vl16
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    smulh z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = sext <16 x i8> %a to <16 x i16>
   %2 = sext <16 x i8> %b to <16 x i16>
@@ -142,9 +144,11 @@ define <16 x i8> @smulh_v16i8(<16 x i8> %a, <16 x i8> %b) {
 define <8 x i16> @smulh_v8i16(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: smulh_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smull2 v2.4s, v0.8h, v1.8h
-; CHECK-NEXT:    smull v0.4s, v0.4h, v1.4h
-; CHECK-NEXT:    uzp2 v0.8h, v0.8h, v2.8h
+; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    smulh z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = sext <8 x i16> %a to <8 x i32>
   %2 = sext <8 x i16> %b to <8 x i32>
@@ -157,9 +161,11 @@ define <8 x i16> @smulh_v8i16(<8 x i16> %a, <8 x i16> %b) {
 define <4 x i32> @smulh_v4i32(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: smulh_v4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smull2 v2.2d, v0.4s, v1.4s
-; CHECK-NEXT:    smull v0.2d, v0.2s, v1.2s
-; CHECK-NEXT:    uzp2 v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    smulh z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = sext <4 x i32> %a to <4 x i64>
   %2 = sext <4 x i32> %b to <4 x i64>
@@ -172,15 +178,11 @@ define <4 x i32> @smulh_v4i32(<4 x i32> %a, <4 x i32> %b) {
 define <2 x i64> @smulh_v2i64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: smulh_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x8, v0.d[1]
-; CHECK-NEXT:    mov x9, v1.d[1]
-; CHECK-NEXT:    fmov x10, d0
-; CHECK-NEXT:    fmov x11, d1
-; CHECK-NEXT:    smulh x10, x10, x11
-; CHECK-NEXT:    smulh x8, x8, x9
-; CHECK-NEXT:    fmov d0, x10
-; CHECK-NEXT:    fmov d1, x8
-; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    smulh z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = sext <2 x i64> %a to <2 x i128>
   %2 = sext <2 x i64> %b to <2 x i128>
@@ -193,9 +195,11 @@ define <2 x i64> @smulh_v2i64(<2 x i64> %a, <2 x i64> %b) {
 define <16 x i8> @umulh_v16i8(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-LABEL: umulh_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    umull2 v2.8h, v0.16b, v1.16b
-; CHECK-NEXT:    umull v0.8h, v0.8b, v1.8b
-; CHECK-NEXT:    uzp2 v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    ptrue p0.b, vl16
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    umulh z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = zext <16 x i8> %a to <16 x i16>
   %2 = zext <16 x i8> %b to <16 x i16>
@@ -208,9 +212,11 @@ define <16 x i8> @umulh_v16i8(<16 x i8> %a, <16 x i8> %b) {
 define <8 x i16> @umulh_v8i16(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: umulh_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    umull2 v2.4s, v0.8h, v1.8h
-; CHECK-NEXT:    umull v0.4s, v0.4h, v1.4h
-; CHECK-NEXT:    uzp2 v0.8h, v0.8h, v2.8h
+; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    umulh z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = zext <8 x i16> %a to <8 x i32>
   %2 = zext <8 x i16> %b to <8 x i32>
@@ -223,9 +229,11 @@ define <8 x i16> @umulh_v8i16(<8 x i16> %a, <8 x i16> %b) {
 define <4 x i32> @umulh_v4i32(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: umulh_v4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    umull2 v2.2d, v0.4s, v1.4s
-; CHECK-NEXT:    umull v0.2d, v0.2s, v1.2s
-; CHECK-NEXT:    uzp2 v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    umulh z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = zext <4 x i32> %a to <4 x i64>
   %2 = zext <4 x i32> %b to <4 x i64>
@@ -238,15 +246,11 @@ define <4 x i32> @umulh_v4i32(<4 x i32> %a, <4 x i32> %b) {
 define <2 x i64> @umulh_v2i64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: umulh_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x8, v0.d[1]
-; CHECK-NEXT:    mov x9, v1.d[1]
-; CHECK-NEXT:    fmov x10, d0
-; CHECK-NEXT:    fmov x11, d1
-; CHECK-NEXT:    umulh x10, x10, x11
-; CHECK-NEXT:    umulh x8, x8, x9
-; CHECK-NEXT:    fmov d0, x10
-; CHECK-NEXT:    fmov d1, x8
-; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    umulh z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = zext <2 x i64> %a to <2 x i128>
   %2 = zext <2 x i64> %b to <2 x i128>
@@ -263,8 +267,11 @@ define <2 x i64> @umulh_v2i64(<2 x i64> %a, <2 x i64> %b) {
 define <8 x i8> @smulh_v8i8(<8 x i8> %a, <8 x i8> %b) {
 ; CHECK-LABEL: smulh_v8i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smull v0.8h, v0.8b, v1.8b
-; CHECK-NEXT:    shrn v0.8b, v0.8h, #8
+; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    smulh z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = sext <8 x i8> %a to <8 x i16>
   %2 = sext <8 x i8> %b to <8 x i16>
@@ -277,8 +284,11 @@ define <8 x i8> @smulh_v8i8(<8 x i8> %a, <8 x i8> %b) {
 define <4 x i16> @smulh_v4i16(<4 x i16> %a, <4 x i16> %b) {
 ; CHECK-LABEL: smulh_v4i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smull v0.4s, v0.4h, v1.4h
-; CHECK-NEXT:    shrn v0.4h, v0.4s, #16
+; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    smulh z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = sext <4 x i16> %a to <4 x i32>
   %2 = sext <4 x i16> %b to <4 x i32>
@@ -291,8 +301,11 @@ define <4 x i16> @smulh_v4i16(<4 x i16> %a, <4 x i16> %b) {
 define <2 x i32> @smulh_v2i32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: smulh_v2i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smull v0.2d, v0.2s, v1.2s
-; CHECK-NEXT:    shrn v0.2s, v0.2d, #32
+; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    smulh z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = sext <2 x i32> %a to <2 x i64>
   %2 = sext <2 x i32> %b to <2 x i64>
@@ -305,12 +318,11 @@ define <2 x i32> @smulh_v2i32(<2 x i32> %a, <2 x i32> %b) {
 define <1 x i64> @smulh_v1i64(<1 x i64> %a, <1 x i64> %b) {
 ; CHECK-LABEL: smulh_v1i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    fmov x8, d0
-; CHECK-NEXT:    fmov x9, d1
-; CHECK-NEXT:    smulh x8, x8, x9
-; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    smulh z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = sext <1 x i64> %a to <1 x i128>
   %2 = sext <1 x i64> %b to <1 x i128>
@@ -323,8 +335,11 @@ define <1 x i64> @smulh_v1i64(<1 x i64> %a, <1 x i64> %b) {
 define <8 x i8> @umulh_v8i8(<8 x i8> %a, <8 x i8> %b) {
 ; CHECK-LABEL: umulh_v8i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    umull v0.8h, v0.8b, v1.8b
-; CHECK-NEXT:    shrn v0.8b, v0.8h, #8
+; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    umulh z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = zext <8 x i8> %a to <8 x i16>
   %2 = zext <8 x i8> %b to <8 x i16>
@@ -337,8 +352,11 @@ define <8 x i8> @umulh_v8i8(<8 x i8> %a, <8 x i8> %b) {
 define <4 x i16> @umulh_v4i16(<4 x i16> %a, <4 x i16> %b) {
 ; CHECK-LABEL: umulh_v4i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    umull v0.4s, v0.4h, v1.4h
-; CHECK-NEXT:    shrn v0.4h, v0.4s, #16
+; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    umulh z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = zext <4 x i16> %a to <4 x i32>
   %2 = zext <4 x i16> %b to <4 x i32>
@@ -351,8 +369,11 @@ define <4 x i16> @umulh_v4i16(<4 x i16> %a, <4 x i16> %b) {
 define <2 x i32> @umulh_v2i32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: umulh_v2i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    umull v0.2d, v0.2s, v1.2s
-; CHECK-NEXT:    shrn v0.2s, v0.2d, #32
+; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    umulh z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = zext <2 x i32> %a to <2 x i64>
   %2 = zext <2 x i32> %b to <2 x i64>
@@ -365,12 +386,11 @@ define <2 x i32> @umulh_v2i32(<2 x i32> %a, <2 x i32> %b) {
 define <1 x i64> @umulh_v1i64(<1 x i64> %a, <1 x i64> %b) {
 ; CHECK-LABEL: umulh_v1i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    fmov x8, d0
-; CHECK-NEXT:    fmov x9, d1
-; CHECK-NEXT:    umulh x8, x8, x9
-; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    umulh z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = zext <1 x i64> %a to <1 x i128>
   %2 = zext <1 x i64> %b to <1 x i128>
diff --git a/llvm/test/CodeGen/AArch64/sve2-int-mulh.ll b/llvm/test/CodeGen/AArch64/sve2-int-mulh.ll
index d7534712b53a0..917d8e6ec22ef 100644
--- a/llvm/test/CodeGen/AArch64/sve2-int-mulh.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-int-mulh.ll
@@ -119,9 +119,10 @@ define <vscale x 2 x i64> @umulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %
 define <16 x i8> @smulh_v16i8(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-LABEL: smulh_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smull2 v2.8h, v0.16b, v1.16b
-; CHECK-NEXT:    smull v0.8h, v0.8b, v1.8b
-; CHECK-NEXT:    uzp2 v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    smulh z0.b, z0.b, z1.b
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = sext <16 x i8> %a to <16 x i16>
   %2 = sext <16 x i8> %b to <16 x i16>
@@ -134,9 +135,10 @@ define <16 x i8> @smulh_v16i8(<16 x i8> %a, <16 x i8> %b) {
 define <8 x i16> @smulh_v8i16(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: smulh_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smull2 v2.4s, v0.8h, v1.8h
-; CHECK-NEXT:    smull v0.4s, v0.4h, v1.4h
-; CHECK-NEXT:    uzp2 v0.8h, v0.8h, v2.8h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    smulh z0.h, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = sext <8 x i16> %a to <8 x i32>
   %2 = sext <8 x i16> %b to <8 x i32>
@@ -149,9 +151,10 @@ define <8 x i16> @smulh_v8i16(<8 x i16> %a, <8 x i16> %b) {
 define <4 x i32> @smulh_v4i32(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: smulh_v4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smull2 v2.2d, v0.4s, v1.4s
-; CHECK-NEXT:    smull v0.2d, v0.2s, v1.2s
-; CHECK-NEXT:    uzp2 v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    smulh z0.s, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = sext <4 x i32> %a to <4 x i64>
   %2 = sext <4 x i32> %b to <4 x i64>
@@ -164,15 +167,10 @@ define <4 x i32> @smulh_v4i32(<4 x i32> %a, <4 x i32> %b) {
 define <2 x i64> @smulh_v2i64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: smulh_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x8, v0.d[1]
-; CHECK-NEXT:    mov x9, v1.d[1]
-; CHECK-NEXT:    fmov x10, d0
-; CHECK-NEXT:    fmov x11, d1
-; CHECK-NEXT:    smulh x10, x10, x11
-; CHECK-NEXT:    smulh x8, x8, x9
-; CHECK-NEXT:    fmov d0, x10
-; CHECK-NEXT:    fmov d1, x8
-; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    smulh z0.d, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = sext <2 x i64> %a to <2 x i128>
   %2 = sext <2 x i64> %b to <2 x i128>
@@ -185,9 +183,10 @@ define <2 x i64> @smulh_v2i64(<2 x i64> %a, <2 x i64> %b) {
 define <16 x i8> @umulh_v16i8(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-LABEL: umulh_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    umull2 v2.8h, v0.16b, v1.16b
-; CHECK-NEXT:    umull v0.8h, v0.8b, v1.8b
-; CHECK-NEXT:    uzp2 v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    umulh z0.b, z0.b, z1.b
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = zext <16 x i8> %a to <16 x i16>
   %2 = zext <16 x i8> %b to <16 x i16>
@@ -200,9 +199,10 @@ define <16 x i8> @umulh_v16i8(<16 x i8> %a, <16 x i8> %b) {
 define <8 x i16> @umulh_v8i16(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: umulh_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    umull2 v2.4s, v0.8h, v1.8h
-; CHECK-NEXT:    umull v0.4s, v0.4h, v1.4h
-; CHECK-NEXT:    uzp2 v0.8h, v0.8h, v2.8h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    umulh z0.h, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = zext <8 x i16> %a to <8 x i32>
   %2 = zext <8 x i16> %b to <8 x i32>
@@ -215,9 +215,10 @@ define <8 x i16> @umulh_v8i16(<8 x i16> %a, <8 x i16> %b) {
 define <4 x i32> @umulh_v4i32(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: umulh_v4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    umull2 v2.2d, v0.4s, v1.4s
-; CHECK-NEXT:    umull v0.2d, v0.2s, v1.2s
-; CHECK-NEXT:    uzp2 v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    umulh z0.s, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = zext <4 x i32> %a to <4 x i64>
   %2 = zext <4 x i32> %b to <4 x i64>
@@ -230,15 +231,10 @@ define <4 x i32> @umulh_v4i32(<4 x i32> %a, <4 x i32> %b) {
 define <2 x i64> @umulh_v2i64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: umulh_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x8, v0.d[1]
-; CHECK-NEXT:    mov x9, v1.d[1]
-; CHECK-NEXT:    fmov x10, d0
-; CHECK-NEXT:    fmov x11, d1
-; CHECK-NEXT:    umulh x10, x10, x11
-; CHECK-NEXT:    umulh x8, x8, x9
-; CHECK-NEXT:    fmov d0, x10
-; CHECK-NEXT:    fmov d1, x8
-; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    umulh z0.d, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = zext <2 x i64> %a to <2 x i128>
   %2 = zext <2 x i64> %b to <2 x i128>
@@ -255,8 +251,10 @@ define <2 x i64> @umulh_v2i64(<2 x i64> %a, <2 x i64> %b) {
 define <8 x i8> @smulh_v8i8(<8 x i8> %a, <8 x i8> %b) {
 ; CHECK-LABEL: smulh_v8i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smull v0.8h, v0.8b, v1.8b
-; CHECK-NEXT:    shrn v0.8b, v0.8h, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    smulh z0.b, z0.b, z1.b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = sext <8 x i8> %a to <8 x i16>
   %2 = sext <8 x i8> %b to <8 x i16>
@@ -269,8 +267,10 @@ define <8 x i8> @smulh_v8i8(<8 x i8> %a, <8 x i8> %b) {
 define <4 x i16> @smulh_v4i16(<4 x i16> %a, <4 x i16> %b) {
 ; CHECK-LABEL: smulh_v4i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smull v0.4s, v0.4h, v1.4h
-; CHECK-NEXT:    shrn v0.4h, v0.4s, #16
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    smulh z0.h, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = sext <4 x i16> %a to <4 x i32>
   %2 = sext <4 x i16> %b to <4 x i32>
@@ -283,8 +283,10 @@ define <4 x i16> @smulh_v4i16(<4 x i16> %a, <4 x i16> %b) {
 define <2 x i32> @smulh_v2i32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: smulh_v2i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smull v0.2d, v0.2s, v1.2s
-; CHECK-NEXT:    shrn v0.2s, v0.2d, #32
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    smulh z0.s, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = sext <2 x i32> %a to <2 x i64>
   %2 = sext <2 x i32> %b to <2 x i64>
@@ -297,12 +299,10 @@ define <2 x i32> @smulh_v2i32(<2 x i32> %a, <2 x i32> %b) {
 define <1 x i64> @smulh_v1i64(<1 x i64> %a, <1 x i64> %b) {
 ; CHECK-LABEL: smulh_v1i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-;...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/166168


More information about the llvm-commits mailing list