[llvm] d340ef6 - [AArch64][SVE] Generate smull/umull instead of sve v2i64 mul
David Green via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 26 14:12:06 PDT 2023
Author: David Green
Date: 2023-04-26T22:12:00+01:00
New Revision: d340ef697d905f81ede9747cb64160177bc0c53c
URL: https://github.com/llvm/llvm-project/commit/d340ef697d905f81ede9747cb64160177bc0c53c
DIFF: https://github.com/llvm/llvm-project/commit/d340ef697d905f81ede9747cb64160177bc0c53c.diff
LOG: [AArch64][SVE] Generate smull/umull instead of sve v2i64 mul
A neon smull/umull should be preferred over a sve v2i64 mul with two extends.
It will be both less instructions and a lower cost multiply instruction.
Differential Revision: https://reviews.llvm.org/D148248
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/aarch64-smull.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index a5c19eb1bf12..1a7adf62047d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4534,8 +4534,8 @@ SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
// If SVE is available then i64 vector multiplications can also be made legal.
- bool OverrideNEON = VT == MVT::v2i64 || VT == MVT::v1i64 ||
- Subtarget->forceStreamingCompatibleSVE();
+ bool OverrideNEON =
+ VT == MVT::v1i64 || Subtarget->forceStreamingCompatibleSVE();
if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT, OverrideNEON))
return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED);
@@ -4551,10 +4551,14 @@ SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
unsigned NewOpc = selectUmullSmull(N0, N1, DAG, DL, isMLA);
if (!NewOpc) {
- if (VT == MVT::v2i64)
+ if (VT == MVT::v2i64) {
+ // If SVE is available then i64 vector multiplications can also be made
+ // legal.
+ if (Subtarget->hasSVE())
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED);
// Fall through to expand this. It is not legal.
return SDValue();
- else
+ } else
// Other vector multiplications are legal.
return Op;
}
diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
index da0e428ec33d..e4d733fd7c1c 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s -o -| FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NEON
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve < %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SVE
define <8 x i16> @smull_v8i8_v8i16(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: smull_v8i8_v8i16:
@@ -119,19 +120,32 @@ define <4 x i32> @smull_zext_v4i16_v4i32(ptr %A, ptr %B) nounwind {
}
define <2 x i64> @smull_zext_v2i32_v2i64(ptr %A, ptr %B) nounwind {
-; CHECK-LABEL: smull_zext_v2i32_v2i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr d0, [x1]
-; CHECK-NEXT: ldrh w8, [x0]
-; CHECK-NEXT: ldrh w11, [x0, #2]
-; CHECK-NEXT: sshll v0.2d, v0.2s, #0
-; CHECK-NEXT: fmov x9, d0
-; CHECK-NEXT: mov x10, v0.d[1]
-; CHECK-NEXT: smull x8, w8, w9
-; CHECK-NEXT: smull x9, w11, w10
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: mov v0.d[1], x9
-; CHECK-NEXT: ret
+; CHECK-NEON-LABEL: smull_zext_v2i32_v2i64:
+; CHECK-NEON: // %bb.0:
+; CHECK-NEON-NEXT: ldr d0, [x1]
+; CHECK-NEON-NEXT: ldrh w8, [x0]
+; CHECK-NEON-NEXT: ldrh w11, [x0, #2]
+; CHECK-NEON-NEXT: sshll v0.2d, v0.2s, #0
+; CHECK-NEON-NEXT: fmov x9, d0
+; CHECK-NEON-NEXT: mov x10, v0.d[1]
+; CHECK-NEON-NEXT: smull x8, w8, w9
+; CHECK-NEON-NEXT: smull x9, w11, w10
+; CHECK-NEON-NEXT: fmov d0, x8
+; CHECK-NEON-NEXT: mov v0.d[1], x9
+; CHECK-NEON-NEXT: ret
+;
+; CHECK-SVE-LABEL: smull_zext_v2i32_v2i64:
+; CHECK-SVE: // %bb.0:
+; CHECK-SVE-NEXT: ldrh w8, [x0]
+; CHECK-SVE-NEXT: ptrue p0.d, vl2
+; CHECK-SVE-NEXT: ldr d1, [x1]
+; CHECK-SVE-NEXT: fmov d0, x8
+; CHECK-SVE-NEXT: ldrh w8, [x0, #2]
+; CHECK-SVE-NEXT: sshll v1.2d, v1.2s, #0
+; CHECK-SVE-NEXT: mov v0.d[1], x8
+; CHECK-SVE-NEXT: mul z0.d, p0/m, z0.d, z1.d
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-SVE-NEXT: ret
%load.A = load <2 x i16>, ptr %A
%load.B = load <2 x i32>, ptr %B
%zext.A = zext <2 x i16> %load.A to <2 x i64>
@@ -611,7 +625,7 @@ define <8 x i16> @smull_noextvec_v8i8_v8i16(<8 x i8> %arg) nounwind {
; Do not use SMULL if the BUILD_VECTOR element values are too big.
; CHECK-LABEL: smull_noextvec_v8i8_v8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #64537
+; CHECK-NEXT: mov w8, #64537 // =0xfc19
; CHECK-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-NEXT: dup v1.8h, w8
; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h
@@ -635,7 +649,7 @@ define <4 x i32> @smull_extvec_v4i16_v4i32(<4 x i16> %arg) nounwind {
define <2 x i64> @smull_extvec_v2i32_v2i64(<2 x i32> %arg) nounwind {
; CHECK-LABEL: smull_extvec_v2i32_v2i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #-1234
+; CHECK-NEXT: mov w8, #-1234 // =0xfffffb2e
; CHECK-NEXT: dup v1.2s, w8
; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s
; CHECK-NEXT: ret
@@ -659,7 +673,7 @@ define <8 x i16> @umull_noextvec_v8i8_v8i16(<8 x i8> %arg) nounwind {
; Do not use SMULL if the BUILD_VECTOR element values are too big.
; CHECK-LABEL: umull_noextvec_v8i8_v8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #999
+; CHECK-NEXT: mov w8, #999 // =0x3e7
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: dup v1.8h, w8
; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h
@@ -672,7 +686,7 @@ define <8 x i16> @umull_noextvec_v8i8_v8i16(<8 x i8> %arg) nounwind {
define <4 x i32> @umull_extvec_v4i16_v4i32(<4 x i16> %arg) nounwind {
; CHECK-LABEL: umull_extvec_v4i16_v4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #1234
+; CHECK-NEXT: mov w8, #1234 // =0x4d2
; CHECK-NEXT: dup v1.4h, w8
; CHECK-NEXT: umull v0.4s, v0.4h, v1.4h
; CHECK-NEXT: ret
@@ -684,7 +698,7 @@ define <4 x i32> @umull_extvec_v4i16_v4i32(<4 x i16> %arg) nounwind {
define <2 x i64> @umull_extvec_v2i32_v2i64(<2 x i32> %arg) nounwind {
; CHECK-LABEL: umull_extvec_v2i32_v2i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #1234
+; CHECK-NEXT: mov w8, #1234 // =0x4d2
; CHECK-NEXT: dup v1.2s, w8
; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s
; CHECK-NEXT: ret
@@ -709,7 +723,7 @@ define <8 x i16> @amull_extvec_v8i8_v8i16(<8 x i8> %arg) nounwind {
define <4 x i32> @amull_extvec_v4i16_v4i32(<4 x i16> %arg) nounwind {
; CHECK-LABEL: amull_extvec_v4i16_v4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #1234
+; CHECK-NEXT: mov w8, #1234 // =0x4d2
; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
; CHECK-NEXT: dup v2.4h, w8
; CHECK-NEXT: smull v0.4s, v0.4h, v2.4h
@@ -724,7 +738,7 @@ define <4 x i32> @amull_extvec_v4i16_v4i32(<4 x i16> %arg) nounwind {
define <2 x i64> @amull_extvec_v2i32_v2i64(<2 x i32> %arg) nounwind {
; CHECK-LABEL: amull_extvec_v2i32_v2i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #1234
+; CHECK-NEXT: mov w8, #1234 // =0x4d2
; CHECK-NEXT: movi v1.2d, #0x000000ffffffff
; CHECK-NEXT: dup v2.2s, w8
; CHECK-NEXT: smull v0.2d, v0.2s, v2.2s
More information about the llvm-commits
mailing list