[llvm] cc721db - [AArch64][Codegen] Improve small shufflevector/concat lowering for SME (#116662)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 22 02:15:27 PST 2024
Author: Benjamin Maxwell
Date: 2024-11-22T10:15:23Z
New Revision: cc721dba4e94c1d28214f81da0f1af79b6ca4218
URL: https://github.com/llvm/llvm-project/commit/cc721dba4e94c1d28214f81da0f1af79b6ca4218
DIFF: https://github.com/llvm/llvm-project/commit/cc721dba4e94c1d28214f81da0f1af79b6ca4218.diff
LOG: [AArch64][Codegen] Improve small shufflevector/concat lowering for SME (#116662)
This now tries to widen the shuffle before generating a possibly
expensive SVE TBL, this may allow the shuffle to be matched as something
cheaper like a ZIP1.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 7ab3fc06715ec8..ed2d9a07cec630 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -29211,6 +29211,11 @@ SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE(
}
}
+ // Try to widen the shuffle before generating a possibly expensive SVE TBL.
+ // This may allow the shuffle to be matched as something cheaper like ZIP1.
+ if (SDValue WideOp = tryWidenMaskForShuffle(Op, DAG))
+ return WideOp;
+
// Avoid producing TBL instruction if we don't know SVE register minimal size,
// unless NEON is not available and we can assume minimal SVE register size is
// 128-bits.
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll
index 6e2ecfca9e963e..619840fc6afb28 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2
-; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,SME
+; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -406,33 +406,13 @@ define void @concat_v8i64(ptr %a, ptr %b, ptr %c) {
;
define <4 x half> @concat_v4f16(<2 x half> %op1, <2 x half> %op2) {
-; SVE2-LABEL: concat_v4f16:
-; SVE2: // %bb.0:
-; SVE2-NEXT: cnth x8
-; SVE2-NEXT: adrp x9, .LCPI15_0
-; SVE2-NEXT: adrp x10, .LCPI15_1
-; SVE2-NEXT: mov z2.h, w8
-; SVE2-NEXT: ldr q3, [x9, :lo12:.LCPI15_0]
-; SVE2-NEXT: ldr q4, [x10, :lo12:.LCPI15_1]
-; SVE2-NEXT: ptrue p0.h, vl8
-; SVE2-NEXT: // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
-; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
-; SVE2-NEXT: mad z2.h, p0/m, z3.h, z4.h
-; SVE2-NEXT: tbl z0.h, { z0.h, z1.h }, z2.h
-; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
-; SVE2-NEXT: ret
-;
-; SME-LABEL: concat_v4f16:
-; SME: // %bb.0:
-; SME-NEXT: // kill: def $d1 killed $d1 def $z1
-; SME-NEXT: // kill: def $d0 killed $d0 def $z0
-; SME-NEXT: mov z2.h, z1.h[1]
-; SME-NEXT: mov z3.h, z0.h[1]
-; SME-NEXT: zip1 z1.h, z1.h, z2.h
-; SME-NEXT: zip1 z0.h, z0.h, z3.h
-; SME-NEXT: zip1 z0.s, z0.s, z1.s
-; SME-NEXT: // kill: def $d0 killed $d0 killed $z0
-; SME-NEXT: ret
+; CHECK-LABEL: concat_v4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT: zip1 z0.s, z0.s, z1.s
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: concat_v4f16:
; NONEON-NOSVE: // %bb.0:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll
index a728cbe97056db..35dd827bbabc55 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll
@@ -276,10 +276,8 @@ define void @extract_subvector_v4i64(ptr %a, ptr %b) {
define <2 x half> @extract_subvector_v4f16(<4 x half> %op) {
; CHECK-LABEL: extract_subvector_v4f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI12_0
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI12_0]
-; CHECK-NEXT: tbl z0.h, { z0.h }, z1.h
+; CHECK-NEXT: mov z0.s, z0.s[1]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
More information about the llvm-commits
mailing list