[llvm] [AArch64] Fix op mask detection in performZExtDeinterleaveShuffleCombine (PR #126054)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 6 06:01:13 PST 2025
https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/126054
>From 73137e420704b09fb45ed4f0cb8206539f0a6b33 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Thu, 6 Feb 2025 11:22:54 +0000
Subject: [PATCH 1/2] [AArch64] Add test cases for IsUndefDeInterleave. NFC
---
llvm/test/CodeGen/AArch64/zext-shuffle.ll | 142 ++++++++++++++++++++++
1 file changed, 142 insertions(+)
diff --git a/llvm/test/CodeGen/AArch64/zext-shuffle.ll b/llvm/test/CodeGen/AArch64/zext-shuffle.ll
index 2965996ddcb0260..b4346c7132be62e 100644
--- a/llvm/test/CodeGen/AArch64/zext-shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/zext-shuffle.ll
@@ -543,3 +543,145 @@ define <8 x double> @uitofp_load_fadd(ptr %p) {
ret <8 x double> %c
}
+define <4 x i32> @isUndefDeInterleave_b0(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: isUndefDeInterleave_b0:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
+; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+ %2 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 0, i32 4, i32 poison, i32 poison, i32 poison, i32 poison>
+ %s2 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %3 = zext <4 x i16> %s2 to <4 x i32>
+ ret <4 x i32> %3
+}
+
+define <4 x i32> @isUndefDeInterleave_b1(<8 x i16> %a) {
+; CHECK-LABEL: isUndefDeInterleave_b1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s
+; CHECK-NEXT: ushr v0.4s, v0.4s, #16
+; CHECK-NEXT: ret
+ %2 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 1, i32 5, i32 poison, i32 poison, i32 poison, i32 poison>
+ %s2 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %3 = zext <4 x i16> %s2 to <4 x i32>
+ ret <4 x i32> %3
+}
+
+define <4 x i32> @isUndefDeInterleave_b2(<8 x i16> %a) {
+; CHECK-LABEL: isUndefDeInterleave_b2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
+; CHECK-NEXT: uzp2 v0.4s, v0.4s, v0.4s
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+ %2 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 2, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
+ %s2 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %3 = zext <4 x i16> %s2 to <4 x i32>
+ ret <4 x i32> %3
+}
+
+define <4 x i32> @isUndefDeInterleave_b3(<8 x i16> %a) {
+; CHECK-LABEL: isUndefDeInterleave_b3:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uzp2 v0.4s, v0.4s, v0.4s
+; CHECK-NEXT: ushr v0.4s, v0.4s, #16
+; CHECK-NEXT: ret
+ %2 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 3, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
+ %s2 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %3 = zext <4 x i16> %s2 to <4 x i32>
+ ret <4 x i32> %3
+}
+
+define <4 x i32> @isUndefDeInterleave_t0(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: isUndefDeInterleave_t0:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
+; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+ %2 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 4>
+ %s2 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %3 = zext <4 x i16> %s2 to <4 x i32>
+ ret <4 x i32> %3
+}
+
+define <4 x i32> @isUndefDeInterleave_t1(<8 x i16> %a) {
+; CHECK-LABEL: isUndefDeInterleave_t1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s
+; CHECK-NEXT: ushr v0.4s, v0.4s, #16
+; CHECK-NEXT: ret
+ %2 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 5>
+ %s2 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %3 = zext <4 x i16> %s2 to <4 x i32>
+ ret <4 x i32> %3
+}
+
+define <4 x i32> @isUndefDeInterleave_t2(<8 x i16> %a) {
+; CHECK-LABEL: isUndefDeInterleave_t2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
+; CHECK-NEXT: uzp2 v0.4s, v0.4s, v0.4s
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+ %2 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 2, i32 6>
+ %s2 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %3 = zext <4 x i16> %s2 to <4 x i32>
+ ret <4 x i32> %3
+}
+
+define <4 x i32> @isUndefDeInterleave_t3(<8 x i16> %a) {
+; CHECK-LABEL: isUndefDeInterleave_t3:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uzp2 v0.4s, v0.4s, v0.4s
+; CHECK-NEXT: ushr v0.4s, v0.4s, #16
+; CHECK-NEXT: ret
+ %2 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 3, i32 7>
+ %s2 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %3 = zext <4 x i16> %s2 to <4 x i32>
+ ret <4 x i32> %3
+}
+
+define <4 x i32> @isUndefDeInterleave_b0_bad(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: isUndefDeInterleave_b0_bad:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
+; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+ %2 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> <i32 4, i32 4, i32 0, i32 4, i32 poison, i32 poison, i32 poison, i32 poison>
+ %s2 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %3 = zext <4 x i16> %s2 to <4 x i32>
+ ret <4 x i32> %3
+}
+
+define <4 x i32> @isUndefDeInterleave_t1_bad(<8 x i16> %a) {
+; CHECK-LABEL: isUndefDeInterleave_t1_bad:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s
+; CHECK-NEXT: ushr v0.4s, v0.4s, #16
+; CHECK-NEXT: ret
+ %2 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 4, i32 4, i32 1, i32 5>
+ %s2 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %3 = zext <4 x i16> %s2 to <4 x i32>
+ ret <4 x i32> %3
+}
+
+define i16 @undeftop(<8 x i16> %0) {
+; CHECK-LABEL: undeftop:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
+; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: add v0.4s, v0.4s, v0.4s
+; CHECK-NEXT: xtn v0.4h, v0.4s
+; CHECK-NEXT: umov w0, v0.h[0]
+; CHECK-NEXT: ret
+ %2 = shufflevector <8 x i16> %0, <8 x i16> zeroinitializer, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 9, i32 7, i32 5, i32 3>
+ %3 = zext <8 x i16> %2 to <8 x i64>
+ %new0 = add <8 x i64> %3, %3
+ %last = trunc <8 x i64> %new0 to <8 x i16>
+ %4 = extractelement <8 x i16> %last, i32 0
+ ret i16 %4
+}
>From da719496d639b008c7da3bb7ba32511281cd5a27 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Thu, 6 Feb 2025 14:00:55 +0000
Subject: [PATCH 2/2] [AArch64] Fix op mask detection in
performZExtDeinterleaveShuffleCombine
Given a zext from an extract vector, with a shuffle mask like <4, 0, 0, 4> we
would previously recognize the top half as a deinterleave. In order to convert
into a uzp we should have been checking that the bottom half is also undef.
Fixes #125989
---
.../Target/AArch64/AArch64ISelLowering.cpp | 3 +++
llvm/test/CodeGen/AArch64/zext-shuffle.ll | 19 ++++++++++---------
2 files changed, 13 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 8617377ffc55b58..b3584ddf5b6ca46 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -22332,6 +22332,9 @@ static SDValue performZExtDeinterleaveShuffleCombine(SDNode *N,
if (!IsDeInterleave)
IsUndefDeInterleave =
Shuffle->getOperand(1).isUndef() &&
+ all_of(
+ Shuffle->getMask().slice(ExtOffset, VT.getVectorNumElements() / 2),
+ [](int M) { return M < 0; }) &&
ShuffleVectorInst::isDeInterleaveMaskOfFactor(
Shuffle->getMask().slice(ExtOffset + VT.getVectorNumElements() / 2,
VT.getVectorNumElements() / 2),
diff --git a/llvm/test/CodeGen/AArch64/zext-shuffle.ll b/llvm/test/CodeGen/AArch64/zext-shuffle.ll
index b4346c7132be62e..20d2071d7fe54df 100644
--- a/llvm/test/CodeGen/AArch64/zext-shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/zext-shuffle.ll
@@ -646,9 +646,10 @@ define <4 x i32> @isUndefDeInterleave_t3(<8 x i16> %a) {
define <4 x i32> @isUndefDeInterleave_b0_bad(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: isUndefDeInterleave_b0_bad:
; CHECK: // %bb.0:
-; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
-; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s
-; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: adrp x8, .LCPI40_0
+; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI40_0]
+; CHECK-NEXT: tbl v0.16b, { v0.16b }, v1.16b
+; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: ret
%2 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> <i32 4, i32 4, i32 0, i32 4, i32 poison, i32 poison, i32 poison, i32 poison>
%s2 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -659,8 +660,10 @@ define <4 x i32> @isUndefDeInterleave_b0_bad(<8 x i16> %a, <8 x i16> %b) {
define <4 x i32> @isUndefDeInterleave_t1_bad(<8 x i16> %a) {
; CHECK-LABEL: isUndefDeInterleave_t1_bad:
; CHECK: // %bb.0:
-; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s
-; CHECK-NEXT: ushr v0.4s, v0.4s, #16
+; CHECK-NEXT: adrp x8, .LCPI41_0
+; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI41_0]
+; CHECK-NEXT: tbl v0.16b, { v0.16b }, v1.16b
+; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0
; CHECK-NEXT: ret
%2 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 4, i32 4, i32 1, i32 5>
%s2 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -671,10 +674,8 @@ define <4 x i32> @isUndefDeInterleave_t1_bad(<8 x i16> %a) {
define i16 @undeftop(<8 x i16> %0) {
; CHECK-LABEL: undeftop:
; CHECK: // %bb.0:
-; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
-; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s
-; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: add v0.4s, v0.4s, v0.4s
+; CHECK-NEXT: dup v0.8h, v0.h[4]
+; CHECK-NEXT: uaddl v0.4s, v0.4h, v0.4h
; CHECK-NEXT: xtn v0.4h, v0.4s
; CHECK-NEXT: umov w0, v0.h[0]
; CHECK-NEXT: ret
More information about the llvm-commits
mailing list