[llvm] [AArch64] Match ZIP and UZP starting from undef elements. NFC (PR #89578)

via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 22 01:08:22 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-aarch64

Author: David Green (davemgreen)

<details>
<summary>Changes</summary>

In case the first element of a zip/uzp mask is undef, the isZIPMask and isUZPMask functions have a 50% chance of picking the wrong "WhichResult", meaning they don't match a zip/uzp where they could. This patch alters the matching code to first check for the first non-undef element, to try and get WhichResult correct.

---
Full diff: https://github.com/llvm/llvm-project/pull/89578.diff


3 Files Affected:

- (modified) llvm/lib/Target/AArch64/AArch64PerfectShuffle.h (+37-6) 
- (modified) llvm/test/CodeGen/AArch64/arm64-uzp.ll (+9-21) 
- (modified) llvm/test/CodeGen/AArch64/arm64-zip.ll (+3-15) 


``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h b/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h
index 7abaead694d114..a143243a8d3bb3 100644
--- a/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h
+++ b/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h
@@ -6620,11 +6620,28 @@ static unsigned getPerfectShuffleCost(llvm::ArrayRef<int> M) {
   return (PFEntry >> 30) + 1;
 }
 
-inline bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
+/// Return true for zip1 or zip2 masks of the form:
+///  <0,  8, 1,  9, 2, 10, 3, 11> or
+///  <4, 12, 5, 13, 6, 14, 7, 15>
+inline bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResultOut) {
   unsigned NumElts = VT.getVectorNumElements();
   if (NumElts % 2 != 0)
     return false;
-  WhichResult = (M[0] == 0 ? 0 : 1);
+  // Check the first non-undef element for which half to use.
+  unsigned WhichResult = 2;
+  for (unsigned i = 0; i != NumElts / 2; i++) {
+    if (M[i * 2] >= 0) {
+      WhichResult = ((unsigned)M[i * 2] == i ? 0 : 1);
+      break;
+    } else if (M[i * 2 + 1] >= 0) {
+      WhichResult = ((unsigned)M[i * 2 + 1] == NumElts + i ? 0 : 1);
+      break;
+    }
+  }
+  if (WhichResult == 2)
+    return false;
+
+  // Check all elements match.
   unsigned Idx = WhichResult * NumElts / 2;
   for (unsigned i = 0; i != NumElts; i += 2) {
     if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
@@ -6632,20 +6649,34 @@ inline bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
       return false;
     Idx += 1;
   }
-
+  WhichResultOut = WhichResult;
   return true;
 }
 
-inline bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
+/// Return true for uzp1 or uzp2 masks of the form:
+///  <0, 2, 4, 6, 8, 10, 12, 14> or
+///  <1, 3, 5, 7, 9, 11, 13, 15>
+inline bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResultOut) {
   unsigned NumElts = VT.getVectorNumElements();
-  WhichResult = (M[0] == 0 ? 0 : 1);
+  // Check the first non-undef element for which half to use.
+  unsigned WhichResult = 2;
+  for (unsigned i = 0; i != NumElts; i++) {
+    if (M[i] >= 0) {
+      WhichResult = ((unsigned)M[i] == i * 2 ? 0 : 1);
+      break;
+    }
+  }
+  if (WhichResult == 2)
+    return false;
+
+  // Check all elements match.
   for (unsigned i = 0; i != NumElts; ++i) {
     if (M[i] < 0)
       continue; // ignore UNDEF indices
     if ((unsigned)M[i] != 2 * i + WhichResult)
       return false;
   }
-
+  WhichResultOut = WhichResult;
   return true;
 }
 
diff --git a/llvm/test/CodeGen/AArch64/arm64-uzp.ll b/llvm/test/CodeGen/AArch64/arm64-uzp.ll
index 6e01ebc95a1cb8..49a51d96fbc841 100644
--- a/llvm/test/CodeGen/AArch64/arm64-uzp.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-uzp.ll
@@ -110,13 +110,9 @@ define <8 x i16> @vuzpQi16_undef1(<8 x i16> %A, <8 x i16> %B) nounwind {
 define <8 x i16> @vuzpQi16_undef0(<8 x i16> %A, <8 x i16> %B) nounwind {
 ; CHECK-LABEL: vuzpQi16_undef0:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, .LCPI8_0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI8_0]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-NEXT:    uzp2.8h v3, v0, v1
-; CHECK-NEXT:    tbl.16b v0, { v0, v1 }, v2
-; CHECK-NEXT:    add.8h v0, v0, v3
+; CHECK-NEXT:    uzp1.8h v2, v0, v1
+; CHECK-NEXT:    uzp2.8h v0, v0, v1
+; CHECK-NEXT:    add.8h v0, v2, v0
 ; CHECK-NEXT:    ret
   %tmp3 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
   %tmp4 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
@@ -127,13 +123,9 @@ define <8 x i16> @vuzpQi16_undef0(<8 x i16> %A, <8 x i16> %B) nounwind {
 define <8 x i16> @vuzpQi16_undef01(<8 x i16> %A, <8 x i16> %B) nounwind {
 ; CHECK-LABEL: vuzpQi16_undef01:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, .LCPI9_0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI9_0]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-NEXT:    uzp2.8h v3, v0, v1
-; CHECK-NEXT:    tbl.16b v0, { v0, v1 }, v2
-; CHECK-NEXT:    add.8h v0, v0, v3
+; CHECK-NEXT:    uzp1.8h v2, v0, v1
+; CHECK-NEXT:    uzp2.8h v0, v0, v1
+; CHECK-NEXT:    add.8h v0, v2, v0
 ; CHECK-NEXT:    ret
   %tmp3 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
   %tmp4 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
@@ -144,13 +136,9 @@ define <8 x i16> @vuzpQi16_undef01(<8 x i16> %A, <8 x i16> %B) nounwind {
 define <8 x i16> @vuzpQi16_undef012(<8 x i16> %A, <8 x i16> %B) nounwind {
 ; CHECK-LABEL: vuzpQi16_undef012:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, .LCPI10_0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI10_0]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-NEXT:    uzp2.8h v3, v0, v1
-; CHECK-NEXT:    tbl.16b v0, { v0, v1 }, v2
-; CHECK-NEXT:    add.8h v0, v0, v3
+; CHECK-NEXT:    uzp1.8h v2, v0, v1
+; CHECK-NEXT:    uzp2.8h v0, v0, v1
+; CHECK-NEXT:    add.8h v0, v2, v0
 ; CHECK-NEXT:    ret
   %tmp3 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 6, i32 8, i32 10, i32 12, i32 14>
   %tmp4 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 7, i32 9, i32 11, i32 13, i32 15>
diff --git a/llvm/test/CodeGen/AArch64/arm64-zip.ll b/llvm/test/CodeGen/AArch64/arm64-zip.ll
index 349751dda461f9..4c771cbd2966cc 100644
--- a/llvm/test/CodeGen/AArch64/arm64-zip.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-zip.ll
@@ -142,11 +142,7 @@ define <16 x i8> @vzipQi8_undef(ptr %A, ptr %B) nounwind {
 define <8 x i16> @vzip1_undef_01(<8 x i16> %A, <8 x i16> %B) nounwind {
 ; CHECK-LABEL: vzip1_undef_01:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, .LCPI8_0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI8_0]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-NEXT:    tbl.16b v0, { v0, v1 }, v2
+; CHECK-NEXT:    zip1.8h v0, v0, v1
 ; CHECK-NEXT:    ret
   %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
   ret <8 x i16> %s
@@ -155,11 +151,7 @@ define <8 x i16> @vzip1_undef_01(<8 x i16> %A, <8 x i16> %B) nounwind {
 define <8 x i16> @vzip1_undef_0(<8 x i16> %A, <8 x i16> %B) nounwind {
 ; CHECK-LABEL: vzip1_undef_0:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, .LCPI9_0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI9_0]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-NEXT:    tbl.16b v0, { v0, v1 }, v2
+; CHECK-NEXT:    zip1.8h v0, v0, v1
 ; CHECK-NEXT:    ret
   %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
   ret <8 x i16> %s
@@ -177,11 +169,7 @@ define <8 x i16> @vzip1_undef_1(<8 x i16> %A, <8 x i16> %B) nounwind {
 define <8 x i16> @vzip1_undef_012(<8 x i16> %A, <8 x i16> %B) nounwind {
 ; CHECK-LABEL: vzip1_undef_012:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, .LCPI11_0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI11_0]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-NEXT:    tbl.16b v0, { v0, v1 }, v2
+; CHECK-NEXT:    zip1.8h v0, v0, v1
 ; CHECK-NEXT:    ret
   %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 9, i32 2, i32 10, i32 3, i32 11>
   ret <8 x i16> %s

``````````

</details>


https://github.com/llvm/llvm-project/pull/89578


More information about the llvm-commits mailing list