[llvm] cebc960 - [AArch64] Match ZIP and UZP starting from undef elements. (#89578)

via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 23 09:37:12 PDT 2024


Author: David Green
Date: 2024-04-23T17:37:07+01:00
New Revision: cebc9609d8cdc6f488693cd8e4a616b935b38d2c

URL: https://github.com/llvm/llvm-project/commit/cebc9609d8cdc6f488693cd8e4a616b935b38d2c
DIFF: https://github.com/llvm/llvm-project/commit/cebc9609d8cdc6f488693cd8e4a616b935b38d2c.diff

LOG: [AArch64] Match ZIP and UZP starting from undef elements. (#89578)

In case the first element of a zip/uzp mask is undef, the isZIPMask and
isUZPMask functions have a 50% chance of picking the wrong
"WhichResult", meaning they don't match a zip/uzp where they could. This
patch alters the matching code to first check for the first non-undef
element, to try and get WhichResult correct.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64PerfectShuffle.h
    llvm/test/CodeGen/AArch64/arm64-uzp.ll
    llvm/test/CodeGen/AArch64/arm64-zip.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h b/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h
index 7abaead694d114..a143243a8d3bb3 100644
--- a/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h
+++ b/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h
@@ -6620,11 +6620,28 @@ static unsigned getPerfectShuffleCost(llvm::ArrayRef<int> M) {
   return (PFEntry >> 30) + 1;
 }
 
-inline bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
+/// Return true for zip1 or zip2 masks of the form:
+///  <0,  8, 1,  9, 2, 10, 3, 11> or
+///  <4, 12, 5, 13, 6, 14, 7, 15>
+inline bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResultOut) {
   unsigned NumElts = VT.getVectorNumElements();
   if (NumElts % 2 != 0)
     return false;
-  WhichResult = (M[0] == 0 ? 0 : 1);
+  // Check the first non-undef element for which half to use.
+  unsigned WhichResult = 2;
+  for (unsigned i = 0; i != NumElts / 2; i++) {
+    if (M[i * 2] >= 0) {
+      WhichResult = ((unsigned)M[i * 2] == i ? 0 : 1);
+      break;
+    } else if (M[i * 2 + 1] >= 0) {
+      WhichResult = ((unsigned)M[i * 2 + 1] == NumElts + i ? 0 : 1);
+      break;
+    }
+  }
+  if (WhichResult == 2)
+    return false;
+
+  // Check all elements match.
   unsigned Idx = WhichResult * NumElts / 2;
   for (unsigned i = 0; i != NumElts; i += 2) {
     if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
@@ -6632,20 +6649,34 @@ inline bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
       return false;
     Idx += 1;
   }
-
+  WhichResultOut = WhichResult;
   return true;
 }
 
-inline bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
+/// Return true for uzp1 or uzp2 masks of the form:
+///  <0, 2, 4, 6, 8, 10, 12, 14> or
+///  <1, 3, 5, 7, 9, 11, 13, 15>
+inline bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResultOut) {
   unsigned NumElts = VT.getVectorNumElements();
-  WhichResult = (M[0] == 0 ? 0 : 1);
+  // Check the first non-undef element for which half to use.
+  unsigned WhichResult = 2;
+  for (unsigned i = 0; i != NumElts; i++) {
+    if (M[i] >= 0) {
+      WhichResult = ((unsigned)M[i] == i * 2 ? 0 : 1);
+      break;
+    }
+  }
+  if (WhichResult == 2)
+    return false;
+
+  // Check all elements match.
   for (unsigned i = 0; i != NumElts; ++i) {
     if (M[i] < 0)
       continue; // ignore UNDEF indices
     if ((unsigned)M[i] != 2 * i + WhichResult)
       return false;
   }
-
+  WhichResultOut = WhichResult;
   return true;
 }
 

diff  --git a/llvm/test/CodeGen/AArch64/arm64-uzp.ll b/llvm/test/CodeGen/AArch64/arm64-uzp.ll
index 6e01ebc95a1cb8..49a51d96fbc841 100644
--- a/llvm/test/CodeGen/AArch64/arm64-uzp.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-uzp.ll
@@ -110,13 +110,9 @@ define <8 x i16> @vuzpQi16_undef1(<8 x i16> %A, <8 x i16> %B) nounwind {
 define <8 x i16> @vuzpQi16_undef0(<8 x i16> %A, <8 x i16> %B) nounwind {
 ; CHECK-LABEL: vuzpQi16_undef0:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, .LCPI8_0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI8_0]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-NEXT:    uzp2.8h v3, v0, v1
-; CHECK-NEXT:    tbl.16b v0, { v0, v1 }, v2
-; CHECK-NEXT:    add.8h v0, v0, v3
+; CHECK-NEXT:    uzp1.8h v2, v0, v1
+; CHECK-NEXT:    uzp2.8h v0, v0, v1
+; CHECK-NEXT:    add.8h v0, v2, v0
 ; CHECK-NEXT:    ret
   %tmp3 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
   %tmp4 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
@@ -127,13 +123,9 @@ define <8 x i16> @vuzpQi16_undef0(<8 x i16> %A, <8 x i16> %B) nounwind {
 define <8 x i16> @vuzpQi16_undef01(<8 x i16> %A, <8 x i16> %B) nounwind {
 ; CHECK-LABEL: vuzpQi16_undef01:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, .LCPI9_0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI9_0]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-NEXT:    uzp2.8h v3, v0, v1
-; CHECK-NEXT:    tbl.16b v0, { v0, v1 }, v2
-; CHECK-NEXT:    add.8h v0, v0, v3
+; CHECK-NEXT:    uzp1.8h v2, v0, v1
+; CHECK-NEXT:    uzp2.8h v0, v0, v1
+; CHECK-NEXT:    add.8h v0, v2, v0
 ; CHECK-NEXT:    ret
   %tmp3 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
   %tmp4 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
@@ -144,13 +136,9 @@ define <8 x i16> @vuzpQi16_undef01(<8 x i16> %A, <8 x i16> %B) nounwind {
 define <8 x i16> @vuzpQi16_undef012(<8 x i16> %A, <8 x i16> %B) nounwind {
 ; CHECK-LABEL: vuzpQi16_undef012:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, .LCPI10_0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI10_0]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-NEXT:    uzp2.8h v3, v0, v1
-; CHECK-NEXT:    tbl.16b v0, { v0, v1 }, v2
-; CHECK-NEXT:    add.8h v0, v0, v3
+; CHECK-NEXT:    uzp1.8h v2, v0, v1
+; CHECK-NEXT:    uzp2.8h v0, v0, v1
+; CHECK-NEXT:    add.8h v0, v2, v0
 ; CHECK-NEXT:    ret
   %tmp3 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 6, i32 8, i32 10, i32 12, i32 14>
   %tmp4 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 7, i32 9, i32 11, i32 13, i32 15>

diff  --git a/llvm/test/CodeGen/AArch64/arm64-zip.ll b/llvm/test/CodeGen/AArch64/arm64-zip.ll
index 349751dda461f9..4c771cbd2966cc 100644
--- a/llvm/test/CodeGen/AArch64/arm64-zip.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-zip.ll
@@ -142,11 +142,7 @@ define <16 x i8> @vzipQi8_undef(ptr %A, ptr %B) nounwind {
 define <8 x i16> @vzip1_undef_01(<8 x i16> %A, <8 x i16> %B) nounwind {
 ; CHECK-LABEL: vzip1_undef_01:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, .LCPI8_0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI8_0]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-NEXT:    tbl.16b v0, { v0, v1 }, v2
+; CHECK-NEXT:    zip1.8h v0, v0, v1
 ; CHECK-NEXT:    ret
   %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
   ret <8 x i16> %s
@@ -155,11 +151,7 @@ define <8 x i16> @vzip1_undef_01(<8 x i16> %A, <8 x i16> %B) nounwind {
 define <8 x i16> @vzip1_undef_0(<8 x i16> %A, <8 x i16> %B) nounwind {
 ; CHECK-LABEL: vzip1_undef_0:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, .LCPI9_0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI9_0]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-NEXT:    tbl.16b v0, { v0, v1 }, v2
+; CHECK-NEXT:    zip1.8h v0, v0, v1
 ; CHECK-NEXT:    ret
   %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
   ret <8 x i16> %s
@@ -177,11 +169,7 @@ define <8 x i16> @vzip1_undef_1(<8 x i16> %A, <8 x i16> %B) nounwind {
 define <8 x i16> @vzip1_undef_012(<8 x i16> %A, <8 x i16> %B) nounwind {
 ; CHECK-LABEL: vzip1_undef_012:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, .LCPI11_0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI11_0]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-NEXT:    tbl.16b v0, { v0, v1 }, v2
+; CHECK-NEXT:    zip1.8h v0, v0, v1
 ; CHECK-NEXT:    ret
   %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 9, i32 2, i32 10, i32 3, i32 11>
   ret <8 x i16> %s


        


More information about the llvm-commits mailing list