[PATCH] D155281: [SVE][CodeGen] Improve codegen for some zero-extends of masked loads

Mon Jul 17 01:19:37 PDT 2023

This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGcc68e05bd2cc: [SVE][CodeGen] Improve codegen for some zero-extends of masked loads (authored by david-arm).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155281/new/

https://reviews.llvm.org/D155281

Files:
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/test/CodeGen/AArch64/sve-intrinsics-mask-ldst-ext.ll


Index: llvm/test/CodeGen/AArch64/sve-intrinsics-mask-ldst-ext.ll
===================================================================

--- llvm/test/CodeGen/AArch64/sve-intrinsics-mask-ldst-ext.ll
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-mask-ldst-ext.ll
@@ -52,10 +52,9 @@
 define <vscale x 8 x i32> @masked_ld1b_nxv8i8_zext_i32(<vscale x 8 x i8> *%a, <vscale x 8 x i1> %mask) {
 ; CHECK-LABEL: masked_ld1b_nxv8i8_zext_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ld1b { z0.h }, p0/z, [x0]
-; CHECK-NEXT:    uunpkhi z1.s, z0.h
-; CHECK-NEXT:    and z0.h, z0.h, #0xff
-; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    ld1b { z1.h }, p0/z, [x0]
+; CHECK-NEXT:    uunpklo z0.s, z1.h
+; CHECK-NEXT:    uunpkhi z1.s, z1.h
 ; CHECK-NEXT:    ret
   %wide.masked.load = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr %a, i32 1, <vscale x 8 x i1> %mask, <vscale x 8 x i8> poison)
   %res = zext <vscale x 8 x i8> %wide.masked.load to <vscale x 8 x i32>
@@ -125,10 +124,9 @@
 define <vscale x 4 x i64> @masked_ld1b_nxv4i8_zext_i64(<vscale x 4 x i8> *%a, <vscale x 4 x i1> %mask) {
 ; CHECK-LABEL: masked_ld1b_nxv4i8_zext_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ld1b { z0.s }, p0/z, [x0]
-; CHECK-NEXT:    uunpkhi z1.d, z0.s
-; CHECK-NEXT:    and z0.s, z0.s, #0xff
-; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    ld1b { z1.s }, p0/z, [x0]
+; CHECK-NEXT:    uunpklo z0.d, z1.s
+; CHECK-NEXT:    uunpkhi z1.d, z1.s
 ; CHECK-NEXT:    ret
   %wide.masked.load = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr %a, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x i8> poison)
   %res = zext <vscale x 4 x i8> %wide.masked.load to <vscale x 4 x i64>
@@ -186,10 +184,9 @@
 define <vscale x 4 x i64> @masked_ld1h_nxv4i16_zext(<vscale x 4 x i16> *%a, <vscale x 4 x i1> %mask) {
 ; CHECK-LABEL: masked_ld1h_nxv4i16_zext:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ld1h { z0.s }, p0/z, [x0]
-; CHECK-NEXT:    uunpkhi z1.d, z0.s
-; CHECK-NEXT:    and z0.s, z0.s, #0xffff
-; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    ld1h { z1.s }, p0/z, [x0]
+; CHECK-NEXT:    uunpklo z0.d, z1.s
+; CHECK-NEXT:    uunpkhi z1.d, z1.s
 ; CHECK-NEXT:    ret
   %wide.masked.load = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr %a, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x i16> poison)
   %res = zext <vscale x 4 x i16> %wide.masked.load to <vscale x 4 x i64>
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -17053,14 +17053,28 @@
 
     uint64_t ExtVal = C->getZExtValue();
 
+    auto MaskAndTypeMatch = [ExtVal](EVT VT) -> bool {
+      return ((ExtVal == 0xFF && VT == MVT::i8) ||
+              (ExtVal == 0xFFFF && VT == MVT::i16) ||
+              (ExtVal == 0xFFFFFFFF && VT == MVT::i32));
+    };
+
     // If the mask is fully covered by the unpack, we don't need to push
     // a new AND onto the operand
     EVT EltTy = UnpkOp->getValueType(0).getVectorElementType();
-    if ((ExtVal == 0xFF && EltTy == MVT::i8) ||
-        (ExtVal == 0xFFFF && EltTy == MVT::i16) ||
-        (ExtVal == 0xFFFFFFFF && EltTy == MVT::i32))
+    if (MaskAndTypeMatch(EltTy))
       return Src;
 
+    // If this is 'and (uunpklo/hi (extload MemTy -> ExtTy)), mask', then check
+    // to see if the mask is all-ones of size MemTy.
+    auto MaskedLoadOp = dyn_cast<MaskedLoadSDNode>(UnpkOp);
+    if (MaskedLoadOp && (MaskedLoadOp->getExtensionType() == ISD::ZEXTLOAD ||
+                         MaskedLoadOp->getExtensionType() == ISD::EXTLOAD)) {
+      EVT EltTy = MaskedLoadOp->getMemoryVT().getVectorElementType();
+      if (MaskAndTypeMatch(EltTy))
+        return Src;
+    }
+
     // Truncate to prevent a DUP with an over wide constant
     APInt Mask = C->getAPIntValue().trunc(EltTy.getSizeInBits());
 


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D155281.540899.patch
Type: text/x-patch
Size: 3918 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230717/ad5b6922/attachment.bin>