[llvm-branch-commits] [llvm] [AArch64] Combine undef UZP and NVCAST away. (PR #204623)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Thu Jun 18 08:34:16 PDT 2026


llvmorg-github-actions[bot] wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-aarch64

Author: Gaƫtan Bossu (gbossu)

<details>
<summary>Changes</summary>

These are used to lower insert_subvec nodes quite early in SDAG. After DAG combines run, it's possible that the inputs to these AArch64 nodes become UNDEF.

---
Full diff: https://github.com/llvm/llvm-project/pull/204623.diff


4 Files Affected:

- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+17-5) 
- (modified) llvm/test/CodeGen/AArch64/sve-masked-gather-64b-unscaled.ll (+3-6) 
- (modified) llvm/test/CodeGen/AArch64/sve-masked-gather-legalize.ll (+1-2) 
- (modified) llvm/test/CodeGen/AArch64/sve-masked-gather.ll (+3-6) 


``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 67ef911117eff..e5d9a79ccbacc 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -25641,6 +25641,12 @@ static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG,
   SDValue Op1 = N->getOperand(1);
   EVT ResVT = N->getValueType(0);
 
+  // UZP is used to lower insert_subvector quite early. When later DAG combines
+  // run, it's possible to actually end up with an insert of UNDEF into UNDEF,
+  // i.e. UZP1 UNDEF, UNDEF.
+  if (Op0.isUndef() && Op1.isUndef())
+    return DAG.getUNDEF(ResVT);
+
   // uzp(extract_lo(x), extract_hi(x)) -> extract_lo(uzp x, x)
   if (Op0.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
       Op1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
@@ -28710,11 +28716,17 @@ static SDValue performDUPCombine(SDNode *N,
 
 /// Get rid of unnecessary NVCASTs (that don't change the type).
 static SDValue performNVCASTCombine(SDNode *N, SelectionDAG &DAG) {
-  if (N->getValueType(0) == N->getOperand(0).getValueType())
-    return N->getOperand(0);
-  if (N->getOperand(0).getOpcode() == AArch64ISD::NVCAST)
-    return DAG.getNode(AArch64ISD::NVCAST, SDLoc(N), N->getValueType(0),
-                       N->getOperand(0).getOperand(0));
+  EVT VT = N->getValueType(0);
+  SDValue Op = N->getOperand(0);
+
+  if (Op.isUndef())
+    return DAG.getUNDEF(VT);
+
+  if (VT == Op.getValueType())
+    return Op;
+
+  if (Op.getOpcode() == AArch64ISD::NVCAST)
+    return DAG.getNode(AArch64ISD::NVCAST, SDLoc(N), VT, Op.getOperand(0));
 
   return SDValue();
 }
diff --git a/llvm/test/CodeGen/AArch64/sve-masked-gather-64b-unscaled.ll b/llvm/test/CodeGen/AArch64/sve-masked-gather-64b-unscaled.ll
index b6552968a97f3..03e4d9361076d 100644
--- a/llvm/test/CodeGen/AArch64/sve-masked-gather-64b-unscaled.ll
+++ b/llvm/test/CodeGen/AArch64/sve-masked-gather-64b-unscaled.ll
@@ -134,10 +134,8 @@ define <vscale x 16 x i8> @masked_gather_nxv1i8(ptr %base, <vscale x 2 x i64> %w
 ; CHECK-NEXT:    uzp1 p0.d, p0.d, p1.d
 ; CHECK-NEXT:    ld1b { z0.d }, p0/z, [x0, z0.d]
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT:    uzp1 z1.s, z0.s, z0.s
-; CHECK-NEXT:    uzp1 z0.h, z0.h, z1.h
-; CHECK-NEXT:    uzp1 z1.h, z1.h, z1.h
-; CHECK-NEXT:    uzp1 z0.b, z0.b, z1.b
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
 ; CHECK-NEXT:    ret
   %offsets = call <vscale x 1 x i64> @llvm.vector.extract.nxv1i64.nxv2i64(
       <vscale x 2 x i64> %wide.offsets, i64 0)
@@ -156,8 +154,7 @@ define <vscale x 8 x i16> @masked_gather_nxv1i16(ptr %base, <vscale x 2 x i64> %
 ; CHECK-NEXT:    uzp1 p0.d, p0.d, p1.d
 ; CHECK-NEXT:    ld1h { z0.d }, p0/z, [x0, z0.d]
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT:    uzp1 z1.s, z0.s, z0.s
-; CHECK-NEXT:    uzp1 z0.h, z0.h, z1.h
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; CHECK-NEXT:    ret
   %offsets = call <vscale x 1 x i64> @llvm.vector.extract.nxv1i64.nxv2i64(
       <vscale x 2 x i64> %wide.offsets, i64 0)
diff --git a/llvm/test/CodeGen/AArch64/sve-masked-gather-legalize.ll b/llvm/test/CodeGen/AArch64/sve-masked-gather-legalize.ll
index dfe1ba0a0f05c..ff3cdddce5dda 100644
--- a/llvm/test/CodeGen/AArch64/sve-masked-gather-legalize.ll
+++ b/llvm/test/CodeGen/AArch64/sve-masked-gather-legalize.ll
@@ -210,8 +210,7 @@ define <vscale x 16 x i8> @masked_gather_nxv16i8_undef_hi_mask(ptr %base, <vscal
 ; CHECK-NEXT:    sunpklo z0.s, z0.h
 ; CHECK-NEXT:    ld1b { z0.s }, p0/z, [x0, z0.s, sxtw]
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    uzp1 z1.h, z0.h, z0.h
-; CHECK-NEXT:    uzp1 z0.b, z0.b, z1.b
+; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
 ; CHECK-NEXT:    ret
   %ptrs = getelementptr i8, ptr %base, <vscale x 16 x i8> %indices
   %mask.false.hi = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv4i1(<vscale x 16 x i1> splat (i1 false), <vscale x 4 x i1> %mask, i64 0)
diff --git a/llvm/test/CodeGen/AArch64/sve-masked-gather.ll b/llvm/test/CodeGen/AArch64/sve-masked-gather.ll
index 77329371c8e2c..fa671e9d5df9e 100644
--- a/llvm/test/CodeGen/AArch64/sve-masked-gather.ll
+++ b/llvm/test/CodeGen/AArch64/sve-masked-gather.ll
@@ -103,10 +103,8 @@ define <vscale x 16 x i8> @masked_gather_nxv1i8(<vscale x 2 x ptr> %wide.ptrs, <
 ; CHECK-NEXT:    uzp1 p0.d, p0.d, p1.d
 ; CHECK-NEXT:    ld1b { z0.d }, p0/z, [z0.d]
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT:    uzp1 z1.s, z0.s, z0.s
-; CHECK-NEXT:    uzp1 z0.h, z0.h, z1.h
-; CHECK-NEXT:    uzp1 z1.h, z1.h, z1.h
-; CHECK-NEXT:    uzp1 z0.b, z0.b, z1.b
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
 ; CHECK-NEXT:    ret
   %ptrs = call <vscale x 1 x ptr> @llvm.vector.extract.nxv1p0.nxv2p0(
       <vscale x 2 x ptr> %wide.ptrs, i64 0)
@@ -124,8 +122,7 @@ define <vscale x 8 x i16> @masked_gather_nxv1i16(<vscale x 2 x ptr> %wide.ptrs,
 ; CHECK-NEXT:    uzp1 p0.d, p0.d, p1.d
 ; CHECK-NEXT:    ld1h { z0.d }, p0/z, [z0.d]
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT:    uzp1 z1.s, z0.s, z0.s
-; CHECK-NEXT:    uzp1 z0.h, z0.h, z1.h
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; CHECK-NEXT:    ret
   %ptrs = call <vscale x 1 x ptr> @llvm.vector.extract.nxv1p0.nxv2p0(
       <vscale x 2 x ptr> %wide.ptrs, i64 0)

``````````

</details>


https://github.com/llvm/llvm-project/pull/204623


More information about the llvm-branch-commits mailing list