[llvm-branch-commits] [llvm] [AArch64] Combine undef UZP and NVCAST away. (PR #204623)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Jun 18 08:34:16 PDT 2026
llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Gaƫtan Bossu (gbossu)
<details>
<summary>Changes</summary>
These are used to lower insert_subvec nodes quite early in SDAG. After DAG combines run, it's possible that the inputs to these AArch64 nodes become UNDEF.
---
Full diff: https://github.com/llvm/llvm-project/pull/204623.diff
4 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+17-5)
- (modified) llvm/test/CodeGen/AArch64/sve-masked-gather-64b-unscaled.ll (+3-6)
- (modified) llvm/test/CodeGen/AArch64/sve-masked-gather-legalize.ll (+1-2)
- (modified) llvm/test/CodeGen/AArch64/sve-masked-gather.ll (+3-6)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 67ef911117eff..e5d9a79ccbacc 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -25641,6 +25641,12 @@ static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG,
SDValue Op1 = N->getOperand(1);
EVT ResVT = N->getValueType(0);
+ // UZP is used to lower insert_subvector quite early. When later DAG combines
+ // run, it's possible to actually end up with an insert of UNDEF into UNDEF,
+ // i.e. UZP1 UNDEF, UNDEF.
+ if (Op0.isUndef() && Op1.isUndef())
+ return DAG.getUNDEF(ResVT);
+
// uzp(extract_lo(x), extract_hi(x)) -> extract_lo(uzp x, x)
if (Op0.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
Op1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
@@ -28710,11 +28716,17 @@ static SDValue performDUPCombine(SDNode *N,
/// Get rid of unnecessary NVCASTs (that don't change the type).
static SDValue performNVCASTCombine(SDNode *N, SelectionDAG &DAG) {
- if (N->getValueType(0) == N->getOperand(0).getValueType())
- return N->getOperand(0);
- if (N->getOperand(0).getOpcode() == AArch64ISD::NVCAST)
- return DAG.getNode(AArch64ISD::NVCAST, SDLoc(N), N->getValueType(0),
- N->getOperand(0).getOperand(0));
+ EVT VT = N->getValueType(0);
+ SDValue Op = N->getOperand(0);
+
+ if (Op.isUndef())
+ return DAG.getUNDEF(VT);
+
+ if (VT == Op.getValueType())
+ return Op;
+
+ if (Op.getOpcode() == AArch64ISD::NVCAST)
+ return DAG.getNode(AArch64ISD::NVCAST, SDLoc(N), VT, Op.getOperand(0));
return SDValue();
}
diff --git a/llvm/test/CodeGen/AArch64/sve-masked-gather-64b-unscaled.ll b/llvm/test/CodeGen/AArch64/sve-masked-gather-64b-unscaled.ll
index b6552968a97f3..03e4d9361076d 100644
--- a/llvm/test/CodeGen/AArch64/sve-masked-gather-64b-unscaled.ll
+++ b/llvm/test/CodeGen/AArch64/sve-masked-gather-64b-unscaled.ll
@@ -134,10 +134,8 @@ define <vscale x 16 x i8> @masked_gather_nxv1i8(ptr %base, <vscale x 2 x i64> %w
; CHECK-NEXT: uzp1 p0.d, p0.d, p1.d
; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0, z0.d]
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT: uzp1 z1.s, z0.s, z0.s
-; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h
-; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h
-; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b
+; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: ret
%offsets = call <vscale x 1 x i64> @llvm.vector.extract.nxv1i64.nxv2i64(
<vscale x 2 x i64> %wide.offsets, i64 0)
@@ -156,8 +154,7 @@ define <vscale x 8 x i16> @masked_gather_nxv1i16(ptr %base, <vscale x 2 x i64> %
; CHECK-NEXT: uzp1 p0.d, p0.d, p1.d
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d]
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT: uzp1 z1.s, z0.s, z0.s
-; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h
+; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: ret
%offsets = call <vscale x 1 x i64> @llvm.vector.extract.nxv1i64.nxv2i64(
<vscale x 2 x i64> %wide.offsets, i64 0)
diff --git a/llvm/test/CodeGen/AArch64/sve-masked-gather-legalize.ll b/llvm/test/CodeGen/AArch64/sve-masked-gather-legalize.ll
index dfe1ba0a0f05c..ff3cdddce5dda 100644
--- a/llvm/test/CodeGen/AArch64/sve-masked-gather-legalize.ll
+++ b/llvm/test/CodeGen/AArch64/sve-masked-gather-legalize.ll
@@ -210,8 +210,7 @@ define <vscale x 16 x i8> @masked_gather_nxv16i8_undef_hi_mask(ptr %base, <vscal
; CHECK-NEXT: sunpklo z0.s, z0.h
; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0, z0.s, sxtw]
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT: uzp1 z1.h, z0.h, z0.h
-; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b
+; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: ret
%ptrs = getelementptr i8, ptr %base, <vscale x 16 x i8> %indices
%mask.false.hi = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv4i1(<vscale x 16 x i1> splat (i1 false), <vscale x 4 x i1> %mask, i64 0)
diff --git a/llvm/test/CodeGen/AArch64/sve-masked-gather.ll b/llvm/test/CodeGen/AArch64/sve-masked-gather.ll
index 77329371c8e2c..fa671e9d5df9e 100644
--- a/llvm/test/CodeGen/AArch64/sve-masked-gather.ll
+++ b/llvm/test/CodeGen/AArch64/sve-masked-gather.ll
@@ -103,10 +103,8 @@ define <vscale x 16 x i8> @masked_gather_nxv1i8(<vscale x 2 x ptr> %wide.ptrs, <
; CHECK-NEXT: uzp1 p0.d, p0.d, p1.d
; CHECK-NEXT: ld1b { z0.d }, p0/z, [z0.d]
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT: uzp1 z1.s, z0.s, z0.s
-; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h
-; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h
-; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b
+; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: ret
%ptrs = call <vscale x 1 x ptr> @llvm.vector.extract.nxv1p0.nxv2p0(
<vscale x 2 x ptr> %wide.ptrs, i64 0)
@@ -124,8 +122,7 @@ define <vscale x 8 x i16> @masked_gather_nxv1i16(<vscale x 2 x ptr> %wide.ptrs,
; CHECK-NEXT: uzp1 p0.d, p0.d, p1.d
; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d]
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT: uzp1 z1.s, z0.s, z0.s
-; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h
+; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: ret
%ptrs = call <vscale x 1 x ptr> @llvm.vector.extract.nxv1p0.nxv2p0(
<vscale x 2 x ptr> %wide.ptrs, i64 0)
``````````
</details>
https://github.com/llvm/llvm-project/pull/204623
More information about the llvm-branch-commits
mailing list