[llvm] [LLVM][CodeGen] Remove AArch64ISD::MOVIedit. (PR #187320)
Paul Walker via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 18 09:22:44 PDT 2026
https://github.com/paulwalker-arm created https://github.com/llvm/llvm-project/pull/187320
This is the first in a series of patches to simplify the handling of vector constants.
While I hope to migrate everything to generic nodes (ISD::BUILD_VECTOR or ISD::SPLAT_VECTOR) there are several moving parts and so I've settled on initially using AArch64ISD::DUP as the canonical form.
The overall rationale for this work is to reduce special case constant handling and enable existing combines/patterns to kick in more often.
>From 14cf0e85812f18e544e204dc5e3ce008ee179722 Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker at arm.com>
Date: Wed, 18 Mar 2026 15:23:08 +0000
Subject: [PATCH] [LLVM][CodeGen] Remove AArch64ISD::MOVIedit.
This is the first in a series of patches to simplify the handling of
vector constants.
While I hope to migrate everything to generic nodes (ISD::BUILD_VECTOR
or ISD::SPLAT_VECTOR) there are several moving parts and so I've
settled on initially using AArch64ISD::DUP as the canonical form.
The overall rationale for this work is to reduce special case constant
handling and enable existing combines/patterns to kick in more often.
---
.../Target/AArch64/AArch64ISelDAGToDAG.cpp | 23 ++-
.../Target/AArch64/AArch64ISelLowering.cpp | 45 +++---
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 21 +--
llvm/test/CodeGen/AArch64/aarch64-smull.ll | 140 +++++++++++-------
.../AArch64/AArch64SelectionDAGTest.cpp | 16 --
llvm/utils/TableGen/FastISelEmitter.cpp | 4 +
6 files changed, 142 insertions(+), 107 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 1b706411791e9..6dc30b8b6bcbe 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -212,6 +212,8 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
return true;
}
+ bool SelectAdvSIMDModImmType10(SDValue N, SDValue &Imm);
+
bool SelectDupZeroOrUndef(SDValue N) {
switch(N->getOpcode()) {
case ISD::UNDEF:
@@ -635,8 +637,6 @@ static std::optional<APInt> DecodeNEONSplat(SDValue N) {
if (N->getOpcode() == AArch64ISD::DUP)
if (auto *Const = dyn_cast<ConstantSDNode>(N->getOperand(0)))
return Const->getAPIntValue().trunc(SplatWidth);
- // TODO: Recognize more splat-like NEON operations. See ConstantBuildVector
- // in AArch64ISelLowering. AArch64ISD::MOVIedit support will allow more folds.
return std::nullopt;
}
@@ -7957,6 +7957,25 @@ bool AArch64DAGToDAGISel::SelectCmpBranchExtOperand(SDValue N, SDValue &Reg,
return false;
}
+bool AArch64DAGToDAGISel::SelectAdvSIMDModImmType10(SDValue N, SDValue &Imm) {
+ if (N->getOpcode() != AArch64ISD::DUP ||
+ !isa<ConstantSDNode>(N->getOperand(0)))
+ return false;
+
+ unsigned ScalarSize = N->getValueType(0).getScalarSizeInBits();
+ APInt ScalarImm = N->getConstantOperandAPInt(0).trunc(ScalarSize);
+ APInt VectorImm = APInt::getSplat(64, ScalarImm);
+
+ uint64_t Value = VectorImm.getZExtValue();
+ if (AArch64_AM::isAdvSIMDModImmType10(Value)) {
+ Value = AArch64_AM::encodeAdvSIMDModImmType10(Value);
+ Imm = CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i32);
+ return true;
+ }
+
+ return false;
+}
+
void AArch64DAGToDAGISel::PreprocessISelDAG() {
bool MadeChange = false;
for (SDNode &N : llvm::make_early_inc_range(CurDAG->allnodes())) {
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 38db1ac4a2fb9..d4775856acfea 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2828,12 +2828,6 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode(
Known.getBitWidth(), ~(~Op->getConstantOperandVal(0) << ShiftAmt)));
break;
}
- case AArch64ISD::MOVIedit: {
- Known = KnownBits::makeConstant(APInt(
- Known.getBitWidth(),
- AArch64_AM::decodeAdvSIMDModImmType10(Op->getConstantOperandVal(0))));
- break;
- }
case AArch64ISD::MVNIshift: {
Known = KnownBits::makeConstant(
APInt(Known.getBitWidth(),
@@ -15516,20 +15510,26 @@ static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits,
}
// Try 64-bit splatted SIMD immediate.
-static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
- const APInt &Bits) {
+static SDValue tryAdvSIMDModImm64(SDValue Op, SelectionDAG &DAG,
+ const APInt &Bits) {
if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
- uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
- EVT VT = Op.getValueType();
- MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v2i64 : MVT::f64;
+ APInt SplatVal = Bits.trunc(64);
- if (AArch64_AM::isAdvSIMDModImmType10(Value)) {
- Value = AArch64_AM::encodeAdvSIMDModImmType10(Value);
+ if (AArch64_AM::isAdvSIMDModImmType10(SplatVal.getZExtValue())) {
+ EVT VT = Op.getValueType();
+ EVT SplatVT = (VT.getSizeInBits() == 128) ? MVT::v2i64 : MVT::v1i64;
+
+ // Pick a more natural (requires minimal casting) integer type.
+ if (Bits.isSplat(VT.getScalarSizeInBits())) {
+ SplatVT = VT.changeVectorElementTypeToInteger();
+ SplatVal = SplatVal.trunc(SplatVT.getScalarSizeInBits());
+ }
SDLoc DL(Op);
- SDValue Mov =
- DAG.getNode(NewOp, DL, MovTy, DAG.getConstant(Value, DL, MVT::i32));
- return DAG.getNode(AArch64ISD::NVCAST, DL, VT, Mov);
+ MVT ScalarVT = SplatVT.getScalarSizeInBits() == 64 ? MVT::i64 : MVT::i32;
+ SDValue Scalar = DAG.getConstant(SplatVal.getZExtValue(), DL, ScalarVT);
+ SDValue Splat = DAG.getNode(AArch64ISD::DUP, DL, SplatVT, Scalar);
+ return DAG.getNode(AArch64ISD::NVCAST, DL, VT, Splat);
}
}
@@ -16080,8 +16080,7 @@ static SDValue ConstantBuildVector(SDValue Op, SelectionDAG &DAG,
if (resolveBuildVector(BVN, DefBits, UndefBits)) {
auto TryMOVIWithBits = [&](APInt DefBits) {
SDValue NewOp;
- if ((NewOp =
- tryAdvSIMDModImm64(AArch64ISD::MOVIedit, Op, DAG, DefBits)) ||
+ if ((NewOp = tryAdvSIMDModImm64(Op, DAG, DefBits)) ||
(NewOp =
tryAdvSIMDModImm32(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
(NewOp =
@@ -22137,7 +22136,6 @@ static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG) {
case AArch64ISD::DUPLANE64:
case AArch64ISD::MOVI:
case AArch64ISD::MOVIshift:
- case AArch64ISD::MOVIedit:
case AArch64ISD::MOVImsl:
case AArch64ISD::MVNIshift:
case AArch64ISD::MVNImsl:
@@ -33244,8 +33242,8 @@ bool AArch64TargetLowering::canCreateUndefOrPoisonForTargetNode(
// TODO: Add more target nodes.
switch (Op.getOpcode()) {
+ case AArch64ISD::DUP:
case AArch64ISD::MOVI:
- case AArch64ISD::MOVIedit:
case AArch64ISD::MOVImsl:
case AArch64ISD::MOVIshift:
case AArch64ISD::MVNImsl:
@@ -33264,15 +33262,8 @@ bool AArch64TargetLowering::isTargetCanonicalConstantNode(SDValue Op) const {
Op.getOpcode() == AArch64ISD::MOVI ||
Op.getOpcode() == AArch64ISD::MOVIshift ||
Op.getOpcode() == AArch64ISD::MOVImsl ||
- Op.getOpcode() == AArch64ISD::MOVIedit ||
Op.getOpcode() == AArch64ISD::MVNIshift ||
Op.getOpcode() == AArch64ISD::MVNImsl ||
- // Ignoring fneg(movi(0)), because if it is folded to FPConstant(-0.0),
- // ISel will select fmov(mov i64 0x8000000000000000), resulting in a
- // fmov from fpr to gpr, which is more expensive than fneg(movi(0))
- (Op.getOpcode() == ISD::FNEG &&
- Op.getOperand(0).getOpcode() == AArch64ISD::MOVIedit &&
- Op.getOperand(0).getConstantOperandVal(0) == 0) ||
(Op.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
Op.getOperand(0).getOpcode() == AArch64ISD::DUP) ||
TargetLowering::isTargetCanonicalConstantNode(Op);
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 44968b14b11a9..aa0d869524d7e 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -920,7 +920,6 @@ def AArch64trn1 : SDNode<"AArch64ISD::TRN1", SDT_AArch64Zip>;
def AArch64trn2 : SDNode<"AArch64ISD::TRN2", SDT_AArch64Zip>;
// Vector immediate moves
-def AArch64movi_edit : SDNode<"AArch64ISD::MOVIedit", SDT_AArch64MOVIedit>;
def AArch64movi_shift : SDNode<"AArch64ISD::MOVIshift", SDT_AArch64MOVIshift>;
def AArch64movi_msl : SDNode<"AArch64ISD::MOVImsl", SDT_AArch64MOVIshift>;
def AArch64mvni_shift : SDNode<"AArch64ISD::MVNIshift", SDT_AArch64MOVIshift>;
@@ -8708,24 +8707,26 @@ def FMOVv8f16_ns : SIMDModifiedImmVectorNoShift<1, 0, 1, 0b1111, V128, fpimm8,
// AdvSIMD MOVI
+def AdvSIMDModImmType10 : ComplexPattern<vAny, 1, "SelectAdvSIMDModImmType10", [AArch64dup]>;
+
// EDIT byte mask: scalar
let isReMaterializable = 1, isAsCheapAsAMove = 1 in
def MOVID : SIMDModifiedImmScalarNoShift<0, 1, 0b1110, "movi",
[(set FPR64:$Rd, simdimmtype10:$imm8)]>;
-// The movi_edit node has the immediate value already encoded, so we use
-// a plain imm0_255 here.
-def : Pat<(f64 (AArch64movi_edit imm0_255:$shift)),
- (MOVID imm0_255:$shift)>;
-// EDIT byte mask: 2d
+foreach VT = [v8i8, v4i16, v2i32, v1i64, v4f16, v2f32, v1f64, v4bf16] in
+ def : Pat<(VT (AdvSIMDModImmType10 imm0_255:$imm8)),
+ (MOVID imm0_255:$imm8)>;
-// The movi_edit node has the immediate value already encoded, so we use
-// a plain imm0_255 in the pattern
+// EDIT byte mask: 2d
let isReMaterializable = 1, isAsCheapAsAMove = 1 in
def MOVIv2d_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1110, V128,
simdimmtype10,
- "movi", ".2d",
- [(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>;
+ "movi", ".2d", []>;
+
+foreach VT = [v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64, v8bf16] in
+ def : Pat<(VT (AdvSIMDModImmType10 imm0_255:$imm8)),
+ (MOVIv2d_ns imm0_255:$imm8)>;
let Predicates = [HasNEON] in {
def : Pat<(f128 fpimm0), (f128 (MOVIv2d_ns (i32 0)))>;
diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
index eb5180e9b86e0..ca7f40d560afd 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
@@ -378,11 +378,11 @@ define <4 x i32> @amull_v4i16_v4i32(ptr %A, ptr %B) nounwind {
;
; CHECK-SVE-LABEL: amull_v4i16_v4i32:
; CHECK-SVE: // %bb.0:
-; CHECK-SVE-NEXT: ldr d1, [x0]
-; CHECK-SVE-NEXT: ldr d2, [x1]
-; CHECK-SVE-NEXT: movi v0.2d, #0x00ffff0000ffff
-; CHECK-SVE-NEXT: smull v1.4s, v1.4h, v2.4h
-; CHECK-SVE-NEXT: and v0.16b, v1.16b, v0.16b
+; CHECK-SVE-NEXT: ldr d0, [x0]
+; CHECK-SVE-NEXT: ldr d1, [x1]
+; CHECK-SVE-NEXT: smull v0.4s, v0.4h, v1.4h
+; CHECK-SVE-NEXT: and z0.s, z0.s, #0xffff
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-SVE-NEXT: ret
;
; CHECK-GI-LABEL: amull_v4i16_v4i32:
@@ -414,11 +414,11 @@ define <2 x i64> @amull_v2i32_v2i64(ptr %A, ptr %B) nounwind {
;
; CHECK-SVE-LABEL: amull_v2i32_v2i64:
; CHECK-SVE: // %bb.0:
-; CHECK-SVE-NEXT: ldr d1, [x0]
-; CHECK-SVE-NEXT: ldr d2, [x1]
-; CHECK-SVE-NEXT: movi v0.2d, #0x000000ffffffff
-; CHECK-SVE-NEXT: smull v1.2d, v1.2s, v2.2s
-; CHECK-SVE-NEXT: and v0.16b, v1.16b, v0.16b
+; CHECK-SVE-NEXT: ldr d0, [x0]
+; CHECK-SVE-NEXT: ldr d1, [x1]
+; CHECK-SVE-NEXT: smull v0.2d, v0.2s, v1.2s
+; CHECK-SVE-NEXT: and z0.d, z0.d, #0xffffffff
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-SVE-NEXT: ret
;
; CHECK-GI-LABEL: amull_v2i32_v2i64:
@@ -602,8 +602,8 @@ define <4 x i32> @amlal_v4i16_v4i32(ptr %A, ptr %B, ptr %C) nounwind {
; CHECK-SVE-NEXT: ldr d1, [x1]
; CHECK-SVE-NEXT: ldr d2, [x2]
; CHECK-SVE-NEXT: smlal v0.4s, v1.4h, v2.4h
-; CHECK-SVE-NEXT: movi v1.2d, #0x00ffff0000ffff
-; CHECK-SVE-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-SVE-NEXT: and z0.s, z0.s, #0xffff
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-SVE-NEXT: ret
;
; CHECK-GI-LABEL: amlal_v4i16_v4i32:
@@ -643,8 +643,8 @@ define <2 x i64> @amlal_v2i32_v2i64(ptr %A, ptr %B, ptr %C) nounwind {
; CHECK-SVE-NEXT: ldr d1, [x1]
; CHECK-SVE-NEXT: ldr d2, [x2]
; CHECK-SVE-NEXT: smlal v0.2d, v1.2s, v2.2s
-; CHECK-SVE-NEXT: movi v1.2d, #0x000000ffffffff
-; CHECK-SVE-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-SVE-NEXT: and z0.d, z0.d, #0xffffffff
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-SVE-NEXT: ret
;
; CHECK-GI-LABEL: amlal_v2i32_v2i64:
@@ -831,8 +831,8 @@ define <4 x i32> @amlsl_v4i16_v4i32(ptr %A, ptr %B, ptr %C) nounwind {
; CHECK-SVE-NEXT: ldr d1, [x1]
; CHECK-SVE-NEXT: ldr d2, [x2]
; CHECK-SVE-NEXT: smlsl v0.4s, v1.4h, v2.4h
-; CHECK-SVE-NEXT: movi v1.2d, #0x00ffff0000ffff
-; CHECK-SVE-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-SVE-NEXT: and z0.s, z0.s, #0xffff
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-SVE-NEXT: ret
;
; CHECK-GI-LABEL: amlsl_v4i16_v4i32:
@@ -872,8 +872,8 @@ define <2 x i64> @amlsl_v2i32_v2i64(ptr %A, ptr %B, ptr %C) nounwind {
; CHECK-SVE-NEXT: ldr d1, [x1]
; CHECK-SVE-NEXT: ldr d2, [x2]
; CHECK-SVE-NEXT: smlsl v0.2d, v1.2s, v2.2s
-; CHECK-SVE-NEXT: movi v1.2d, #0x000000ffffffff
-; CHECK-SVE-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-SVE-NEXT: and z0.d, z0.d, #0xffffffff
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-SVE-NEXT: ret
;
; CHECK-GI-LABEL: amlsl_v2i32_v2i64:
@@ -1111,8 +1111,8 @@ define <4 x i32> @amull_extvec_v4i16_v4i32(<4 x i16> %arg) nounwind {
; CHECK-SVE-NEXT: mov w8, #1234 // =0x4d2
; CHECK-SVE-NEXT: dup v1.4h, w8
; CHECK-SVE-NEXT: smull v0.4s, v0.4h, v1.4h
-; CHECK-SVE-NEXT: movi v1.2d, #0x00ffff0000ffff
-; CHECK-SVE-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-SVE-NEXT: and z0.s, z0.s, #0xffff
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-SVE-NEXT: ret
;
; CHECK-GI-LABEL: amull_extvec_v4i16_v4i32:
@@ -1144,8 +1144,8 @@ define <2 x i64> @amull_extvec_v2i32_v2i64(<2 x i32> %arg) nounwind {
; CHECK-SVE-NEXT: mov w8, #1234 // =0x4d2
; CHECK-SVE-NEXT: dup v1.2s, w8
; CHECK-SVE-NEXT: smull v0.2d, v0.2s, v1.2s
-; CHECK-SVE-NEXT: movi v1.2d, #0x000000ffffffff
-; CHECK-SVE-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-SVE-NEXT: and z0.d, z0.d, #0xffffffff
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-SVE-NEXT: ret
;
; CHECK-GI-LABEL: amull_extvec_v2i32_v2i64:
@@ -1457,11 +1457,12 @@ define <8 x i32> @amull2_i16(<8 x i16> %arg1, <8 x i16> %arg2) {
;
; CHECK-SVE-LABEL: amull2_i16:
; CHECK-SVE: // %bb.0:
-; CHECK-SVE-NEXT: movi v2.2d, #0x00ffff0000ffff
-; CHECK-SVE-NEXT: smull v3.4s, v0.4h, v1.4h
-; CHECK-SVE-NEXT: smull2 v0.4s, v0.8h, v1.8h
-; CHECK-SVE-NEXT: and v1.16b, v0.16b, v2.16b
-; CHECK-SVE-NEXT: and v0.16b, v3.16b, v2.16b
+; CHECK-SVE-NEXT: smull v2.4s, v0.4h, v1.4h
+; CHECK-SVE-NEXT: smull2 v1.4s, v0.8h, v1.8h
+; CHECK-SVE-NEXT: and z2.s, z2.s, #0xffff
+; CHECK-SVE-NEXT: and z1.s, z1.s, #0xffff
+; CHECK-SVE-NEXT: // kill: def $q1 killed $q1 killed $z1
+; CHECK-SVE-NEXT: mov v0.16b, v2.16b
; CHECK-SVE-NEXT: ret
;
; CHECK-GI-LABEL: amull2_i16:
@@ -1491,11 +1492,12 @@ define <4 x i64> @amull2_i32(<4 x i32> %arg1, <4 x i32> %arg2) {
;
; CHECK-SVE-LABEL: amull2_i32:
; CHECK-SVE: // %bb.0:
-; CHECK-SVE-NEXT: movi v2.2d, #0x000000ffffffff
-; CHECK-SVE-NEXT: smull v3.2d, v0.2s, v1.2s
-; CHECK-SVE-NEXT: smull2 v0.2d, v0.4s, v1.4s
-; CHECK-SVE-NEXT: and v1.16b, v0.16b, v2.16b
-; CHECK-SVE-NEXT: and v0.16b, v3.16b, v2.16b
+; CHECK-SVE-NEXT: smull v2.2d, v0.2s, v1.2s
+; CHECK-SVE-NEXT: smull2 v1.2d, v0.4s, v1.4s
+; CHECK-SVE-NEXT: and z2.d, z2.d, #0xffffffff
+; CHECK-SVE-NEXT: and z1.d, z1.d, #0xffffffff
+; CHECK-SVE-NEXT: // kill: def $q1 killed $q1 killed $z1
+; CHECK-SVE-NEXT: mov v0.16b, v2.16b
; CHECK-SVE-NEXT: ret
;
; CHECK-GI-LABEL: amull2_i32:
@@ -1663,13 +1665,29 @@ entry:
}
define <4 x i32> @umull_and_v4i32(<4 x i16> %src1, <4 x i32> %src2) {
-; CHECK-LABEL: umull_and_v4i32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: movi v2.2d, #0x0000ff000000ff
-; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
-; CHECK-NEXT: xtn v1.4h, v1.4s
-; CHECK-NEXT: umull v0.4s, v0.4h, v1.4h
-; CHECK-NEXT: ret
+; CHECK-NEON-LABEL: umull_and_v4i32:
+; CHECK-NEON: // %bb.0: // %entry
+; CHECK-NEON-NEXT: movi v2.2d, #0x0000ff000000ff
+; CHECK-NEON-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-NEON-NEXT: xtn v1.4h, v1.4s
+; CHECK-NEON-NEXT: umull v0.4s, v0.4h, v1.4h
+; CHECK-NEON-NEXT: ret
+;
+; CHECK-SVE-LABEL: umull_and_v4i32:
+; CHECK-SVE: // %bb.0: // %entry
+; CHECK-SVE-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-SVE-NEXT: and z1.s, z1.s, #0xff
+; CHECK-SVE-NEXT: xtn v1.4h, v1.4s
+; CHECK-SVE-NEXT: umull v0.4s, v0.4h, v1.4h
+; CHECK-SVE-NEXT: ret
+;
+; CHECK-GI-LABEL: umull_and_v4i32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: movi v2.2d, #0x0000ff000000ff
+; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT: xtn v1.4h, v1.4s
+; CHECK-GI-NEXT: umull v0.4s, v0.4h, v1.4h
+; CHECK-GI-NEXT: ret
entry:
%in1 = zext <4 x i16> %src1 to <4 x i32>
%in2 = and <4 x i32> %src2, <i32 255, i32 255, i32 255, i32 255>
@@ -1690,9 +1708,10 @@ define <8 x i32> @umull_and_v8i32(<8 x i16> %src1, <8 x i32> %src2) {
;
; CHECK-SVE-LABEL: umull_and_v8i32:
; CHECK-SVE: // %bb.0: // %entry
-; CHECK-SVE-NEXT: movi v3.2d, #0x0000ff000000ff
-; CHECK-SVE-NEXT: and v2.16b, v2.16b, v3.16b
-; CHECK-SVE-NEXT: and v1.16b, v1.16b, v3.16b
+; CHECK-SVE-NEXT: // kill: def $q2 killed $q2 def $z2
+; CHECK-SVE-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-SVE-NEXT: and z2.s, z2.s, #0xff
+; CHECK-SVE-NEXT: and z1.s, z1.s, #0xff
; CHECK-SVE-NEXT: uzp1 v2.8h, v1.8h, v2.8h
; CHECK-SVE-NEXT: umull2 v1.4s, v0.8h, v2.8h
; CHECK-SVE-NEXT: umull v0.4s, v0.4h, v2.4h
@@ -1752,13 +1771,29 @@ entry:
}
define <2 x i64> @umull_and_v2i64(<2 x i32> %src1, <2 x i64> %src2) {
-; CHECK-LABEL: umull_and_v2i64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: movi v2.2d, #0x000000000000ff
-; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
-; CHECK-NEXT: xtn v1.2s, v1.2d
-; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s
-; CHECK-NEXT: ret
+; CHECK-NEON-LABEL: umull_and_v2i64:
+; CHECK-NEON: // %bb.0: // %entry
+; CHECK-NEON-NEXT: movi v2.2d, #0x000000000000ff
+; CHECK-NEON-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-NEON-NEXT: xtn v1.2s, v1.2d
+; CHECK-NEON-NEXT: umull v0.2d, v0.2s, v1.2s
+; CHECK-NEON-NEXT: ret
+;
+; CHECK-SVE-LABEL: umull_and_v2i64:
+; CHECK-SVE: // %bb.0: // %entry
+; CHECK-SVE-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-SVE-NEXT: and z1.d, z1.d, #0xff
+; CHECK-SVE-NEXT: xtn v1.2s, v1.2d
+; CHECK-SVE-NEXT: umull v0.2d, v0.2s, v1.2s
+; CHECK-SVE-NEXT: ret
+;
+; CHECK-GI-LABEL: umull_and_v2i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: movi v2.2d, #0x000000000000ff
+; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT: xtn v1.2s, v1.2d
+; CHECK-GI-NEXT: umull v0.2d, v0.2s, v1.2s
+; CHECK-GI-NEXT: ret
entry:
%in1 = zext <2 x i32> %src1 to <2 x i64>
%in2 = and <2 x i64> %src2, <i64 255, i64 255>
@@ -1779,9 +1814,10 @@ define <4 x i64> @umull_and_v4i64(<4 x i32> %src1, <4 x i64> %src2) {
;
; CHECK-SVE-LABEL: umull_and_v4i64:
; CHECK-SVE: // %bb.0: // %entry
-; CHECK-SVE-NEXT: movi v3.2d, #0x000000000000ff
-; CHECK-SVE-NEXT: and v2.16b, v2.16b, v3.16b
-; CHECK-SVE-NEXT: and v1.16b, v1.16b, v3.16b
+; CHECK-SVE-NEXT: // kill: def $q2 killed $q2 def $z2
+; CHECK-SVE-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-SVE-NEXT: and z2.d, z2.d, #0xff
+; CHECK-SVE-NEXT: and z1.d, z1.d, #0xff
; CHECK-SVE-NEXT: uzp1 v2.4s, v1.4s, v2.4s
; CHECK-SVE-NEXT: umull2 v1.2d, v0.4s, v2.4s
; CHECK-SVE-NEXT: umull v0.2d, v0.2s, v2.2s
diff --git a/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp b/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp
index b0c48e8c97995..fd0cf14af878d 100644
--- a/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp
+++ b/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp
@@ -563,22 +563,6 @@ TEST_F(AArch64SelectionDAGTest, ComputeKnownBits_MOVI) {
auto N165 = DAG->getConstant(0x000000A5, Loc, IntSca32VT);
KnownBits Known;
- auto OpMOVIedit64 = DAG->getNode(AArch64ISD::MOVIedit, Loc, IntVec64VT, N165);
- Known = DAG->computeKnownBits(OpMOVIedit64);
- EXPECT_EQ(Known.Zero, APInt(64, 0x00FF00FFFF00FF00));
- EXPECT_EQ(Known.One, APInt(64, 0xFF00FF0000FF00FF));
-
- auto OpMOVIedit128 =
- DAG->getNode(AArch64ISD::MOVIedit, Loc, Int2Vec64VT, N165);
- Known = DAG->computeKnownBits(OpMOVIedit128);
- EXPECT_EQ(Known.Zero, APInt(64, 0x00FF00FFFF00FF00));
- EXPECT_EQ(Known.One, APInt(64, 0xFF00FF0000FF00FF));
-
- auto FrMOVIedit128 = DAG->getFreeze(OpMOVIedit128);
- Known = DAG->computeKnownBits(FrMOVIedit128);
- EXPECT_EQ(Known.Zero, APInt(64, 0x00FF00FFFF00FF00));
- EXPECT_EQ(Known.One, APInt(64, 0xFF00FF0000FF00FF));
-
auto N264 = DAG->getConstant(264, Loc, IntSca32VT);
auto OpMOVImsl64 =
DAG->getNode(AArch64ISD::MOVImsl, Loc, Int2Vec32VT, N165, N264);
diff --git a/llvm/utils/TableGen/FastISelEmitter.cpp b/llvm/utils/TableGen/FastISelEmitter.cpp
index 381e857fca822..d5b63c4de3c3d 100644
--- a/llvm/utils/TableGen/FastISelEmitter.cpp
+++ b/llvm/utils/TableGen/FastISelEmitter.cpp
@@ -497,6 +497,10 @@ void FastISelMap::collectPatterns(const CodeGenDAGPatterns &CGP) {
continue;
const Record *InstPatOp = InstPatNode.getOperator();
+ // Only SDNode operators can be mapped to a SelectionDAG opcode name.
+ if (!InstPatOp->isSubClassOf("SDNode"))
+ continue;
+
StringRef OpcodeName = CGP.getSDNodeInfo(InstPatOp).getEnumName();
MVT RetVT = MVT::isVoid;
if (InstPatNode.getNumTypes())
More information about the llvm-commits
mailing list