[llvm] [AArch64] Add lowering for misc NEON intrinsics (PR #183050)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 4 05:35:30 PST 2026
https://github.com/Lukacma updated https://github.com/llvm/llvm-project/pull/183050
>From fb34b86096f9181266485ad8ff2f5761d447de33 Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Tue, 24 Feb 2026 12:32:17 +0000
Subject: [PATCH 1/4] [AArch64] Add lowering for misc NEON intrinsics
This patch adds custom lowering for the following NEON intrinsics
to enable better codegen for convert and load/store operations:
- suqadd
- usqadd
- abs
- neg
---
.../Target/AArch64/AArch64ISelLowering.cpp | 13 +-
.../lib/Target/AArch64/AArch64InstrFormats.td | 24 ++-
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 12 +-
llvm/test/CodeGen/AArch64/arm64-int-neon.ll | 138 +++++++++++++++++-
llvm/test/CodeGen/AArch64/arm64-neon-copy.ll | 11 +-
llvm/test/CodeGen/AArch64/arm64-vqadd.ll | 32 ++--
6 files changed, 191 insertions(+), 39 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 0e24f022b6fd0..90abba251198c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -6460,9 +6460,10 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
EVT Ty = Op.getValueType();
if (Ty == MVT::i64) {
SDValue Result =
- DAG.getNode(ISD::BITCAST, DL, MVT::v1i64, Op.getOperand(1));
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i64, Op.getOperand(1));
Result = DAG.getNode(ISD::ABS, DL, MVT::v1i64, Result);
- return DAG.getNode(ISD::BITCAST, DL, MVT::i64, Result);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, Result,
+ DAG.getConstant(0, DL, MVT::i64));
} else if (Ty.isVector() && Ty.isInteger() && isTypeLegal(Ty)) {
return DAG.getNode(ISD::ABS, DL, Ty, Op.getOperand(1));
} else {
@@ -6641,6 +6642,10 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(ISD::UADDSAT, DL, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2));
return lowerIntNeonIntrinsic(Op, AArch64ISD::UQADD, DAG);
+ case Intrinsic::aarch64_neon_suqadd:
+ return lowerIntNeonIntrinsic(Op, AArch64ISD::SUQADD, DAG);
+ case Intrinsic::aarch64_neon_usqadd:
+ return lowerIntNeonIntrinsic(Op, AArch64ISD::USQADD, DAG);
case Intrinsic::aarch64_neon_uqsub:
if (Op.getValueType().isVector())
return DAG.getNode(ISD::USUBSAT, DL, Op.getValueType(), Op.getOperand(1),
@@ -6648,6 +6653,10 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return lowerIntNeonIntrinsic(Op, AArch64ISD::UQSUB, DAG);
case Intrinsic::aarch64_neon_sqdmulls_scalar:
return lowerIntNeonIntrinsic(Op, AArch64ISD::SQDMULL, DAG);
+ case Intrinsic::aarch64_neon_sqabs:
+ return lowerIntNeonIntrinsic(Op, AArch64ISD::SQABS, DAG);
+ case Intrinsic::aarch64_neon_sqneg:
+ return lowerIntNeonIntrinsic(Op, AArch64ISD::SQNEG, DAG);
case Intrinsic::aarch64_sve_whilelt:
return optimizeIncrementingWhile(Op.getNode(), DAG, /*IsSigned=*/true,
/*IsEqual=*/false);
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 19332507efe1d..b094ce291931d 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -8123,32 +8123,40 @@ multiclass SIMDFPTwoScalarCVT<bit U, bit S, bits<5> opc, string asm,
}
multiclass SIMDTwoScalarBHSD<bit U, bits<5> opc, string asm,
- SDPatternOperator OpNode = null_frag> {
+ SDPatternOperator OpNode, SDPatternOperator G_OpNode> {
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
def v1i64 : BaseSIMDTwoScalar<U, 0b11, 0b00, opc, FPR64, FPR64, asm,
- [(set (i64 FPR64:$Rd), (OpNode (i64 FPR64:$Rn)))]>;
+ [(set (i64 FPR64:$Rd), (G_OpNode (i64 FPR64:$Rn)))]>;
def v1i32 : BaseSIMDTwoScalar<U, 0b10, 0b00, opc, FPR32, FPR32, asm,
- [(set (i32 FPR32:$Rd), (OpNode (i32 FPR32:$Rn)))]>;
+ [(set (i32 FPR32:$Rd), (G_OpNode (i32 FPR32:$Rn)))]>;
def v1i16 : BaseSIMDTwoScalar<U, 0b01, 0b00, opc, FPR16, FPR16, asm, []>;
def v1i8 : BaseSIMDTwoScalar<U, 0b00, 0b00, opc, FPR8 , FPR8 , asm, []>;
}
- def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rn))),
+ def : Pat<(v1i64 (G_OpNode (v1i64 FPR64:$Rn))),
+ (!cast<Instruction>(NAME # v1i64) FPR64:$Rn)>;
+ def : Pat<(f32 (OpNode (f32 FPR32:$Rn))),
+ (!cast<Instruction>(NAME # v1i32) FPR32:$Rn)>;
+ def : Pat<(f64 (OpNode (f64 FPR64:$Rn))),
(!cast<Instruction>(NAME # v1i64) FPR64:$Rn)>;
}
multiclass SIMDTwoScalarBHSDTied<bit U, bits<5> opc, string asm,
- Intrinsic OpNode> {
+ SDPatternOperator OpNode, Intrinsic G_OpNode> {
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
def v1i64 : BaseSIMDTwoScalarTied<U, 0b11, opc, FPR64, FPR64, asm,
- [(set (i64 FPR64:$dst), (OpNode (i64 FPR64:$Rd), (i64 FPR64:$Rn)))]>;
+ [(set (i64 FPR64:$dst), (G_OpNode (i64 FPR64:$Rd), (i64 FPR64:$Rn)))]>;
def v1i32 : BaseSIMDTwoScalarTied<U, 0b10, opc, FPR32, FPR32, asm,
- [(set (i32 FPR32:$dst), (OpNode (i32 FPR32:$Rd), (i32 FPR32:$Rn)))]>;
+ [(set (i32 FPR32:$dst), (G_OpNode (i32 FPR32:$Rd), (i32 FPR32:$Rn)))]>;
def v1i16 : BaseSIMDTwoScalarTied<U, 0b01, opc, FPR16, FPR16, asm, []>;
def v1i8 : BaseSIMDTwoScalarTied<U, 0b00, opc, FPR8 , FPR8 , asm, []>;
}
- def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn))),
+ def : Pat<(v1i64 (G_OpNode (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn))),
+ (!cast<Instruction>(NAME # v1i64) FPR64:$Rd, FPR64:$Rn)>;
+ def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn))),
+ (!cast<Instruction>(NAME # v1i32) FPR32:$Rd, FPR32:$Rn)>;
+ def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn))),
(!cast<Instruction>(NAME # v1i64) FPR64:$Rd, FPR64:$Rn)>;
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index dfa53a39d5934..0c7ccc0152e9c 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -1028,12 +1028,16 @@ def AArch64sqrshl: SDNode<"AArch64ISD::SQRSHL", SDTFPBinOp>;
def AArch64sqshl: SDNode<"AArch64ISD::SQSHL", SDTFPBinOp>;
def AArch64sqsub: SDNode<"AArch64ISD::SQSUB", SDTFPBinOp>;
def AArch64uqadd: SDNode<"AArch64ISD::UQADD", SDTFPBinOp>;
+def AArch64suqadd: SDNode<"AArch64ISD::SUQADD", SDTFPBinOp>;
+def AArch64usqadd: SDNode<"AArch64ISD::USQADD", SDTFPBinOp>;
def AArch64uqrshl: SDNode<"AArch64ISD::UQRSHL", SDTFPBinOp>;
def AArch64uqshl: SDNode<"AArch64ISD::UQSHL", SDTFPBinOp>;
def AArch64uqsub: SDNode<"AArch64ISD::UQSUB", SDTFPBinOp>;
def AArch64sqdmull: SDNode<"AArch64ISD::SQDMULL",
SDTypeProfile<1, 2, [ SDTCisSameAs<1, 2>,
SDTCisFP<0>, SDTCisFP<1>]>>;
+def AArch64sqabs: SDNode<"AArch64ISD::SQABS", SDTFPUnaryOp>;
+def AArch64sqneg: SDNode<"AArch64ISD::SQNEG", SDTFPUnaryOp>;
def AArch64sqshrun: SDNode<"AArch64ISD::SQSHRUN", SDTFPTruncRoundOp>;
def AArch64sqrshrun: SDNode<"AArch64ISD::SQRSHRUN", SDTFPTruncRoundOp>;
def AArch64sqshrn: SDNode<"AArch64ISD::SQSHRN", SDTFPTruncRoundOp>;
@@ -6646,15 +6650,15 @@ defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte">;
defm NEG : SIMDTwoScalarD< 1, 0b01011, "neg",
UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
defm SCVTF : SIMDFPTwoScalarCVT< 0, 0, 0b11101, "scvtf", AArch64sitof>;
-defm SQABS : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_aarch64_neon_sqabs>;
-defm SQNEG : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_aarch64_neon_sqneg>;
+defm SQABS : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", AArch64sqabs, int_aarch64_neon_sqabs>;
+defm SQNEG : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", AArch64sqneg, int_aarch64_neon_sqneg>;
defm SQXTN : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_aarch64_neon_scalar_sqxtn>;
defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_aarch64_neon_scalar_sqxtun>;
-defm SUQADD : SIMDTwoScalarBHSDTied< 0, 0b00011, "suqadd",
+defm SUQADD : SIMDTwoScalarBHSDTied< 0, 0b00011, "suqadd", AArch64suqadd,
int_aarch64_neon_suqadd>;
defm UCVTF : SIMDFPTwoScalarCVT< 1, 0, 0b11101, "ucvtf", AArch64uitof>;
defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar_uqxtn>;
-defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd",
+defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd", AArch64usqadd,
int_aarch64_neon_usqadd>;
// Floating-point conversion patterns.
diff --git a/llvm/test/CodeGen/AArch64/arm64-int-neon.ll b/llvm/test/CodeGen/AArch64/arm64-int-neon.ll
index 40716c1fffa7c..c728654d9e851 100644
--- a/llvm/test/CodeGen/AArch64/arm64-int-neon.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-int-neon.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+fprcvt,+fullfp16 | FileCheck %s --check-prefixes=CHECK
+; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+fprcvt,+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc < %s -mtriple aarch64-unknown-unknown -global-isel -global-isel-abort=2 -mattr=+fprcvt,+fullfp16 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; CHECK-GI: warning: Instruction selection used fallback path for test_uqadd_s32
@@ -205,6 +205,62 @@ entry:
ret void
}
+define void @test_suqadd_s32(float noundef %a, ptr %dst) {
+; CHECK-LABEL: test_suqadd_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs s0, s0
+; CHECK-NEXT: suqadd s0, s0
+; CHECK-NEXT: str s0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %cvt = tail call i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float %a)
+ %res = tail call i32 @llvm.aarch64.neon.suqadd.i32(i32 %cvt, i32 %cvt)
+ store i32 %res, ptr %dst, align 4
+ ret void
+}
+
+define void @test_suqadd_s64(float noundef %a, ptr %dst) {
+; CHECK-LABEL: test_suqadd_s64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs d0, s0
+; CHECK-NEXT: suqadd d0, d0
+; CHECK-NEXT: str d0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
+ %res = tail call i64 @llvm.aarch64.neon.suqadd.i64(i64 %cvt, i64 %cvt)
+ store i64 %res, ptr %dst, align 8
+ ret void
+}
+
+define void @test_usqadd_s32(float noundef %a, ptr %dst) {
+; CHECK-LABEL: test_usqadd_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs s0, s0
+; CHECK-NEXT: usqadd s0, s0
+; CHECK-NEXT: str s0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %cvt = tail call i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float %a)
+ %res = tail call i32 @llvm.aarch64.neon.usqadd.i32(i32 %cvt, i32 %cvt)
+ store i32 %res, ptr %dst, align 4
+ ret void
+}
+
+define void @test_usqadd_s64(float noundef %a, ptr %dst) {
+; CHECK-LABEL: test_usqadd_s64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs d0, s0
+; CHECK-NEXT: usqadd d0, d0
+; CHECK-NEXT: str d0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
+ %res = tail call i64 @llvm.aarch64.neon.usqadd.i64(i64 %cvt, i64 %cvt)
+ store i64 %res, ptr %dst, align 8
+ ret void
+}
+
define void @test_sqadd_s32(float noundef %a, ptr %dst) {
; CHECK-LABEL: test_sqadd_s32:
; CHECK: // %bb.0: // %entry
@@ -289,6 +345,28 @@ entry:
ret void
}
+define void @test_abs_s64(float noundef %a, ptr %dst) {
+; CHECK-SD-LABEL: test_abs_s64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fcvtzs d0, s0
+; CHECK-SD-NEXT: abs d0, d0
+; CHECK-SD-NEXT: str d0, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_abs_s64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fcvtzs x8, s0
+; CHECK-GI-NEXT: cmp x8, #0
+; CHECK-GI-NEXT: cneg x8, x8, le
+; CHECK-GI-NEXT: str x8, [x0]
+; CHECK-GI-NEXT: ret
+entry:
+ %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
+ %res = tail call i64 @llvm.aarch64.neon.abs.i64(i64 %cvt)
+ store i64 %res, ptr %dst, align 8
+ ret void
+}
+
define void @test_uqsub_s32(float noundef %a, ptr %dst) {
; CHECK-LABEL: test_uqsub_s32:
; CHECK: // %bb.0: // %entry
@@ -386,7 +464,61 @@ entry:
ret void
}
+define void @test_sqabs_s32(float noundef %a, ptr %dst) {
+; CHECK-LABEL: test_sqabs_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs s0, s0
+; CHECK-NEXT: sqabs s0, s0
+; CHECK-NEXT: str s0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %cvt = tail call i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float %a)
+ %res = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %cvt)
+ store i32 %res, ptr %dst, align 4
+ ret void
+}
+
+define void @test_sqabs_s64(float noundef %a, ptr %dst) {
+; CHECK-LABEL: test_sqabs_s64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs d0, s0
+; CHECK-NEXT: sqabs d0, d0
+; CHECK-NEXT: str d0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
+ %res = tail call i64 @llvm.aarch64.neon.sqabs.i64(i64 %cvt)
+ store i64 %res, ptr %dst, align 8
+ ret void
+}
+
+define void @test_sqneg_s32(float noundef %a, ptr %dst) {
+; CHECK-LABEL: test_sqneg_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs s0, s0
+; CHECK-NEXT: sqneg s0, s0
+; CHECK-NEXT: str s0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %cvt = tail call i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float %a)
+ %res = tail call i32 @llvm.aarch64.neon.sqneg.i32(i32 %cvt)
+ store i32 %res, ptr %dst, align 4
+ ret void
+}
+
+define void @test_sqneg_s64(float noundef %a, ptr %dst) {
+; CHECK-LABEL: test_sqneg_s64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs d0, s0
+; CHECK-NEXT: sqneg d0, d0
+; CHECK-NEXT: str d0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
+ %res = tail call i64 @llvm.aarch64.neon.sqneg.i64(i64 %cvt)
+ store i64 %res, ptr %dst, align 8
+ ret void
+}
+
attributes #0 = { "target-features"="+rdm" }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK-GI: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
index c2f39fb14ee24..4f3666c58d317 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
@@ -1658,12 +1658,11 @@ entry:
define <2 x i32> @test_concat_diff_v1i32_v1i32(i32 %a, i32 %b) {
; CHECK-LABEL: test_concat_diff_v1i32_v1i32:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fmov s0, w1
-; CHECK-NEXT: fmov s1, w0
-; CHECK-NEXT: sqabs s2, s0
-; CHECK-NEXT: sqabs s0, s1
-; CHECK-NEXT: fmov w8, s2
-; CHECK-NEXT: mov v0.s[1], w8
+; CHECK-NEXT: fmov s1, w1
+; CHECK-NEXT: fmov s0, w0
+; CHECK-NEXT: sqabs s0, s0
+; CHECK-NEXT: sqabs s1, s1
+; CHECK-NEXT: mov v0.s[1], v1.s[0]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/AArch64/arm64-vqadd.ll b/llvm/test/CodeGen/AArch64/arm64-vqadd.ll
index ff1fedad43393..c1cd1d5c47e32 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vqadd.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vqadd.ll
@@ -340,10 +340,10 @@ define <2 x i64> @usqadd2d(ptr %A, ptr %B) nounwind {
define i64 @usqadd_d(i64 %l, i64 %r) nounwind {
; CHECK-LABEL: usqadd_d:
; CHECK: // %bb.0:
-; CHECK-NEXT: fmov d0, x0
-; CHECK-NEXT: fmov d1, x1
-; CHECK-NEXT: usqadd d0, d1
-; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: fmov d0, x1
+; CHECK-NEXT: fmov d1, x0
+; CHECK-NEXT: usqadd d1, d0
+; CHECK-NEXT: fmov x0, d1
; CHECK-NEXT: ret
%sum = call i64 @llvm.aarch64.neon.usqadd.i64(i64 %l, i64 %r)
ret i64 %sum
@@ -352,10 +352,10 @@ define i64 @usqadd_d(i64 %l, i64 %r) nounwind {
define i32 @usqadd_s(i32 %l, i32 %r) nounwind {
; CHECK-LABEL: usqadd_s:
; CHECK: // %bb.0:
-; CHECK-NEXT: fmov s0, w0
-; CHECK-NEXT: fmov s1, w1
-; CHECK-NEXT: usqadd s0, s1
-; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: fmov s0, w1
+; CHECK-NEXT: fmov s1, w0
+; CHECK-NEXT: usqadd s1, s0
+; CHECK-NEXT: fmov w0, s1
; CHECK-NEXT: ret
%sum = call i32 @llvm.aarch64.neon.usqadd.i32(i32 %l, i32 %r)
ret i32 %sum
@@ -482,10 +482,10 @@ define <2 x i64> @suqadd2d(ptr %A, ptr %B) nounwind {
define i64 @suqadd_d(i64 %l, i64 %r) nounwind {
; CHECK-LABEL: suqadd_d:
; CHECK: // %bb.0:
-; CHECK-NEXT: fmov d0, x0
-; CHECK-NEXT: fmov d1, x1
-; CHECK-NEXT: suqadd d0, d1
-; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: fmov d0, x1
+; CHECK-NEXT: fmov d1, x0
+; CHECK-NEXT: suqadd d1, d0
+; CHECK-NEXT: fmov x0, d1
; CHECK-NEXT: ret
%sum = call i64 @llvm.aarch64.neon.suqadd.i64(i64 %l, i64 %r)
ret i64 %sum
@@ -494,10 +494,10 @@ define i64 @suqadd_d(i64 %l, i64 %r) nounwind {
define i32 @suqadd_s(i32 %l, i32 %r) nounwind {
; CHECK-LABEL: suqadd_s:
; CHECK: // %bb.0:
-; CHECK-NEXT: fmov s0, w0
-; CHECK-NEXT: fmov s1, w1
-; CHECK-NEXT: suqadd s0, s1
-; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: fmov s0, w1
+; CHECK-NEXT: fmov s1, w0
+; CHECK-NEXT: suqadd s1, s0
+; CHECK-NEXT: fmov w0, s1
; CHECK-NEXT: ret
%sum = call i32 @llvm.aarch64.neon.suqadd.i32(i32 %l, i32 %r)
ret i32 %sum
>From c4aed5cb346c18943ee5d48b56e6d1bfa66e0a9e Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Tue, 24 Feb 2026 13:11:20 +0000
Subject: [PATCH 2/4] Fix tests
---
llvm/test/CodeGen/AArch64/arm64-int-neon.ll | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/AArch64/arm64-int-neon.ll b/llvm/test/CodeGen/AArch64/arm64-int-neon.ll
index c728654d9e851..309a17798b13c 100644
--- a/llvm/test/CodeGen/AArch64/arm64-int-neon.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-int-neon.ll
@@ -2,12 +2,20 @@
; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+fprcvt,+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc < %s -mtriple aarch64-unknown-unknown -global-isel -global-isel-abort=2 -mattr=+fprcvt,+fullfp16 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-; CHECK-GI: warning: Instruction selection used fallback path for test_uqadd_s32
+; CHECK-GI: warning: Instruction selection used fallback path for test_suqadd_s32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_suqadd_s64
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_usqadd_s32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_usqadd_s64
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqadd_s32
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqadd_s64
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqsub_s32
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqsub_s64
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqdmulls_scalar
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqdmulh_scalar
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqabs_s32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqabs_s64
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqneg_s32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqneg_s64
define void @test_sqrshl_s32(float noundef %a, ptr %dst){
; CHECK-LABEL: test_sqrshl_s32:
>From 6e4622212da699dc68dd5abb0292c374324f3211 Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Wed, 25 Feb 2026 11:44:44 +0000
Subject: [PATCH 3/4] Remove unnecessary patterns
---
llvm/lib/Target/AArch64/AArch64InstrFormats.td | 4 ----
1 file changed, 4 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index b094ce291931d..e85b536afad98 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -8133,8 +8133,6 @@ multiclass SIMDTwoScalarBHSD<bit U, bits<5> opc, string asm,
def v1i8 : BaseSIMDTwoScalar<U, 0b00, 0b00, opc, FPR8 , FPR8 , asm, []>;
}
- def : Pat<(v1i64 (G_OpNode (v1i64 FPR64:$Rn))),
- (!cast<Instruction>(NAME # v1i64) FPR64:$Rn)>;
def : Pat<(f32 (OpNode (f32 FPR32:$Rn))),
(!cast<Instruction>(NAME # v1i32) FPR32:$Rn)>;
def : Pat<(f64 (OpNode (f64 FPR64:$Rn))),
@@ -8152,8 +8150,6 @@ multiclass SIMDTwoScalarBHSDTied<bit U, bits<5> opc, string asm,
def v1i8 : BaseSIMDTwoScalarTied<U, 0b00, opc, FPR8 , FPR8 , asm, []>;
}
- def : Pat<(v1i64 (G_OpNode (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn))),
- (!cast<Instruction>(NAME # v1i64) FPR64:$Rd, FPR64:$Rn)>;
def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn))),
(!cast<Instruction>(NAME # v1i32) FPR32:$Rd, FPR32:$Rn)>;
def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn))),
>From 08738a063a4846a50e9b1ff8a62690075cf43bf2 Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Wed, 25 Feb 2026 15:06:18 +0000
Subject: [PATCH 4/4] Revert "Remove unnecessary patterns"
This reverts commit 6e4622212da699dc68dd5abb0292c374324f3211.
---
llvm/lib/Target/AArch64/AArch64InstrFormats.td | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index e85b536afad98..b094ce291931d 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -8133,6 +8133,8 @@ multiclass SIMDTwoScalarBHSD<bit U, bits<5> opc, string asm,
def v1i8 : BaseSIMDTwoScalar<U, 0b00, 0b00, opc, FPR8 , FPR8 , asm, []>;
}
+ def : Pat<(v1i64 (G_OpNode (v1i64 FPR64:$Rn))),
+ (!cast<Instruction>(NAME # v1i64) FPR64:$Rn)>;
def : Pat<(f32 (OpNode (f32 FPR32:$Rn))),
(!cast<Instruction>(NAME # v1i32) FPR32:$Rn)>;
def : Pat<(f64 (OpNode (f64 FPR64:$Rn))),
@@ -8150,6 +8152,8 @@ multiclass SIMDTwoScalarBHSDTied<bit U, bits<5> opc, string asm,
def v1i8 : BaseSIMDTwoScalarTied<U, 0b00, opc, FPR8 , FPR8 , asm, []>;
}
+ def : Pat<(v1i64 (G_OpNode (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn))),
+ (!cast<Instruction>(NAME # v1i64) FPR64:$Rd, FPR64:$Rn)>;
def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn))),
(!cast<Instruction>(NAME # v1i32) FPR32:$Rd, FPR32:$Rn)>;
def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn))),
More information about the llvm-commits
mailing list