[llvm] [AArch64] Expand UADDLV patterns to handle two-step i8->i16->i32 extends (PR #146078)
Igor Kirillov via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 8 09:03:35 PDT 2025
https://github.com/igogo-x86 updated https://github.com/llvm/llvm-project/pull/146078
>From 13ddd5abb115af2b65bfe1d9311e88a789fae80f Mon Sep 17 00:00:00 2001
From: Igor Kirillov <igor.kirillov at arm.com>
Date: Fri, 27 Jun 2025 13:43:35 +0000
Subject: [PATCH 1/2] [AArch64] Expand UADDLV patterns to handle two-step
i8->i16->i32 extends
Closes #142961
---
.../Target/AArch64/AArch64ISelLowering.cpp | 35 +++++++++++++++----
llvm/test/CodeGen/AArch64/neon-sad.ll | 22 ++++++++++++
2 files changed, 50 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index cdb68684b3856..3afdfc74d0ec1 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -18052,6 +18052,15 @@ static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG,
// v16i32 abs(
// v16i32 sub(
// v16i32 [sign|zero]_extend(v16i8 a), v16i32 [sign|zero]_extend(v16i8 b))))
+//
+// or
+//
+// i32 vecreduce_add(
+// v16i32 zext(
+// v16i16 abs(
+// v16i16 sub(
+// v16i16 [sign|zero]_extend(v16i8 a), v16i16 [sign|zero]_extend(v16i8 b))))
+//
// =================>
// i32 vecreduce_add(
// v4i32 UADDLP(
@@ -18067,23 +18076,35 @@ static SDValue performVecReduceAddCombineWithUADDLP(SDNode *N,
return SDValue();
SDValue VecReduceOp0 = N->getOperand(0);
+ bool SawTrailingZext = false;
+ // Look through an optional post-ABS ZEXT from v16i16 -> v16i32.
+ if (VecReduceOp0.getOpcode() == ISD::ZERO_EXTEND &&
+ VecReduceOp0->getValueType(0) == MVT::v16i32 &&
+ VecReduceOp0->getOperand(0)->getOpcode() == ISD::ABS &&
+ VecReduceOp0->getOperand(0)->getValueType(0) == MVT::v16i16) {
+ SawTrailingZext = true;
+ VecReduceOp0 = VecReduceOp0.getOperand(0);
+ }
+
+ // Peel off an optional post-ABS extend (v16i16 -> v16i32).
+ MVT AbsInputVT = SawTrailingZext ? MVT::v16i16 : MVT::v16i32;
+ // Assumed v16i16 or v16i32 abs input
unsigned Opcode = VecReduceOp0.getOpcode();
- // Assumed v16i32 abs
- if (Opcode != ISD::ABS || VecReduceOp0->getValueType(0) != MVT::v16i32)
+ if (Opcode != ISD::ABS || VecReduceOp0->getValueType(0) != AbsInputVT)
return SDValue();
SDValue ABS = VecReduceOp0;
- // Assumed v16i32 sub
+ // Assumed v16i16 or v16i32 sub
if (ABS->getOperand(0)->getOpcode() != ISD::SUB ||
- ABS->getOperand(0)->getValueType(0) != MVT::v16i32)
+ ABS->getOperand(0)->getValueType(0) != AbsInputVT)
return SDValue();
SDValue SUB = ABS->getOperand(0);
unsigned Opcode0 = SUB->getOperand(0).getOpcode();
unsigned Opcode1 = SUB->getOperand(1).getOpcode();
- // Assumed v16i32 type
- if (SUB->getOperand(0)->getValueType(0) != MVT::v16i32 ||
- SUB->getOperand(1)->getValueType(0) != MVT::v16i32)
+ // Assumed v16i16 or v16i32 type
+ if (SUB->getOperand(0)->getValueType(0) != AbsInputVT ||
+ SUB->getOperand(1)->getValueType(0) != AbsInputVT)
return SDValue();
// Assumed zext or sext
diff --git a/llvm/test/CodeGen/AArch64/neon-sad.ll b/llvm/test/CodeGen/AArch64/neon-sad.ll
index c0cfe8d8ca3cc..1f0ca9ca7ad36 100644
--- a/llvm/test/CodeGen/AArch64/neon-sad.ll
+++ b/llvm/test/CodeGen/AArch64/neon-sad.ll
@@ -45,3 +45,25 @@ entry:
%6 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %5)
ret i32 %6
}
+
+define i32 @test_sad_v16i8_two_step_zext(ptr noundef readonly %a, ptr noundef readonly %b) {
+; CHECK-LABEL: test_sad_v16i8_two_step_zext:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ldr q1, [x1]
+; CHECK-NEXT: uabdl v2.8h, v1.8b, v0.8b
+; CHECK-NEXT: uabal2 v2.8h, v1.16b, v0.16b
+; CHECK-NEXT: uaddlv s0, v2.8h
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+entry:
+ %0 = load <16 x i8>, ptr %a
+ %1 = zext <16 x i8> %0 to <16 x i16>
+ %2 = load <16 x i8>, ptr %b
+ %3 = zext <16 x i8> %2 to <16 x i16>
+ %4 = sub nsw <16 x i16> %3, %1
+ %5 = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> %4, i1 false)
+ %6 = zext <16 x i16> %5 to <16 x i32>
+ %7 = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %6)
+ ret i32 %7
+}
>From c830ff2706da1723f89bd84249e6ce91d5040556 Mon Sep 17 00:00:00 2001
From: Igor Kirillov <igor.kirillov at arm.com>
Date: Tue, 8 Jul 2025 16:01:48 +0000
Subject: [PATCH 2/2] Add test
---
llvm/test/CodeGen/AArch64/neon-sad.ll | 22 ++++++++++++++++++++++
1 file changed, 22 insertions(+)
diff --git a/llvm/test/CodeGen/AArch64/neon-sad.ll b/llvm/test/CodeGen/AArch64/neon-sad.ll
index 1f0ca9ca7ad36..f21fafea50451 100644
--- a/llvm/test/CodeGen/AArch64/neon-sad.ll
+++ b/llvm/test/CodeGen/AArch64/neon-sad.ll
@@ -67,3 +67,25 @@ entry:
%7 = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %6)
ret i32 %7
}
+
+define i32 @test_sad_v16i8_two_step_sext(ptr noundef readonly %a, ptr noundef readonly %b) {
+; CHECK-LABEL: test_sad_v16i8_two_step_sext:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ldr q1, [x1]
+; CHECK-NEXT: sabdl v2.8h, v1.8b, v0.8b
+; CHECK-NEXT: sabal2 v2.8h, v1.16b, v0.16b
+; CHECK-NEXT: uaddlv s0, v2.8h
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+entry:
+ %0 = load <16 x i8>, ptr %a
+ %1 = sext <16 x i8> %0 to <16 x i16>
+ %2 = load <16 x i8>, ptr %b
+ %3 = sext <16 x i8> %2 to <16 x i16>
+ %4 = sub nsw <16 x i16> %3, %1
+ %5 = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> %4, i1 false)
+ %6 = zext <16 x i16> %5 to <16 x i32>
+ %7 = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %6)
+ ret i32 %7
+}
More information about the llvm-commits
mailing list