[llvm] [AArch64][GlobalISel] Added support for hadd family of intrinsics (PR #163985)
Joshua Rodriguez via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 17 09:07:08 PDT 2025
https://github.com/JoshdRod created https://github.com/llvm/llvm-project/pull/163985
GlobalISel now selects hadd family of intrinsics, without falling back to SDAG.
>From dbadd11c63313208236a54937d766eaed3f60a8c Mon Sep 17 00:00:00 2001
From: Josh Rodriguez <josh.rodriguez at arm.com>
Date: Thu, 16 Oct 2025 15:47:02 +0000
Subject: [PATCH 1/5] [AArch64][GlobalISel] Added uhadd intrinsic support
GlobalISel now selects uhadd intrinsic, without falling back to SDAG.
Note that GlobalISel-generated code involving uhadd seems to be inefficent when compared to SDAG.
---
llvm/lib/Target/AArch64/AArch64InstrGISel.td | 8 ++++++
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 2 ++
llvm/test/CodeGen/AArch64/freeze.ll | 25 +++++++++++++------
3 files changed, 27 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index fe8419301b306..9e5019c5ed378 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -239,6 +239,12 @@ def G_USDOT : AArch64GenericInstruction {
let hasSideEffects = 0;
}
+def G_UHADD : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type1:$src2);
+ let hasSideEffects = 0;
+}
+
// Generic instruction for the BSP pseudo. It is expanded into BSP, which
// expands into BSL/BIT/BIF after register allocation.
def G_BSP : AArch64GenericInstruction {
@@ -286,6 +292,8 @@ def : GINodeEquiv<G_UDOT, AArch64udot>;
def : GINodeEquiv<G_SDOT, AArch64sdot>;
def : GINodeEquiv<G_USDOT, AArch64usdot>;
+def : GINodeEquiv<G_UHADD, avgflooru>;
+
def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
def : GINodeEquiv<G_AARCH64_PREFETCH, AArch64Prefetch>;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 05a431312472e..4e24ed8e02f16 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1825,6 +1825,8 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return LowerBinOp(TargetOpcode::G_ABDS);
case Intrinsic::aarch64_neon_uabd:
return LowerBinOp(TargetOpcode::G_ABDU);
+ case Intrinsic::aarch64_neon_uhadd:
+ return LowerBinOp(AArch64::G_UHADD);
case Intrinsic::aarch64_neon_abs: {
// Lower the intrinsic to G_ABS.
MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});
diff --git a/llvm/test/CodeGen/AArch64/freeze.ll b/llvm/test/CodeGen/AArch64/freeze.ll
index fb909fec90434..f5e2ffd7361ce 100644
--- a/llvm/test/CodeGen/AArch64/freeze.ll
+++ b/llvm/test/CodeGen/AArch64/freeze.ll
@@ -3,7 +3,6 @@
; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel -global-isel-abort=2 2>&1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; CHECK-GI: warning: Instruction selection used fallback path for freeze_v2i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for freeze_uhadd
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for freeze_urhadd
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for freeze_shadd
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for freeze_srhadd
@@ -435,13 +434,23 @@ define <8 x i16> @freeze_abds(<8 x i16> %a, <8 x i16> %b) {
}
define <8 x i16> @freeze_uhadd(<8 x i16> %a0, <8 x i16> %a1) {
-; CHECK-LABEL: freeze_uhadd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v2.8h, #15
-; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
-; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: freeze_uhadd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v2.8h, #15
+; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-SD-NEXT: uhadd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: freeze_uhadd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v2.8h, #15
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT: movi v2.8h, #31
+; CHECK-GI-NEXT: uhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%m0 = and <8 x i16> %a0, splat (i16 15)
%m1 = and <8 x i16> %a1, splat (i16 15)
%avg = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %m0, <8 x i16> %m1)
>From cd051d156f3543799b02dfdaefb01ea1de48bc0f Mon Sep 17 00:00:00 2001
From: Josh Rodriguez <josh.rodriguez at arm.com>
Date: Fri, 17 Oct 2025 13:13:22 +0000
Subject: [PATCH 2/5] [AArch64][GlobalISel] Added urhadd intrinsic support
GlobalISel now selects urhadd intrinsic, without falling back to SDAG.
Note that GlobalISel-generated code involving urhadd seems to be inefficent when compared to SDAG.
---
llvm/lib/Target/AArch64/AArch64InstrGISel.td | 7 ++++++
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 2 ++
llvm/test/CodeGen/AArch64/freeze.ll | 25 +++++++++++++------
3 files changed, 26 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index 9e5019c5ed378..4f7c82e45c2fa 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -245,6 +245,12 @@ def G_UHADD : AArch64GenericInstruction {
let hasSideEffects = 0;
}
+def G_URHADD : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type1:$src2);
+ let hasSideEffects = 0;
+}
+
// Generic instruction for the BSP pseudo. It is expanded into BSP, which
// expands into BSL/BIT/BIF after register allocation.
def G_BSP : AArch64GenericInstruction {
@@ -293,6 +299,7 @@ def : GINodeEquiv<G_SDOT, AArch64sdot>;
def : GINodeEquiv<G_USDOT, AArch64usdot>;
def : GINodeEquiv<G_UHADD, avgflooru>;
+def : GINodeEquiv<G_URHADD, avgceilu>;
def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 4e24ed8e02f16..9f840bfff1178 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1827,6 +1827,8 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return LowerBinOp(TargetOpcode::G_ABDU);
case Intrinsic::aarch64_neon_uhadd:
return LowerBinOp(AArch64::G_UHADD);
+ case Intrinsic::aarch64_neon_urhadd:
+ return LowerBinOp(AArch64::G_URHADD);
case Intrinsic::aarch64_neon_abs: {
// Lower the intrinsic to G_ABS.
MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});
diff --git a/llvm/test/CodeGen/AArch64/freeze.ll b/llvm/test/CodeGen/AArch64/freeze.ll
index f5e2ffd7361ce..e2ae046da1467 100644
--- a/llvm/test/CodeGen/AArch64/freeze.ll
+++ b/llvm/test/CodeGen/AArch64/freeze.ll
@@ -3,7 +3,6 @@
; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel -global-isel-abort=2 2>&1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; CHECK-GI: warning: Instruction selection used fallback path for freeze_v2i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for freeze_urhadd
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for freeze_shadd
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for freeze_srhadd
@@ -460,13 +459,23 @@ define <8 x i16> @freeze_uhadd(<8 x i16> %a0, <8 x i16> %a1) {
}
define <8 x i16> @freeze_urhadd(<8 x i16> %a0, <8 x i16> %a1) {
-; CHECK-LABEL: freeze_urhadd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v2.8h, #15
-; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
-; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: freeze_urhadd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v2.8h, #15
+; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-SD-NEXT: urhadd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: freeze_urhadd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v2.8h, #15
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT: movi v2.8h, #31
+; CHECK-GI-NEXT: urhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%m0 = and <8 x i16> %a0, splat (i16 15)
%m1 = and <8 x i16> %a1, splat (i16 15)
%avg = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %m0, <8 x i16> %m1)
>From def160d1e1c7f5c82cd93222df474fcff7344503 Mon Sep 17 00:00:00 2001
From: Josh Rodriguez <josh.rodriguez at arm.com>
Date: Fri, 17 Oct 2025 15:06:58 +0000
Subject: [PATCH 3/5] [AArch64][GlobalISel] Added shadd intrinsic support
GlobalISel now selects shadd intrinsic, without falling back to SDAG. Note
that GlobalISel-generated code involving shadd seems to be inefficent when
compared to SDAG.
---
llvm/lib/Target/AArch64/AArch64InstrGISel.td | 7 ++++++
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 2 ++
llvm/test/CodeGen/AArch64/freeze.ll | 22 +++++++++++++------
3 files changed, 24 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index 4f7c82e45c2fa..9b0efa30cfee5 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -251,6 +251,12 @@ def G_URHADD : AArch64GenericInstruction {
let hasSideEffects = 0;
}
+def G_SHADD : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type1:$src2);
+ let hasSideEffects = 0;
+}
+
// Generic instruction for the BSP pseudo. It is expanded into BSP, which
// expands into BSL/BIT/BIF after register allocation.
def G_BSP : AArch64GenericInstruction {
@@ -300,6 +306,7 @@ def : GINodeEquiv<G_USDOT, AArch64usdot>;
def : GINodeEquiv<G_UHADD, avgflooru>;
def : GINodeEquiv<G_URHADD, avgceilu>;
+def : GINodeEquiv<G_SHADD, avgfloors>;
def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 9f840bfff1178..a11784fc0ca12 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1829,6 +1829,8 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return LowerBinOp(AArch64::G_UHADD);
case Intrinsic::aarch64_neon_urhadd:
return LowerBinOp(AArch64::G_URHADD);
+ case Intrinsic::aarch64_neon_shadd:
+ return LowerBinOp(AArch64::G_SHADD);
case Intrinsic::aarch64_neon_abs: {
// Lower the intrinsic to G_ABS.
MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});
diff --git a/llvm/test/CodeGen/AArch64/freeze.ll b/llvm/test/CodeGen/AArch64/freeze.ll
index e2ae046da1467..dffd89143d16b 100644
--- a/llvm/test/CodeGen/AArch64/freeze.ll
+++ b/llvm/test/CodeGen/AArch64/freeze.ll
@@ -3,7 +3,6 @@
; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel -global-isel-abort=2 2>&1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; CHECK-GI: warning: Instruction selection used fallback path for freeze_v2i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for freeze_shadd
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for freeze_srhadd
%struct.T = type { i32, i32 }
@@ -485,12 +484,21 @@ define <8 x i16> @freeze_urhadd(<8 x i16> %a0, <8 x i16> %a1) {
}
define <8 x i16> @freeze_shadd(<8 x i8> %a0, <8 x i16> %a1) {
-; CHECK-LABEL: freeze_shadd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-NEXT: sshr v1.8h, v1.8h, #8
-; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: freeze_shadd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: sshr v1.8h, v1.8h, #8
+; CHECK-SD-NEXT: shadd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: freeze_shadd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshr v1.8h, v1.8h, #8
+; CHECK-GI-NEXT: shadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: shl v0.8h, v0.8h, #8
+; CHECK-GI-NEXT: sshr v0.8h, v0.8h, #8
+; CHECK-GI-NEXT: ret
%x0 = sext <8 x i8> %a0 to <8 x i16>
%x1 = ashr <8 x i16> %a1, splat (i16 8)
%avg = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
>From 1cf088b935fdce8cc8c89f21545cb6a04fb3d6cd Mon Sep 17 00:00:00 2001
From: Josh Rodriguez <josh.rodriguez at arm.com>
Date: Fri, 17 Oct 2025 15:46:32 +0000
Subject: [PATCH 4/5] [AArch64][GlobalISel] Added srhadd intrinsic support
GlobalISel now selects srhadd intrinsic, without falling back to SDAG. Note
that GlobalISel-generated code involving uhadd seems to be inefficent when
compared to SDAG.
---
llvm/lib/Target/AArch64/AArch64InstrGISel.td | 7 ++++++
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 2 ++
llvm/test/CodeGen/AArch64/freeze.ll | 22 +++++++++++++------
3 files changed, 24 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index 9b0efa30cfee5..187ef00eebd3c 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -257,6 +257,12 @@ def G_SHADD : AArch64GenericInstruction {
let hasSideEffects = 0;
}
+def G_SRHADD : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type1:$src2);
+ let hasSideEffects = 0;
+}
+
// Generic instruction for the BSP pseudo. It is expanded into BSP, which
// expands into BSL/BIT/BIF after register allocation.
def G_BSP : AArch64GenericInstruction {
@@ -307,6 +313,7 @@ def : GINodeEquiv<G_USDOT, AArch64usdot>;
def : GINodeEquiv<G_UHADD, avgflooru>;
def : GINodeEquiv<G_URHADD, avgceilu>;
def : GINodeEquiv<G_SHADD, avgfloors>;
+def : GINodeEquiv<G_SRHADD, avgceils>;
def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index a11784fc0ca12..d2abdd0662b60 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1831,6 +1831,8 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return LowerBinOp(AArch64::G_URHADD);
case Intrinsic::aarch64_neon_shadd:
return LowerBinOp(AArch64::G_SHADD);
+ case Intrinsic::aarch64_neon_srhadd:
+ return LowerBinOp(AArch64::G_SRHADD);
case Intrinsic::aarch64_neon_abs: {
// Lower the intrinsic to G_ABS.
MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});
diff --git a/llvm/test/CodeGen/AArch64/freeze.ll b/llvm/test/CodeGen/AArch64/freeze.ll
index dffd89143d16b..136ac8b0a2aa1 100644
--- a/llvm/test/CodeGen/AArch64/freeze.ll
+++ b/llvm/test/CodeGen/AArch64/freeze.ll
@@ -3,7 +3,6 @@
; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel -global-isel-abort=2 2>&1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; CHECK-GI: warning: Instruction selection used fallback path for freeze_v2i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for freeze_srhadd
%struct.T = type { i32, i32 }
@@ -509,12 +508,21 @@ define <8 x i16> @freeze_shadd(<8 x i8> %a0, <8 x i16> %a1) {
}
define <8 x i16> @freeze_srhadd(<8 x i8> %a0, <8 x i16> %a1) {
-; CHECK-LABEL: freeze_srhadd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-NEXT: sshr v1.8h, v1.8h, #8
-; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: freeze_srhadd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: sshr v1.8h, v1.8h, #8
+; CHECK-SD-NEXT: srhadd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: freeze_srhadd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshr v1.8h, v1.8h, #8
+; CHECK-GI-NEXT: srhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: shl v0.8h, v0.8h, #8
+; CHECK-GI-NEXT: sshr v0.8h, v0.8h, #8
+; CHECK-GI-NEXT: ret
%x0 = sext <8 x i8> %a0 to <8 x i16>
%x1 = ashr <8 x i16> %a1, splat (i16 8)
%avg = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
>From 7998bebc36bc914d56bb3042a5cb3323766eae2b Mon Sep 17 00:00:00 2001
From: Josh Rodriguez <josh.rodriguez at arm.com>
Date: Fri, 17 Oct 2025 15:48:31 +0000
Subject: [PATCH 5/5] [AArch64][GlobalISel] Modified llc test to check
generation from both SDAG and GISel Note that GlobalISel-generated code
involving the hadd family of intrinsics seems to be inefficent when compared
to SDAG.
---
.../AArch64/aarch64-known-bits-hadd.ll | 171 +++++++++++++-----
1 file changed, 123 insertions(+), 48 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll b/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll
index f900f0209a108..a6fbaf01c5476 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s -mtriple=aarch64 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
declare <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16>, <8 x i16>)
declare <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16>, <8 x i16>)
@@ -7,11 +8,20 @@ declare <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16>, <8 x i16>)
declare <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16>, <8 x i16>)
define <8 x i16> @haddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
-; CHECK-LABEL: haddu_zext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uhadd v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: haddu_zext:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: uhadd v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: haddu_zext:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
+; CHECK-GI-NEXT: uhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%x0 = zext <8 x i8> %a0 to <8 x i16>
%x1 = zext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
@@ -20,11 +30,20 @@ define <8 x i16> @haddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
}
define <8 x i16> @rhaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
-; CHECK-LABEL: rhaddu_zext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: urhadd v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: rhaddu_zext:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: urhadd v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: rhaddu_zext:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
+; CHECK-GI-NEXT: urhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%x0 = zext <8 x i8> %a0 to <8 x i16>
%x1 = zext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
@@ -33,11 +52,20 @@ define <8 x i16> @rhaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
}
define <8 x i16> @hadds_zext(<8 x i8> %a0, <8 x i8> %a1) {
-; CHECK-LABEL: hadds_zext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uhadd v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: hadds_zext:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: uhadd v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: hadds_zext:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
+; CHECK-GI-NEXT: shadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%x0 = zext <8 x i8> %a0 to <8 x i16>
%x1 = zext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
@@ -46,12 +74,21 @@ define <8 x i16> @hadds_zext(<8 x i8> %a0, <8 x i8> %a1) {
}
define <8 x i16> @shaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
-; CHECK-LABEL: shaddu_zext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-NEXT: ushll v1.8h, v1.8b, #0
-; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: shaddu_zext:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-SD-NEXT: srhadd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: shaddu_zext:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
+; CHECK-GI-NEXT: srhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%x0 = zext <8 x i8> %a0 to <8 x i16>
%x1 = zext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
@@ -62,13 +99,22 @@ define <8 x i16> @shaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
; ; negative tests
define <8 x i16> @haddu_sext(<8 x i8> %a0, <8 x i8> %a1) {
-; CHECK-LABEL: haddu_sext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-NEXT: sshll v1.8h, v1.8b, #0
-; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: bic v0.8h, #254, lsl #8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: haddu_sext:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-SD-NEXT: uhadd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: bic v0.8h, #254, lsl #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: haddu_sext:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
+; CHECK-GI-NEXT: uhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%x0 = sext <8 x i8> %a0 to <8 x i16>
%x1 = sext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
@@ -77,13 +123,22 @@ define <8 x i16> @haddu_sext(<8 x i8> %a0, <8 x i8> %a1) {
}
define <8 x i16> @urhadd_sext(<8 x i8> %a0, <8 x i8> %a1) {
-; CHECK-LABEL: urhadd_sext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-NEXT: sshll v1.8h, v1.8b, #0
-; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: bic v0.8h, #254, lsl #8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: urhadd_sext:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-SD-NEXT: urhadd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: bic v0.8h, #254, lsl #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: urhadd_sext:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
+; CHECK-GI-NEXT: urhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%x0 = sext <8 x i8> %a0 to <8 x i16>
%x1 = sext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
@@ -92,12 +147,21 @@ define <8 x i16> @urhadd_sext(<8 x i8> %a0, <8 x i8> %a1) {
}
define <8 x i16> @hadds_sext(<8 x i8> %a0, <8 x i8> %a1) {
-; CHECK-LABEL: hadds_sext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: shadd v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-NEXT: bic v0.8h, #254, lsl #8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: hadds_sext:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: shadd v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: bic v0.8h, #254, lsl #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: hadds_sext:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
+; CHECK-GI-NEXT: shadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%x0 = sext <8 x i8> %a0 to <8 x i16>
%x1 = sext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
@@ -106,15 +170,26 @@ define <8 x i16> @hadds_sext(<8 x i8> %a0, <8 x i8> %a1) {
}
define <8 x i16> @shaddu_sext(<8 x i8> %a0, <8 x i8> %a1) {
-; CHECK-LABEL: shaddu_sext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: srhadd v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-NEXT: bic v0.8h, #254, lsl #8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: shaddu_sext:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: srhadd v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: bic v0.8h, #254, lsl #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: shaddu_sext:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
+; CHECK-GI-NEXT: srhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%x0 = sext <8 x i8> %a0 to <8 x i16>
%x1 = sext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
%res = and <8 x i16> %hadd, <i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511>
ret <8 x i16> %res
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
More information about the llvm-commits
mailing list