[llvm] [AArch64][GlobalISel] Added support for hadd family of intrinsics (PR #163985)
Joshua Rodriguez via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 27 06:55:40 PDT 2025
https://github.com/JoshdRod updated https://github.com/llvm/llvm-project/pull/163985
>From 653666b1422017dec90e27bfddb0bb581028c58d Mon Sep 17 00:00:00 2001
From: Josh Rodriguez <josh.rodriguez at arm.com>
Date: Thu, 16 Oct 2025 15:47:02 +0000
Subject: [PATCH 1/7] [AArch64][GlobalISel] Added uhadd intrinsic support
GlobalISel now selects uhadd intrinsic, without falling back to SDAG.
Note that GlobalISel-generated code involving uhadd seems to be inefficent when compared to SDAG.
---
llvm/lib/Target/AArch64/AArch64InstrGISel.td | 8 ++++++
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 2 ++
llvm/test/CodeGen/AArch64/freeze.ll | 25 +++++++++++++------
3 files changed, 27 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index fe8419301b306..9e5019c5ed378 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -239,6 +239,12 @@ def G_USDOT : AArch64GenericInstruction {
let hasSideEffects = 0;
}
+def G_UHADD : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type1:$src2);
+ let hasSideEffects = 0;
+}
+
// Generic instruction for the BSP pseudo. It is expanded into BSP, which
// expands into BSL/BIT/BIF after register allocation.
def G_BSP : AArch64GenericInstruction {
@@ -286,6 +292,8 @@ def : GINodeEquiv<G_UDOT, AArch64udot>;
def : GINodeEquiv<G_SDOT, AArch64sdot>;
def : GINodeEquiv<G_USDOT, AArch64usdot>;
+def : GINodeEquiv<G_UHADD, avgflooru>;
+
def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
def : GINodeEquiv<G_AARCH64_PREFETCH, AArch64Prefetch>;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 05a431312472e..4e24ed8e02f16 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1825,6 +1825,8 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return LowerBinOp(TargetOpcode::G_ABDS);
case Intrinsic::aarch64_neon_uabd:
return LowerBinOp(TargetOpcode::G_ABDU);
+ case Intrinsic::aarch64_neon_uhadd:
+ return LowerBinOp(AArch64::G_UHADD);
case Intrinsic::aarch64_neon_abs: {
// Lower the intrinsic to G_ABS.
MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});
diff --git a/llvm/test/CodeGen/AArch64/freeze.ll b/llvm/test/CodeGen/AArch64/freeze.ll
index fb909fec90434..f5e2ffd7361ce 100644
--- a/llvm/test/CodeGen/AArch64/freeze.ll
+++ b/llvm/test/CodeGen/AArch64/freeze.ll
@@ -3,7 +3,6 @@
; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel -global-isel-abort=2 2>&1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; CHECK-GI: warning: Instruction selection used fallback path for freeze_v2i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for freeze_uhadd
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for freeze_urhadd
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for freeze_shadd
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for freeze_srhadd
@@ -435,13 +434,23 @@ define <8 x i16> @freeze_abds(<8 x i16> %a, <8 x i16> %b) {
}
define <8 x i16> @freeze_uhadd(<8 x i16> %a0, <8 x i16> %a1) {
-; CHECK-LABEL: freeze_uhadd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v2.8h, #15
-; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
-; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: freeze_uhadd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v2.8h, #15
+; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-SD-NEXT: uhadd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: freeze_uhadd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v2.8h, #15
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT: movi v2.8h, #31
+; CHECK-GI-NEXT: uhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%m0 = and <8 x i16> %a0, splat (i16 15)
%m1 = and <8 x i16> %a1, splat (i16 15)
%avg = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %m0, <8 x i16> %m1)
>From a160fa42841869b658bf040f56f55dc607f4d8ad Mon Sep 17 00:00:00 2001
From: Josh Rodriguez <josh.rodriguez at arm.com>
Date: Fri, 17 Oct 2025 13:13:22 +0000
Subject: [PATCH 2/7] [AArch64][GlobalISel] Added urhadd intrinsic support
GlobalISel now selects urhadd intrinsic, without falling back to SDAG.
Note that GlobalISel-generated code involving urhadd seems to be inefficent when compared to SDAG.
---
llvm/lib/Target/AArch64/AArch64InstrGISel.td | 7 ++++++
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 2 ++
llvm/test/CodeGen/AArch64/freeze.ll | 25 +++++++++++++------
3 files changed, 26 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index 9e5019c5ed378..4f7c82e45c2fa 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -245,6 +245,12 @@ def G_UHADD : AArch64GenericInstruction {
let hasSideEffects = 0;
}
+def G_URHADD : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type1:$src2);
+ let hasSideEffects = 0;
+}
+
// Generic instruction for the BSP pseudo. It is expanded into BSP, which
// expands into BSL/BIT/BIF after register allocation.
def G_BSP : AArch64GenericInstruction {
@@ -293,6 +299,7 @@ def : GINodeEquiv<G_SDOT, AArch64sdot>;
def : GINodeEquiv<G_USDOT, AArch64usdot>;
def : GINodeEquiv<G_UHADD, avgflooru>;
+def : GINodeEquiv<G_URHADD, avgceilu>;
def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 4e24ed8e02f16..9f840bfff1178 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1827,6 +1827,8 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return LowerBinOp(TargetOpcode::G_ABDU);
case Intrinsic::aarch64_neon_uhadd:
return LowerBinOp(AArch64::G_UHADD);
+ case Intrinsic::aarch64_neon_urhadd:
+ return LowerBinOp(AArch64::G_URHADD);
case Intrinsic::aarch64_neon_abs: {
// Lower the intrinsic to G_ABS.
MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});
diff --git a/llvm/test/CodeGen/AArch64/freeze.ll b/llvm/test/CodeGen/AArch64/freeze.ll
index f5e2ffd7361ce..e2ae046da1467 100644
--- a/llvm/test/CodeGen/AArch64/freeze.ll
+++ b/llvm/test/CodeGen/AArch64/freeze.ll
@@ -3,7 +3,6 @@
; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel -global-isel-abort=2 2>&1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; CHECK-GI: warning: Instruction selection used fallback path for freeze_v2i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for freeze_urhadd
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for freeze_shadd
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for freeze_srhadd
@@ -460,13 +459,23 @@ define <8 x i16> @freeze_uhadd(<8 x i16> %a0, <8 x i16> %a1) {
}
define <8 x i16> @freeze_urhadd(<8 x i16> %a0, <8 x i16> %a1) {
-; CHECK-LABEL: freeze_urhadd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v2.8h, #15
-; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
-; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: freeze_urhadd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v2.8h, #15
+; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-SD-NEXT: urhadd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: freeze_urhadd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v2.8h, #15
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT: movi v2.8h, #31
+; CHECK-GI-NEXT: urhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%m0 = and <8 x i16> %a0, splat (i16 15)
%m1 = and <8 x i16> %a1, splat (i16 15)
%avg = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %m0, <8 x i16> %m1)
>From 64f37c4bdcec6fa78659e3962e947a27fdb7aa88 Mon Sep 17 00:00:00 2001
From: Josh Rodriguez <josh.rodriguez at arm.com>
Date: Fri, 17 Oct 2025 15:06:58 +0000
Subject: [PATCH 3/7] [AArch64][GlobalISel] Added shadd intrinsic support
GlobalISel now selects shadd intrinsic, without falling back to SDAG. Note
that GlobalISel-generated code involving shadd seems to be inefficent when
compared to SDAG.
---
llvm/lib/Target/AArch64/AArch64InstrGISel.td | 7 ++++++
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 2 ++
llvm/test/CodeGen/AArch64/freeze.ll | 22 +++++++++++++------
3 files changed, 24 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index 4f7c82e45c2fa..9b0efa30cfee5 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -251,6 +251,12 @@ def G_URHADD : AArch64GenericInstruction {
let hasSideEffects = 0;
}
+def G_SHADD : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type1:$src2);
+ let hasSideEffects = 0;
+}
+
// Generic instruction for the BSP pseudo. It is expanded into BSP, which
// expands into BSL/BIT/BIF after register allocation.
def G_BSP : AArch64GenericInstruction {
@@ -300,6 +306,7 @@ def : GINodeEquiv<G_USDOT, AArch64usdot>;
def : GINodeEquiv<G_UHADD, avgflooru>;
def : GINodeEquiv<G_URHADD, avgceilu>;
+def : GINodeEquiv<G_SHADD, avgfloors>;
def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 9f840bfff1178..a11784fc0ca12 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1829,6 +1829,8 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return LowerBinOp(AArch64::G_UHADD);
case Intrinsic::aarch64_neon_urhadd:
return LowerBinOp(AArch64::G_URHADD);
+ case Intrinsic::aarch64_neon_shadd:
+ return LowerBinOp(AArch64::G_SHADD);
case Intrinsic::aarch64_neon_abs: {
// Lower the intrinsic to G_ABS.
MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});
diff --git a/llvm/test/CodeGen/AArch64/freeze.ll b/llvm/test/CodeGen/AArch64/freeze.ll
index e2ae046da1467..dffd89143d16b 100644
--- a/llvm/test/CodeGen/AArch64/freeze.ll
+++ b/llvm/test/CodeGen/AArch64/freeze.ll
@@ -3,7 +3,6 @@
; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel -global-isel-abort=2 2>&1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; CHECK-GI: warning: Instruction selection used fallback path for freeze_v2i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for freeze_shadd
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for freeze_srhadd
%struct.T = type { i32, i32 }
@@ -485,12 +484,21 @@ define <8 x i16> @freeze_urhadd(<8 x i16> %a0, <8 x i16> %a1) {
}
define <8 x i16> @freeze_shadd(<8 x i8> %a0, <8 x i16> %a1) {
-; CHECK-LABEL: freeze_shadd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-NEXT: sshr v1.8h, v1.8h, #8
-; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: freeze_shadd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: sshr v1.8h, v1.8h, #8
+; CHECK-SD-NEXT: shadd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: freeze_shadd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshr v1.8h, v1.8h, #8
+; CHECK-GI-NEXT: shadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: shl v0.8h, v0.8h, #8
+; CHECK-GI-NEXT: sshr v0.8h, v0.8h, #8
+; CHECK-GI-NEXT: ret
%x0 = sext <8 x i8> %a0 to <8 x i16>
%x1 = ashr <8 x i16> %a1, splat (i16 8)
%avg = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
>From a9899a1357f6b608ade53e0ff410db6df51b9800 Mon Sep 17 00:00:00 2001
From: Josh Rodriguez <josh.rodriguez at arm.com>
Date: Fri, 17 Oct 2025 15:46:32 +0000
Subject: [PATCH 4/7] [AArch64][GlobalISel] Added srhadd intrinsic support
GlobalISel now selects srhadd intrinsic, without falling back to SDAG. Note
that GlobalISel-generated code involving uhadd seems to be inefficent when
compared to SDAG.
---
llvm/lib/Target/AArch64/AArch64InstrGISel.td | 7 ++++++
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 2 ++
llvm/test/CodeGen/AArch64/freeze.ll | 22 +++++++++++++------
3 files changed, 24 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index 9b0efa30cfee5..187ef00eebd3c 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -257,6 +257,12 @@ def G_SHADD : AArch64GenericInstruction {
let hasSideEffects = 0;
}
+def G_SRHADD : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type1:$src2);
+ let hasSideEffects = 0;
+}
+
// Generic instruction for the BSP pseudo. It is expanded into BSP, which
// expands into BSL/BIT/BIF after register allocation.
def G_BSP : AArch64GenericInstruction {
@@ -307,6 +313,7 @@ def : GINodeEquiv<G_USDOT, AArch64usdot>;
def : GINodeEquiv<G_UHADD, avgflooru>;
def : GINodeEquiv<G_URHADD, avgceilu>;
def : GINodeEquiv<G_SHADD, avgfloors>;
+def : GINodeEquiv<G_SRHADD, avgceils>;
def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index a11784fc0ca12..d2abdd0662b60 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1831,6 +1831,8 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return LowerBinOp(AArch64::G_URHADD);
case Intrinsic::aarch64_neon_shadd:
return LowerBinOp(AArch64::G_SHADD);
+ case Intrinsic::aarch64_neon_srhadd:
+ return LowerBinOp(AArch64::G_SRHADD);
case Intrinsic::aarch64_neon_abs: {
// Lower the intrinsic to G_ABS.
MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});
diff --git a/llvm/test/CodeGen/AArch64/freeze.ll b/llvm/test/CodeGen/AArch64/freeze.ll
index dffd89143d16b..136ac8b0a2aa1 100644
--- a/llvm/test/CodeGen/AArch64/freeze.ll
+++ b/llvm/test/CodeGen/AArch64/freeze.ll
@@ -3,7 +3,6 @@
; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel -global-isel-abort=2 2>&1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; CHECK-GI: warning: Instruction selection used fallback path for freeze_v2i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for freeze_srhadd
%struct.T = type { i32, i32 }
@@ -509,12 +508,21 @@ define <8 x i16> @freeze_shadd(<8 x i8> %a0, <8 x i16> %a1) {
}
define <8 x i16> @freeze_srhadd(<8 x i8> %a0, <8 x i16> %a1) {
-; CHECK-LABEL: freeze_srhadd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-NEXT: sshr v1.8h, v1.8h, #8
-; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: freeze_srhadd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: sshr v1.8h, v1.8h, #8
+; CHECK-SD-NEXT: srhadd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: freeze_srhadd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshr v1.8h, v1.8h, #8
+; CHECK-GI-NEXT: srhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: shl v0.8h, v0.8h, #8
+; CHECK-GI-NEXT: sshr v0.8h, v0.8h, #8
+; CHECK-GI-NEXT: ret
%x0 = sext <8 x i8> %a0 to <8 x i16>
%x1 = ashr <8 x i16> %a1, splat (i16 8)
%avg = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
>From 255822304b863381a9604086dfbc4986873d1ed8 Mon Sep 17 00:00:00 2001
From: Josh Rodriguez <josh.rodriguez at arm.com>
Date: Fri, 17 Oct 2025 15:48:31 +0000
Subject: [PATCH 5/7] [AArch64][GlobalISel] Modified llc test to check
generation from both SDAG and GISel Note that GlobalISel-generated code
involving the hadd family of intrinsics seems to be inefficent when compared
to SDAG.
---
.../AArch64/aarch64-known-bits-hadd.ll | 171 +++++++++++++-----
1 file changed, 123 insertions(+), 48 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll b/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll
index f900f0209a108..a6fbaf01c5476 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s -mtriple=aarch64 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
declare <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16>, <8 x i16>)
declare <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16>, <8 x i16>)
@@ -7,11 +8,20 @@ declare <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16>, <8 x i16>)
declare <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16>, <8 x i16>)
define <8 x i16> @haddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
-; CHECK-LABEL: haddu_zext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uhadd v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: haddu_zext:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: uhadd v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: haddu_zext:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
+; CHECK-GI-NEXT: uhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%x0 = zext <8 x i8> %a0 to <8 x i16>
%x1 = zext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
@@ -20,11 +30,20 @@ define <8 x i16> @haddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
}
define <8 x i16> @rhaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
-; CHECK-LABEL: rhaddu_zext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: urhadd v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: rhaddu_zext:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: urhadd v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: rhaddu_zext:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
+; CHECK-GI-NEXT: urhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%x0 = zext <8 x i8> %a0 to <8 x i16>
%x1 = zext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
@@ -33,11 +52,20 @@ define <8 x i16> @rhaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
}
define <8 x i16> @hadds_zext(<8 x i8> %a0, <8 x i8> %a1) {
-; CHECK-LABEL: hadds_zext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uhadd v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: hadds_zext:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: uhadd v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: hadds_zext:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
+; CHECK-GI-NEXT: shadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%x0 = zext <8 x i8> %a0 to <8 x i16>
%x1 = zext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
@@ -46,12 +74,21 @@ define <8 x i16> @hadds_zext(<8 x i8> %a0, <8 x i8> %a1) {
}
define <8 x i16> @shaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
-; CHECK-LABEL: shaddu_zext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-NEXT: ushll v1.8h, v1.8b, #0
-; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: shaddu_zext:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-SD-NEXT: srhadd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: shaddu_zext:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
+; CHECK-GI-NEXT: srhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%x0 = zext <8 x i8> %a0 to <8 x i16>
%x1 = zext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
@@ -62,13 +99,22 @@ define <8 x i16> @shaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
; ; negative tests
define <8 x i16> @haddu_sext(<8 x i8> %a0, <8 x i8> %a1) {
-; CHECK-LABEL: haddu_sext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-NEXT: sshll v1.8h, v1.8b, #0
-; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: bic v0.8h, #254, lsl #8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: haddu_sext:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-SD-NEXT: uhadd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: bic v0.8h, #254, lsl #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: haddu_sext:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
+; CHECK-GI-NEXT: uhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%x0 = sext <8 x i8> %a0 to <8 x i16>
%x1 = sext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
@@ -77,13 +123,22 @@ define <8 x i16> @haddu_sext(<8 x i8> %a0, <8 x i8> %a1) {
}
define <8 x i16> @urhadd_sext(<8 x i8> %a0, <8 x i8> %a1) {
-; CHECK-LABEL: urhadd_sext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-NEXT: sshll v1.8h, v1.8b, #0
-; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: bic v0.8h, #254, lsl #8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: urhadd_sext:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-SD-NEXT: urhadd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: bic v0.8h, #254, lsl #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: urhadd_sext:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
+; CHECK-GI-NEXT: urhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%x0 = sext <8 x i8> %a0 to <8 x i16>
%x1 = sext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
@@ -92,12 +147,21 @@ define <8 x i16> @urhadd_sext(<8 x i8> %a0, <8 x i8> %a1) {
}
define <8 x i16> @hadds_sext(<8 x i8> %a0, <8 x i8> %a1) {
-; CHECK-LABEL: hadds_sext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: shadd v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-NEXT: bic v0.8h, #254, lsl #8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: hadds_sext:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: shadd v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: bic v0.8h, #254, lsl #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: hadds_sext:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
+; CHECK-GI-NEXT: shadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%x0 = sext <8 x i8> %a0 to <8 x i16>
%x1 = sext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
@@ -106,15 +170,26 @@ define <8 x i16> @hadds_sext(<8 x i8> %a0, <8 x i8> %a1) {
}
define <8 x i16> @shaddu_sext(<8 x i8> %a0, <8 x i8> %a1) {
-; CHECK-LABEL: shaddu_sext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: srhadd v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-NEXT: bic v0.8h, #254, lsl #8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: shaddu_sext:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: srhadd v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: bic v0.8h, #254, lsl #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: shaddu_sext:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
+; CHECK-GI-NEXT: srhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%x0 = sext <8 x i8> %a0 to <8 x i16>
%x1 = sext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
%res = and <8 x i16> %hadd, <i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511>
ret <8 x i16> %res
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
>From 75539ff0117abb934d0880723ff7d6315ea0d697 Mon Sep 17 00:00:00 2001
From: Josh Rodriguez <josh.rodriguez at arm.com>
Date: Tue, 21 Oct 2025 14:36:01 +0000
Subject: [PATCH 6/7] [AArch64][GlobalISel] Modified gMIR instruction names to
match SDAG equivalents.
---
llvm/lib/Target/AArch64/AArch64InstrGISel.td | 16 ++++++++--------
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 8 ++++----
2 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index 187ef00eebd3c..ad071a1cce7f1 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -239,25 +239,25 @@ def G_USDOT : AArch64GenericInstruction {
let hasSideEffects = 0;
}
-def G_UHADD : AArch64GenericInstruction {
+def G_AVGFLOORU : AArch64GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type1:$src2);
let hasSideEffects = 0;
}
-def G_URHADD : AArch64GenericInstruction {
+def G_AVGCEILU : AArch64GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type1:$src2);
let hasSideEffects = 0;
}
-def G_SHADD : AArch64GenericInstruction {
+def G_AVGFLOORS : AArch64GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type1:$src2);
let hasSideEffects = 0;
}
-def G_SRHADD : AArch64GenericInstruction {
+def G_AVGCEILS : AArch64GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type1:$src2);
let hasSideEffects = 0;
@@ -310,10 +310,10 @@ def : GINodeEquiv<G_UDOT, AArch64udot>;
def : GINodeEquiv<G_SDOT, AArch64sdot>;
def : GINodeEquiv<G_USDOT, AArch64usdot>;
-def : GINodeEquiv<G_UHADD, avgflooru>;
-def : GINodeEquiv<G_URHADD, avgceilu>;
-def : GINodeEquiv<G_SHADD, avgfloors>;
-def : GINodeEquiv<G_SRHADD, avgceils>;
+def : GINodeEquiv<G_AVGFLOORU, avgflooru>;
+def : GINodeEquiv<G_AVGCEILU, avgceilu>;
+def : GINodeEquiv<G_AVGFLOORS, avgfloors>;
+def : GINodeEquiv<G_AVGCEILS, avgceils>;
def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index d2abdd0662b60..1d865e3cd85aa 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1826,13 +1826,13 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
case Intrinsic::aarch64_neon_uabd:
return LowerBinOp(TargetOpcode::G_ABDU);
case Intrinsic::aarch64_neon_uhadd:
- return LowerBinOp(AArch64::G_UHADD);
+ return LowerBinOp(AArch64::G_AVGFLOORU);
case Intrinsic::aarch64_neon_urhadd:
- return LowerBinOp(AArch64::G_URHADD);
+ return LowerBinOp(AArch64::G_AVGCEILU);
case Intrinsic::aarch64_neon_shadd:
- return LowerBinOp(AArch64::G_SHADD);
+ return LowerBinOp(AArch64::G_AVGFLOORS);
case Intrinsic::aarch64_neon_srhadd:
- return LowerBinOp(AArch64::G_SRHADD);
+ return LowerBinOp(AArch64::G_AVGCEILS);
case Intrinsic::aarch64_neon_abs: {
// Lower the intrinsic to G_ABS.
MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});
>From eee05eaf94f85f0da23af7e6d27706e34fd69632 Mon Sep 17 00:00:00 2001
From: Josh Rodriguez <josh.rodriguez at arm.com>
Date: Mon, 27 Oct 2025 13:55:11 +0000
Subject: [PATCH 7/7] [AArch64][GlobalISel] Converted intrinsics to generic
form
---
llvm/include/llvm/Support/TargetOpcodes.def | 11 ++++++++
llvm/include/llvm/Target/GenericOpcodes.td | 28 +++++++++++++++++++
.../Target/GlobalISel/SelectionDAGCompat.td | 4 +++
llvm/lib/Target/AArch64/AArch64InstrGISel.td | 24 ----------------
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 10 +++----
.../GlobalISel/legalizer-info-validation.mir | 20 +++++++++++++
.../match-table-cxx.td | 2 +-
.../GlobalISelEmitter/GlobalISelEmitter.td | 2 +-
llvm/test/TableGen/get-named-operand-idx.td | 3 +-
9 files changed, 72 insertions(+), 32 deletions(-)
diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def
index e55314568d683..d7a2e899ffd6f 100644
--- a/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/llvm/include/llvm/Support/TargetOpcodes.def
@@ -295,6 +295,17 @@ HANDLE_TARGET_OPCODE(G_ABDS)
/// Generic absolute difference unsigned instruction.
HANDLE_TARGET_OPCODE(G_ABDU)
+/// Generic vector average with truncate unsigned instruction.
+HANDLE_TARGET_OPCODE(G_AVGFLOORU)
+
+/// Generic vector average with round unsigned instruction.
+HANDLE_TARGET_OPCODE(G_AVGCEILU)
+
+/// Generic vector average with truncate signed instruction.
+HANDLE_TARGET_OPCODE(G_AVGFLOORS)
+
+/// Generic vector average with round signed instruction.
+HANDLE_TARGET_OPCODE(G_AVGCEILS)
HANDLE_TARGET_OPCODE(G_IMPLICIT_DEF)
diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td
index e3f995d53484f..b847e0425cf2b 100644
--- a/llvm/include/llvm/Target/GenericOpcodes.td
+++ b/llvm/include/llvm/Target/GenericOpcodes.td
@@ -423,6 +423,34 @@ def G_ABDU : GenericInstruction {
let isCommutable = true;
}
+// Generic vector average truncated unsigned.
+def G_AVGFLOORU : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type0:$src2);
+ let hasSideEffects = 0;
+}
+
+// Generic vector average rounded unsigned.
+def G_AVGCEILU : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type0:$src2);
+ let hasSideEffects = 0;
+}
+
+// Generic vector average truncated signed.
+def G_AVGFLOORS : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type0:$src2);
+ let hasSideEffects = 0;
+}
+
+// Generic vector average rounded signed.
+def G_AVGCEILS : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type0:$src2);
+ let hasSideEffects = 0;
+}
+
/// Funnel 'double' shifts take 3 operands, 2 inputs and the shift amount.
/// fshl(X,Y,Z): (X << (Z % bitwidth)) | (Y >> (bitwidth - (Z % bitwidth)))
def G_FSHL : GenericInstruction {
diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index c0d480294dd8b..137b291d25d35 100644
--- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -83,6 +83,10 @@ def : GINodeEquiv<G_LSHR, srl>;
def : GINodeEquiv<G_ASHR, sra>;
def : GINodeEquiv<G_ABDS, abds>;
def : GINodeEquiv<G_ABDU, abdu>;
+def : GINodeEquiv<G_AVGFLOORU, avgflooru>;
+def : GINodeEquiv<G_AVGCEILU, avgceilu>;
+def : GINodeEquiv<G_AVGFLOORS, avgfloors>;
+def : GINodeEquiv<G_AVGCEILS, avgceils>;
def : GINodeEquiv<G_SADDSAT, saddsat>;
def : GINodeEquiv<G_UADDSAT, uaddsat>;
def : GINodeEquiv<G_SSUBSAT, ssubsat>;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index ad071a1cce7f1..a9e16b2210d2a 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -239,30 +239,6 @@ def G_USDOT : AArch64GenericInstruction {
let hasSideEffects = 0;
}
-def G_AVGFLOORU : AArch64GenericInstruction {
- let OutOperandList = (outs type0:$dst);
- let InOperandList = (ins type0:$src1, type1:$src2);
- let hasSideEffects = 0;
-}
-
-def G_AVGCEILU : AArch64GenericInstruction {
- let OutOperandList = (outs type0:$dst);
- let InOperandList = (ins type0:$src1, type1:$src2);
- let hasSideEffects = 0;
-}
-
-def G_AVGFLOORS : AArch64GenericInstruction {
- let OutOperandList = (outs type0:$dst);
- let InOperandList = (ins type0:$src1, type1:$src2);
- let hasSideEffects = 0;
-}
-
-def G_AVGCEILS : AArch64GenericInstruction {
- let OutOperandList = (outs type0:$dst);
- let InOperandList = (ins type0:$src1, type1:$src2);
- let hasSideEffects = 0;
-}
-
// Generic instruction for the BSP pseudo. It is expanded into BSP, which
// expands into BSL/BIT/BIF after register allocation.
def G_BSP : AArch64GenericInstruction {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 1d865e3cd85aa..9c798f0485e6b 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -289,7 +289,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.moreElementsToNextPow2(0)
.lower();
- getActionDefinitionsBuilder({G_ABDS, G_ABDU})
+ getActionDefinitionsBuilder({G_ABDS, G_ABDU, G_AVGFLOORU, G_AVGCEILU, G_AVGFLOORS, G_AVGCEILS})
.legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
.lower();
@@ -1826,13 +1826,13 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
case Intrinsic::aarch64_neon_uabd:
return LowerBinOp(TargetOpcode::G_ABDU);
case Intrinsic::aarch64_neon_uhadd:
- return LowerBinOp(AArch64::G_AVGFLOORU);
+ return LowerBinOp(TargetOpcode::G_AVGFLOORU);
case Intrinsic::aarch64_neon_urhadd:
- return LowerBinOp(AArch64::G_AVGCEILU);
+ return LowerBinOp(TargetOpcode::G_AVGCEILU);
case Intrinsic::aarch64_neon_shadd:
- return LowerBinOp(AArch64::G_AVGFLOORS);
+ return LowerBinOp(TargetOpcode::G_AVGFLOORS);
case Intrinsic::aarch64_neon_srhadd:
- return LowerBinOp(AArch64::G_AVGCEILS);
+ return LowerBinOp(TargetOpcode::G_AVGCEILS);
case Intrinsic::aarch64_neon_abs: {
// Lower the intrinsic to G_ABS.
MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
index 896603d6eb20d..800b575dc7920 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -79,6 +79,26 @@
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
#
+# DEBUG-NEXT: G_AVGFLOORU (opcode {{[0-9]+}}): 1 type index, 0 imm indices
+# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
+#
+# DEBUG-NEXT: G_AVGCEILU (opcode {{[0-9]+}}): 1 type index, 0 imm indices
+# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
+#
+# DEBUG-NEXT: G_AVGFLOORS (opcode {{[0-9]+}}): 1 type index, 0 imm indices
+# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
+#
+# DEBUG-NEXT: G_AVGCEILS (opcode {{[0-9]+}}): 1 type index, 0 imm indices
+# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
+#
# DEBUG-NEXT: G_IMPLICIT_DEF (opcode {{[0-9]+}}): 1 type index, 0 imm indices
# DEBUG-NEXT: .. the first uncovered type index: {{[0-9]+}}, OK
# DEBUG-NEXT: .. the first uncovered imm index: {{[0-9]+}}, OK
diff --git a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-cxx.td b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-cxx.td
index 18960b43ab97d..df645c28ace9b 100644
--- a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-cxx.td
+++ b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-cxx.td
@@ -96,7 +96,7 @@ def MyCombiner: GICombiner<"GenMyCombiner", [
// CHECK: const uint8_t *GenMyCombiner::getMatchTable() const {
// CHECK-NEXT: constexpr static uint8_t MatchTable0[] = {
-// CHECK-NEXT: /* 0 */ GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(99), GIMT_Encode2(211), /*)*//*default:*//*Label 5*/ GIMT_Encode4(524),
+// CHECK-NEXT: /* 0 */ GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(103), GIMT_Encode2(215), /*)*//*default:*//*Label 5*/ GIMT_Encode4(524),
// CHECK-NEXT: /* 10 */ /*TargetOpcode::G_STORE*//*Label 0*/ GIMT_Encode4(458), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0),
// CHECK-NEXT: /* 182 */ /*TargetOpcode::G_SEXT*//*Label 1*/ GIMT_Encode4(476), GIMT_Encode4(0),
// CHECK-NEXT: /* 190 */ /*TargetOpcode::G_ZEXT*//*Label 2*/ GIMT_Encode4(488), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0),
diff --git a/llvm/test/TableGen/GlobalISelEmitter/GlobalISelEmitter.td b/llvm/test/TableGen/GlobalISelEmitter/GlobalISelEmitter.td
index fdabc53a3ff3b..64ca63da3b6f0 100644
--- a/llvm/test/TableGen/GlobalISelEmitter/GlobalISelEmitter.td
+++ b/llvm/test/TableGen/GlobalISelEmitter/GlobalISelEmitter.td
@@ -535,7 +535,7 @@ def : Pat<(frag GPR32:$src1, complex:$src2, complex:$src3),
// R00O-NEXT: GIM_Reject,
// R00O: // Label [[DEFAULT_NUM]]: @[[DEFAULT]]
// R00O-NEXT: GIM_Reject,
-// R00O-NEXT: }; // Size: 1902 bytes
+// R00O-NEXT: }; // Size: 1918 bytes
def INSNBOB : I<(outs GPR32:$dst), (ins GPR32:$src1, GPR32:$src2, GPR32:$src3, GPR32:$src4),
[(set GPR32:$dst,
diff --git a/llvm/test/TableGen/get-named-operand-idx.td b/llvm/test/TableGen/get-named-operand-idx.td
index e6f6331cd9c48..59693eba50bdc 100644
--- a/llvm/test/TableGen/get-named-operand-idx.td
+++ b/llvm/test/TableGen/get-named-operand-idx.td
@@ -89,7 +89,8 @@ def InstD : InstBase {
// CHECK-NEXT: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
// CHECK-NEXT: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
// CHECK-NEXT: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-// CHECK-NEXT: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 0,
+// CHECK-NEXT: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+// CHECK-NEXT: 1, 2, 2, 0,
// CHECK-NEXT: };
// CHECK-NEXT: return InstructionIndex[Opcode];
// CHECK-NEXT: }
More information about the llvm-commits
mailing list