[llvm] [AArch64][GlobalISel] Added support for hadd family of intrinsics (PR #163985)
Joshua Rodriguez via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 5 02:58:20 PST 2025
https://github.com/JoshdRod updated https://github.com/llvm/llvm-project/pull/163985
>From d4fd27582fa9d428911d56deff26131917b33661 Mon Sep 17 00:00:00 2001
From: Josh Rodriguez <josh.rodriguez at arm.com>
Date: Thu, 16 Oct 2025 15:47:02 +0000
Subject: [PATCH 01/11] [AArch64][GlobalISel] Added uhadd intrinsic support
GlobalISel now selects uhadd intrinsic, without falling back to SDAG.
Note that GlobalISel-generated code involving uhadd seems to be inefficent when compared to SDAG.
---
llvm/lib/Target/AArch64/AArch64InstrGISel.td | 8 ++++++
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 2 ++
llvm/test/CodeGen/AArch64/freeze.ll | 25 +++++++++++++------
3 files changed, 27 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index 30b7b03f7a69a..a80390011f986 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -239,6 +239,12 @@ def G_USDOT : AArch64GenericInstruction {
let hasSideEffects = 0;
}
+def G_UHADD : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type1:$src2);
+ let hasSideEffects = 0;
+}
+
// Generic instruction for the BSP pseudo. It is expanded into BSP, which
// expands into BSL/BIT/BIF after register allocation.
def G_BSP : AArch64GenericInstruction {
@@ -286,6 +292,8 @@ def : GINodeEquiv<G_UDOT, AArch64udot>;
def : GINodeEquiv<G_SDOT, AArch64sdot>;
def : GINodeEquiv<G_USDOT, AArch64usdot>;
+def : GINodeEquiv<G_UHADD, avgflooru>;
+
def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
def : GINodeEquiv<G_AARCH64_PREFETCH, AArch64Prefetch>;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 5f93847bc680e..44ed11c396dbd 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1817,6 +1817,8 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return LowerBinOp(TargetOpcode::G_ABDS);
case Intrinsic::aarch64_neon_uabd:
return LowerBinOp(TargetOpcode::G_ABDU);
+ case Intrinsic::aarch64_neon_uhadd:
+ return LowerBinOp(AArch64::G_UHADD);
case Intrinsic::aarch64_neon_abs: {
// Lower the intrinsic to G_ABS.
MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});
diff --git a/llvm/test/CodeGen/AArch64/freeze.ll b/llvm/test/CodeGen/AArch64/freeze.ll
index fb909fec90434..f5e2ffd7361ce 100644
--- a/llvm/test/CodeGen/AArch64/freeze.ll
+++ b/llvm/test/CodeGen/AArch64/freeze.ll
@@ -3,7 +3,6 @@
; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel -global-isel-abort=2 2>&1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; CHECK-GI: warning: Instruction selection used fallback path for freeze_v2i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for freeze_uhadd
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for freeze_urhadd
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for freeze_shadd
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for freeze_srhadd
@@ -435,13 +434,23 @@ define <8 x i16> @freeze_abds(<8 x i16> %a, <8 x i16> %b) {
}
define <8 x i16> @freeze_uhadd(<8 x i16> %a0, <8 x i16> %a1) {
-; CHECK-LABEL: freeze_uhadd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v2.8h, #15
-; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
-; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: freeze_uhadd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v2.8h, #15
+; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-SD-NEXT: uhadd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: freeze_uhadd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v2.8h, #15
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT: movi v2.8h, #31
+; CHECK-GI-NEXT: uhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%m0 = and <8 x i16> %a0, splat (i16 15)
%m1 = and <8 x i16> %a1, splat (i16 15)
%avg = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %m0, <8 x i16> %m1)
>From 279919cf16fbcba23dd8e9f7849076cb0cc744ee Mon Sep 17 00:00:00 2001
From: Josh Rodriguez <josh.rodriguez at arm.com>
Date: Fri, 17 Oct 2025 13:13:22 +0000
Subject: [PATCH 02/11] [AArch64][GlobalISel] Added urhadd intrinsic support
GlobalISel now selects urhadd intrinsic, without falling back to SDAG.
Note that GlobalISel-generated code involving urhadd seems to be inefficent when compared to SDAG.
---
llvm/lib/Target/AArch64/AArch64InstrGISel.td | 7 ++++++
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 2 ++
llvm/test/CodeGen/AArch64/freeze.ll | 25 +++++++++++++------
3 files changed, 26 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index a80390011f986..68f921e030429 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -245,6 +245,12 @@ def G_UHADD : AArch64GenericInstruction {
let hasSideEffects = 0;
}
+def G_URHADD : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type1:$src2);
+ let hasSideEffects = 0;
+}
+
// Generic instruction for the BSP pseudo. It is expanded into BSP, which
// expands into BSL/BIT/BIF after register allocation.
def G_BSP : AArch64GenericInstruction {
@@ -293,6 +299,7 @@ def : GINodeEquiv<G_SDOT, AArch64sdot>;
def : GINodeEquiv<G_USDOT, AArch64usdot>;
def : GINodeEquiv<G_UHADD, avgflooru>;
+def : GINodeEquiv<G_URHADD, avgceilu>;
def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 44ed11c396dbd..f579c6f5ba091 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1819,6 +1819,8 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return LowerBinOp(TargetOpcode::G_ABDU);
case Intrinsic::aarch64_neon_uhadd:
return LowerBinOp(AArch64::G_UHADD);
+ case Intrinsic::aarch64_neon_urhadd:
+ return LowerBinOp(AArch64::G_URHADD);
case Intrinsic::aarch64_neon_abs: {
// Lower the intrinsic to G_ABS.
MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});
diff --git a/llvm/test/CodeGen/AArch64/freeze.ll b/llvm/test/CodeGen/AArch64/freeze.ll
index f5e2ffd7361ce..e2ae046da1467 100644
--- a/llvm/test/CodeGen/AArch64/freeze.ll
+++ b/llvm/test/CodeGen/AArch64/freeze.ll
@@ -3,7 +3,6 @@
; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel -global-isel-abort=2 2>&1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; CHECK-GI: warning: Instruction selection used fallback path for freeze_v2i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for freeze_urhadd
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for freeze_shadd
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for freeze_srhadd
@@ -460,13 +459,23 @@ define <8 x i16> @freeze_uhadd(<8 x i16> %a0, <8 x i16> %a1) {
}
define <8 x i16> @freeze_urhadd(<8 x i16> %a0, <8 x i16> %a1) {
-; CHECK-LABEL: freeze_urhadd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v2.8h, #15
-; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
-; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: freeze_urhadd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v2.8h, #15
+; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-SD-NEXT: urhadd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: freeze_urhadd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v2.8h, #15
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT: movi v2.8h, #31
+; CHECK-GI-NEXT: urhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%m0 = and <8 x i16> %a0, splat (i16 15)
%m1 = and <8 x i16> %a1, splat (i16 15)
%avg = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %m0, <8 x i16> %m1)
>From faf2d20728f9cf3fdb9ba16fdd8ca601511caa66 Mon Sep 17 00:00:00 2001
From: Josh Rodriguez <josh.rodriguez at arm.com>
Date: Fri, 17 Oct 2025 15:06:58 +0000
Subject: [PATCH 03/11] [AArch64][GlobalISel] Added shadd intrinsic support
GlobalISel now selects shadd intrinsic, without falling back to SDAG. Note
that GlobalISel-generated code involving shadd seems to be inefficent when
compared to SDAG.
---
llvm/lib/Target/AArch64/AArch64InstrGISel.td | 7 ++++++
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 2 ++
llvm/test/CodeGen/AArch64/freeze.ll | 22 +++++++++++++------
3 files changed, 24 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index 68f921e030429..2c2c403d96d72 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -251,6 +251,12 @@ def G_URHADD : AArch64GenericInstruction {
let hasSideEffects = 0;
}
+def G_SHADD : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type1:$src2);
+ let hasSideEffects = 0;
+}
+
// Generic instruction for the BSP pseudo. It is expanded into BSP, which
// expands into BSL/BIT/BIF after register allocation.
def G_BSP : AArch64GenericInstruction {
@@ -300,6 +306,7 @@ def : GINodeEquiv<G_USDOT, AArch64usdot>;
def : GINodeEquiv<G_UHADD, avgflooru>;
def : GINodeEquiv<G_URHADD, avgceilu>;
+def : GINodeEquiv<G_SHADD, avgfloors>;
def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index f579c6f5ba091..14f592b895c9e 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1821,6 +1821,8 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return LowerBinOp(AArch64::G_UHADD);
case Intrinsic::aarch64_neon_urhadd:
return LowerBinOp(AArch64::G_URHADD);
+ case Intrinsic::aarch64_neon_shadd:
+ return LowerBinOp(AArch64::G_SHADD);
case Intrinsic::aarch64_neon_abs: {
// Lower the intrinsic to G_ABS.
MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});
diff --git a/llvm/test/CodeGen/AArch64/freeze.ll b/llvm/test/CodeGen/AArch64/freeze.ll
index e2ae046da1467..dffd89143d16b 100644
--- a/llvm/test/CodeGen/AArch64/freeze.ll
+++ b/llvm/test/CodeGen/AArch64/freeze.ll
@@ -3,7 +3,6 @@
; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel -global-isel-abort=2 2>&1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; CHECK-GI: warning: Instruction selection used fallback path for freeze_v2i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for freeze_shadd
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for freeze_srhadd
%struct.T = type { i32, i32 }
@@ -485,12 +484,21 @@ define <8 x i16> @freeze_urhadd(<8 x i16> %a0, <8 x i16> %a1) {
}
define <8 x i16> @freeze_shadd(<8 x i8> %a0, <8 x i16> %a1) {
-; CHECK-LABEL: freeze_shadd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-NEXT: sshr v1.8h, v1.8h, #8
-; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: freeze_shadd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: sshr v1.8h, v1.8h, #8
+; CHECK-SD-NEXT: shadd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: freeze_shadd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshr v1.8h, v1.8h, #8
+; CHECK-GI-NEXT: shadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: shl v0.8h, v0.8h, #8
+; CHECK-GI-NEXT: sshr v0.8h, v0.8h, #8
+; CHECK-GI-NEXT: ret
%x0 = sext <8 x i8> %a0 to <8 x i16>
%x1 = ashr <8 x i16> %a1, splat (i16 8)
%avg = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
>From 701522aa4ff39827737b89af902131cd591da5c0 Mon Sep 17 00:00:00 2001
From: Josh Rodriguez <josh.rodriguez at arm.com>
Date: Fri, 17 Oct 2025 15:46:32 +0000
Subject: [PATCH 04/11] [AArch64][GlobalISel] Added srhadd intrinsic support
GlobalISel now selects srhadd intrinsic, without falling back to SDAG. Note
that GlobalISel-generated code involving uhadd seems to be inefficent when
compared to SDAG.
---
llvm/lib/Target/AArch64/AArch64InstrGISel.td | 7 ++++++
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 2 ++
llvm/test/CodeGen/AArch64/freeze.ll | 22 +++++++++++++------
3 files changed, 24 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index 2c2c403d96d72..e44e31845380a 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -257,6 +257,12 @@ def G_SHADD : AArch64GenericInstruction {
let hasSideEffects = 0;
}
+def G_SRHADD : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type1:$src2);
+ let hasSideEffects = 0;
+}
+
// Generic instruction for the BSP pseudo. It is expanded into BSP, which
// expands into BSL/BIT/BIF after register allocation.
def G_BSP : AArch64GenericInstruction {
@@ -307,6 +313,7 @@ def : GINodeEquiv<G_USDOT, AArch64usdot>;
def : GINodeEquiv<G_UHADD, avgflooru>;
def : GINodeEquiv<G_URHADD, avgceilu>;
def : GINodeEquiv<G_SHADD, avgfloors>;
+def : GINodeEquiv<G_SRHADD, avgceils>;
def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 14f592b895c9e..c8d31bbbc8b9a 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1823,6 +1823,8 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return LowerBinOp(AArch64::G_URHADD);
case Intrinsic::aarch64_neon_shadd:
return LowerBinOp(AArch64::G_SHADD);
+ case Intrinsic::aarch64_neon_srhadd:
+ return LowerBinOp(AArch64::G_SRHADD);
case Intrinsic::aarch64_neon_abs: {
// Lower the intrinsic to G_ABS.
MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});
diff --git a/llvm/test/CodeGen/AArch64/freeze.ll b/llvm/test/CodeGen/AArch64/freeze.ll
index dffd89143d16b..136ac8b0a2aa1 100644
--- a/llvm/test/CodeGen/AArch64/freeze.ll
+++ b/llvm/test/CodeGen/AArch64/freeze.ll
@@ -3,7 +3,6 @@
; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel -global-isel-abort=2 2>&1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; CHECK-GI: warning: Instruction selection used fallback path for freeze_v2i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for freeze_srhadd
%struct.T = type { i32, i32 }
@@ -509,12 +508,21 @@ define <8 x i16> @freeze_shadd(<8 x i8> %a0, <8 x i16> %a1) {
}
define <8 x i16> @freeze_srhadd(<8 x i8> %a0, <8 x i16> %a1) {
-; CHECK-LABEL: freeze_srhadd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-NEXT: sshr v1.8h, v1.8h, #8
-; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: freeze_srhadd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: sshr v1.8h, v1.8h, #8
+; CHECK-SD-NEXT: srhadd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: freeze_srhadd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshr v1.8h, v1.8h, #8
+; CHECK-GI-NEXT: srhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: shl v0.8h, v0.8h, #8
+; CHECK-GI-NEXT: sshr v0.8h, v0.8h, #8
+; CHECK-GI-NEXT: ret
%x0 = sext <8 x i8> %a0 to <8 x i16>
%x1 = ashr <8 x i16> %a1, splat (i16 8)
%avg = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
>From 09b24f5afc26f983b1f8d15e7fb96ac9bfdba7b8 Mon Sep 17 00:00:00 2001
From: Josh Rodriguez <josh.rodriguez at arm.com>
Date: Fri, 17 Oct 2025 15:48:31 +0000
Subject: [PATCH 05/11] [AArch64][GlobalISel] Modified llc test to check
generation from both SDAG and GISel Note that GlobalISel-generated code
involving the hadd family of intrinsics seems to be inefficent when compared
to SDAG.
---
.../AArch64/aarch64-known-bits-hadd.ll | 171 +++++++++++++-----
1 file changed, 123 insertions(+), 48 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll b/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll
index f900f0209a108..a6fbaf01c5476 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s -mtriple=aarch64 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
declare <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16>, <8 x i16>)
declare <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16>, <8 x i16>)
@@ -7,11 +8,20 @@ declare <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16>, <8 x i16>)
declare <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16>, <8 x i16>)
define <8 x i16> @haddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
-; CHECK-LABEL: haddu_zext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uhadd v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: haddu_zext:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: uhadd v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: haddu_zext:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
+; CHECK-GI-NEXT: uhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%x0 = zext <8 x i8> %a0 to <8 x i16>
%x1 = zext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
@@ -20,11 +30,20 @@ define <8 x i16> @haddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
}
define <8 x i16> @rhaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
-; CHECK-LABEL: rhaddu_zext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: urhadd v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: rhaddu_zext:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: urhadd v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: rhaddu_zext:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
+; CHECK-GI-NEXT: urhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%x0 = zext <8 x i8> %a0 to <8 x i16>
%x1 = zext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
@@ -33,11 +52,20 @@ define <8 x i16> @rhaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
}
define <8 x i16> @hadds_zext(<8 x i8> %a0, <8 x i8> %a1) {
-; CHECK-LABEL: hadds_zext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uhadd v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: hadds_zext:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: uhadd v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: hadds_zext:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
+; CHECK-GI-NEXT: shadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%x0 = zext <8 x i8> %a0 to <8 x i16>
%x1 = zext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
@@ -46,12 +74,21 @@ define <8 x i16> @hadds_zext(<8 x i8> %a0, <8 x i8> %a1) {
}
define <8 x i16> @shaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
-; CHECK-LABEL: shaddu_zext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-NEXT: ushll v1.8h, v1.8b, #0
-; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: shaddu_zext:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-SD-NEXT: srhadd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: shaddu_zext:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
+; CHECK-GI-NEXT: srhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%x0 = zext <8 x i8> %a0 to <8 x i16>
%x1 = zext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
@@ -62,13 +99,22 @@ define <8 x i16> @shaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
; ; negative tests
define <8 x i16> @haddu_sext(<8 x i8> %a0, <8 x i8> %a1) {
-; CHECK-LABEL: haddu_sext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-NEXT: sshll v1.8h, v1.8b, #0
-; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: bic v0.8h, #254, lsl #8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: haddu_sext:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-SD-NEXT: uhadd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: bic v0.8h, #254, lsl #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: haddu_sext:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
+; CHECK-GI-NEXT: uhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%x0 = sext <8 x i8> %a0 to <8 x i16>
%x1 = sext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
@@ -77,13 +123,22 @@ define <8 x i16> @haddu_sext(<8 x i8> %a0, <8 x i8> %a1) {
}
define <8 x i16> @urhadd_sext(<8 x i8> %a0, <8 x i8> %a1) {
-; CHECK-LABEL: urhadd_sext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-NEXT: sshll v1.8h, v1.8b, #0
-; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: bic v0.8h, #254, lsl #8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: urhadd_sext:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-SD-NEXT: urhadd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: bic v0.8h, #254, lsl #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: urhadd_sext:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
+; CHECK-GI-NEXT: urhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%x0 = sext <8 x i8> %a0 to <8 x i16>
%x1 = sext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
@@ -92,12 +147,21 @@ define <8 x i16> @urhadd_sext(<8 x i8> %a0, <8 x i8> %a1) {
}
define <8 x i16> @hadds_sext(<8 x i8> %a0, <8 x i8> %a1) {
-; CHECK-LABEL: hadds_sext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: shadd v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-NEXT: bic v0.8h, #254, lsl #8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: hadds_sext:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: shadd v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: bic v0.8h, #254, lsl #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: hadds_sext:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
+; CHECK-GI-NEXT: shadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%x0 = sext <8 x i8> %a0 to <8 x i16>
%x1 = sext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
@@ -106,15 +170,26 @@ define <8 x i16> @hadds_sext(<8 x i8> %a0, <8 x i8> %a1) {
}
define <8 x i16> @shaddu_sext(<8 x i8> %a0, <8 x i8> %a1) {
-; CHECK-LABEL: shaddu_sext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: srhadd v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-NEXT: bic v0.8h, #254, lsl #8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: shaddu_sext:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: srhadd v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: bic v0.8h, #254, lsl #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: shaddu_sext:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
+; CHECK-GI-NEXT: srhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%x0 = sext <8 x i8> %a0 to <8 x i16>
%x1 = sext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
%res = and <8 x i16> %hadd, <i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511>
ret <8 x i16> %res
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
>From f48cbfec7e7dd36f8248993e96849e7380dd4606 Mon Sep 17 00:00:00 2001
From: Josh Rodriguez <josh.rodriguez at arm.com>
Date: Tue, 21 Oct 2025 14:36:01 +0000
Subject: [PATCH 06/11] [AArch64][GlobalISel] Modified gMIR instruction names
to match SDAG equivalents.
---
llvm/lib/Target/AArch64/AArch64InstrGISel.td | 16 ++++++++--------
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 8 ++++----
2 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index e44e31845380a..d055e28f41e35 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -239,25 +239,25 @@ def G_USDOT : AArch64GenericInstruction {
let hasSideEffects = 0;
}
-def G_UHADD : AArch64GenericInstruction {
+def G_AVGFLOORU : AArch64GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type1:$src2);
let hasSideEffects = 0;
}
-def G_URHADD : AArch64GenericInstruction {
+def G_AVGCEILU : AArch64GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type1:$src2);
let hasSideEffects = 0;
}
-def G_SHADD : AArch64GenericInstruction {
+def G_AVGFLOORS : AArch64GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type1:$src2);
let hasSideEffects = 0;
}
-def G_SRHADD : AArch64GenericInstruction {
+def G_AVGCEILS : AArch64GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type1:$src2);
let hasSideEffects = 0;
@@ -310,10 +310,10 @@ def : GINodeEquiv<G_UDOT, AArch64udot>;
def : GINodeEquiv<G_SDOT, AArch64sdot>;
def : GINodeEquiv<G_USDOT, AArch64usdot>;
-def : GINodeEquiv<G_UHADD, avgflooru>;
-def : GINodeEquiv<G_URHADD, avgceilu>;
-def : GINodeEquiv<G_SHADD, avgfloors>;
-def : GINodeEquiv<G_SRHADD, avgceils>;
+def : GINodeEquiv<G_AVGFLOORU, avgflooru>;
+def : GINodeEquiv<G_AVGCEILU, avgceilu>;
+def : GINodeEquiv<G_AVGFLOORS, avgfloors>;
+def : GINodeEquiv<G_AVGCEILS, avgceils>;
def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index c8d31bbbc8b9a..204f4b2b4c2de 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1818,13 +1818,13 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
case Intrinsic::aarch64_neon_uabd:
return LowerBinOp(TargetOpcode::G_ABDU);
case Intrinsic::aarch64_neon_uhadd:
- return LowerBinOp(AArch64::G_UHADD);
+ return LowerBinOp(AArch64::G_AVGFLOORU);
case Intrinsic::aarch64_neon_urhadd:
- return LowerBinOp(AArch64::G_URHADD);
+ return LowerBinOp(AArch64::G_AVGCEILU);
case Intrinsic::aarch64_neon_shadd:
- return LowerBinOp(AArch64::G_SHADD);
+ return LowerBinOp(AArch64::G_AVGFLOORS);
case Intrinsic::aarch64_neon_srhadd:
- return LowerBinOp(AArch64::G_SRHADD);
+ return LowerBinOp(AArch64::G_AVGCEILS);
case Intrinsic::aarch64_neon_abs: {
// Lower the intrinsic to G_ABS.
MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});
>From cc40eb7f9968628348959186a5ff7780c4207c83 Mon Sep 17 00:00:00 2001
From: Josh Rodriguez <josh.rodriguez at arm.com>
Date: Mon, 27 Oct 2025 13:55:11 +0000
Subject: [PATCH 07/11] [AArch64][GlobalISel] Converted intrinsics to
machine-independent form
---
llvm/include/llvm/Support/TargetOpcodes.def | 11 ++++++++
llvm/include/llvm/Target/GenericOpcodes.td | 28 +++++++++++++++++++
.../Target/GlobalISel/SelectionDAGCompat.td | 4 +++
llvm/lib/Target/AArch64/AArch64InstrGISel.td | 24 ----------------
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 10 +++----
.../GlobalISel/legalizer-info-validation.mir | 20 +++++++++++++
.../match-table-cxx.td | 2 +-
.../GlobalISelEmitter/GlobalISelEmitter.td | 2 +-
llvm/test/TableGen/get-named-operand-idx.td | 3 +-
9 files changed, 72 insertions(+), 32 deletions(-)
diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def
index e55314568d683..d7a2e899ffd6f 100644
--- a/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/llvm/include/llvm/Support/TargetOpcodes.def
@@ -295,6 +295,17 @@ HANDLE_TARGET_OPCODE(G_ABDS)
/// Generic absolute difference unsigned instruction.
HANDLE_TARGET_OPCODE(G_ABDU)
+/// Generic vector average with truncate unsigned instruction.
+HANDLE_TARGET_OPCODE(G_AVGFLOORU)
+
+/// Generic vector average with round unsigned instruction.
+HANDLE_TARGET_OPCODE(G_AVGCEILU)
+
+/// Generic vector average with truncate signed instruction.
+HANDLE_TARGET_OPCODE(G_AVGFLOORS)
+
+/// Generic vector average with round signed instruction.
+HANDLE_TARGET_OPCODE(G_AVGCEILS)
HANDLE_TARGET_OPCODE(G_IMPLICIT_DEF)
diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td
index e3f995d53484f..b847e0425cf2b 100644
--- a/llvm/include/llvm/Target/GenericOpcodes.td
+++ b/llvm/include/llvm/Target/GenericOpcodes.td
@@ -423,6 +423,34 @@ def G_ABDU : GenericInstruction {
let isCommutable = true;
}
+// Generic vector average truncated unsigned.
+def G_AVGFLOORU : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type0:$src2);
+ let hasSideEffects = 0;
+}
+
+// Generic vector average rounded unsigned.
+def G_AVGCEILU : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type0:$src2);
+ let hasSideEffects = 0;
+}
+
+// Generic vector average truncated signed.
+def G_AVGFLOORS : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type0:$src2);
+ let hasSideEffects = 0;
+}
+
+// Generic vector average rounded signed.
+def G_AVGCEILS : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type0:$src2);
+ let hasSideEffects = 0;
+}
+
/// Funnel 'double' shifts take 3 operands, 2 inputs and the shift amount.
/// fshl(X,Y,Z): (X << (Z % bitwidth)) | (Y >> (bitwidth - (Z % bitwidth)))
def G_FSHL : GenericInstruction {
diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index c0d480294dd8b..137b291d25d35 100644
--- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -83,6 +83,10 @@ def : GINodeEquiv<G_LSHR, srl>;
def : GINodeEquiv<G_ASHR, sra>;
def : GINodeEquiv<G_ABDS, abds>;
def : GINodeEquiv<G_ABDU, abdu>;
+def : GINodeEquiv<G_AVGFLOORU, avgflooru>;
+def : GINodeEquiv<G_AVGCEILU, avgceilu>;
+def : GINodeEquiv<G_AVGFLOORS, avgfloors>;
+def : GINodeEquiv<G_AVGCEILS, avgceils>;
def : GINodeEquiv<G_SADDSAT, saddsat>;
def : GINodeEquiv<G_UADDSAT, uaddsat>;
def : GINodeEquiv<G_SSUBSAT, ssubsat>;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index d055e28f41e35..7791eda6cd14a 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -239,30 +239,6 @@ def G_USDOT : AArch64GenericInstruction {
let hasSideEffects = 0;
}
-def G_AVGFLOORU : AArch64GenericInstruction {
- let OutOperandList = (outs type0:$dst);
- let InOperandList = (ins type0:$src1, type1:$src2);
- let hasSideEffects = 0;
-}
-
-def G_AVGCEILU : AArch64GenericInstruction {
- let OutOperandList = (outs type0:$dst);
- let InOperandList = (ins type0:$src1, type1:$src2);
- let hasSideEffects = 0;
-}
-
-def G_AVGFLOORS : AArch64GenericInstruction {
- let OutOperandList = (outs type0:$dst);
- let InOperandList = (ins type0:$src1, type1:$src2);
- let hasSideEffects = 0;
-}
-
-def G_AVGCEILS : AArch64GenericInstruction {
- let OutOperandList = (outs type0:$dst);
- let InOperandList = (ins type0:$src1, type1:$src2);
- let hasSideEffects = 0;
-}
-
// Generic instruction for the BSP pseudo. It is expanded into BSP, which
// expands into BSL/BIT/BIF after register allocation.
def G_BSP : AArch64GenericInstruction {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 204f4b2b4c2de..2e64b7ba0bd5a 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -289,7 +289,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.moreElementsToNextPow2(0)
.lower();
- getActionDefinitionsBuilder({G_ABDS, G_ABDU})
+ getActionDefinitionsBuilder({G_ABDS, G_ABDU, G_AVGFLOORU, G_AVGCEILU, G_AVGFLOORS, G_AVGCEILS})
.legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
.lower();
@@ -1818,13 +1818,13 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
case Intrinsic::aarch64_neon_uabd:
return LowerBinOp(TargetOpcode::G_ABDU);
case Intrinsic::aarch64_neon_uhadd:
- return LowerBinOp(AArch64::G_AVGFLOORU);
+ return LowerBinOp(TargetOpcode::G_AVGFLOORU);
case Intrinsic::aarch64_neon_urhadd:
- return LowerBinOp(AArch64::G_AVGCEILU);
+ return LowerBinOp(TargetOpcode::G_AVGCEILU);
case Intrinsic::aarch64_neon_shadd:
- return LowerBinOp(AArch64::G_AVGFLOORS);
+ return LowerBinOp(TargetOpcode::G_AVGFLOORS);
case Intrinsic::aarch64_neon_srhadd:
- return LowerBinOp(AArch64::G_AVGCEILS);
+ return LowerBinOp(TargetOpcode::G_AVGCEILS);
case Intrinsic::aarch64_neon_abs: {
// Lower the intrinsic to G_ABS.
MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
index 896603d6eb20d..800b575dc7920 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -79,6 +79,26 @@
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
#
+# DEBUG-NEXT: G_AVGFLOORU (opcode {{[0-9]+}}): 1 type index, 0 imm indices
+# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
+#
+# DEBUG-NEXT: G_AVGCEILU (opcode {{[0-9]+}}): 1 type index, 0 imm indices
+# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
+#
+# DEBUG-NEXT: G_AVGFLOORS (opcode {{[0-9]+}}): 1 type index, 0 imm indices
+# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
+#
+# DEBUG-NEXT: G_AVGCEILS (opcode {{[0-9]+}}): 1 type index, 0 imm indices
+# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
+#
# DEBUG-NEXT: G_IMPLICIT_DEF (opcode {{[0-9]+}}): 1 type index, 0 imm indices
# DEBUG-NEXT: .. the first uncovered type index: {{[0-9]+}}, OK
# DEBUG-NEXT: .. the first uncovered imm index: {{[0-9]+}}, OK
diff --git a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-cxx.td b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-cxx.td
index 18960b43ab97d..df645c28ace9b 100644
--- a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-cxx.td
+++ b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-cxx.td
@@ -96,7 +96,7 @@ def MyCombiner: GICombiner<"GenMyCombiner", [
// CHECK: const uint8_t *GenMyCombiner::getMatchTable() const {
// CHECK-NEXT: constexpr static uint8_t MatchTable0[] = {
-// CHECK-NEXT: /* 0 */ GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(99), GIMT_Encode2(211), /*)*//*default:*//*Label 5*/ GIMT_Encode4(524),
+// CHECK-NEXT: /* 0 */ GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(103), GIMT_Encode2(215), /*)*//*default:*//*Label 5*/ GIMT_Encode4(524),
// CHECK-NEXT: /* 10 */ /*TargetOpcode::G_STORE*//*Label 0*/ GIMT_Encode4(458), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0),
// CHECK-NEXT: /* 182 */ /*TargetOpcode::G_SEXT*//*Label 1*/ GIMT_Encode4(476), GIMT_Encode4(0),
// CHECK-NEXT: /* 190 */ /*TargetOpcode::G_ZEXT*//*Label 2*/ GIMT_Encode4(488), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0),
diff --git a/llvm/test/TableGen/GlobalISelEmitter/GlobalISelEmitter.td b/llvm/test/TableGen/GlobalISelEmitter/GlobalISelEmitter.td
index fdabc53a3ff3b..64ca63da3b6f0 100644
--- a/llvm/test/TableGen/GlobalISelEmitter/GlobalISelEmitter.td
+++ b/llvm/test/TableGen/GlobalISelEmitter/GlobalISelEmitter.td
@@ -535,7 +535,7 @@ def : Pat<(frag GPR32:$src1, complex:$src2, complex:$src3),
// R00O-NEXT: GIM_Reject,
// R00O: // Label [[DEFAULT_NUM]]: @[[DEFAULT]]
// R00O-NEXT: GIM_Reject,
-// R00O-NEXT: }; // Size: 1902 bytes
+// R00O-NEXT: }; // Size: 1918 bytes
def INSNBOB : I<(outs GPR32:$dst), (ins GPR32:$src1, GPR32:$src2, GPR32:$src3, GPR32:$src4),
[(set GPR32:$dst,
diff --git a/llvm/test/TableGen/get-named-operand-idx.td b/llvm/test/TableGen/get-named-operand-idx.td
index e6f6331cd9c48..59693eba50bdc 100644
--- a/llvm/test/TableGen/get-named-operand-idx.td
+++ b/llvm/test/TableGen/get-named-operand-idx.td
@@ -89,7 +89,8 @@ def InstD : InstBase {
// CHECK-NEXT: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
// CHECK-NEXT: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
// CHECK-NEXT: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-// CHECK-NEXT: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 0,
+// CHECK-NEXT: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+// CHECK-NEXT: 1, 2, 2, 0,
// CHECK-NEXT: };
// CHECK-NEXT: return InstructionIndex[Opcode];
// CHECK-NEXT: }
>From bc312fc63d46163f9e5c1acc6c97d3c511cbe86c Mon Sep 17 00:00:00 2001
From: Josh Rodriguez <josh.rodriguez at arm.com>
Date: Tue, 28 Oct 2025 09:17:55 +0000
Subject: [PATCH 08/11] [AArch64][GlobalISel] Fixed formatting
---
llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 2e64b7ba0bd5a..942455e1942b8 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -289,7 +289,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.moreElementsToNextPow2(0)
.lower();
- getActionDefinitionsBuilder({G_ABDS, G_ABDU, G_AVGFLOORU, G_AVGCEILU, G_AVGFLOORS, G_AVGCEILS})
+ getActionDefinitionsBuilder(
+ {G_ABDS, G_ABDU, G_AVGFLOORU, G_AVGCEILU, G_AVGFLOORS, G_AVGCEILS})
.legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
.lower();
>From dc04caaac14bb8be0f937b5cafe2e57b97b08bac Mon Sep 17 00:00:00 2001
From: Josh Rodriguez <josh.rodriguez at arm.com>
Date: Tue, 4 Nov 2025 09:19:02 +0000
Subject: [PATCH 09/11] [AArch64][GlobalISel] Renamed GISel nodes for
consistency
---
llvm/include/llvm/Support/TargetOpcodes.def | 8 ++++----
llvm/include/llvm/Target/GenericOpcodes.td | 8 ++++----
.../llvm/Target/GlobalISel/SelectionDAGCompat.td | 8 ++++----
llvm/lib/Target/AArch64/AArch64InstrGISel.td | 8 ++++----
llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp | 10 +++++-----
.../AArch64/GlobalISel/legalizer-info-validation.mir | 8 ++++----
6 files changed, 25 insertions(+), 25 deletions(-)
diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def
index d7a2e899ffd6f..0d43dce5d6357 100644
--- a/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/llvm/include/llvm/Support/TargetOpcodes.def
@@ -296,16 +296,16 @@ HANDLE_TARGET_OPCODE(G_ABDS)
HANDLE_TARGET_OPCODE(G_ABDU)
/// Generic vector average with truncate unsigned instruction.
-HANDLE_TARGET_OPCODE(G_AVGFLOORU)
+HANDLE_TARGET_OPCODE(G_UAVGFLOOR)
/// Generic vector average with round unsigned instruction.
-HANDLE_TARGET_OPCODE(G_AVGCEILU)
+HANDLE_TARGET_OPCODE(G_UAVGCEIL)
/// Generic vector average with truncate signed instruction.
-HANDLE_TARGET_OPCODE(G_AVGFLOORS)
+HANDLE_TARGET_OPCODE(G_SAVGFLOOR)
/// Generic vector average with round signed instruction.
-HANDLE_TARGET_OPCODE(G_AVGCEILS)
+HANDLE_TARGET_OPCODE(G_SAVGCEIL)
HANDLE_TARGET_OPCODE(G_IMPLICIT_DEF)
diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td
index b847e0425cf2b..1b65b8b73527d 100644
--- a/llvm/include/llvm/Target/GenericOpcodes.td
+++ b/llvm/include/llvm/Target/GenericOpcodes.td
@@ -424,28 +424,28 @@ def G_ABDU : GenericInstruction {
}
// Generic vector average truncated unsigned.
-def G_AVGFLOORU : GenericInstruction {
+def G_UAVGFLOOR : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2);
let hasSideEffects = 0;
}
// Generic vector average rounded unsigned.
-def G_AVGCEILU : GenericInstruction {
+def G_UAVGCEIL : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2);
let hasSideEffects = 0;
}
// Generic vector average truncated signed.
-def G_AVGFLOORS : GenericInstruction {
+def G_SAVGFLOOR : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2);
let hasSideEffects = 0;
}
// Generic vector average rounded signed.
-def G_AVGCEILS : GenericInstruction {
+def G_SAVGCEIL : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2);
let hasSideEffects = 0;
diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index 137b291d25d35..a69e089779315 100644
--- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -83,10 +83,10 @@ def : GINodeEquiv<G_LSHR, srl>;
def : GINodeEquiv<G_ASHR, sra>;
def : GINodeEquiv<G_ABDS, abds>;
def : GINodeEquiv<G_ABDU, abdu>;
-def : GINodeEquiv<G_AVGFLOORU, avgflooru>;
-def : GINodeEquiv<G_AVGCEILU, avgceilu>;
-def : GINodeEquiv<G_AVGFLOORS, avgfloors>;
-def : GINodeEquiv<G_AVGCEILS, avgceils>;
+def : GINodeEquiv<G_UAVGFLOOR, avgflooru>;
+def : GINodeEquiv<G_UAVGCEIL, avgceilu>;
+def : GINodeEquiv<G_SAVGFLOOR, avgfloors>;
+def : GINodeEquiv<G_SAVGCEIL, avgceils>;
def : GINodeEquiv<G_SADDSAT, saddsat>;
def : GINodeEquiv<G_UADDSAT, uaddsat>;
def : GINodeEquiv<G_SSUBSAT, ssubsat>;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index 7791eda6cd14a..dffff27ce94aa 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -286,10 +286,10 @@ def : GINodeEquiv<G_UDOT, AArch64udot>;
def : GINodeEquiv<G_SDOT, AArch64sdot>;
def : GINodeEquiv<G_USDOT, AArch64usdot>;
-def : GINodeEquiv<G_AVGFLOORU, avgflooru>;
-def : GINodeEquiv<G_AVGCEILU, avgceilu>;
-def : GINodeEquiv<G_AVGFLOORS, avgfloors>;
-def : GINodeEquiv<G_AVGCEILS, avgceils>;
+def : GINodeEquiv<G_UAVGFLOOR, avgflooru>;
+def : GINodeEquiv<G_UAVGCEIL, avgceilu>;
+def : GINodeEquiv<G_SAVGFLOOR, avgfloors>;
+def : GINodeEquiv<G_SAVGCEIL, avgceils>;
def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 942455e1942b8..6af3fd9c65984 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -290,7 +290,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.lower();
getActionDefinitionsBuilder(
- {G_ABDS, G_ABDU, G_AVGFLOORU, G_AVGCEILU, G_AVGFLOORS, G_AVGCEILS})
+ {G_ABDS, G_ABDU, G_UAVGFLOOR, G_UAVGCEIL, G_SAVGFLOOR, G_SAVGCEIL})
.legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
.lower();
@@ -1819,13 +1819,13 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
case Intrinsic::aarch64_neon_uabd:
return LowerBinOp(TargetOpcode::G_ABDU);
case Intrinsic::aarch64_neon_uhadd:
- return LowerBinOp(TargetOpcode::G_AVGFLOORU);
+ return LowerBinOp(TargetOpcode::G_UAVGFLOOR);
case Intrinsic::aarch64_neon_urhadd:
- return LowerBinOp(TargetOpcode::G_AVGCEILU);
+ return LowerBinOp(TargetOpcode::G_UAVGCEIL);
case Intrinsic::aarch64_neon_shadd:
- return LowerBinOp(TargetOpcode::G_AVGFLOORS);
+ return LowerBinOp(TargetOpcode::G_SAVGFLOOR);
case Intrinsic::aarch64_neon_srhadd:
- return LowerBinOp(TargetOpcode::G_AVGCEILS);
+ return LowerBinOp(TargetOpcode::G_SAVGCEIL);
case Intrinsic::aarch64_neon_abs: {
// Lower the intrinsic to G_ABS.
MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
index 800b575dc7920..7edebd576d268 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -79,22 +79,22 @@
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
#
-# DEBUG-NEXT: G_AVGFLOORU (opcode {{[0-9]+}}): 1 type index, 0 imm indices
+# DEBUG-NEXT: G_UAVGFLOOR (opcode {{[0-9]+}}): 1 type index, 0 imm indices
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
#
-# DEBUG-NEXT: G_AVGCEILU (opcode {{[0-9]+}}): 1 type index, 0 imm indices
+# DEBUG-NEXT: G_UAVGCEIL (opcode {{[0-9]+}}): 1 type index, 0 imm indices
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
#
-# DEBUG-NEXT: G_AVGFLOORS (opcode {{[0-9]+}}): 1 type index, 0 imm indices
+# DEBUG-NEXT: G_SAVGFLOOR (opcode {{[0-9]+}}): 1 type index, 0 imm indices
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
#
-# DEBUG-NEXT: G_AVGCEILS (opcode {{[0-9]+}}): 1 type index, 0 imm indices
+# DEBUG-NEXT: G_SAVGCEIL (opcode {{[0-9]+}}): 1 type index, 0 imm indices
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
>From 61d7a971cdfb605c43c5d76269cf6cf84f798bd2 Mon Sep 17 00:00:00 2001
From: Josh Rodriguez <josh.rodriguez at arm.com>
Date: Wed, 5 Nov 2025 09:57:54 +0000
Subject: [PATCH 10/11] [GlobalISel] Added documentation for gMIR instructions
into GenericOpcode.rst
---
llvm/docs/GlobalISel/GenericOpcode.rst | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/llvm/docs/GlobalISel/GenericOpcode.rst b/llvm/docs/GlobalISel/GenericOpcode.rst
index 661a11537cf57..72cb8c6efca10 100644
--- a/llvm/docs/GlobalISel/GenericOpcode.rst
+++ b/llvm/docs/GlobalISel/GenericOpcode.rst
@@ -511,6 +511,19 @@ Compute the absolute difference (signed and unsigned), e.g. trunc(abs(ext(x)-ext
%0:_(s33) = G_ABDS %2, %3
%1:_(s33) = G_ABDU %4, %5
+G_UAVGFLOOR, G_UAVGCEIL, G_SAVGFLOOR, G_SAVGCEIL
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Computes the average of corresponding elements in two vectors (signed and unsigned).
+Resulting vector contains values that are either rounded or truncated. e.g. trunc(shr(ext(a)+ext(b))).
+
+.. code-block:: none
+
+ %0:_(<4 x i16>) = G_UAVGFLOOR %4:_(<4 x i16>), %5:_(<4 x i16>)
+ %1:_(<4 x i16>) = G_UAVGCEIL %6:_(<4 x i16>), %7:_(<4 x i16>)
+ %2:_(<4 x i16>) = G_SAVGFLOOR %8:_(<4 x i16>), %9:_(<4 x i16>)
+ %3:_(<4 x i16>) = G_SAVGCEIL %10:_(<4 x i16>), %11:_(<4 x i16>)
+
Floating Point Operations
-------------------------
>From f11c039a8aa23277f5efe35033b9bd392a9af56e Mon Sep 17 00:00:00 2001
From: Josh Rodriguez <josh.rodriguez at arm.com>
Date: Wed, 5 Nov 2025 10:56:50 +0000
Subject: [PATCH 11/11] [AArch64][GlobalISel] Modified trunc-avg-fold.ll to
separately test SDAG and GISel generated code
Test file contains only CHECK-SD and CHECK-GI prefixes, as shared CHECK prefix is not needed.
---
llvm/test/CodeGen/AArch64/trunc-avg-fold.ll | 69 +++++++++++++++------
1 file changed, 50 insertions(+), 19 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/trunc-avg-fold.ll b/llvm/test/CodeGen/AArch64/trunc-avg-fold.ll
index 54fcae4ba28b7..0a72bbccf0ed2 100644
--- a/llvm/test/CodeGen/AArch64/trunc-avg-fold.ll
+++ b/llvm/test/CodeGen/AArch64/trunc-avg-fold.ll
@@ -1,11 +1,20 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=aarch64-- -O2 -mattr=+neon < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-- -O2 -mattr=+neon < %s | FileCheck %s --check-prefixes=CHECK-SD
+; RUN: llc -mtriple=aarch64-- -O2 -mattr=+neon -global-isel < %s | FileCheck %s --check-prefixes=CHECK-GI
define <8 x i8> @avgceil_u_i8_to_i16(<8 x i8> %a, <8 x i8> %b) {
-; CHECK-LABEL: avgceil_u_i8_to_i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: urhadd v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: avgceil_u_i8_to_i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: urhadd v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: avgceil_u_i8_to_i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: urhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: xtn v0.8b, v0.8h
+; CHECK-GI-NEXT: ret
%a16 = zext <8 x i8> %a to <8 x i16>
%b16 = zext <8 x i8> %b to <8 x i16>
%avg16 = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %a16, <8 x i16> %b16)
@@ -15,10 +24,18 @@ define <8 x i8> @avgceil_u_i8_to_i16(<8 x i8> %a, <8 x i8> %b) {
define <8 x i8> @test_avgceil_s(<8 x i8> %a, <8 x i8> %b) {
-; CHECK-LABEL: test_avgceil_s:
-; CHECK: // %bb.0:
-; CHECK-NEXT: srhadd v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_avgceil_s:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: srhadd v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_avgceil_s:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: srhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: xtn v0.8b, v0.8h
+; CHECK-GI-NEXT: ret
%a16 = sext <8 x i8> %a to <8 x i16>
%b16 = sext <8 x i8> %b to <8 x i16>
%avg16 = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %a16, <8 x i16> %b16)
@@ -27,10 +44,18 @@ define <8 x i8> @test_avgceil_s(<8 x i8> %a, <8 x i8> %b) {
}
define <8 x i8> @avgfloor_u_i8_to_i16(<8 x i8> %a, <8 x i8> %b) {
-; CHECK-LABEL: avgfloor_u_i8_to_i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uhadd v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: avgfloor_u_i8_to_i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: uhadd v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: avgfloor_u_i8_to_i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: uhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: xtn v0.8b, v0.8h
+; CHECK-GI-NEXT: ret
%a16 = zext <8 x i8> %a to <8 x i16>
%b16 = zext <8 x i8> %b to <8 x i16>
%avg16 = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %a16, <8 x i16> %b16)
@@ -39,15 +64,21 @@ define <8 x i8> @avgfloor_u_i8_to_i16(<8 x i8> %a, <8 x i8> %b) {
}
define <8 x i8> @test_avgfloor_s(<8 x i8> %a, <8 x i8> %b) {
-; CHECK-LABEL: test_avgfloor_s:
-; CHECK: // %bb.0:
-; CHECK-NEXT: shadd v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_avgfloor_s:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: shadd v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_avgfloor_s:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: shadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: xtn v0.8b, v0.8h
+; CHECK-GI-NEXT: ret
%a16 = sext <8 x i8> %a to <8 x i16>
%b16 = sext <8 x i8> %b to <8 x i16>
%avg16 = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %a16, <8 x i16> %b16)
%res = trunc <8 x i16> %avg16 to <8 x i8>
ret <8 x i8> %res
}
-
-
More information about the llvm-commits
mailing list