[llvm] [AArch64] Optimized generated assembly for bool to svbool_t conversions (PR #83001)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 28 03:15:50 PST 2024
https://github.com/Lukacma updated https://github.com/llvm/llvm-project/pull/83001
>From 3c4270c28d42ac798c3674bc51f16ca30ae0320d Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Mon, 26 Feb 2024 13:03:52 +0000
Subject: [PATCH 1/2] [AArch64] Optimized generated assembly for bool to
svbool_t conversions
The original assembly was generating `AND(WHILELO, SPLAT 1)` pattern when only `WHILELO` was necessary
---
.../Target/AArch64/AArch64ISelLowering.cpp | 1 +
.../AArch64/sve-intrinsics-reinterpret.ll | 42 ++++++++++++++++++-
2 files changed, 42 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index a3b7e3128ac1a4..dba3a787734721 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -276,6 +276,7 @@ static bool isZeroingInactiveLanes(SDValue Op) {
if (ISD::isConstantSplatVectorAllOnes(Op.getNode()))
return true;
return false;
+ case ISD::SPLAT_VECTOR:
case AArch64ISD::PTRUE:
case AArch64ISD::SETCC_MERGE_ZERO:
return true;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll
index 82bf756f822898..c7c102f5d567d9 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
@@ -150,6 +150,46 @@ define <vscale x 16 x i1> @chained_reinterpret() {
ret <vscale x 16 x i1> %out
}
+define <vscale x 16 x i1> @reinterpret_scalar_bool_h(i1 %x){
+; CHECK-LABEL: reinterpret_scalar_bool_h:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: sbfx x8, x0, #0, #1
+; CHECK-NEXT: whilelo p0.h, xzr, x8
+; CHECK-NEXT: ret
+ %.splatinsert = insertelement <vscale x 8 x i1> poison, i1 %x, i64 0
+ %.splat = shufflevector <vscale x 8 x i1> %.splatinsert, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %out = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %.splat)
+ ret <vscale x 16 x i1> %out
+}
+
+define <vscale x 16 x i1> @reinterpret_scalar_bool_s(i1 %x){
+; CHECK-LABEL: reinterpret_scalar_bool_s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: sbfx x8, x0, #0, #1
+; CHECK-NEXT: whilelo p0.s, xzr, x8
+; CHECK-NEXT: ret
+ %.splatinsert = insertelement <vscale x 4 x i1> poison, i1 %x, i64 0
+ %.splat = shufflevector <vscale x 4 x i1> %.splatinsert, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %out = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %.splat)
+ ret <vscale x 16 x i1> %out
+}
+
+define <vscale x 16 x i1> @reinterpret_scalar_bool_q(i1 %x){
+; CHECK-LABEL: reinterpret_scalar_bool_q:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: sbfx x8, x0, #0, #1
+; CHECK-NEXT: whilelo p0.d, xzr, x8
+; CHECK-NEXT: ret
+ %.splatinsert = insertelement <vscale x 2 x i1> poison, i1 %x, i64 0
+ %.splat = shufflevector <vscale x 2 x i1> %.splatinsert, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %out = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %.splat)
+ ret <vscale x 16 x i1> %out
+}
+
+
declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 immarg)
declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 immarg)
declare <vscale x 8 x i1> @llvm.aarch64.sve.cmpgt.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
>From 8724b327c830ec9caa6ab75f6a21f1da49fcb641 Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Wed, 28 Feb 2024 11:11:19 +0000
Subject: [PATCH 2/2] Cleaned up default behaviour of isZeroingInactiveLanes
and added seeing through bitcasts
---
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index dba3a787734721..5b1be2705d3740 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -269,13 +269,14 @@ static bool isMergePassthruOpcode(unsigned Opc) {
// Returns true if inactive lanes are known to be zeroed by construction.
static bool isZeroingInactiveLanes(SDValue Op) {
+ // Skip bitcasts nodes
+ while (Op->getOpcode() == ISD::BITCAST)
+ Op = Op->getOperand(0);
+
switch (Op.getOpcode()) {
default:
- // We guarantee i1 splat_vectors to zero the other lanes by
- // implementing it with ptrue and possibly a punpklo for nxv1i1.
- if (ISD::isConstantSplatVectorAllOnes(Op.getNode()))
- return true;
return false;
+ // We guarantee i1 splat_vectors to zero the other lanes
case ISD::SPLAT_VECTOR:
case AArch64ISD::PTRUE:
case AArch64ISD::SETCC_MERGE_ZERO:
More information about the llvm-commits
mailing list