[llvm] [AArch64] Utilize `XAR` for certain vector rotates (PR #137629)
Rajveer Singh Bharadwaj via llvm-commits
llvm-commits at lists.llvm.org
Thu May 8 03:28:37 PDT 2025
https://github.com/Rajveer100 updated https://github.com/llvm/llvm-project/pull/137629
>From 97bd76587ba3a91f51f1a648e5ee871ce75db455 Mon Sep 17 00:00:00 2001
From: Rajveer <rajveer.developer at icloud.com>
Date: Mon, 28 Apr 2025 18:42:32 +0530
Subject: [PATCH] [AArch64] Utilize `XAR` for certain vector rotates
Resolves #137162
For cases when there isn't any `XOR` in the transformation,
replace with a zero register.
---
.../Target/AArch64/AArch64ISelDAGToDAG.cpp | 54 +++++++++++++++----
llvm/test/CodeGen/AArch64/sve2-xar.ll | 13 +++--
llvm/test/CodeGen/AArch64/xar.ll | 18 +++++++
3 files changed, 68 insertions(+), 17 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 40944e3d43d6b..96fa85179d023 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -4532,7 +4532,9 @@ bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
+
EVT VT = N->getValueType(0);
+ SDLoc DL(N);
// Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm)
// Rotate by a constant is a funnel shift in IR which is exanded to
@@ -4558,10 +4560,18 @@ bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
!TLI->isAllActivePredicate(*CurDAG, N1.getOperand(0)))
return false;
- SDValue XOR = N0.getOperand(1);
- if (XOR.getOpcode() != ISD::XOR || XOR != N1.getOperand(1))
+ if (N0.getOperand(1) != N1.getOperand(1))
return false;
+ SDValue R1, R2;
+ bool IsXOROperand = true;
+ if (N0.getOperand(1).getOpcode() != ISD::XOR) {
+ IsXOROperand = false;
+ } else {
+ R1 = N0.getOperand(1).getOperand(0);
+ R2 = N1.getOperand(1).getOperand(1);
+ }
+
APInt ShlAmt, ShrAmt;
if (!ISD::isConstantSplatVector(N0.getOperand(2).getNode(), ShlAmt) ||
!ISD::isConstantSplatVector(N1.getOperand(2).getNode(), ShrAmt))
@@ -4570,11 +4580,23 @@ bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
if (ShlAmt + ShrAmt != VT.getScalarSizeInBits())
return false;
- SDLoc DL(N);
+ if (!IsXOROperand) {
+ SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
+ SDNode *MOV = CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, VT, Zero);
+ SDValue MOVIV = SDValue(MOV, 0);
+
+ SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
+ SDNode *SubRegToReg = CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, DL,
+ VT, Zero, MOVIV, ZSub);
+
+ R1 = N1->getOperand(1);
+ R2 = SDValue(SubRegToReg, 0);
+ }
+
SDValue Imm =
CurDAG->getTargetConstant(ShrAmt.getZExtValue(), DL, MVT::i32);
- SDValue Ops[] = {XOR.getOperand(0), XOR.getOperand(1), Imm};
+ SDValue Ops[] = {R1, R2, Imm};
if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::Int>(
VT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
AArch64::XAR_ZZZI_D})) {
@@ -4591,24 +4613,36 @@ bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
N1->getOpcode() != AArch64ISD::VLSHR)
return false;
- if (N0->getOperand(0) != N1->getOperand(0) ||
- N1->getOperand(0)->getOpcode() != ISD::XOR)
+ if (N0->getOperand(0) != N1->getOperand(0))
return false;
- SDValue XOR = N0.getOperand(0);
- SDValue R1 = XOR.getOperand(0);
- SDValue R2 = XOR.getOperand(1);
+ SDValue R1, R2;
+ bool IsXOROperand = true;
+ if (N1->getOperand(0)->getOpcode() != ISD::XOR) {
+ IsXOROperand = false;
+ } else {
+ SDValue XOR = N0.getOperand(0);
+ R1 = XOR.getOperand(0);
+ R2 = XOR.getOperand(1);
+ }
unsigned HsAmt = N0.getConstantOperandVal(1);
unsigned ShAmt = N1.getConstantOperandVal(1);
- SDLoc DL = SDLoc(N0.getOperand(1));
SDValue Imm = CurDAG->getTargetConstant(
ShAmt, DL, N0.getOperand(1).getValueType(), false);
if (ShAmt + HsAmt != 64)
return false;
+ if (!IsXOROperand) {
+ SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
+ SDNode *MOV = CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, VT, Zero);
+ SDValue MOVIV = SDValue(MOV, 0);
+ R1 = N1->getOperand(0);
+ R2 = MOVIV;
+ }
+
SDValue Ops[] = {R1, R2, Imm};
CurDAG->SelectNodeTo(N, AArch64::XAR, N0.getValueType(), Ops);
diff --git a/llvm/test/CodeGen/AArch64/sve2-xar.ll b/llvm/test/CodeGen/AArch64/sve2-xar.ll
index e5a240b7a53fd..4d7b76ce985f9 100644
--- a/llvm/test/CodeGen/AArch64/sve2-xar.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-xar.ll
@@ -170,13 +170,12 @@ define <vscale x 2 x i64> @xar_nxv2i64_l_neg1(<vscale x 2 x i64> %x, <vscale x 2
; OR instead of an XOR.
; TODO: We could use usra instruction here for SVE2.
define <vscale x 2 x i64> @xar_nxv2i64_l_neg2(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y) {
-; CHECK-LABEL: xar_nxv2i64_l_neg2:
-; CHECK: // %bb.0:
-; CHECK-NEXT: orr z0.d, z0.d, z1.d
-; CHECK-NEXT: lsr z1.d, z0.d, #4
-; CHECK-NEXT: lsl z0.d, z0.d, #60
-; CHECK-NEXT: orr z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; SVE2-LABEL: xar_nxv2i64_l_neg2:
+; SVE2: // %bb.0:
+; SVE2-NEXT: movi v2.2d, #0000000000000000
+; SVE2-NEXT: orr z0.d, z0.d, z1.d
+; SVE2-NEXT: xar z0.d, z0.d, z2.d, #4
+; SVE2-NEXT: ret
%a = or <vscale x 2 x i64> %x, %y
%b = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %a, <vscale x 2 x i64> splat (i64 60))
ret <vscale x 2 x i64> %b
diff --git a/llvm/test/CodeGen/AArch64/xar.ll b/llvm/test/CodeGen/AArch64/xar.ll
index d050eaf6646de..3631344cfde8e 100644
--- a/llvm/test/CodeGen/AArch64/xar.ll
+++ b/llvm/test/CodeGen/AArch64/xar.ll
@@ -19,4 +19,22 @@ define <2 x i64> @xar(<2 x i64> %x, <2 x i64> %y) {
ret <2 x i64> %b
}
+define <2 x i64> @xar_instead_of_or(<2 x i64> %r) {
+; SHA3-LABEL: xar_instead_of_or:
+; SHA3: // %bb.0: // %entry
+; SHA3-NEXT: movi v1.2d, #0000000000000000
+; SHA3-NEXT: xar v0.2d, v0.2d, v1.2d, #39
+; SHA3-NEXT: ret
+;
+; NOSHA3-LABEL: xar_instead_of_or:
+; NOSHA3: // %bb.0: // %entry
+; NOSHA3-NEXT: shl v1.2d, v0.2d, #25
+; NOSHA3-NEXT: usra v1.2d, v0.2d, #39
+; NOSHA3-NEXT: mov v0.16b, v1.16b
+; NOSHA3-NEXT: ret
+entry:
+ %or = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %r, <2 x i64> %r, <2 x i64> splat (i64 25))
+ ret <2 x i64> %or
+}
+
declare <2 x i64> @llvm.fshl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
More information about the llvm-commits
mailing list