[llvm] [GlobalISel][AArch64] Add support for sli/sri intrinsics (PR #173364)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 23 06:19:44 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Joshua Rodriguez (JoshdRod)
<details>
<summary>Changes</summary>
GISel previously failed to lower the sli and sri family of intrinsics. This patch fixes this.
- sli/sri intrinsics are now lowered to G_SLI / G_SRI GlobalISel nodes during Legalisation.
MIRBuilder is used over LowerTriOp to build the instruction here, as LowerTriOp treats all operands as registers, whilst the last operand of sli/sri is an immediate value.
- Intrinsics are treated as "always defines fp" and "always uses fp", to ensure vectors are always placed on floating point registers.
This fixes an issue where 1-element vectors are placed on general purpose registers, as IR Translation converts these vectors to i64s.
This PR is a replica of #<!-- -->171448, which was incorrectly merged into the wrong branch.
---
Full diff: https://github.com/llvm/llvm-project/pull/173364.diff
4 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64InstrGISel.td (+14)
- (modified) llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp (+14)
- (modified) llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp (+4)
- (modified) llvm/test/CodeGen/AArch64/arm64-vshift.ll (+116-15)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index 742141b28c502..497306dabaa97 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -270,6 +270,18 @@ def G_URSHR_I: AArch64GenericInstruction {
let hasSideEffects = 0;
}
+def G_SLI: AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type0:$src2, type1:$src3);
+ let hasSideEffects = 0;
+}
+
+def G_SRI: AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type0:$src2, type1:$src3);
+ let hasSideEffects = 0;
+}
+
// Generic instruction for the BSP pseudo. It is expanded into BSP, which
// expands into BSL/BIT/BIF after register allocation.
def G_BSP : AArch64GenericInstruction {
@@ -321,6 +333,8 @@ def : GINodeEquiv<G_USDOT, AArch64usdot>;
def : GINodeEquiv<G_SQSHLU_I, AArch64sqshlui>;
def : GINodeEquiv<G_SRSHR_I, AArch64srshri>;
def : GINodeEquiv<G_URSHR_I, AArch64urshri>;
+def : GINodeEquiv<G_SLI, AArch64vsli>;
+def : GINodeEquiv<G_SRI, AArch64vsri>;
def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index fa12818390bdd..1ae0b99416a29 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1949,6 +1949,20 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
}
return false;
}
+ case Intrinsic::aarch64_neon_vsli: {
+ MIB.buildInstr(
+ AArch64::G_SLI, {MI.getOperand(0)},
+ {MI.getOperand(2), MI.getOperand(3), MI.getOperand(4).getImm()});
+ MI.eraseFromParent();
+ break;
+ }
+ case Intrinsic::aarch64_neon_vsri: {
+ MIB.buildInstr(
+ AArch64::G_SRI, {MI.getOperand(0)},
+ {MI.getOperand(2), MI.getOperand(3), MI.getOperand(4).getImm()});
+ MI.eraseFromParent();
+ break;
+ }
case Intrinsic::aarch64_neon_abs: {
// Lower the intrinsic to G_ABS.
MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index f21f6023dc181..c5885b53b513b 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -575,6 +575,8 @@ bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
case TargetOpcode::G_LROUND:
case TargetOpcode::G_LLROUND:
case AArch64::G_PMULL:
+ case AArch64::G_SLI:
+ case AArch64::G_SRI:
return true;
case TargetOpcode::G_INTRINSIC:
switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
@@ -613,6 +615,8 @@ bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
case TargetOpcode::G_INSERT_VECTOR_ELT:
case TargetOpcode::G_BUILD_VECTOR:
case TargetOpcode::G_BUILD_VECTOR_TRUNC:
+ case AArch64::G_SLI:
+ case AArch64::G_SRI:
return true;
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
diff --git a/llvm/test/CodeGen/AArch64/arm64-vshift.ll b/llvm/test/CodeGen/AArch64/arm64-vshift.ll
index a316a4bc543b5..c5d2fb1d3749d 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vshift.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vshift.ll
@@ -2,15 +2,12 @@
; RUN: llc < %s -mtriple=arm64-eabi -global-isel=0 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc < %s -mtriple=arm64-eabi -global-isel=1 -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-; CHECK-GI: warning: Instruction selection used fallback path for sli8b
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sli4h
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sli2s
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sli1d
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sli1d_imm0
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sli16b
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sli8h
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sli4s
-; CHECK-GI NEXT: warning: Instruction selection used fallback path for sli2d
+; CHECK-GI: warning: Instruction selection used fallback path for sqshrn1s
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrun1s
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrn1s
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrun1s
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshrn1s
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshrn1s
define <8 x i8> @sqshl8b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sqshl8b:
@@ -4288,6 +4285,110 @@ declare <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16>, <8 x i16>, i32) nounw
declare <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32>, <4 x i32>, i32) nounwind readnone
declare <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64>, <2 x i64>, i32) nounwind readnone
+define <8 x i8> @sri8b(ptr %A, ptr %B) nounwind {
+; CHECK-LABEL: sri8b:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: sri v0.8b, v1.8b, #1
+; CHECK-NEXT: ret
+ %tmp1 = load <8 x i8>, ptr %A
+ %tmp2 = load <8 x i8>, ptr %B
+ %tmp3 = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, i32 1)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @sri4h(ptr %A, ptr %B) nounwind {
+; CHECK-LABEL: sri4h:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: sri v0.4h, v1.4h, #1
+; CHECK-NEXT: ret
+ %tmp1 = load <4 x i16>, ptr %A
+ %tmp2 = load <4 x i16>, ptr %B
+ %tmp3 = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, i32 1)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @sri2s(ptr %A, ptr %B) nounwind {
+; CHECK-LABEL: sri2s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: sri v0.2s, v1.2s, #1
+; CHECK-NEXT: ret
+ %tmp1 = load <2 x i32>, ptr %A
+ %tmp2 = load <2 x i32>, ptr %B
+ %tmp3 = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, i32 1)
+ ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @sri1d(ptr %A, ptr %B) nounwind {
+; CHECK-LABEL: sri1d:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: sri d0, d1, #1
+; CHECK-NEXT: ret
+ %tmp1 = load <1 x i64>, ptr %A
+ %tmp2 = load <1 x i64>, ptr %B
+ %tmp3 = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, i32 1)
+ ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @sri16b(ptr %A, ptr %B) nounwind {
+; CHECK-LABEL: sri16b:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ldr q1, [x1]
+; CHECK-NEXT: sri v0.16b, v1.16b, #1
+; CHECK-NEXT: ret
+ %tmp1 = load <16 x i8>, ptr %A
+ %tmp2 = load <16 x i8>, ptr %B
+ %tmp3 = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, i32 1)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @sri8h(ptr %A, ptr %B) nounwind {
+; CHECK-LABEL: sri8h:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ldr q1, [x1]
+; CHECK-NEXT: sri v0.8h, v1.8h, #1
+; CHECK-NEXT: ret
+ %tmp1 = load <8 x i16>, ptr %A
+ %tmp2 = load <8 x i16>, ptr %B
+ %tmp3 = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, i32 1)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @sri4s(ptr %A, ptr %B) nounwind {
+; CHECK-LABEL: sri4s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ldr q1, [x1]
+; CHECK-NEXT: sri v0.4s, v1.4s, #1
+; CHECK-NEXT: ret
+ %tmp1 = load <4 x i32>, ptr %A
+ %tmp2 = load <4 x i32>, ptr %B
+ %tmp3 = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, i32 1)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @sri2d(ptr %A, ptr %B) nounwind {
+; CHECK-LABEL: sri2d:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ldr q1, [x1]
+; CHECK-NEXT: sri v0.2d, v1.2d, #1
+; CHECK-NEXT: ret
+ %tmp1 = load <2 x i64>, ptr %A
+ %tmp2 = load <2 x i64>, ptr %B
+ %tmp3 = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, i32 1)
+ ret <2 x i64> %tmp3
+}
+
define <1 x i64> @ashr_v1i64(<1 x i64> %a, <1 x i64> %b) {
; CHECK-SD-LABEL: ashr_v1i64:
; CHECK-SD: // %bb.0:
@@ -4532,9 +4633,9 @@ define <4 x i16> @lshr_trunc_v4i64_v4i16(<4 x i64> %a) {
;
; CHECK-GI-LABEL: lshr_trunc_v4i64_v4i16:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI270_0
+; CHECK-GI-NEXT: adrp x8, .LCPI278_0
; CHECK-GI-NEXT: uzp1 v0.4s, v0.4s, v1.4s
-; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI270_0]
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI278_0]
; CHECK-GI-NEXT: uzp1 v2.4s, v2.4s, v2.4s
; CHECK-GI-NEXT: neg v1.4s, v2.4s
; CHECK-GI-NEXT: ushl v0.4s, v0.4s, v1.4s
@@ -4573,9 +4674,9 @@ define <4 x i16> @ashr_trunc_v4i64_v4i16(<4 x i64> %a) {
;
; CHECK-GI-LABEL: ashr_trunc_v4i64_v4i16:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI272_0
+; CHECK-GI-NEXT: adrp x8, .LCPI280_0
; CHECK-GI-NEXT: uzp1 v0.4s, v0.4s, v1.4s
-; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI272_0]
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI280_0]
; CHECK-GI-NEXT: uzp1 v2.4s, v2.4s, v2.4s
; CHECK-GI-NEXT: neg v1.4s, v2.4s
; CHECK-GI-NEXT: sshl v0.4s, v0.4s, v1.4s
@@ -4613,9 +4714,9 @@ define <4 x i16> @shl_trunc_v4i64_v4i16(<4 x i64> %a) {
;
; CHECK-GI-LABEL: shl_trunc_v4i64_v4i16:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI274_0
+; CHECK-GI-NEXT: adrp x8, .LCPI282_0
; CHECK-GI-NEXT: uzp1 v0.4s, v0.4s, v1.4s
-; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI274_0]
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI282_0]
; CHECK-GI-NEXT: uzp1 v1.4s, v2.4s, v2.4s
; CHECK-GI-NEXT: xtn v0.4h, v0.4s
; CHECK-GI-NEXT: xtn v1.4h, v1.4s
``````````
</details>
https://github.com/llvm/llvm-project/pull/173364
More information about the llvm-commits
mailing list