[llvm] [AArch64][GlobalISel] Select TBL/TBX Intrinsics (PR #92914)
via llvm-commits
llvm-commits at lists.llvm.org
Tue May 21 06:15:39 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: None (chuongg3)
<details>
<summary>Changes</summary>
---
Patch is 55.38 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/92914.diff
2 Files Affected:
- (modified) llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp (+66)
- (modified) llvm/test/CodeGen/AArch64/arm64-tbl.ll (+563-329)
``````````diff
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 3b3c1fc8b27bf..b7af19ef139c5 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -227,6 +227,8 @@ class AArch64InstructionSelector : public InstructionSelector {
bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
+ void SelectTable(MachineInstr &I, MachineRegisterInfo &MRI, unsigned NumVecs,
+ unsigned Opc, bool isExt);
bool selectIndexedExtLoad(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectIndexedLoad(MachineInstr &I, MachineRegisterInfo &MRI);
@@ -6537,6 +6539,48 @@ bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
I.eraseFromParent();
return true;
}
+ case Intrinsic::aarch64_neon_tbl2:
+ SelectTable(I, MRI, 2,
+ MRI.getType(I.getOperand(0).getReg()) == LLT::fixed_vector(8, 8)
+ ? AArch64::TBLv8i8Two
+ : AArch64::TBLv16i8Two,
+ false);
+ return true;
+ case Intrinsic::aarch64_neon_tbl3:
+ SelectTable(I, MRI, 3,
+ MRI.getType(I.getOperand(0).getReg()) == LLT::fixed_vector(8, 8)
+ ? AArch64::TBLv8i8Three
+ : AArch64::TBLv16i8Three,
+ false);
+ return true;
+ case Intrinsic::aarch64_neon_tbl4:
+ SelectTable(I, MRI, 4,
+ MRI.getType(I.getOperand(0).getReg()) == LLT::fixed_vector(8, 8)
+ ? AArch64::TBLv8i8Four
+ : AArch64::TBLv16i8Four,
+ false);
+ return true;
+ case Intrinsic::aarch64_neon_tbx2:
+ SelectTable(I, MRI, 2,
+ MRI.getType(I.getOperand(0).getReg()) == LLT::fixed_vector(8, 8)
+ ? AArch64::TBXv8i8Two
+ : AArch64::TBXv16i8Two,
+ true);
+ return true;
+ case Intrinsic::aarch64_neon_tbx3:
+ SelectTable(I, MRI, 3,
+ MRI.getType(I.getOperand(0).getReg()) == LLT::fixed_vector(8, 8)
+ ? AArch64::TBXv8i8Three
+ : AArch64::TBXv16i8Three,
+ true);
+ return true;
+ case Intrinsic::aarch64_neon_tbx4:
+ SelectTable(I, MRI, 4,
+ MRI.getType(I.getOperand(0).getReg()) == LLT::fixed_vector(8, 8)
+ ? AArch64::TBXv8i8Four
+ : AArch64::TBXv16i8Four,
+ true);
+ return true;
case Intrinsic::swift_async_context_addr:
auto Sub = MIB.buildInstr(AArch64::SUBXri, {I.getOperand(0).getReg()},
{Register(AArch64::FP)})
@@ -6552,6 +6596,28 @@ bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
return false;
}
+void AArch64InstructionSelector::SelectTable(MachineInstr &I,
+ MachineRegisterInfo &MRI,
+ unsigned NumVec, unsigned Opc,
+ bool isExt) {
+ // Create the REG_SEQUENCE
+ SmallVector<Register, 4> Regs;
+ for (unsigned i = 0; i < NumVec; i++)
+ Regs.push_back(I.getOperand(i + 2 + isExt).getReg());
+ Register RegSeq = createQTuple(Regs, MIB);
+
+ Register DstReg = I.getOperand(0).getReg();
+ Register IdxReg = I.getOperand(2 + NumVec + isExt).getReg();
+ MachineInstrBuilder Instr;
+ if (isExt) {
+ Register Reg = I.getOperand(2).getReg();
+ Instr = MIB.buildInstr(Opc, {DstReg}, {Reg, RegSeq, IdxReg});
+ } else
+ Instr = MIB.buildInstr(Opc, {DstReg}, {RegSeq, IdxReg});
+ constrainSelectedInstRegOperands(*Instr, TII, TRI, RBI);
+ I.eraseFromParent();
+}
+
InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
auto MaybeImmed = getImmedFromMO(Root);
diff --git a/llvm/test/CodeGen/AArch64/arm64-tbl.ll b/llvm/test/CodeGen/AArch64/arm64-tbl.ll
index b89232c03f136..a5675df982d65 100644
--- a/llvm/test/CodeGen/AArch64/arm64-tbl.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-tbl.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
define <8 x i8> @tbl1_8b(<16 x i8> %A, <8 x i8> %B) nounwind {
; CHECK-LABEL: tbl1_8b:
@@ -20,136 +21,189 @@ define <16 x i8> @tbl1_16b(<16 x i8> %A, <16 x i8> %B) nounwind {
}
define <8 x i8> @tbl2_8b(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) {
-; CHECK-LABEL: tbl2_8b:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: tbl.8b v0, { v0, v1 }, v2
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: tbl2_8b:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: tbl.8b v0, { v0, v1 }, v2
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: tbl2_8b:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: tbl.8b v0, { v0, v1 }, v2
+; CHECK-GI-NEXT: ret
%tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C)
ret <8 x i8> %tmp3
}
define <16 x i8> @tbl2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) {
-; CHECK-LABEL: tbl2_16b:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: tbl2_16b:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: tbl.16b v0, { v0, v1 }, v2
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: tbl2_16b:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
+; CHECK-GI-NEXT: ret
%tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C)
ret <16 x i8> %tmp3
}
define <8 x i8> @tbl3_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) {
-; CHECK-LABEL: tbl3_8b:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-NEXT: tbl.8b v0, { v0, v1, v2 }, v3
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: tbl3_8b:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: tbl.8b v0, { v0, v1, v2 }, v3
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: tbl3_8b:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: tbl.8b v0, { v0, v1, v2 }, v3
+; CHECK-GI-NEXT: ret
%tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D)
ret <8 x i8> %tmp3
}
define <16 x i8> @tbl3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) {
-; CHECK-LABEL: tbl3_16b:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-NEXT: tbl.16b v0, { v0, v1, v2 }, v3
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: tbl3_16b:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2 }, v3
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: tbl3_16b:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: tbl.16b v0, { v0, v1, v2 }, v3
+; CHECK-GI-NEXT: ret
%tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D)
ret <16 x i8> %tmp3
}
define <8 x i8> @tbl4_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) {
-; CHECK-LABEL: tbl4_8b:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-NEXT: tbl.8b v0, { v0, v1, v2, v3 }, v4
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: tbl4_8b:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: tbl.8b v0, { v0, v1, v2, v3 }, v4
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: tbl4_8b:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: tbl.8b v0, { v0, v1, v2, v3 }, v4
+; CHECK-GI-NEXT: ret
%tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E)
ret <8 x i8> %tmp3
}
define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) {
-; CHECK-LABEL: tbl4_16b:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: tbl4_16b:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: tbl4_16b:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
+; CHECK-GI-NEXT: ret
%tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E)
ret <16 x i8> %tmp3
}
-; CHECK-LABEL: .LCPI8_0:
-; CHECK-NEXT: .byte 0 // 0x0
-; CHECK-NEXT: .byte 4 // 0x4
-; CHECK-NEXT: .byte 8 // 0x8
-; CHECK-NEXT: .byte 12 // 0xc
-; CHECK-NEXT: .byte 255 // 0xff
-; CHECK-NEXT: .byte 255 // 0xff
-; CHECK-NEXT: .byte 255 // 0xff
-; CHECK-NEXT: .byte 255 // 0xff
define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
-; CHECK-LABEL: shuffled_tbl2_to_tbl4_v8i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI8_0
-; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
-; CHECK-NEXT: ldr d4, [x8, :lo12:.LCPI8_0]
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
-; CHECK-NEXT: tbl.8b v0, { v0, v1 }, v4
-; CHECK-NEXT: tbl.8b v1, { v2, v3 }, v4
-; CHECK-NEXT: mov.s v0[1], v1[1]
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_v8i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: adrp x8, .LCPI8_0
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
+; CHECK-SD-NEXT: ldr d4, [x8, :lo12:.LCPI8_0]
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
+; CHECK-SD-NEXT: tbl.8b v0, { v0, v1 }, v4
+; CHECK-SD-NEXT: tbl.8b v1, { v2, v3 }, v4
+; CHECK-SD-NEXT: mov.s v0[1], v1[1]
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_v8i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: adrp x8, .LCPI8_1
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
+; CHECK-GI-NEXT: ldr d4, [x8, :lo12:.LCPI8_1]
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
+; CHECK-GI-NEXT: adrp x8, .LCPI8_0
+; CHECK-GI-NEXT: tbl.8b v0, { v0, v1 }, v4
+; CHECK-GI-NEXT: tbl.8b v1, { v2, v3 }, v4
+; CHECK-GI-NEXT: mov.d v0[1], v1[0]
+; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI8_0]
+; CHECK-GI-NEXT: tbl.16b v0, { v0 }, v1
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
%t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
%t2 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %c, <16 x i8> %d, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
%s = shufflevector <8 x i8> %t1, <8 x i8> %t2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
ret <8 x i8> %s
}
-; CHECK-LABEL: .LCPI9_0:
-; CHECK-NEXT: .byte 0 // 0x0
-; CHECK-NEXT: .byte 4 // 0x4
-; CHECK-NEXT: .byte 8 // 0x8
-; CHECK-NEXT: .byte 12 // 0xc
-; CHECK-NEXT: .byte 16 // 0x10
-; CHECK-NEXT: .byte 20 // 0x14
-; CHECK-NEXT: .byte 24 // 0x18
-; CHECK-NEXT: .byte 28 // 0x1c
-; CHECK-NEXT: .byte 32 // 0x20
-; CHECK-NEXT: .byte 36 // 0x24
-; CHECK-NEXT: .byte 40 // 0x28
-; CHECK-NEXT: .byte 44 // 0x2c
-; CHECK-NEXT: .byte 48 // 0x30
-; CHECK-NEXT: .byte 52 // 0x34
-; CHECK-NEXT: .byte 56 // 0x38
-; CHECK-NEXT: .byte 60 // 0x3c
-
define <16 x i8> @shuffled_tbl2_to_tbl4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
-; CHECK-LABEL: shuffled_tbl2_to_tbl4:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-NEXT: adrp x8, .LCPI9_0
-; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI9_0]
-; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: adrp x8, .LCPI9_0
+; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI9_0]
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: adrp x8, .LCPI9_1
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
+; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI9_1]
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
+; CHECK-GI-NEXT: adrp x8, .LCPI9_0
+; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4
+; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v4
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI9_0]
+; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
+; CHECK-GI-NEXT: ret
%t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
%t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
%s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
@@ -157,38 +211,72 @@ define <16 x i8> @shuffled_tbl2_to_tbl4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c
}
define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) {
-; CHECK-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fmov s4, w0
-; CHECK-NEXT: mov w8, #32 // =0x20
-; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-NEXT...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/92914
More information about the llvm-commits
mailing list