[llvm] [AArch64][GlobalISel] Legalize ptr shuffle vector to s64 (PR #116013)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 13 01:33:06 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-globalisel
Author: David Green (davemgreen)
<details>
<summary>Changes</summary>
This converts all ptr element shuffle vectors to s64, so that the existing vector legalization handling can lower them as needed. This prevents a lot of fallbacks that currently try to generate things like `<2 x ptr> G_EXT`.
I'm not sure if bitcast/inttoptr/ptrtoint is intended to be necessary for vectors of pointers, but it uses buildCast for the casts, which now generates a ptrtoint/inttoptr.
---
Full diff: https://github.com/llvm/llvm-project/pull/116013.diff
8 Files Affected:
- (modified) llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h (+2)
- (modified) llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp (+37)
- (modified) llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp (+4-3)
- (modified) llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp (+15-4)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir (+5-2)
- (modified) llvm/test/CodeGen/AArch64/arm64-ext.ll (+1-3)
- (modified) llvm/test/CodeGen/AArch64/neon-perm.ll (+1-8)
- (modified) llvm/test/CodeGen/AArch64/shufflevector.ll (+13-12)
``````````diff
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index f682b20816d57f..2384b22c052662 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -378,6 +378,8 @@ class LegalizerHelper {
LLT CastTy);
LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx,
LLT CastTy);
+ LegalizeResult bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx,
+ LLT CastTy);
LegalizeResult bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx,
LLT CastTy);
LegalizeResult bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx,
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 062dbbe904de33..e7abeb61bd354a 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -3697,6 +3697,41 @@ LegalizerHelper::bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx,
return Legalized;
}
+// This bitcasts a shuffle vector to a different type currently of the same
+// element size. Mostly used to legalize ptr vectors, where ptrtoint/inttoptr
+// will be used instead.
+//
+// <16 x p0> = G_CONCAT_VECTORS <4 x p0>, <4 x p0>, mask
+// ===>
+// <4 x s64> = G_PTRTOINT <4 x p0>
+// <4 x s64> = G_PTRTOINT <4 x p0>
+// <16 x s64> = G_CONCAT_VECTORS <4 x s64>, <4 x s64>, mask
+// <16 x p0> = G_INTTOPTR <16 x s64>
+LegalizerHelper::LegalizeResult
+LegalizerHelper::bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx,
+ LLT CastTy) {
+ auto ShuffleMI = dyn_cast<GShuffleVector>(&MI);
+ LLT DstTy = MRI.getType(ShuffleMI->getReg(0));
+ LLT SrcTy = MRI.getType(ShuffleMI->getReg(1));
+
+ // We currently only handle vectors of the same size.
+ if (TypeIdx != 0 ||
+ CastTy.getScalarSizeInBits() != DstTy.getScalarSizeInBits() ||
+ CastTy.getElementCount() != DstTy.getElementCount())
+ return UnableToLegalize;
+
+ LLT NewSrcTy = SrcTy.changeElementType(CastTy.getScalarType());
+
+ auto Inp1 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(1));
+ auto Inp2 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(2));
+ auto Shuf =
+ MIRBuilder.buildShuffleVector(CastTy, Inp1, Inp2, ShuffleMI->getMask());
+ MIRBuilder.buildCast(ShuffleMI->getReg(0), Shuf);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
/// This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
///
/// <vscale x 8 x i1> = G_EXTRACT_SUBVECTOR <vscale x 16 x i1>, N
@@ -4133,6 +4168,8 @@ LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
case TargetOpcode::G_CONCAT_VECTORS:
return bitcastConcatVector(MI, TypeIdx, CastTy);
+ case TargetOpcode::G_SHUFFLE_VECTOR:
+ return bitcastShuffleVector(MI, TypeIdx, CastTy);
case TargetOpcode::G_EXTRACT_SUBVECTOR:
return bitcastExtractSubvector(MI, TypeIdx, CastTy);
case TargetOpcode::G_INSERT_SUBVECTOR:
diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index d910e33ac40f65..be347006a81f92 100644
--- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -600,12 +600,13 @@ MachineInstrBuilder MachineIRBuilder::buildCast(const DstOp &Dst,
return buildCopy(Dst, Src);
unsigned Opcode;
- if (SrcTy.isPointer() && DstTy.isScalar())
+ if (SrcTy.isPointerOrPointerVector())
Opcode = TargetOpcode::G_PTRTOINT;
- else if (DstTy.isPointer() && SrcTy.isScalar())
+ else if (DstTy.isPointerOrPointerVector())
Opcode = TargetOpcode::G_INTTOPTR;
else {
- assert(!SrcTy.isPointer() && !DstTy.isPointer() && "n G_ADDRCAST yet");
+ assert(!SrcTy.isPointerOrPointerVector() &&
+ !DstTy.isPointerOrPointerVector() && "no G_ADDRCAST yet");
Opcode = TargetOpcode::G_BITCAST;
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index d42ecc1c72dce9..355cb45750427d 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -838,13 +838,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_PTRTOINT)
.legalFor({{s64, p0}, {v2s64, v2p0}})
.widenScalarToNextPow2(0, 64)
- .clampScalar(0, s64, s64);
+ .clampScalar(0, s64, s64)
+ .clampMaxNumElements(0, s64, 2);
getActionDefinitionsBuilder(G_INTTOPTR)
.unsupportedIf([&](const LegalityQuery &Query) {
return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
})
- .legalFor({{p0, s64}, {v2p0, v2s64}});
+ .legalFor({{p0, s64}, {v2p0, v2s64}})
+ .clampMaxNumElements(1, s64, 2);
// Casts for 32 and 64-bit width type are just copies.
// Same for 128-bit width type, except they are on the FPR bank.
@@ -1051,7 +1053,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
if (DstTy != SrcTy)
return false;
return llvm::is_contained(
- {v2s64, v2p0, v2s32, v4s32, v4s16, v16s8, v8s8, v8s16}, DstTy);
+ {v2s64, v2s32, v4s32, v4s16, v16s8, v8s8, v8s16}, DstTy);
})
// G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
// just want those lowered into G_BUILD_VECTOR
@@ -1077,7 +1079,16 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.clampNumElements(0, v8s8, v16s8)
.clampNumElements(0, v4s16, v8s16)
.clampNumElements(0, v4s32, v4s32)
- .clampNumElements(0, v2s64, v2s64);
+ .clampNumElements(0, v2s64, v2s64)
+ .bitcastIf(
+ // Bitcast pointers vector to i64.
+ [=](const LegalityQuery &Query) {
+ return Query.Types[0].isPointerVector();
+ },
+ [=](const LegalityQuery &Query) {
+ const LLT DstTy = Query.Types[0];
+ return std::pair(0, LLT::vector(DstTy.getElementCount(), 64));
+ });
getActionDefinitionsBuilder(G_CONCAT_VECTORS)
.legalFor({{v4s32, v2s32}, {v8s16, v4s16}, {v16s8, v8s8}})
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir
index c92718f9e9b3c7..2464026aa125b5 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir
@@ -59,8 +59,11 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x p0>) = COPY $q0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $q1
- ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x p0>) = G_SHUFFLE_VECTOR [[COPY]](<2 x p0>), [[COPY1]], shufflemask(0, 0)
- ; CHECK-NEXT: $q0 = COPY [[SHUF]](<2 x p0>)
+ ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(<2 x s64>) = G_PTRTOINT [[COPY]](<2 x p0>)
+ ; CHECK-NEXT: [[PTRTOINT1:%[0-9]+]]:_(<2 x s64>) = G_PTRTOINT [[COPY1]](<2 x p0>)
+ ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[PTRTOINT]](<2 x s64>), [[PTRTOINT1]], shufflemask(0, 0)
+ ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(<2 x p0>) = G_INTTOPTR [[SHUF]](<2 x s64>)
+ ; CHECK-NEXT: $q0 = COPY [[INTTOPTR]](<2 x p0>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(<2 x p0>) = COPY $q0
%1:_(<2 x p0>) = COPY $q1
diff --git a/llvm/test/CodeGen/AArch64/arm64-ext.ll b/llvm/test/CodeGen/AArch64/arm64-ext.ll
index 932b94a91095a8..f403b68bc93b52 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ext.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ext.ll
@@ -1,8 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=arm64-eabi -global-isel=0 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc < %s -mtriple=arm64-eabi -global-isel=1 -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-
-; CHECK-GI: warning: Instruction selection used fallback path for test_v2p0
+; RUN: llc < %s -mtriple=arm64-eabi -global-isel=1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
define <8 x i8> @test_vextd(<8 x i8> %tmp1, <8 x i8> %tmp2) {
; CHECK-LABEL: test_vextd:
diff --git a/llvm/test/CodeGen/AArch64/neon-perm.ll b/llvm/test/CodeGen/AArch64/neon-perm.ll
index def0f15790a9ba..7218204ba844ca 100644
--- a/llvm/test/CodeGen/AArch64/neon-perm.ll
+++ b/llvm/test/CodeGen/AArch64/neon-perm.ll
@@ -1,13 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-
-; CHECK-GI: warning: Instruction selection used fallback path for test_vuzp1q_p0
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vuzp2q_p0
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vzip1q_p0
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vzip2q_p0
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vtrn1q_p0
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vtrn2q_p0
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
%struct.int8x8x2_t = type { [2 x <8 x i8>] }
%struct.int16x4x2_t = type { [2 x <4 x i16>] }
diff --git a/llvm/test/CodeGen/AArch64/shufflevector.ll b/llvm/test/CodeGen/AArch64/shufflevector.ll
index 69d3174581e3ef..532d7d1b1e6d56 100644
--- a/llvm/test/CodeGen/AArch64/shufflevector.ll
+++ b/llvm/test/CodeGen/AArch64/shufflevector.ll
@@ -1,11 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-
-; CHECK-GI: warning: Instruction selection used fallback path for shufflevector_v2p0
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v2p0_zeroes
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v4p0
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v4p0_zeroes
+; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; ===== Legal Vector Types =====
@@ -393,12 +388,18 @@ define <4 x i64> @shufflevector_v4i64(<4 x i64> %a, <4 x i64> %b) {
}
define <4 x ptr> @shufflevector_v4p0(<4 x ptr> %a, <4 x ptr> %b) {
-; CHECK-LABEL: shufflevector_v4p0:
-; CHECK: // %bb.0:
-; CHECK-NEXT: zip2 v2.2d, v2.2d, v3.2d
-; CHECK-NEXT: zip2 v0.2d, v0.2d, v1.2d
-; CHECK-NEXT: mov v1.16b, v2.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: shufflevector_v4p0:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: zip2 v2.2d, v2.2d, v3.2d
+; CHECK-SD-NEXT: zip2 v0.2d, v0.2d, v1.2d
+; CHECK-SD-NEXT: mov v1.16b, v2.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: shufflevector_v4p0:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: zip2 v0.2d, v0.2d, v1.2d
+; CHECK-GI-NEXT: zip2 v1.2d, v2.2d, v3.2d
+; CHECK-GI-NEXT: ret
%c = shufflevector <4 x ptr> %a, <4 x ptr> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
ret <4 x ptr> %c
}
``````````
</details>
https://github.com/llvm/llvm-project/pull/116013
More information about the llvm-commits
mailing list