[llvm] [AArch64][GlobalISel] Support more types for TRUNC (PR #66927)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 20 09:29:37 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-globalisel
<details>
<summary>Changes</summary>
G_TRUNC will get lowered into trunc(merge(trunc(unmerge), trunc(unmerge))) if the source is larger than 128 bits or the truncation is more than half of the current bit size.
Now mirrors ZEXT/SEXT code more closely for vector types.
---
Patch is 28.78 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/66927.diff
6 Files Affected:
- (modified) llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h (+1)
- (modified) llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp (+54-7)
- (modified) llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp (+13-52)
- (modified) llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h (-1)
- (added) llvm/test/CodeGen/AArch64/xtn.ll (+473)
- (modified) llvm/test/CodeGen/AArch64/zext.ll (+83-55)
``````````diff
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index aec603a225d779e..a2cf325d1ef7ae7 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -376,6 +376,7 @@ class LegalizerHelper {
LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI);
LegalizeResult lowerFunnelShift(MachineInstr &MI);
LegalizeResult lowerEXT(MachineInstr &MI);
+ LegalizeResult lowerTRUNC(MachineInstr &MI);
LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI);
LegalizeResult lowerRotate(MachineInstr &MI);
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 0c3f558ac2a6419..24bdd819dec2e31 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -3620,6 +3620,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
case G_SEXT:
case G_ANYEXT:
return lowerEXT(MI);
+ case G_TRUNC:
+ return lowerTRUNC(MI);
GISEL_VECREDUCE_CASES_NONSEQ
return lowerVectorReduction(MI);
}
@@ -4964,13 +4966,7 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
MI.eraseFromParent();
return Legalized;
}
- case TargetOpcode::G_TRUNC: {
- Observer.changingInstr(MI);
- moreElementsVectorSrc(MI, MoreTy, 1);
- moreElementsVectorDst(MI, MoreTy, 0);
- Observer.changedInstr(MI);
- return Legalized;
- }
+ case TargetOpcode::G_TRUNC:
case TargetOpcode::G_FPTRUNC:
case TargetOpcode::G_FPEXT: {
if (TypeIdx != 0)
@@ -6019,6 +6015,57 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerEXT(MachineInstr &MI) {
return UnableToLegalize;
}
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerTRUNC(MachineInstr &MI) {
+ // MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+ MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
+ // Similar to how operand splitting is done in SelectiondDAG, we can handle
+ // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
+ // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
+ // %lo16(<4 x s16>) = G_TRUNC %inlo
+ // %hi16(<4 x s16>) = G_TRUNC %inhi
+ // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
+ // %res(<8 x s8>) = G_TRUNC %in16
+
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ LLT SrcTy = MRI.getType(SrcReg);
+
+ assert(DstTy.isVector() && "This should be a vector operation");
+
+ // Split input type.
+ LLT SplitSrcTy =
+ SrcTy.changeElementCount(SrcTy.getElementCount().divideCoefficientBy(2));
+
+ // First, split the source into two smaller vectors.
+ SmallVector<Register, 2> SplitSrcs;
+ extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs);
+
+ // Truncate the splits into intermediate narrower elements.
+ LLT InterTy;
+ if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
+ InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
+ else
+ InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits());
+ for (unsigned I = 0; I < SplitSrcs.size(); ++I) {
+ SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0);
+ }
+
+ // Combine the new truncates into one vector
+ auto Merge = MIRBuilder.buildMergeLikeInstr(
+ DstTy.changeElementSize(InterTy.getScalarSizeInBits()), SplitSrcs);
+
+ // Truncate the new vector to the final result type
+ if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
+ MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), Merge.getReg(0));
+ else
+ MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Merge.getReg(0));
+
+ MI.eraseFromParent();
+
+ return Legalized;
+}
+
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerRotateWithReverseRotate(MachineInstr &MI) {
auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index d07de82de1335af..0114a8b8968a8c4 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -542,14 +542,22 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
});
getActionDefinitionsBuilder(G_TRUNC)
+ .legalFor({{v2s32, v2s64}, {v4s16, v4s32}, {v8s8, v8s16}})
+ .moreElementsToNextPow2(0)
+ .clampMaxNumElements(0, s8, 8)
+ .clampMaxNumElements(0, s16, 4)
+ .clampMaxNumElements(0, s32, 2)
.minScalarOrEltIf(
[=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
0, s8)
- .customIf([=](const LegalityQuery &Query) {
+ .lowerIf([=](const LegalityQuery &Query) {
LLT DstTy = Query.Types[0];
LLT SrcTy = Query.Types[1];
- return DstTy == v8s8 && SrcTy.getSizeInBits() > 128;
+ return DstTy.isVector() && (SrcTy.getSizeInBits() > 128 ||
+ (DstTy.getScalarSizeInBits() * 2 <
+ SrcTy.getScalarSizeInBits()));
})
+
.alwaysLegal();
getActionDefinitionsBuilder(G_SEXT_INREG)
@@ -997,8 +1005,6 @@ bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
case TargetOpcode::G_GLOBAL_VALUE:
return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
- case TargetOpcode::G_TRUNC:
- return legalizeVectorTrunc(MI, Helper);
case TargetOpcode::G_SBFX:
case TargetOpcode::G_UBFX:
return legalizeBitfieldExtract(MI, MRI, Helper);
@@ -1097,54 +1103,6 @@ bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
return true;
}
-static void extractParts(Register Reg, MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder, LLT Ty, int NumParts,
- SmallVectorImpl<Register> &VRegs) {
- for (int I = 0; I < NumParts; ++I)
- VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
- MIRBuilder.buildUnmerge(VRegs, Reg);
-}
-
-bool AArch64LegalizerInfo::legalizeVectorTrunc(
- MachineInstr &MI, LegalizerHelper &Helper) const {
- MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
- MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
- // Similar to how operand splitting is done in SelectiondDAG, we can handle
- // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
- // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
- // %lo16(<4 x s16>) = G_TRUNC %inlo
- // %hi16(<4 x s16>) = G_TRUNC %inhi
- // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
- // %res(<8 x s8>) = G_TRUNC %in16
-
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(SrcReg);
- assert(llvm::has_single_bit<uint32_t>(DstTy.getSizeInBits()) &&
- llvm::has_single_bit<uint32_t>(SrcTy.getSizeInBits()));
-
- // Split input type.
- LLT SplitSrcTy =
- SrcTy.changeElementCount(SrcTy.getElementCount().divideCoefficientBy(2));
- // First, split the source into two smaller vectors.
- SmallVector<Register, 2> SplitSrcs;
- extractParts(SrcReg, MRI, MIRBuilder, SplitSrcTy, 2, SplitSrcs);
-
- // Truncate the splits into intermediate narrower elements.
- LLT InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
- for (unsigned I = 0; I < SplitSrcs.size(); ++I)
- SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0);
-
- auto Concat = MIRBuilder.buildConcatVectors(
- DstTy.changeElementSize(DstTy.getScalarSizeInBits() * 2), SplitSrcs);
-
- Helper.Observer.changingInstr(MI);
- MI.getOperand(1).setReg(Concat.getReg(0));
- Helper.Observer.changedInstr(MI);
- return true;
-}
-
bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
GISelChangeObserver &Observer) const {
@@ -1314,6 +1272,9 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return true;
}
+ case Intrinsic::experimental_vector_reverse:
+ // TODO: Add support for vector_reverse
+ return false;
}
return true;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
index 853d5a2305ac68a..e6c9182da912dba 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
@@ -46,7 +46,6 @@ class AArch64LegalizerInfo : public LegalizerInfo {
bool legalizeSmallCMGlobalValue(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder,
GISelChangeObserver &Observer) const;
- bool legalizeVectorTrunc(MachineInstr &MI, LegalizerHelper &Helper) const;
bool legalizeBitfieldExtract(MachineInstr &MI, MachineRegisterInfo &MRI,
LegalizerHelper &Helper) const;
bool legalizeRotate(MachineInstr &MI, MachineRegisterInfo &MRI,
diff --git a/llvm/test/CodeGen/AArch64/xtn.ll b/llvm/test/CodeGen/AArch64/xtn.ll
new file mode 100644
index 000000000000000..0dd4e3644b78356
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/xtn.ll
@@ -0,0 +1,473 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+define i8 @xtn_i16_to_i8(i16 %a) {
+; CHECK-LABEL: xtn_i16_to_i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ret
+entry:
+ %arg1 = trunc i16 %a to i8
+ ret i8 %arg1
+}
+
+define i8 @xtn_i32_to_i8(i32 %a) {
+; CHECK-LABEL: xtn_i32_to_i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ret
+entry:
+ %arg1 = trunc i32 %a to i8
+ ret i8 %arg1
+}
+
+define i8 @xtn_i64_to_i8(i64 %a) {
+; CHECK-LABEL: xtn_i64_to_i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT: ret
+entry:
+ %arg1 = trunc i64 %a to i8
+ ret i8 %arg1
+}
+
+define i8 @xtn_i128_to_i8(i128 %a) {
+; CHECK-LABEL: xtn_i128_to_i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT: ret
+entry:
+ %arg1 = trunc i128 %a to i8
+ ret i8 %arg1
+}
+
+define i16 @xtn_i32_to_i16(i32 %a) {
+; CHECK-LABEL: xtn_i32_to_i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ret
+entry:
+ %arg1 = trunc i32 %a to i16
+ ret i16 %arg1
+}
+
+define i16 @xtn_i64_to_i16(i64 %a) {
+; CHECK-LABEL: xtn_i64_to_i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT: ret
+entry:
+ %arg1 = trunc i64 %a to i16
+ ret i16 %arg1
+}
+
+define i16 @xtn_i128_to_i16(i128 %a) {
+; CHECK-LABEL: xtn_i128_to_i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT: ret
+entry:
+ %arg1 = trunc i128 %a to i16
+ ret i16 %arg1
+}
+
+define i32 @xtn_i64_to_i32(i64 %a) {
+; CHECK-LABEL: xtn_i64_to_i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT: ret
+entry:
+ %arg1 = trunc i64 %a to i32
+ ret i32 %arg1
+}
+
+define i32 @xtn_i128_to_i32(i128 %a) {
+; CHECK-LABEL: xtn_i128_to_i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT: ret
+entry:
+ %arg1 = trunc i128 %a to i32
+ ret i32 %arg1
+}
+
+define i64 @xtn_i128_to_i64(i128 %a) {
+; CHECK-LABEL: xtn_i128_to_i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ret
+entry:
+ %arg1 = trunc i128 %a to i64
+ ret i64 %arg1
+}
+
+define <2 x i8> @xtn_v2i16_v2i8(<2 x i16> %a) {
+; CHECK-LABEL: xtn_v2i16_v2i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ret
+entry:
+ %arg1 = trunc <2 x i16> %a to <2 x i8>
+ ret <2 x i8> %arg1
+}
+
+define <2 x i8> @xtn_v2i32_v2i8(<2 x i32> %a) {
+; CHECK-LABEL: xtn_v2i32_v2i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ret
+entry:
+ %arg1 = trunc <2 x i32> %a to <2 x i8>
+ ret <2 x i8> %arg1
+}
+
+define <2 x i8> @xtn_v2i64_v2i8(<2 x i64> %a) {
+; CHECK-LABEL: xtn_v2i64_v2i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: xtn v0.2s, v0.2d
+; CHECK-NEXT: ret
+entry:
+ %arg1 = trunc <2 x i64> %a to <2 x i8>
+ ret <2 x i8> %arg1
+}
+
+define <2 x i8> @xtn_v2i128_v2i8(<2 x i128> %a) {
+; CHECK-SD-LABEL: xtn_v2i128_v2i8:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmov s0, w0
+; CHECK-SD-NEXT: mov v0.s[1], w2
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: xtn_v2i128_v2i8:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov d0, x0
+; CHECK-GI-NEXT: mov v0.d[1], x2
+; CHECK-GI-NEXT: xtn v0.2s, v0.2d
+; CHECK-GI-NEXT: ret
+entry:
+ %arg1 = trunc <2 x i128> %a to <2 x i8>
+ ret <2 x i8> %arg1
+}
+
+define <2 x i16> @xtn_v2i32_v2i16(<2 x i32> %a) {
+; CHECK-LABEL: xtn_v2i32_v2i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ret
+entry:
+ %arg1 = trunc <2 x i32> %a to <2 x i16>
+ ret <2 x i16> %arg1
+}
+
+define <2 x i16> @xtn_v2i64_v2i16(<2 x i64> %a) {
+; CHECK-LABEL: xtn_v2i64_v2i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: xtn v0.2s, v0.2d
+; CHECK-NEXT: ret
+entry:
+ %arg1 = trunc <2 x i64> %a to <2 x i16>
+ ret <2 x i16> %arg1
+}
+
+define <2 x i16> @xtn_v2i128_v2i16(<2 x i128> %a) {
+; CHECK-SD-LABEL: xtn_v2i128_v2i16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmov s0, w0
+; CHECK-SD-NEXT: mov v0.s[1], w2
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: xtn_v2i128_v2i16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov d0, x0
+; CHECK-GI-NEXT: mov v0.d[1], x2
+; CHECK-GI-NEXT: xtn v0.2s, v0.2d
+; CHECK-GI-NEXT: ret
+entry:
+ %arg1 = trunc <2 x i128> %a to <2 x i16>
+ ret <2 x i16> %arg1
+}
+
+define <2 x i32> @xtn_v2i64_v2i32(<2 x i64> %a) {
+; CHECK-LABEL: xtn_v2i64_v2i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: xtn v0.2s, v0.2d
+; CHECK-NEXT: ret
+entry:
+ %arg1 = trunc <2 x i64> %a to <2 x i32>
+ ret <2 x i32> %arg1
+}
+
+define <2 x i32> @xtn_v2i128_v2i32(<2 x i128> %a) {
+; CHECK-SD-LABEL: xtn_v2i128_v2i32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmov s0, w0
+; CHECK-SD-NEXT: mov v0.s[1], w2
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: xtn_v2i128_v2i32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov d0, x0
+; CHECK-GI-NEXT: mov v0.d[1], x2
+; CHECK-GI-NEXT: xtn v0.2s, v0.2d
+; CHECK-GI-NEXT: ret
+entry:
+ %arg1 = trunc <2 x i128> %a to <2 x i32>
+ ret <2 x i32> %arg1
+}
+
+define <2 x i64> @xtn_v2i128_v2i64(<2 x i128> %a) {
+; CHECK-LABEL: xtn_v2i128_v2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: mov v0.d[1], x2
+; CHECK-NEXT: ret
+entry:
+ %arg1 = trunc <2 x i128> %a to <2 x i64>
+ ret <2 x i64> %arg1
+}
+
+define <3 x i8> @xtn_v3i16_v3i8(<3 x i16> %a) {
+; CHECK-SD-LABEL: xtn_v3i16_v3i8:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: umov w0, v0.h[0]
+; CHECK-SD-NEXT: umov w1, v0.h[1]
+; CHECK-SD-NEXT: umov w2, v0.h[2]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: xtn_v3i16_v3i8:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: mov h1, v0.h[1]
+; CHECK-GI-NEXT: mov h2, v0.h[2]
+; CHECK-GI-NEXT: fmov w0, s0
+; CHECK-GI-NEXT: fmov w1, s1
+; CHECK-GI-NEXT: fmov w2, s2
+; CHECK-GI-NEXT: ret
+entry:
+ %arg1 = trunc <3 x i16> %a to <3 x i8>
+ ret <3 x i8> %arg1
+}
+
+define <3 x i8> @xtn_v3i32_v3i8(<3 x i32> %a) {
+; CHECK-SD-LABEL: xtn_v3i32_v3i8:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: xtn v0.4h, v0.4s
+; CHECK-SD-NEXT: umov w0, v0.h[0]
+; CHECK-SD-NEXT: umov w1, v0.h[1]
+; CHECK-SD-NEXT: umov w2, v0.h[2]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: xtn_v3i32_v3i8:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov s1, v0.s[1]
+; CHECK-GI-NEXT: mov s2, v0.s[2]
+; CHECK-GI-NEXT: fmov w0, s0
+; CHECK-GI-NEXT: fmov w1, s1
+; CHECK-GI-NEXT: fmov w2, s2
+; CHECK-GI-NEXT: ret
+entry:
+ %arg1 = trunc <3 x i32> %a to <3 x i8>
+ ret <3 x i8> %arg1
+}
+
+define <3 x i8> @xtn_v3i64_v3i8(<3 x i64> %a) {
+; CHECK-SD-LABEL: xtn_v3i64_v3i8:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT: xtn v1.2s, v2.2d
+; CHECK-SD-NEXT: xtn v0.2s, v0.2d
+; CHECK-SD-NEXT: fmov w2, s1
+; CHECK-SD-NEXT: mov w1, v0.s[1]
+; CHECK-SD-NEXT: fmov w0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: xtn_v3i64_v3i8:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov x0, d0
+; CHECK-GI-NEXT: fmov x1, d1
+; CHECK-GI-NEXT: fmov x2, d2
+; CHECK-GI-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-GI-NEXT: // kill: def $w1 killed $w1 killed $x1
+; CHECK-GI-NEXT: // kill: def $w2 killed $w2 killed $x2
+; CHECK-GI-NEXT: ret
+entry:
+ %arg1 = trunc <3 x i64> %a to <3 x i8>
+ ret <3 x i8> %arg1
+}
+
+define <3 x i16> @xtn_v3i32_v3i16(<3 x i32> %a) {
+; CHECK-SD-LABEL: xtn_v3i32_v3i16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: xtn v0.4h, v0.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: xtn_v3i32_v3i16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov s1, v0.s[1]
+; CHECK-GI-NEXT: mov s2, v0.s[2]
+; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT: mov v0.h[2], v2.h[0]
+; CHECK-GI-NEXT: mov v0.h[3], v0.h[0]
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
+entry:
+ %arg1 = trunc <3 x i32> %a to <3 x i16>
+ ret <3 x i16> %arg1
+}
+
+define <3 x i16> @xtn_v3i64_v3i16(<3 x i64> %a) {
+; CHECK-SD-LABEL: xtn_v3i64_v3i16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v2.4s
+; CHECK-SD-NEXT: xtn v0.4h, v0.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: xtn_v3i64_v3i16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov x8, d0
+; CHECK-GI-NEXT: fmov x9, d1
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: fmov s1, w9
+; CHECK-GI-NEXT: fmov x8, d2
+; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT: fmov s1, w8
+; CHECK-GI-NEXT: mov v0.h[2], v1.h[0]
+; CHECK-GI-NEXT: mov v0.h[3], v0.h[0]
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
+entry:
+ %arg1 = trunc <3 x i64> %a to <3 x i16>
+ ret <3 x i16> %arg1
+}
+
+define <3 x i32> @xtn_v3i64_v3i32(<3 x i64> %a) {
+; CHECK-SD-LABEL: xtn_v3i64_v3i32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v2.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: xtn_v3i64_v3i32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov x8, d0
+; CHECK-GI-NEXT: fmov x9, d1
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: fmov x8, d2
+; CHECK-GI-NEXT: mov v0.s[1], w9
+; CHECK-GI-NEXT: mov v0.s[2], w8
+; CHECK-GI-NEXT: mov v0.s[3], w8
+; CHECK-GI-NEXT: ret
+entry:
+ %arg1 = trunc <3 x i64> %a to <3 x i32>
+ ret <3 x i32> %arg1
+}
+
+define <4 x i8> @xtn_v4i16_v4i8(<4 x i16> %a) {
+; CHECK-LABEL: xtn_v4i16_v4i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ret
+e...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/66927
More information about the llvm-commits
mailing list