[llvm] [AArch64][GlobalISel] Support more types for TRUNC (PR #66927)

via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 20 09:29:37 PDT 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-globalisel

<details>
<summary>Changes</summary>

G_TRUNC will get lowered into trunc(merge(trunc(unmerge), trunc(unmerge))) if the source is larger than 128 bits or the truncation is more than half of the current bit size.

Now mirrors ZEXT/SEXT code more closely for vector types.

---

Patch is 28.78 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/66927.diff


6 Files Affected:

- (modified) llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h (+1) 
- (modified) llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp (+54-7) 
- (modified) llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp (+13-52) 
- (modified) llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h (-1) 
- (added) llvm/test/CodeGen/AArch64/xtn.ll (+473) 
- (modified) llvm/test/CodeGen/AArch64/zext.ll (+83-55) 


``````````diff
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index aec603a225d779e..a2cf325d1ef7ae7 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -376,6 +376,7 @@ class LegalizerHelper {
   LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI);
   LegalizeResult lowerFunnelShift(MachineInstr &MI);
   LegalizeResult lowerEXT(MachineInstr &MI);
+  LegalizeResult lowerTRUNC(MachineInstr &MI);
   LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI);
   LegalizeResult lowerRotate(MachineInstr &MI);
 
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 0c3f558ac2a6419..24bdd819dec2e31 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -3620,6 +3620,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
   case G_SEXT:
   case G_ANYEXT:
     return lowerEXT(MI);
+  case G_TRUNC:
+    return lowerTRUNC(MI);
   GISEL_VECREDUCE_CASES_NONSEQ
     return lowerVectorReduction(MI);
   }
@@ -4964,13 +4966,7 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
     MI.eraseFromParent();
     return Legalized;
   }
-  case TargetOpcode::G_TRUNC: {
-    Observer.changingInstr(MI);
-    moreElementsVectorSrc(MI, MoreTy, 1);
-    moreElementsVectorDst(MI, MoreTy, 0);
-    Observer.changedInstr(MI);
-    return Legalized;
-  }
+  case TargetOpcode::G_TRUNC:
   case TargetOpcode::G_FPTRUNC:
   case TargetOpcode::G_FPEXT: {
     if (TypeIdx != 0)
@@ -6019,6 +6015,57 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerEXT(MachineInstr &MI) {
   return UnableToLegalize;
 }
 
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerTRUNC(MachineInstr &MI) {
+  // MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
+  // Similar to how operand splitting is done in SelectiondDAG, we can handle
+  // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
+  //   %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
+  //   %lo16(<4 x s16>) = G_TRUNC %inlo
+  //   %hi16(<4 x s16>) = G_TRUNC %inhi
+  //   %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
+  //   %res(<8 x s8>) = G_TRUNC %in16
+
+  Register DstReg = MI.getOperand(0).getReg();
+  Register SrcReg = MI.getOperand(1).getReg();
+  LLT DstTy = MRI.getType(DstReg);
+  LLT SrcTy = MRI.getType(SrcReg);
+
+  assert(DstTy.isVector() && "This should be a vector operation");
+
+  // Split input type.
+  LLT SplitSrcTy =
+      SrcTy.changeElementCount(SrcTy.getElementCount().divideCoefficientBy(2));
+
+  // First, split the source into two smaller vectors.
+  SmallVector<Register, 2> SplitSrcs;
+  extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs);
+
+  // Truncate the splits into intermediate narrower elements.
+  LLT InterTy;
+  if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
+    InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
+  else
+    InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits());
+  for (unsigned I = 0; I < SplitSrcs.size(); ++I) {
+    SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0);
+  }
+
+  // Combine the new truncates into one vector
+  auto Merge = MIRBuilder.buildMergeLikeInstr(
+      DstTy.changeElementSize(InterTy.getScalarSizeInBits()), SplitSrcs);
+
+  // Truncate the new vector to the final result type
+  if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
+    MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), Merge.getReg(0));
+  else
+    MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Merge.getReg(0));
+
+  MI.eraseFromParent();
+
+  return Legalized;
+}
+
 LegalizerHelper::LegalizeResult
 LegalizerHelper::lowerRotateWithReverseRotate(MachineInstr &MI) {
   auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index d07de82de1335af..0114a8b8968a8c4 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -542,14 +542,22 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       });
 
   getActionDefinitionsBuilder(G_TRUNC)
+      .legalFor({{v2s32, v2s64}, {v4s16, v4s32}, {v8s8, v8s16}})
+      .moreElementsToNextPow2(0)
+      .clampMaxNumElements(0, s8, 8)
+      .clampMaxNumElements(0, s16, 4)
+      .clampMaxNumElements(0, s32, 2)
       .minScalarOrEltIf(
           [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
           0, s8)
-      .customIf([=](const LegalityQuery &Query) {
+      .lowerIf([=](const LegalityQuery &Query) {
         LLT DstTy = Query.Types[0];
         LLT SrcTy = Query.Types[1];
-        return DstTy == v8s8 && SrcTy.getSizeInBits() > 128;
+        return DstTy.isVector() && (SrcTy.getSizeInBits() > 128 ||
+                                    (DstTy.getScalarSizeInBits() * 2 <
+                                     SrcTy.getScalarSizeInBits()));
       })
+
       .alwaysLegal();
 
   getActionDefinitionsBuilder(G_SEXT_INREG)
@@ -997,8 +1005,6 @@ bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
     return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
   case TargetOpcode::G_GLOBAL_VALUE:
     return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
-  case TargetOpcode::G_TRUNC:
-    return legalizeVectorTrunc(MI, Helper);
   case TargetOpcode::G_SBFX:
   case TargetOpcode::G_UBFX:
     return legalizeBitfieldExtract(MI, MRI, Helper);
@@ -1097,54 +1103,6 @@ bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
   return true;
 }
 
-static void extractParts(Register Reg, MachineRegisterInfo &MRI,
-                         MachineIRBuilder &MIRBuilder, LLT Ty, int NumParts,
-                         SmallVectorImpl<Register> &VRegs) {
-  for (int I = 0; I < NumParts; ++I)
-    VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
-  MIRBuilder.buildUnmerge(VRegs, Reg);
-}
-
-bool AArch64LegalizerInfo::legalizeVectorTrunc(
-    MachineInstr &MI, LegalizerHelper &Helper) const {
-  MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
-  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
-  // Similar to how operand splitting is done in SelectiondDAG, we can handle
-  // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
-  //   %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
-  //   %lo16(<4 x s16>) = G_TRUNC %inlo
-  //   %hi16(<4 x s16>) = G_TRUNC %inhi
-  //   %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
-  //   %res(<8 x s8>) = G_TRUNC %in16
-
-  Register DstReg = MI.getOperand(0).getReg();
-  Register SrcReg = MI.getOperand(1).getReg();
-  LLT DstTy = MRI.getType(DstReg);
-  LLT SrcTy = MRI.getType(SrcReg);
-  assert(llvm::has_single_bit<uint32_t>(DstTy.getSizeInBits()) &&
-         llvm::has_single_bit<uint32_t>(SrcTy.getSizeInBits()));
-
-  // Split input type.
-  LLT SplitSrcTy =
-      SrcTy.changeElementCount(SrcTy.getElementCount().divideCoefficientBy(2));
-  // First, split the source into two smaller vectors.
-  SmallVector<Register, 2> SplitSrcs;
-  extractParts(SrcReg, MRI, MIRBuilder, SplitSrcTy, 2, SplitSrcs);
-
-  // Truncate the splits into intermediate narrower elements.
-  LLT InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
-  for (unsigned I = 0; I < SplitSrcs.size(); ++I)
-    SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0);
-
-  auto Concat = MIRBuilder.buildConcatVectors(
-      DstTy.changeElementSize(DstTy.getScalarSizeInBits() * 2), SplitSrcs);
-
-  Helper.Observer.changingInstr(MI);
-  MI.getOperand(1).setReg(Concat.getReg(0));
-  Helper.Observer.changedInstr(MI);
-  return true;
-}
-
 bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
     MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
     GISelChangeObserver &Observer) const {
@@ -1314,6 +1272,9 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
 
     return true;
   }
+  case Intrinsic::experimental_vector_reverse:
+    // TODO: Add support for vector_reverse
+    return false;
   }
 
   return true;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
index 853d5a2305ac68a..e6c9182da912dba 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
@@ -46,7 +46,6 @@ class AArch64LegalizerInfo : public LegalizerInfo {
   bool legalizeSmallCMGlobalValue(MachineInstr &MI, MachineRegisterInfo &MRI,
                                   MachineIRBuilder &MIRBuilder,
                                   GISelChangeObserver &Observer) const;
-  bool legalizeVectorTrunc(MachineInstr &MI, LegalizerHelper &Helper) const;
   bool legalizeBitfieldExtract(MachineInstr &MI, MachineRegisterInfo &MRI,
                                LegalizerHelper &Helper) const;
   bool legalizeRotate(MachineInstr &MI, MachineRegisterInfo &MRI,
diff --git a/llvm/test/CodeGen/AArch64/xtn.ll b/llvm/test/CodeGen/AArch64/xtn.ll
new file mode 100644
index 000000000000000..0dd4e3644b78356
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/xtn.ll
@@ -0,0 +1,473 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+define i8 @xtn_i16_to_i8(i16 %a) {
+; CHECK-LABEL: xtn_i16_to_i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ret
+entry:
+  %arg1 = trunc i16 %a to i8
+  ret i8 %arg1
+}
+
+define i8 @xtn_i32_to_i8(i32 %a) {
+; CHECK-LABEL: xtn_i32_to_i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ret
+entry:
+  %arg1 = trunc i32 %a to i8
+  ret i8 %arg1
+}
+
+define i8 @xtn_i64_to_i8(i64 %a) {
+; CHECK-LABEL: xtn_i64_to_i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT:    ret
+entry:
+  %arg1 = trunc i64 %a to i8
+  ret i8 %arg1
+}
+
+define i8 @xtn_i128_to_i8(i128 %a) {
+; CHECK-LABEL: xtn_i128_to_i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT:    ret
+entry:
+  %arg1 = trunc i128 %a to i8
+  ret i8 %arg1
+}
+
+define i16 @xtn_i32_to_i16(i32 %a) {
+; CHECK-LABEL: xtn_i32_to_i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ret
+entry:
+  %arg1 = trunc i32 %a to i16
+  ret i16 %arg1
+}
+
+define i16 @xtn_i64_to_i16(i64 %a) {
+; CHECK-LABEL: xtn_i64_to_i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT:    ret
+entry:
+  %arg1 = trunc i64 %a to i16
+  ret i16 %arg1
+}
+
+define i16 @xtn_i128_to_i16(i128 %a) {
+; CHECK-LABEL: xtn_i128_to_i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT:    ret
+entry:
+  %arg1 = trunc i128 %a to i16
+  ret i16 %arg1
+}
+
+define i32 @xtn_i64_to_i32(i64 %a) {
+; CHECK-LABEL: xtn_i64_to_i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT:    ret
+entry:
+  %arg1 = trunc i64 %a to i32
+  ret i32 %arg1
+}
+
+define i32 @xtn_i128_to_i32(i128 %a) {
+; CHECK-LABEL: xtn_i128_to_i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT:    ret
+entry:
+  %arg1 = trunc i128 %a to i32
+  ret i32 %arg1
+}
+
+define i64 @xtn_i128_to_i64(i128 %a) {
+; CHECK-LABEL: xtn_i128_to_i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ret
+entry:
+  %arg1 = trunc i128 %a to i64
+  ret i64 %arg1
+}
+
+define <2 x i8> @xtn_v2i16_v2i8(<2 x i16> %a) {
+; CHECK-LABEL: xtn_v2i16_v2i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ret
+entry:
+  %arg1 = trunc <2 x i16> %a to <2 x i8>
+  ret <2 x i8> %arg1
+}
+
+define <2 x i8> @xtn_v2i32_v2i8(<2 x i32> %a) {
+; CHECK-LABEL: xtn_v2i32_v2i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ret
+entry:
+  %arg1 = trunc <2 x i32> %a to <2 x i8>
+  ret <2 x i8> %arg1
+}
+
+define <2 x i8> @xtn_v2i64_v2i8(<2 x i64> %a) {
+; CHECK-LABEL: xtn_v2i64_v2i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    xtn v0.2s, v0.2d
+; CHECK-NEXT:    ret
+entry:
+  %arg1 = trunc <2 x i64> %a to <2 x i8>
+  ret <2 x i8> %arg1
+}
+
+define <2 x i8> @xtn_v2i128_v2i8(<2 x i128> %a) {
+; CHECK-SD-LABEL: xtn_v2i128_v2i8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fmov s0, w0
+; CHECK-SD-NEXT:    mov v0.s[1], w2
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: xtn_v2i128_v2i8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fmov d0, x0
+; CHECK-GI-NEXT:    mov v0.d[1], x2
+; CHECK-GI-NEXT:    xtn v0.2s, v0.2d
+; CHECK-GI-NEXT:    ret
+entry:
+  %arg1 = trunc <2 x i128> %a to <2 x i8>
+  ret <2 x i8> %arg1
+}
+
+define <2 x i16> @xtn_v2i32_v2i16(<2 x i32> %a) {
+; CHECK-LABEL: xtn_v2i32_v2i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ret
+entry:
+  %arg1 = trunc <2 x i32> %a to <2 x i16>
+  ret <2 x i16> %arg1
+}
+
+define <2 x i16> @xtn_v2i64_v2i16(<2 x i64> %a) {
+; CHECK-LABEL: xtn_v2i64_v2i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    xtn v0.2s, v0.2d
+; CHECK-NEXT:    ret
+entry:
+  %arg1 = trunc <2 x i64> %a to <2 x i16>
+  ret <2 x i16> %arg1
+}
+
+define <2 x i16> @xtn_v2i128_v2i16(<2 x i128> %a) {
+; CHECK-SD-LABEL: xtn_v2i128_v2i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fmov s0, w0
+; CHECK-SD-NEXT:    mov v0.s[1], w2
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: xtn_v2i128_v2i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fmov d0, x0
+; CHECK-GI-NEXT:    mov v0.d[1], x2
+; CHECK-GI-NEXT:    xtn v0.2s, v0.2d
+; CHECK-GI-NEXT:    ret
+entry:
+  %arg1 = trunc <2 x i128> %a to <2 x i16>
+  ret <2 x i16> %arg1
+}
+
+define <2 x i32> @xtn_v2i64_v2i32(<2 x i64> %a) {
+; CHECK-LABEL: xtn_v2i64_v2i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    xtn v0.2s, v0.2d
+; CHECK-NEXT:    ret
+entry:
+  %arg1 = trunc <2 x i64> %a to <2 x i32>
+  ret <2 x i32> %arg1
+}
+
+define <2 x i32> @xtn_v2i128_v2i32(<2 x i128> %a) {
+; CHECK-SD-LABEL: xtn_v2i128_v2i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fmov s0, w0
+; CHECK-SD-NEXT:    mov v0.s[1], w2
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: xtn_v2i128_v2i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fmov d0, x0
+; CHECK-GI-NEXT:    mov v0.d[1], x2
+; CHECK-GI-NEXT:    xtn v0.2s, v0.2d
+; CHECK-GI-NEXT:    ret
+entry:
+  %arg1 = trunc <2 x i128> %a to <2 x i32>
+  ret <2 x i32> %arg1
+}
+
+define <2 x i64> @xtn_v2i128_v2i64(<2 x i128> %a) {
+; CHECK-LABEL: xtn_v2i128_v2i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    mov v0.d[1], x2
+; CHECK-NEXT:    ret
+entry:
+  %arg1 = trunc <2 x i128> %a to <2 x i64>
+  ret <2 x i64> %arg1
+}
+
+define <3 x i8> @xtn_v3i16_v3i8(<3 x i16> %a) {
+; CHECK-SD-LABEL: xtn_v3i16_v3i8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    umov w0, v0.h[0]
+; CHECK-SD-NEXT:    umov w1, v0.h[1]
+; CHECK-SD-NEXT:    umov w2, v0.h[2]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: xtn_v3i16_v3i8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-NEXT:    fmov w0, s0
+; CHECK-GI-NEXT:    fmov w1, s1
+; CHECK-GI-NEXT:    fmov w2, s2
+; CHECK-GI-NEXT:    ret
+entry:
+  %arg1 = trunc <3 x i16> %a to <3 x i8>
+  ret <3 x i8> %arg1
+}
+
+define <3 x i8> @xtn_v3i32_v3i8(<3 x i32> %a) {
+; CHECK-SD-LABEL: xtn_v3i32_v3i8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    xtn v0.4h, v0.4s
+; CHECK-SD-NEXT:    umov w0, v0.h[0]
+; CHECK-SD-NEXT:    umov w1, v0.h[1]
+; CHECK-SD-NEXT:    umov w2, v0.h[2]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: xtn_v3i32_v3i8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov s1, v0.s[1]
+; CHECK-GI-NEXT:    mov s2, v0.s[2]
+; CHECK-GI-NEXT:    fmov w0, s0
+; CHECK-GI-NEXT:    fmov w1, s1
+; CHECK-GI-NEXT:    fmov w2, s2
+; CHECK-GI-NEXT:    ret
+entry:
+  %arg1 = trunc <3 x i32> %a to <3 x i8>
+  ret <3 x i8> %arg1
+}
+
+define <3 x i8> @xtn_v3i64_v3i8(<3 x i64> %a) {
+; CHECK-SD-LABEL: xtn_v3i64_v3i8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT:    xtn v1.2s, v2.2d
+; CHECK-SD-NEXT:    xtn v0.2s, v0.2d
+; CHECK-SD-NEXT:    fmov w2, s1
+; CHECK-SD-NEXT:    mov w1, v0.s[1]
+; CHECK-SD-NEXT:    fmov w0, s0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: xtn_v3i64_v3i8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fmov x0, d0
+; CHECK-GI-NEXT:    fmov x1, d1
+; CHECK-GI-NEXT:    fmov x2, d2
+; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 killed $x1
+; CHECK-GI-NEXT:    // kill: def $w2 killed $w2 killed $x2
+; CHECK-GI-NEXT:    ret
+entry:
+  %arg1 = trunc <3 x i64> %a to <3 x i8>
+  ret <3 x i8> %arg1
+}
+
+define <3 x i16> @xtn_v3i32_v3i16(<3 x i32> %a) {
+; CHECK-SD-LABEL: xtn_v3i32_v3i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    xtn v0.4h, v0.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: xtn_v3i32_v3i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov s1, v0.s[1]
+; CHECK-GI-NEXT:    mov s2, v0.s[2]
+; CHECK-GI-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT:    mov v0.h[2], v2.h[0]
+; CHECK-GI-NEXT:    mov v0.h[3], v0.h[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    ret
+entry:
+  %arg1 = trunc <3 x i32> %a to <3 x i16>
+  ret <3 x i16> %arg1
+}
+
+define <3 x i16> @xtn_v3i64_v3i16(<3 x i64> %a) {
+; CHECK-SD-LABEL: xtn_v3i64_v3i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT:    uzp1 v0.4s, v0.4s, v2.4s
+; CHECK-SD-NEXT:    xtn v0.4h, v0.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: xtn_v3i64_v3i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fmov x8, d0
+; CHECK-GI-NEXT:    fmov x9, d1
+; CHECK-GI-NEXT:    fmov s0, w8
+; CHECK-GI-NEXT:    fmov s1, w9
+; CHECK-GI-NEXT:    fmov x8, d2
+; CHECK-GI-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT:    fmov s1, w8
+; CHECK-GI-NEXT:    mov v0.h[2], v1.h[0]
+; CHECK-GI-NEXT:    mov v0.h[3], v0.h[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    ret
+entry:
+  %arg1 = trunc <3 x i64> %a to <3 x i16>
+  ret <3 x i16> %arg1
+}
+
+define <3 x i32> @xtn_v3i64_v3i32(<3 x i64> %a) {
+; CHECK-SD-LABEL: xtn_v3i64_v3i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT:    uzp1 v0.4s, v0.4s, v2.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: xtn_v3i64_v3i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fmov x8, d0
+; CHECK-GI-NEXT:    fmov x9, d1
+; CHECK-GI-NEXT:    fmov s0, w8
+; CHECK-GI-NEXT:    fmov x8, d2
+; CHECK-GI-NEXT:    mov v0.s[1], w9
+; CHECK-GI-NEXT:    mov v0.s[2], w8
+; CHECK-GI-NEXT:    mov v0.s[3], w8
+; CHECK-GI-NEXT:    ret
+entry:
+  %arg1 = trunc <3 x i64> %a to <3 x i32>
+  ret <3 x i32> %arg1
+}
+
+define <4 x i8> @xtn_v4i16_v4i8(<4 x i16> %a) {
+; CHECK-LABEL: xtn_v4i16_v4i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ret
+e...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/66927


More information about the llvm-commits mailing list