[llvm] [GlobalIsel] Pust cast through build vector (PR #104634)

via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 16 12:07:20 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-llvm-globalisel

Author: Thorsten Schütt (tschuett)

<details>
<summary>Changes</summary>

Credits: https://github.com/llvm/llvm-project/pull/100563

---

Patch is 116.23 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/104634.diff


16 Files Affected:

- (modified) llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h (+3) 
- (modified) llvm/include/llvm/Target/GlobalISel/Combine.td (+16-1) 
- (modified) llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp (+39) 
- (modified) llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp (+1) 
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir (+92) 
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir (+2-2) 
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/combine-with-flags.mir (+33-12) 
- (modified) llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll (+272-184) 
- (modified) llvm/test/CodeGen/AArch64/arm64-vadd.ll (+31-15) 
- (modified) llvm/test/CodeGen/AArch64/neon-extadd.ll (+226-150) 
- (modified) llvm/test/CodeGen/AArch64/sext.ll (+207-147) 
- (modified) llvm/test/CodeGen/AArch64/vecreduce-add.ll (+396-268) 
- (modified) llvm/test/CodeGen/AArch64/xtn.ll (+17-29) 
- (modified) llvm/test/CodeGen/AArch64/zext.ll (+146-117) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.postlegal.mir (+6-6) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.prelegal.mir (+4-6) 


``````````diff
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 32effc536eb35d..9b62d6067be39c 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -900,6 +900,9 @@ class CombinerHelper {
   bool matchExtOfExt(const MachineInstr &FirstMI, const MachineInstr &SecondMI,
                      BuildFnTy &MatchInfo);
 
+  bool matchCastOfBuildVector(const MachineInstr &CastMI,
+                              const MachineInstr &BVMI, BuildFnTy &MatchInfo);
+
 private:
   /// Checks for legality of an indexed variant of \p LdSt.
   bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 814c5e789cb374..c95f542757c66b 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1857,6 +1857,18 @@ def anyext_of_anyext : ext_of_ext_opcodes<G_ANYEXT, G_ANYEXT>;
 def anyext_of_zext : ext_of_ext_opcodes<G_ANYEXT, G_ZEXT>;
 def anyext_of_sext : ext_of_ext_opcodes<G_ANYEXT, G_SEXT>;
 
+// Push cast through build vector.
+class buildvector_of_opcode<Instruction castOpcode> : GICombineRule <
+  (defs root:$root, build_fn_matchinfo:$matchinfo),
+  (match (G_BUILD_VECTOR $bv, GIVariadic<>:$unused):$Build,
+         (castOpcode $root, $bv):$Cast,
+         [{ return Helper.matchCastOfBuildVector(*${Cast}, *${Build}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFn(*${Cast}, ${matchinfo}); }])>;
+
+def buildvector_of_zext : buildvector_of_opcode<G_ZEXT>;
+def buildvector_of_anyext : buildvector_of_opcode<G_ANYEXT>;
+def buildvector_of_truncate : buildvector_of_opcode<G_TRUNC>;
+
 def cast_combines: GICombineGroup<[
   truncate_of_zext,
   truncate_of_sext,
@@ -1870,7 +1882,10 @@ def cast_combines: GICombineGroup<[
   sext_of_anyext,
   anyext_of_anyext,
   anyext_of_zext,
-  anyext_of_sext
+  anyext_of_sext,
+  buildvector_of_zext,
+  buildvector_of_anyext,
+  buildvector_of_truncate
 ]>;
 
 
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
index 494d8da84445d1..e3208c16cfd586 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
@@ -273,3 +273,42 @@ bool CombinerHelper::matchExtOfExt(const MachineInstr &FirstMI,
 
   return false;
 }
+
+bool CombinerHelper::matchCastOfBuildVector(const MachineInstr &CastMI,
+                                            const MachineInstr &BVMI,
+                                            BuildFnTy &MatchInfo) {
+  const GExtOrTruncOp *Cast = cast<GExtOrTruncOp>(&CastMI);
+  const GBuildVector *BV = cast<GBuildVector>(&BVMI);
+
+  if (!MRI.hasOneNonDBGUse(BV->getReg(0)))
+    return false;
+
+  Register Dst = Cast->getReg(0);
+  // The type of the new build vector.
+  LLT DstTy = MRI.getType(Dst);
+  // The scalar or element type of the new build vector.
+  LLT ElemTy = DstTy.getScalarType();
+  // The scalar or element type of the old build vector.
+  LLT InputElemTy = MRI.getType(BV->getReg(0)).getScalarType();
+
+  // Check legality of new build vector, the scalar casts, and profitability of
+  // the many casts.
+  if (!isLegalOrBeforeLegalizer(
+          {TargetOpcode::G_BUILD_VECTOR, {DstTy, ElemTy}}) ||
+      !isLegalOrBeforeLegalizer({Cast->getOpcode(), {ElemTy, InputElemTy}}) ||
+      !isCastFree(Cast->getOpcode(), ElemTy, InputElemTy))
+    return false;
+
+  MatchInfo = [=](MachineIRBuilder &B) {
+    SmallVector<Register> Casts;
+    unsigned Elements = BV->getNumSources();
+    for (unsigned I = 0; I < Elements; ++I)
+      Casts.push_back(
+          B.buildInstr(Cast->getOpcode(), {ElemTy}, {BV->getSourceReg(I)})
+              .getReg(0));
+
+    B.buildBuildVector(Dst, Casts);
+  };
+
+  return true;
+}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index d3c5742cee3eb4..33a1fa1ad04fdf 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -953,6 +953,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       .clampNumElements(0, v2s64, v2s64)
       .minScalarOrElt(0, s8)
       .widenVectorEltsToVectorMinSize(0, 64)
+      .widenScalarOrEltToNextPow2(0)
       .minScalarSameAs(1, 0);
 
   getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower();
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir
index 0f436127ea2eb6..9eef79a9c4bbee 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir
@@ -129,3 +129,95 @@ body:             |
     %res:_(<2 x s64>) = G_SELECT %cond(<2 x s32>), %bv, %bv2
     %small:_(<2 x s32>) = G_TRUNC %res(<2 x s64>)
     $x0 = COPY %small(<2 x s32>)
+...
+---
+name:            test_combine_trunc_build_vector
+legalized: true
+body:             |
+  bb.1:
+    ; CHECK-PRE-LABEL: name: test_combine_trunc_build_vector
+    ; CHECK-PRE: %arg1:_(s64) = COPY $x0
+    ; CHECK-PRE-NEXT: %arg2:_(s64) = COPY $x0
+    ; CHECK-PRE-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %arg1(s64)
+    ; CHECK-PRE-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC %arg2(s64)
+    ; CHECK-PRE-NEXT: %small:_(<2 x s32>) = G_BUILD_VECTOR [[TRUNC]](s32), [[TRUNC1]](s32)
+    ; CHECK-PRE-NEXT: $x0 = COPY %small(<2 x s32>)
+    ;
+    ; CHECK-POST-LABEL: name: test_combine_trunc_build_vector
+    ; CHECK-POST: %arg1:_(s64) = COPY $x0
+    ; CHECK-POST-NEXT: %arg2:_(s64) = COPY $x0
+    ; CHECK-POST-NEXT: %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
+    ; CHECK-POST-NEXT: %small:_(<2 x s32>) = G_TRUNC %bv(<2 x s64>)
+    ; CHECK-POST-NEXT: $x0 = COPY %small(<2 x s32>)
+    %arg1:_(s64) = COPY $x0
+    %arg2:_(s64) = COPY $x0
+    %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
+    %small:_(<2 x s32>) = G_TRUNC %bv(<2 x s64>)
+    $x0 = COPY %small(<2 x s32>)
+...
+---
+name:            test_combine_zext_build_vector
+legalized: true
+body:             |
+  bb.1:
+    ; CHECK-PRE-LABEL: name: test_combine_zext_build_vector
+    ; CHECK-PRE: %arg1:_(s32) = COPY $w0
+    ; CHECK-PRE-NEXT: %arg2:_(s32) = COPY $w0
+    ; CHECK-PRE-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT %arg1(s32)
+    ; CHECK-PRE-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT %arg2(s32)
+    ; CHECK-PRE-NEXT: %large:_(<2 x s64>) = G_BUILD_VECTOR [[ZEXT]](s64), [[ZEXT1]](s64)
+    ; CHECK-PRE-NEXT: $q0 = COPY %large(<2 x s64>)
+    ;
+    ; CHECK-POST-LABEL: name: test_combine_zext_build_vector
+    ; CHECK-POST: %arg1:_(s32) = COPY $w0
+    ; CHECK-POST-NEXT: %arg2:_(s32) = COPY $w0
+    ; CHECK-POST-NEXT: %bv:_(<2 x s32>) = G_BUILD_VECTOR %arg1(s32), %arg2(s32)
+    ; CHECK-POST-NEXT: %large:_(<2 x s64>) = G_ZEXT %bv(<2 x s32>)
+    ; CHECK-POST-NEXT: $q0 = COPY %large(<2 x s64>)
+    %arg1:_(s32) = COPY $w0
+    %arg2:_(s32) = COPY $w0
+    %bv:_(<2 x s32>) = G_BUILD_VECTOR %arg1(s32), %arg2(s32)
+    %large:_(<2 x s64>) = G_ZEXT %bv(<2 x s32>)
+    $q0 = COPY %large(<2 x s64>)
+...
+---
+name:            test_combine_anyext_build_vector
+legalized: true
+body:             |
+  bb.1:
+    ; CHECK-PRE-LABEL: name: test_combine_anyext_build_vector
+    ; CHECK-PRE: %arg1:_(s32) = COPY $w0
+    ; CHECK-PRE-NEXT: %arg2:_(s32) = COPY $w0
+    ; CHECK-PRE-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %arg1(s32)
+    ; CHECK-PRE-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT %arg2(s32)
+    ; CHECK-PRE-NEXT: %large:_(<2 x s64>) = G_BUILD_VECTOR [[ANYEXT]](s64), [[ANYEXT1]](s64)
+    ; CHECK-PRE-NEXT: $q0 = COPY %large(<2 x s64>)
+    ;
+    ; CHECK-POST-LABEL: name: test_combine_anyext_build_vector
+    ; CHECK-POST: %arg1:_(s32) = COPY $w0
+    ; CHECK-POST-NEXT: %arg2:_(s32) = COPY $w0
+    ; CHECK-POST-NEXT: %bv:_(<2 x s32>) = G_BUILD_VECTOR %arg1(s32), %arg2(s32)
+    ; CHECK-POST-NEXT: %large:_(<2 x s64>) = G_ANYEXT %bv(<2 x s32>)
+    ; CHECK-POST-NEXT: $q0 = COPY %large(<2 x s64>)
+    %arg1:_(s32) = COPY $w0
+    %arg2:_(s32) = COPY $w0
+    %bv:_(<2 x s32>) = G_BUILD_VECTOR %arg1(s32), %arg2(s32)
+    %large:_(<2 x s64>) = G_ANYEXT %bv(<2 x s32>)
+    $q0 = COPY %large(<2 x s64>)
+...
+---
+name:            test_combine_sext_build_vector
+legalized: true
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: test_combine_sext_build_vector
+    ; CHECK: %arg1:_(s32) = COPY $w0
+    ; CHECK-NEXT: %arg2:_(s32) = COPY $w0
+    ; CHECK-NEXT: %bv:_(<2 x s32>) = G_BUILD_VECTOR %arg1(s32), %arg2(s32)
+    ; CHECK-NEXT: %large:_(<2 x s64>) = G_SEXT %bv(<2 x s32>)
+    ; CHECK-NEXT: $q0 = COPY %large(<2 x s64>)
+    %arg1:_(s32) = COPY $w0
+    %arg2:_(s32) = COPY $w0
+    %bv:_(<2 x s32>) = G_BUILD_VECTOR %arg1(s32), %arg2(s32)
+    %large:_(<2 x s64>) = G_SEXT %bv(<2 x s32>)
+    $q0 = COPY %large(<2 x s64>)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
index 70241e71aa593f..c98dcf6ccb7966 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
@@ -49,8 +49,8 @@ body:             |
     ; CHECK: liveins: $x0, $x1
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %arg1:_(s64) = COPY $x0
-    ; CHECK-NEXT: %extract:_(s32) = G_TRUNC %arg1(s64)
-    ; CHECK-NEXT: %zext:_(s64) = G_ZEXT %extract(s32)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %arg1(s64)
+    ; CHECK-NEXT: %zext:_(s64) = G_ZEXT [[TRUNC]](s32)
     ; CHECK-NEXT: $x0 = COPY %zext(s64)
     ; CHECK-NEXT: RET_ReallyLR implicit $x0
     %arg1:_(s64) = COPY $x0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-with-flags.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-with-flags.mir
index 6eece5c56258dc..8cb44605246ffa 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-with-flags.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-with-flags.mir
@@ -60,8 +60,11 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
-    ; CHECK-NEXT: %bv0:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY]](s32), [[COPY1]](s32)
-    ; CHECK-NEXT: $q0 = COPY %bv0(<4 x s32>)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; CHECK-NEXT: %trunc:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC]](s16), [[TRUNC1]](s16)
+    ; CHECK-NEXT: %zext:_(<4 x s32>) = G_ZEXT %trunc(<4 x s16>)
+    ; CHECK-NEXT: $q0 = COPY %zext(<4 x s32>)
     ; CHECK-NEXT: RET_ReallyLR implicit $w0
     %0:_(s32) = COPY $w0
     %1:_(s32) = COPY $w1
@@ -165,8 +168,13 @@ body:             |
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
     ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $w3
-    ; CHECK-NEXT: %bv0:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
-    ; CHECK-NEXT: $q0 = COPY %bv0(<4 x s32>)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+    ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
+    ; CHECK-NEXT: %t:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16)
+    ; CHECK-NEXT: %s:_(<4 x s32>) = G_SEXT %t(<4 x s16>)
+    ; CHECK-NEXT: $q0 = COPY %s(<4 x s32>)
     %0:_(s32) = COPY $w0
     %1:_(s32) = COPY $w1
     %2:_(s32) = COPY $w2
@@ -188,8 +196,11 @@ body:             |
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
     ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $w3
-    ; CHECK-NEXT: %bv0:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
-    ; CHECK-NEXT: %t:_(<4 x s16>) = G_TRUNC %bv0(<4 x s32>)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+    ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
+    ; CHECK-NEXT: %t:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16)
     ; CHECK-NEXT: %z:_(<4 x s32>) = G_ZEXT %t(<4 x s16>)
     ; CHECK-NEXT: $q0 = COPY %z(<4 x s32>)
     %0:_(s32) = COPY $w0
@@ -213,8 +224,11 @@ body:             |
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
     ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $w3
-    ; CHECK-NEXT: %bv0:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
-    ; CHECK-NEXT: %t:_(<4 x s16>) = nsw G_TRUNC %bv0(<4 x s32>)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+    ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
+    ; CHECK-NEXT: %t:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16)
     ; CHECK-NEXT: %z:_(<4 x s32>) = G_ZEXT %t(<4 x s16>)
     ; CHECK-NEXT: $q0 = COPY %z(<4 x s32>)
     %0:_(s32) = COPY $w0
@@ -238,8 +252,13 @@ body:             |
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
     ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $w3
-    ; CHECK-NEXT: %bv0:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
-    ; CHECK-NEXT: $q0 = COPY %bv0(<4 x s32>)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+    ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
+    ; CHECK-NEXT: %t:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16)
+    ; CHECK-NEXT: %z:_(<4 x s32>) = G_ZEXT %t(<4 x s16>)
+    ; CHECK-NEXT: $q0 = COPY %z(<4 x s32>)
     %0:_(s32) = COPY $w0
     %1:_(s32) = COPY $w1
     %2:_(s32) = COPY $w2
@@ -259,8 +278,10 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
-    ; CHECK-NEXT: %bv0:_(<2 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64)
-    ; CHECK-NEXT: %z:_(<2 x s32>) = nuw G_TRUNC %bv0(<2 x s64>)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s64)
+    ; CHECK-NEXT: %t:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
+    ; CHECK-NEXT: %z:_(<2 x s32>) = G_ZEXT %t(<2 x s16>)
     ; CHECK-NEXT: $d0 = COPY %z(<2 x s32>)
     %0:_(s64) = COPY $x0
     %1:_(s64) = COPY $x1
diff --git a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
index abf2e1272d6450..1f5654d59926dc 100644
--- a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
@@ -466,62 +466,92 @@ define <32 x i8> @sext_v32i1(<32 x i1> %arg) {
 ;
 ; CHECK-GI-LABEL: sext_v32i1:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    fmov s17, w0
-; CHECK-GI-NEXT:    fmov s19, w4
-; CHECK-GI-NEXT:    ldr s0, [sp]
-; CHECK-GI-NEXT:    ldr s21, [sp, #8]
-; CHECK-GI-NEXT:    ldr s1, [sp, #32]
-; CHECK-GI-NEXT:    ldr s22, [sp, #40]
-; CHECK-GI-NEXT:    ldr s2, [sp, #64]
-; CHECK-GI-NEXT:    ldr s23, [sp, #72]
-; CHECK-GI-NEXT:    ldr s3, [sp, #96]
-; CHECK-GI-NEXT:    ldr s24, [sp, #104]
-; CHECK-GI-NEXT:    mov.s v17[1], w1
-; CHECK-GI-NEXT:    mov.s v19[1], w5
-; CHECK-GI-NEXT:    ldr s5, [sp, #128]
-; CHECK-GI-NEXT:    ldr s20, [sp, #136]
-; CHECK-GI-NEXT:    mov.s v0[1], v21[0]
-; CHECK-GI-NEXT:    ldr s7, [sp, #160]
-; CHECK-GI-NEXT:    ldr s25, [sp, #168]
-; CHECK-GI-NEXT:    mov.s v1[1], v22[0]
-; CHECK-GI-NEXT:    mov.s v2[1], v23[0]
-; CHECK-GI-NEXT:    mov.s v3[1], v24[0]
-; CHECK-GI-NEXT:    mov.s v5[1], v20[0]
-; CHECK-GI-NEXT:    mov.s v7[1], v25[0]
-; CHECK-GI-NEXT:    ldr s16, [sp, #16]
-; CHECK-GI-NEXT:    ldr s18, [sp, #48]
-; CHECK-GI-NEXT:    ldr s20, [sp, #80]
-; CHECK-GI-NEXT:    ldr s21, [sp, #112]
-; CHECK-GI-NEXT:    ldr s22, [sp, #144]
-; CHECK-GI-NEXT:    ldr s23, [sp, #176]
-; CHECK-GI-NEXT:    mov.s v17[2], w2
-; CHECK-GI-NEXT:    mov.s v19[2], w6
-; CHECK-GI-NEXT:    mov.s v0[2], v16[0]
-; CHECK-GI-NEXT:    mov.s v1[2], v18[0]
-; CHECK-GI-NEXT:    mov.s v2[2], v20[0]
-; CHECK-GI-NEXT:    mov.s v3[2], v21[0]
-; CHECK-GI-NEXT:    mov.s v5[2], v22[0]
-; CHECK-GI-NEXT:    mov.s v7[2], v23[0]
-; CHECK-GI-NEXT:    ldr s4, [sp, #24]
-; CHECK-GI-NEXT:    ldr s6, [sp, #56]
-; CHECK-GI-NEXT:    ldr s16, [sp, #88]
-; CHECK-GI-NEXT:    ldr s18, [sp, #120]
-; CHECK-GI-NEXT:    ldr s20, [sp, #152]
-; CHECK-GI-NEXT:    ldr s21, [sp, #184]
-; CHECK-GI-NEXT:    mov.s v17[3], w3
-; CHECK-GI-NEXT:    mov.s v19[3], w7
-; CHECK-GI-NEXT:    mov.s v0[3], v4[0]
-; CHECK-GI-NEXT:    mov.s v1[3], v6[0]
-; CHECK-GI-NEXT:    mov.s v2[3], v16[0]
-; CHECK-GI-NEXT:    mov.s v3[3], v18[0]
-; CHECK-GI-NEXT:    mov.s v5[3], v20[0]
-; CHECK-GI-NEXT:    mov.s v7[3], v21[0]
-; CHECK-GI-NEXT:    uzp1.8h v4, v17, v19
-; CHECK-GI-NEXT:    uzp1.8h v0, v0, v1
-; CHECK-GI-NEXT:    uzp1.8h v1, v2, v3
-; CHECK-GI-NEXT:    uzp1.8h v2, v5, v7
-; CHECK-GI-NEXT:    uzp1.16b v0, v4, v0
-; CHECK-GI-NEXT:    uzp1.16b v1, v1, v2
+; CHECK-GI-NEXT:    ldr w9, [sp, #64]
+; CHECK-GI-NEXT:    ldr w8, [sp, #72]
+; CHECK-GI-NEXT:    fmov s0, w0
+; CHECK-GI-NEXT:    fmov s2, w1
+; CHECK-GI-NEXT:    fmov s1, w9
+; CHECK-GI-NEXT:    fmov s3, w8
+; CHECK-GI-NEXT:    ldr w8, [sp, #80]
+; CHECK-GI-NEXT:    ldr w9, [sp, #128]
+; CHECK-GI-NEXT:    mov.b v0[1], v2[0]
+; CHECK-GI-NEXT:    fmov s2, w2
+; CHECK-GI-NEXT:    mov.b v1[1], v3[0]
+; CHECK-GI-NEXT:    fmov s3, w8
+; CHECK-GI-NEXT:    ldr w8, [sp, #88]
+; CHECK-GI-NEXT:    mov.b v0[2], v2[0]
+; CHECK-GI-NEXT:    fmov s2, w3
+; CHECK-GI-NEXT:    mov.b v1[2], v3[0]
+; CHECK-GI-NEXT:    fmov s3, w8
+; CHECK-GI-NEXT:    ldr w8, [sp, #96]
+; CHECK-GI-NEXT:    mov.b v0[3], v2[0]
+; CHECK-GI-NEXT:    fmov s2, w4
+; CHECK-GI-NEXT:    mov.b v1[3], v3[0]
+; CHECK-GI-NEXT:    fmov s3, w8
+; CHECK-GI-NEXT:    ldr w8, [sp, #104]
+; CHECK-GI-NEXT:    mov.b v0[4], v2[0]
+; CHECK-GI-NEXT:    fmov s2, w5
+; CHECK-GI-NEXT:    mov.b v1[4], v3[0]
+; CHECK-GI-NEXT:    fmov s3, w8
+; CHECK-GI-NEXT:    ldr w8, [sp, #112]
+; CHECK-GI-NEXT:    mov.b v0[5], v2[0]
+; CHECK-GI-NEXT:    fmov s2, w6
+; CHECK-GI-NEXT:    mov.b v1[5], v3[0]
+; CHECK-GI-NEXT:    fmov s3, w8
+; CHECK-GI-NEXT:    ldr w8, [sp, #120]
+; CHECK-GI-NEXT:    mov.b v0[6], v2[0]
+; CHECK-GI-NEXT:    fmov s2, w7
+; CHECK-GI-NEXT:    mov.b v1[6], v3[0]
+; CHECK-GI-NEXT:    fmov s3, w8
+; CHECK-GI-NEXT:    ldr w8, [sp]
+; CHECK-GI-NEXT:    mov.b v0[7], v2[0]
+; CHECK-GI-NEXT:    fmov s2, w8
+; CHECK-GI-NEXT:    ldr w8, [sp, #8]
+; CHECK-GI-NEXT:    mov.b v1[7], v3[0]
+; CHECK-GI-NEXT:    fmov s3, w9
+; CHECK-GI-NEXT:    ldr w9, [sp, #136]
+; CHECK-GI-NEXT:    mov.b v0[8], v2[0]
+; CHECK-GI-NEXT:    fmov s2, w8
+; CHECK-GI-NEXT:    ldr w8, [sp, #16]
+; CHECK-GI-NEXT:    mov.b v1[8], v3[0]
+; CHECK-GI-NEXT:    fmov s3, w9
+; CHECK-GI-NEXT:    ldr w9, [sp, #144]
+; CHECK-GI-NEXT:    mov.b v0[9], v2[0]
+; CHECK-GI-NEXT:    fmov s2, w8
+; CHECK-GI-NEXT:    ldr w8, [sp, #24]
+; CHECK-GI-NEXT:    mov.b v1[9], v3[0]
+; CHECK-GI-NEXT:    fmov s3, w9
+; CHECK-GI-NEXT:    ldr w9, [sp, #152]
+; CHECK-GI-NEXT:    mov.b v0[10], v2[0]
+; CHECK-GI-NEXT:    fmov s2, w8
+; CHECK-GI-NEXT:    ldr w8, [sp, #32]
+; CHECK-GI-NEXT:    mov.b v1[10], v3[0]
+; CHECK-GI-NEXT:    fmov s3, w9
+; CHECK-GI-NEXT:    ldr w9, [sp, #160]
+; CHECK-GI-NEXT:    mov.b v0[11], v2[0]
+; CHECK-GI-NEXT:    fmov s2, w8
+; CHECK-GI-NEXT:    ldr w8, [sp, #40]
+; CHECK-GI-NEXT:    mov.b v1[11], v3[0]
+; CHECK-GI-NEXT:   ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/104634


More information about the llvm-commits mailing list