[llvm] [GlobalIsel] Pust cast through build vector (PR #104634)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 16 12:07:20 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
@llvm/pr-subscribers-llvm-globalisel
Author: Thorsten Schütt (tschuett)
<details>
<summary>Changes</summary>
Credits: https://github.com/llvm/llvm-project/pull/100563
---
Patch is 116.23 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/104634.diff
16 Files Affected:
- (modified) llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h (+3)
- (modified) llvm/include/llvm/Target/GlobalISel/Combine.td (+16-1)
- (modified) llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp (+39)
- (modified) llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp (+1)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir (+92)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir (+2-2)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/combine-with-flags.mir (+33-12)
- (modified) llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll (+272-184)
- (modified) llvm/test/CodeGen/AArch64/arm64-vadd.ll (+31-15)
- (modified) llvm/test/CodeGen/AArch64/neon-extadd.ll (+226-150)
- (modified) llvm/test/CodeGen/AArch64/sext.ll (+207-147)
- (modified) llvm/test/CodeGen/AArch64/vecreduce-add.ll (+396-268)
- (modified) llvm/test/CodeGen/AArch64/xtn.ll (+17-29)
- (modified) llvm/test/CodeGen/AArch64/zext.ll (+146-117)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.postlegal.mir (+6-6)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.prelegal.mir (+4-6)
``````````diff
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 32effc536eb35d..9b62d6067be39c 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -900,6 +900,9 @@ class CombinerHelper {
bool matchExtOfExt(const MachineInstr &FirstMI, const MachineInstr &SecondMI,
BuildFnTy &MatchInfo);
+ bool matchCastOfBuildVector(const MachineInstr &CastMI,
+ const MachineInstr &BVMI, BuildFnTy &MatchInfo);
+
private:
/// Checks for legality of an indexed variant of \p LdSt.
bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 814c5e789cb374..c95f542757c66b 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1857,6 +1857,18 @@ def anyext_of_anyext : ext_of_ext_opcodes<G_ANYEXT, G_ANYEXT>;
def anyext_of_zext : ext_of_ext_opcodes<G_ANYEXT, G_ZEXT>;
def anyext_of_sext : ext_of_ext_opcodes<G_ANYEXT, G_SEXT>;
+// Push cast through build vector.
+class buildvector_of_opcode<Instruction castOpcode> : GICombineRule <
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (G_BUILD_VECTOR $bv, GIVariadic<>:$unused):$Build,
+ (castOpcode $root, $bv):$Cast,
+ [{ return Helper.matchCastOfBuildVector(*${Cast}, *${Build}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${Cast}, ${matchinfo}); }])>;
+
+def buildvector_of_zext : buildvector_of_opcode<G_ZEXT>;
+def buildvector_of_anyext : buildvector_of_opcode<G_ANYEXT>;
+def buildvector_of_truncate : buildvector_of_opcode<G_TRUNC>;
+
def cast_combines: GICombineGroup<[
truncate_of_zext,
truncate_of_sext,
@@ -1870,7 +1882,10 @@ def cast_combines: GICombineGroup<[
sext_of_anyext,
anyext_of_anyext,
anyext_of_zext,
- anyext_of_sext
+ anyext_of_sext,
+ buildvector_of_zext,
+ buildvector_of_anyext,
+ buildvector_of_truncate
]>;
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
index 494d8da84445d1..e3208c16cfd586 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
@@ -273,3 +273,42 @@ bool CombinerHelper::matchExtOfExt(const MachineInstr &FirstMI,
return false;
}
+
+bool CombinerHelper::matchCastOfBuildVector(const MachineInstr &CastMI,
+ const MachineInstr &BVMI,
+ BuildFnTy &MatchInfo) {
+ const GExtOrTruncOp *Cast = cast<GExtOrTruncOp>(&CastMI);
+ const GBuildVector *BV = cast<GBuildVector>(&BVMI);
+
+ if (!MRI.hasOneNonDBGUse(BV->getReg(0)))
+ return false;
+
+ Register Dst = Cast->getReg(0);
+ // The type of the new build vector.
+ LLT DstTy = MRI.getType(Dst);
+ // The scalar or element type of the new build vector.
+ LLT ElemTy = DstTy.getScalarType();
+ // The scalar or element type of the old build vector.
+ LLT InputElemTy = MRI.getType(BV->getReg(0)).getScalarType();
+
+ // Check legality of new build vector, the scalar casts, and profitability of
+ // the many casts.
+ if (!isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_BUILD_VECTOR, {DstTy, ElemTy}}) ||
+ !isLegalOrBeforeLegalizer({Cast->getOpcode(), {ElemTy, InputElemTy}}) ||
+ !isCastFree(Cast->getOpcode(), ElemTy, InputElemTy))
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ SmallVector<Register> Casts;
+ unsigned Elements = BV->getNumSources();
+ for (unsigned I = 0; I < Elements; ++I)
+ Casts.push_back(
+ B.buildInstr(Cast->getOpcode(), {ElemTy}, {BV->getSourceReg(I)})
+ .getReg(0));
+
+ B.buildBuildVector(Dst, Casts);
+ };
+
+ return true;
+}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index d3c5742cee3eb4..33a1fa1ad04fdf 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -953,6 +953,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.clampNumElements(0, v2s64, v2s64)
.minScalarOrElt(0, s8)
.widenVectorEltsToVectorMinSize(0, 64)
+ .widenScalarOrEltToNextPow2(0)
.minScalarSameAs(1, 0);
getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower();
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir
index 0f436127ea2eb6..9eef79a9c4bbee 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir
@@ -129,3 +129,95 @@ body: |
%res:_(<2 x s64>) = G_SELECT %cond(<2 x s32>), %bv, %bv2
%small:_(<2 x s32>) = G_TRUNC %res(<2 x s64>)
$x0 = COPY %small(<2 x s32>)
+...
+---
+name: test_combine_trunc_build_vector
+legalized: true
+body: |
+ bb.1:
+ ; CHECK-PRE-LABEL: name: test_combine_trunc_build_vector
+ ; CHECK-PRE: %arg1:_(s64) = COPY $x0
+ ; CHECK-PRE-NEXT: %arg2:_(s64) = COPY $x0
+ ; CHECK-PRE-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %arg1(s64)
+ ; CHECK-PRE-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC %arg2(s64)
+ ; CHECK-PRE-NEXT: %small:_(<2 x s32>) = G_BUILD_VECTOR [[TRUNC]](s32), [[TRUNC1]](s32)
+ ; CHECK-PRE-NEXT: $x0 = COPY %small(<2 x s32>)
+ ;
+ ; CHECK-POST-LABEL: name: test_combine_trunc_build_vector
+ ; CHECK-POST: %arg1:_(s64) = COPY $x0
+ ; CHECK-POST-NEXT: %arg2:_(s64) = COPY $x0
+ ; CHECK-POST-NEXT: %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
+ ; CHECK-POST-NEXT: %small:_(<2 x s32>) = G_TRUNC %bv(<2 x s64>)
+ ; CHECK-POST-NEXT: $x0 = COPY %small(<2 x s32>)
+ %arg1:_(s64) = COPY $x0
+ %arg2:_(s64) = COPY $x0
+ %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
+ %small:_(<2 x s32>) = G_TRUNC %bv(<2 x s64>)
+ $x0 = COPY %small(<2 x s32>)
+...
+---
+name: test_combine_zext_build_vector
+legalized: true
+body: |
+ bb.1:
+ ; CHECK-PRE-LABEL: name: test_combine_zext_build_vector
+ ; CHECK-PRE: %arg1:_(s32) = COPY $w0
+ ; CHECK-PRE-NEXT: %arg2:_(s32) = COPY $w0
+ ; CHECK-PRE-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT %arg1(s32)
+ ; CHECK-PRE-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT %arg2(s32)
+ ; CHECK-PRE-NEXT: %large:_(<2 x s64>) = G_BUILD_VECTOR [[ZEXT]](s64), [[ZEXT1]](s64)
+ ; CHECK-PRE-NEXT: $q0 = COPY %large(<2 x s64>)
+ ;
+ ; CHECK-POST-LABEL: name: test_combine_zext_build_vector
+ ; CHECK-POST: %arg1:_(s32) = COPY $w0
+ ; CHECK-POST-NEXT: %arg2:_(s32) = COPY $w0
+ ; CHECK-POST-NEXT: %bv:_(<2 x s32>) = G_BUILD_VECTOR %arg1(s32), %arg2(s32)
+ ; CHECK-POST-NEXT: %large:_(<2 x s64>) = G_ZEXT %bv(<2 x s32>)
+ ; CHECK-POST-NEXT: $q0 = COPY %large(<2 x s64>)
+ %arg1:_(s32) = COPY $w0
+ %arg2:_(s32) = COPY $w0
+ %bv:_(<2 x s32>) = G_BUILD_VECTOR %arg1(s32), %arg2(s32)
+ %large:_(<2 x s64>) = G_ZEXT %bv(<2 x s32>)
+ $q0 = COPY %large(<2 x s64>)
+...
+---
+name: test_combine_anyext_build_vector
+legalized: true
+body: |
+ bb.1:
+ ; CHECK-PRE-LABEL: name: test_combine_anyext_build_vector
+ ; CHECK-PRE: %arg1:_(s32) = COPY $w0
+ ; CHECK-PRE-NEXT: %arg2:_(s32) = COPY $w0
+ ; CHECK-PRE-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %arg1(s32)
+ ; CHECK-PRE-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT %arg2(s32)
+ ; CHECK-PRE-NEXT: %large:_(<2 x s64>) = G_BUILD_VECTOR [[ANYEXT]](s64), [[ANYEXT1]](s64)
+ ; CHECK-PRE-NEXT: $q0 = COPY %large(<2 x s64>)
+ ;
+ ; CHECK-POST-LABEL: name: test_combine_anyext_build_vector
+ ; CHECK-POST: %arg1:_(s32) = COPY $w0
+ ; CHECK-POST-NEXT: %arg2:_(s32) = COPY $w0
+ ; CHECK-POST-NEXT: %bv:_(<2 x s32>) = G_BUILD_VECTOR %arg1(s32), %arg2(s32)
+ ; CHECK-POST-NEXT: %large:_(<2 x s64>) = G_ANYEXT %bv(<2 x s32>)
+ ; CHECK-POST-NEXT: $q0 = COPY %large(<2 x s64>)
+ %arg1:_(s32) = COPY $w0
+ %arg2:_(s32) = COPY $w0
+ %bv:_(<2 x s32>) = G_BUILD_VECTOR %arg1(s32), %arg2(s32)
+ %large:_(<2 x s64>) = G_ANYEXT %bv(<2 x s32>)
+ $q0 = COPY %large(<2 x s64>)
+...
+---
+name: test_combine_sext_build_vector
+legalized: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_combine_sext_build_vector
+ ; CHECK: %arg1:_(s32) = COPY $w0
+ ; CHECK-NEXT: %arg2:_(s32) = COPY $w0
+ ; CHECK-NEXT: %bv:_(<2 x s32>) = G_BUILD_VECTOR %arg1(s32), %arg2(s32)
+ ; CHECK-NEXT: %large:_(<2 x s64>) = G_SEXT %bv(<2 x s32>)
+ ; CHECK-NEXT: $q0 = COPY %large(<2 x s64>)
+ %arg1:_(s32) = COPY $w0
+ %arg2:_(s32) = COPY $w0
+ %bv:_(<2 x s32>) = G_BUILD_VECTOR %arg1(s32), %arg2(s32)
+ %large:_(<2 x s64>) = G_SEXT %bv(<2 x s32>)
+ $q0 = COPY %large(<2 x s64>)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
index 70241e71aa593f..c98dcf6ccb7966 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
@@ -49,8 +49,8 @@ body: |
; CHECK: liveins: $x0, $x1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %arg1:_(s64) = COPY $x0
- ; CHECK-NEXT: %extract:_(s32) = G_TRUNC %arg1(s64)
- ; CHECK-NEXT: %zext:_(s64) = G_ZEXT %extract(s32)
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %arg1(s64)
+ ; CHECK-NEXT: %zext:_(s64) = G_ZEXT [[TRUNC]](s32)
; CHECK-NEXT: $x0 = COPY %zext(s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
%arg1:_(s64) = COPY $x0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-with-flags.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-with-flags.mir
index 6eece5c56258dc..8cb44605246ffa 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-with-flags.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-with-flags.mir
@@ -60,8 +60,11 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
- ; CHECK-NEXT: %bv0:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY]](s32), [[COPY1]](s32)
- ; CHECK-NEXT: $q0 = COPY %bv0(<4 x s32>)
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: %trunc:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC]](s16), [[TRUNC1]](s16)
+ ; CHECK-NEXT: %zext:_(<4 x s32>) = G_ZEXT %trunc(<4 x s16>)
+ ; CHECK-NEXT: $q0 = COPY %zext(<4 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%0:_(s32) = COPY $w0
%1:_(s32) = COPY $w1
@@ -165,8 +168,13 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $w3
- ; CHECK-NEXT: %bv0:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
- ; CHECK-NEXT: $q0 = COPY %bv0(<4 x s32>)
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+ ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
+ ; CHECK-NEXT: %t:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16)
+ ; CHECK-NEXT: %s:_(<4 x s32>) = G_SEXT %t(<4 x s16>)
+ ; CHECK-NEXT: $q0 = COPY %s(<4 x s32>)
%0:_(s32) = COPY $w0
%1:_(s32) = COPY $w1
%2:_(s32) = COPY $w2
@@ -188,8 +196,11 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $w3
- ; CHECK-NEXT: %bv0:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
- ; CHECK-NEXT: %t:_(<4 x s16>) = G_TRUNC %bv0(<4 x s32>)
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+ ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
+ ; CHECK-NEXT: %t:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16)
; CHECK-NEXT: %z:_(<4 x s32>) = G_ZEXT %t(<4 x s16>)
; CHECK-NEXT: $q0 = COPY %z(<4 x s32>)
%0:_(s32) = COPY $w0
@@ -213,8 +224,11 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $w3
- ; CHECK-NEXT: %bv0:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
- ; CHECK-NEXT: %t:_(<4 x s16>) = nsw G_TRUNC %bv0(<4 x s32>)
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+ ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
+ ; CHECK-NEXT: %t:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16)
; CHECK-NEXT: %z:_(<4 x s32>) = G_ZEXT %t(<4 x s16>)
; CHECK-NEXT: $q0 = COPY %z(<4 x s32>)
%0:_(s32) = COPY $w0
@@ -238,8 +252,13 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $w3
- ; CHECK-NEXT: %bv0:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
- ; CHECK-NEXT: $q0 = COPY %bv0(<4 x s32>)
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+ ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
+ ; CHECK-NEXT: %t:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16)
+ ; CHECK-NEXT: %z:_(<4 x s32>) = G_ZEXT %t(<4 x s16>)
+ ; CHECK-NEXT: $q0 = COPY %z(<4 x s32>)
%0:_(s32) = COPY $w0
%1:_(s32) = COPY $w1
%2:_(s32) = COPY $w2
@@ -259,8 +278,10 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
- ; CHECK-NEXT: %bv0:_(<2 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64)
- ; CHECK-NEXT: %z:_(<2 x s32>) = nuw G_TRUNC %bv0(<2 x s64>)
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s64)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s64)
+ ; CHECK-NEXT: %t:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
+ ; CHECK-NEXT: %z:_(<2 x s32>) = G_ZEXT %t(<2 x s16>)
; CHECK-NEXT: $d0 = COPY %z(<2 x s32>)
%0:_(s64) = COPY $x0
%1:_(s64) = COPY $x1
diff --git a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
index abf2e1272d6450..1f5654d59926dc 100644
--- a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
@@ -466,62 +466,92 @@ define <32 x i8> @sext_v32i1(<32 x i1> %arg) {
;
; CHECK-GI-LABEL: sext_v32i1:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: fmov s17, w0
-; CHECK-GI-NEXT: fmov s19, w4
-; CHECK-GI-NEXT: ldr s0, [sp]
-; CHECK-GI-NEXT: ldr s21, [sp, #8]
-; CHECK-GI-NEXT: ldr s1, [sp, #32]
-; CHECK-GI-NEXT: ldr s22, [sp, #40]
-; CHECK-GI-NEXT: ldr s2, [sp, #64]
-; CHECK-GI-NEXT: ldr s23, [sp, #72]
-; CHECK-GI-NEXT: ldr s3, [sp, #96]
-; CHECK-GI-NEXT: ldr s24, [sp, #104]
-; CHECK-GI-NEXT: mov.s v17[1], w1
-; CHECK-GI-NEXT: mov.s v19[1], w5
-; CHECK-GI-NEXT: ldr s5, [sp, #128]
-; CHECK-GI-NEXT: ldr s20, [sp, #136]
-; CHECK-GI-NEXT: mov.s v0[1], v21[0]
-; CHECK-GI-NEXT: ldr s7, [sp, #160]
-; CHECK-GI-NEXT: ldr s25, [sp, #168]
-; CHECK-GI-NEXT: mov.s v1[1], v22[0]
-; CHECK-GI-NEXT: mov.s v2[1], v23[0]
-; CHECK-GI-NEXT: mov.s v3[1], v24[0]
-; CHECK-GI-NEXT: mov.s v5[1], v20[0]
-; CHECK-GI-NEXT: mov.s v7[1], v25[0]
-; CHECK-GI-NEXT: ldr s16, [sp, #16]
-; CHECK-GI-NEXT: ldr s18, [sp, #48]
-; CHECK-GI-NEXT: ldr s20, [sp, #80]
-; CHECK-GI-NEXT: ldr s21, [sp, #112]
-; CHECK-GI-NEXT: ldr s22, [sp, #144]
-; CHECK-GI-NEXT: ldr s23, [sp, #176]
-; CHECK-GI-NEXT: mov.s v17[2], w2
-; CHECK-GI-NEXT: mov.s v19[2], w6
-; CHECK-GI-NEXT: mov.s v0[2], v16[0]
-; CHECK-GI-NEXT: mov.s v1[2], v18[0]
-; CHECK-GI-NEXT: mov.s v2[2], v20[0]
-; CHECK-GI-NEXT: mov.s v3[2], v21[0]
-; CHECK-GI-NEXT: mov.s v5[2], v22[0]
-; CHECK-GI-NEXT: mov.s v7[2], v23[0]
-; CHECK-GI-NEXT: ldr s4, [sp, #24]
-; CHECK-GI-NEXT: ldr s6, [sp, #56]
-; CHECK-GI-NEXT: ldr s16, [sp, #88]
-; CHECK-GI-NEXT: ldr s18, [sp, #120]
-; CHECK-GI-NEXT: ldr s20, [sp, #152]
-; CHECK-GI-NEXT: ldr s21, [sp, #184]
-; CHECK-GI-NEXT: mov.s v17[3], w3
-; CHECK-GI-NEXT: mov.s v19[3], w7
-; CHECK-GI-NEXT: mov.s v0[3], v4[0]
-; CHECK-GI-NEXT: mov.s v1[3], v6[0]
-; CHECK-GI-NEXT: mov.s v2[3], v16[0]
-; CHECK-GI-NEXT: mov.s v3[3], v18[0]
-; CHECK-GI-NEXT: mov.s v5[3], v20[0]
-; CHECK-GI-NEXT: mov.s v7[3], v21[0]
-; CHECK-GI-NEXT: uzp1.8h v4, v17, v19
-; CHECK-GI-NEXT: uzp1.8h v0, v0, v1
-; CHECK-GI-NEXT: uzp1.8h v1, v2, v3
-; CHECK-GI-NEXT: uzp1.8h v2, v5, v7
-; CHECK-GI-NEXT: uzp1.16b v0, v4, v0
-; CHECK-GI-NEXT: uzp1.16b v1, v1, v2
+; CHECK-GI-NEXT: ldr w9, [sp, #64]
+; CHECK-GI-NEXT: ldr w8, [sp, #72]
+; CHECK-GI-NEXT: fmov s0, w0
+; CHECK-GI-NEXT: fmov s2, w1
+; CHECK-GI-NEXT: fmov s1, w9
+; CHECK-GI-NEXT: fmov s3, w8
+; CHECK-GI-NEXT: ldr w8, [sp, #80]
+; CHECK-GI-NEXT: ldr w9, [sp, #128]
+; CHECK-GI-NEXT: mov.b v0[1], v2[0]
+; CHECK-GI-NEXT: fmov s2, w2
+; CHECK-GI-NEXT: mov.b v1[1], v3[0]
+; CHECK-GI-NEXT: fmov s3, w8
+; CHECK-GI-NEXT: ldr w8, [sp, #88]
+; CHECK-GI-NEXT: mov.b v0[2], v2[0]
+; CHECK-GI-NEXT: fmov s2, w3
+; CHECK-GI-NEXT: mov.b v1[2], v3[0]
+; CHECK-GI-NEXT: fmov s3, w8
+; CHECK-GI-NEXT: ldr w8, [sp, #96]
+; CHECK-GI-NEXT: mov.b v0[3], v2[0]
+; CHECK-GI-NEXT: fmov s2, w4
+; CHECK-GI-NEXT: mov.b v1[3], v3[0]
+; CHECK-GI-NEXT: fmov s3, w8
+; CHECK-GI-NEXT: ldr w8, [sp, #104]
+; CHECK-GI-NEXT: mov.b v0[4], v2[0]
+; CHECK-GI-NEXT: fmov s2, w5
+; CHECK-GI-NEXT: mov.b v1[4], v3[0]
+; CHECK-GI-NEXT: fmov s3, w8
+; CHECK-GI-NEXT: ldr w8, [sp, #112]
+; CHECK-GI-NEXT: mov.b v0[5], v2[0]
+; CHECK-GI-NEXT: fmov s2, w6
+; CHECK-GI-NEXT: mov.b v1[5], v3[0]
+; CHECK-GI-NEXT: fmov s3, w8
+; CHECK-GI-NEXT: ldr w8, [sp, #120]
+; CHECK-GI-NEXT: mov.b v0[6], v2[0]
+; CHECK-GI-NEXT: fmov s2, w7
+; CHECK-GI-NEXT: mov.b v1[6], v3[0]
+; CHECK-GI-NEXT: fmov s3, w8
+; CHECK-GI-NEXT: ldr w8, [sp]
+; CHECK-GI-NEXT: mov.b v0[7], v2[0]
+; CHECK-GI-NEXT: fmov s2, w8
+; CHECK-GI-NEXT: ldr w8, [sp, #8]
+; CHECK-GI-NEXT: mov.b v1[7], v3[0]
+; CHECK-GI-NEXT: fmov s3, w9
+; CHECK-GI-NEXT: ldr w9, [sp, #136]
+; CHECK-GI-NEXT: mov.b v0[8], v2[0]
+; CHECK-GI-NEXT: fmov s2, w8
+; CHECK-GI-NEXT: ldr w8, [sp, #16]
+; CHECK-GI-NEXT: mov.b v1[8], v3[0]
+; CHECK-GI-NEXT: fmov s3, w9
+; CHECK-GI-NEXT: ldr w9, [sp, #144]
+; CHECK-GI-NEXT: mov.b v0[9], v2[0]
+; CHECK-GI-NEXT: fmov s2, w8
+; CHECK-GI-NEXT: ldr w8, [sp, #24]
+; CHECK-GI-NEXT: mov.b v1[9], v3[0]
+; CHECK-GI-NEXT: fmov s3, w9
+; CHECK-GI-NEXT: ldr w9, [sp, #152]
+; CHECK-GI-NEXT: mov.b v0[10], v2[0]
+; CHECK-GI-NEXT: fmov s2, w8
+; CHECK-GI-NEXT: ldr w8, [sp, #32]
+; CHECK-GI-NEXT: mov.b v1[10], v3[0]
+; CHECK-GI-NEXT: fmov s3, w9
+; CHECK-GI-NEXT: ldr w9, [sp, #160]
+; CHECK-GI-NEXT: mov.b v0[11], v2[0]
+; CHECK-GI-NEXT: fmov s2, w8
+; CHECK-GI-NEXT: ldr w8, [sp, #40]
+; CHECK-GI-NEXT: mov.b v1[11], v3[0]
+; CHECK-GI-NEXT: ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/104634
More information about the llvm-commits
mailing list