[llvm] [GlobalISel] Combine unmerge(unmerge()) if the result is legal. (PR #109606)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 22 21:35:19 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: David Green (davemgreen)
<details>
<summary>Changes</summary>
This attempts to fold:
```
%1:_(<2 x s32>), %2:_(<2 x s32>) = G_UNMERGE_VALUES %0:_(<4 x s32>)
%3:_(s32), %4:_(s32) = G_UNMERGE_VALUES %1
```
Into a single UNMERGE:
```
%3:_(s32), %4:_(s32), %5:_(s32), %6:_(s32) = G_UNMERGE_VALUES %0
```
This transform already exists, this patch alters it to occur when the result UNMERGE is considered legal.
It does not try to transform where the result would be extracting a subelement from a vector at the moment, as the code is not setup to handle it.
```
%1:_(s32), %2:_(s32) = G_UNMERGE_VALUES %0:_(<2 x s32>)
%3:_(s16), %4:_(s16) = G_UNMERGE_VALUES %1
```
This helps us reduce the amount of legalization artefacts, especially from widened vectors padded with undef.
---
Patch is 118.40 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/109606.diff
21 Files Affected:
- (modified) llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h (+4)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir (+4-5)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir (+11-16)
- (modified) llvm/test/CodeGen/AArch64/bswap.ll (-2)
- (modified) llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll (-5)
- (modified) llvm/test/CodeGen/AArch64/fpext.ll (+13-22)
- (modified) llvm/test/CodeGen/AArch64/fptoi.ll (+44-88)
- (modified) llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll (+22-25)
- (modified) llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll (+22-25)
- (modified) llvm/test/CodeGen/AArch64/fptrunc.ll (-3)
- (modified) llvm/test/CodeGen/AArch64/itofp.ll (+24-95)
- (modified) llvm/test/CodeGen/AArch64/neon-perm.ll (+1-9)
- (modified) llvm/test/CodeGen/AArch64/sadd_sat_vec.ll (+35-24)
- (modified) llvm/test/CodeGen/AArch64/shift.ll (+3-30)
- (modified) llvm/test/CodeGen/AArch64/shufflevector.ll (+48-25)
- (modified) llvm/test/CodeGen/AArch64/ssub_sat_vec.ll (+35-24)
- (modified) llvm/test/CodeGen/AArch64/uadd_sat_vec.ll (+36-25)
- (modified) llvm/test/CodeGen/AArch64/usub_sat_vec.ll (+34-23)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir (+18-21)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir (+80-88)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir (+5-7)
``````````diff
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
index bc83f19dc581fa..0dc827b9ad43eb 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
@@ -1090,6 +1090,10 @@ class LegalizationArtifactCombiner {
LegalizeActionStep ActionStep = LI.getAction(
{TargetOpcode::G_UNMERGE_VALUES, {OpTy, SrcUnmergeSrcTy}});
switch (ActionStep.Action) {
+ case LegalizeActions::Legal:
+ if (!OpTy.isVector() || !LI.isLegal({TargetOpcode::G_UNMERGE_VALUES,
+ {DestTy, SrcUnmergeSrcTy}}))
+ return false;
case LegalizeActions::Lower:
case LegalizeActions::Unsupported:
break;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir
index 89750c90fc1cbe..bd80a892e239e4 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir
@@ -412,12 +412,11 @@ body: |
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8)
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8)
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<8 x s8>) = G_ICMP intpred(eq), [[BUILD_VECTOR]](<8 x s8>), [[BUILD_VECTOR1]]
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s8>), [[UV1:%[0-9]+]]:_(<4 x s8>) = G_UNMERGE_VALUES [[ICMP]](<8 x s8>)
- ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UV]](<4 x s8>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[ICMP]](<8 x s8>)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
- ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8)
- ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8)
- ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8)
+ ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8)
+ ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8)
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[DEF]](s32)
; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR2]](<4 x s32>), [[C]](s64)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir
index 92f8e524dbb312..52a28ad37e362e 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir
@@ -307,29 +307,24 @@ body: |
; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[UV7]](s16)
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8)
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<8 x s8>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR]](<8 x s8>), [[BUILD_VECTOR1]], shufflemask(0, 0, 0, 0, undef, undef, undef, undef)
- ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<4 x s8>), [[UV9:%[0-9]+]]:_(<4 x s8>) = G_UNMERGE_VALUES [[SHUF]](<8 x s8>)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s8) = G_CONSTANT i8 1
- ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UV8]](<4 x s8>)
- ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[UV10]](s8), [[UV11]](s8), [[UV12]](s8), [[UV13]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8)
- ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR2]](<8 x s8>)
- ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(<4 x s16>), [[UV15:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT]](<8 x s16>)
- ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8)
- ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR3]](<8 x s8>)
- ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(<4 x s16>), [[UV17:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT1]](<8 x s16>)
- ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[UV14]], [[UV16]]
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[SHUF]](<8 x s8>)
+ ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<4 x s16>), [[UV9:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT]](<8 x s16>)
+ ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8)
+ ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR2]](<8 x s8>)
+ ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<4 x s16>), [[UV11:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT1]](<8 x s16>)
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[UV8]], [[UV10]]
; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>)
- ; CHECK-NEXT: [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8), [[UV20:%[0-9]+]]:_(s8), [[UV21:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UV8]](<4 x s8>)
- ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[UV18]](s8), [[UV19]](s8), [[UV20]](s8), [[UV21]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8)
- ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR4]](<8 x s8>)
- ; CHECK-NEXT: [[UV22:%[0-9]+]]:_(<4 x s16>), [[UV23:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT2]](<8 x s16>)
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC9]], [[UV22]]
+ ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[SHUF]](<8 x s8>)
+ ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<4 x s16>), [[UV13:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT2]](<8 x s16>)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC9]], [[UV12]]
; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>)
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC10]], [[XOR]]
; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[AND]], [[AND1]]
; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[OR]](<4 x s16>)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32)
- ; CHECK-NEXT: %zext_select:_(<4 x s32>) = G_AND [[ANYEXT3]], [[BUILD_VECTOR5]]
+ ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32)
+ ; CHECK-NEXT: %zext_select:_(<4 x s32>) = G_AND [[ANYEXT3]], [[BUILD_VECTOR3]]
; CHECK-NEXT: $q0 = COPY %zext_select(<4 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%w0:_(s32) = COPY $w0
diff --git a/llvm/test/CodeGen/AArch64/bswap.ll b/llvm/test/CodeGen/AArch64/bswap.ll
index e90014be21deb3..a541f6b999111d 100644
--- a/llvm/test/CodeGen/AArch64/bswap.ll
+++ b/llvm/test/CodeGen/AArch64/bswap.ll
@@ -181,8 +181,6 @@ define <2 x i16> @bswap_v2i16(<2 x i16> %a){
; CHECK-GI-NEXT: mov w8, v0.s[1]
; CHECK-GI-NEXT: mov v0.h[1], w8
; CHECK-GI-NEXT: rev16 v0.8b, v0.8b
-; CHECK-GI-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll b/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll
index 5bd680ed489389..bbfec8c7c33617 100644
--- a/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll
+++ b/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll
@@ -18,11 +18,6 @@ define {<2 x half>, <2 x half>} @vector_deinterleave_v2f16_v4f16(<4 x half> %vec
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: uzp1 v2.4h, v0.4h, v0.4h
; CHECK-GI-NEXT: uzp2 v1.4h, v0.4h, v0.4h
-; CHECK-GI-NEXT: mov h0, v2.h[1]
-; CHECK-GI-NEXT: mov h3, v1.h[1]
-; CHECK-GI-NEXT: mov v2.h[1], v0.h[0]
-; CHECK-GI-NEXT: mov v1.h[1], v3.h[0]
-; CHECK-GI-NEXT: // kill: def $d1 killed $d1 killed $q1
; CHECK-GI-NEXT: fmov d0, d2
; CHECK-GI-NEXT: ret
%retval = call {<2 x half>, <2 x half>} @llvm.vector.deinterleave2.v4f16(<4 x half> %vec)
diff --git a/llvm/test/CodeGen/AArch64/fpext.ll b/llvm/test/CodeGen/AArch64/fpext.ll
index d942839c577d2b..df90f9d5f09109 100644
--- a/llvm/test/CodeGen/AArch64/fpext.ll
+++ b/llvm/test/CodeGen/AArch64/fpext.ll
@@ -376,15 +376,15 @@ define <4 x double> @fpext_v4f16_v4f64(<4 x half> %a) {
; CHECK-GI-LABEL: fpext_v4f16_v4f64:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov s1, v0.s[1]
-; CHECK-GI-NEXT: mov h2, v0.h[1]
+; CHECK-GI-NEXT: mov h1, v0.h[1]
+; CHECK-GI-NEXT: mov h2, v0.h[2]
+; CHECK-GI-NEXT: mov h3, v0.h[3]
; CHECK-GI-NEXT: fcvt d0, h0
-; CHECK-GI-NEXT: mov h3, v1.h[1]
-; CHECK-GI-NEXT: fcvt d2, h2
-; CHECK-GI-NEXT: fcvt d1, h1
-; CHECK-GI-NEXT: fcvt d3, h3
-; CHECK-GI-NEXT: mov v0.d[1], v2.d[0]
-; CHECK-GI-NEXT: mov v1.d[1], v3.d[0]
+; CHECK-GI-NEXT: fcvt d4, h1
+; CHECK-GI-NEXT: fcvt d1, h2
+; CHECK-GI-NEXT: fcvt d2, h3
+; CHECK-GI-NEXT: mov v0.d[1], v4.d[0]
+; CHECK-GI-NEXT: mov v1.d[1], v2.d[0]
; CHECK-GI-NEXT: ret
entry:
%c = fpext <4 x half> %a to <4 x double>
@@ -392,20 +392,11 @@ entry:
}
define <2 x float> @fpext_v2f16_v2f32(<2 x half> %a) {
-; CHECK-SD-LABEL: fpext_v2f16_v2f32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: fpext_v2f16_v2f32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
-; CHECK-GI-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: fpext_v2f16_v2f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
entry:
%c = fpext <2 x half> %a to <2 x float>
ret <2 x float> %c
diff --git a/llvm/test/CodeGen/AArch64/fptoi.ll b/llvm/test/CodeGen/AArch64/fptoi.ll
index 20b5567e973d09..3fd3baa6a27090 100644
--- a/llvm/test/CodeGen/AArch64/fptoi.ll
+++ b/llvm/test/CodeGen/AArch64/fptoi.ll
@@ -3981,9 +3981,6 @@ define <2 x i64> @fptos_v2f16_v2i64(<2 x half> %a) {
;
; CHECK-GI-NOFP16-LABEL: fptos_v2f16_v2i64:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0]
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
; CHECK-GI-NOFP16-NEXT: fcvtl v0.2d, v0.2s
; CHECK-GI-NOFP16-NEXT: fcvtzs v0.2d, v0.2d
@@ -4028,9 +4025,6 @@ define <2 x i64> @fptou_v2f16_v2i64(<2 x half> %a) {
;
; CHECK-GI-NOFP16-LABEL: fptou_v2f16_v2i64:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0]
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
; CHECK-GI-NOFP16-NEXT: fcvtl v0.2d, v0.2s
; CHECK-GI-NOFP16-NEXT: fcvtzu v0.2d, v0.2d
@@ -4227,17 +4221,17 @@ define <4 x i64> @fptos_v4f16_v4i64(<4 x half> %a) {
; CHECK-GI-FP16-LABEL: fptos_v4f16_v4i64:
; CHECK-GI-FP16: // %bb.0: // %entry
; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-FP16-NEXT: mov s1, v0.s[1]
-; CHECK-GI-FP16-NEXT: mov h2, v0.h[1]
+; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-FP16-NEXT: mov h3, v0.h[3]
; CHECK-GI-FP16-NEXT: fcvt d0, h0
-; CHECK-GI-FP16-NEXT: mov h3, v1.h[1]
-; CHECK-GI-FP16-NEXT: fcvt d2, h2
; CHECK-GI-FP16-NEXT: fcvt d1, h1
+; CHECK-GI-FP16-NEXT: fcvt d2, h2
; CHECK-GI-FP16-NEXT: fcvt d3, h3
-; CHECK-GI-FP16-NEXT: mov v0.d[1], v2.d[0]
-; CHECK-GI-FP16-NEXT: mov v1.d[1], v3.d[0]
+; CHECK-GI-FP16-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-GI-FP16-NEXT: mov v2.d[1], v3.d[0]
; CHECK-GI-FP16-NEXT: fcvtzs v0.2d, v0.2d
-; CHECK-GI-FP16-NEXT: fcvtzs v1.2d, v1.2d
+; CHECK-GI-FP16-NEXT: fcvtzs v1.2d, v2.2d
; CHECK-GI-FP16-NEXT: ret
entry:
%c = fptosi <4 x half> %a to <4 x i64>
@@ -4293,17 +4287,17 @@ define <4 x i64> @fptou_v4f16_v4i64(<4 x half> %a) {
; CHECK-GI-FP16-LABEL: fptou_v4f16_v4i64:
; CHECK-GI-FP16: // %bb.0: // %entry
; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-FP16-NEXT: mov s1, v0.s[1]
-; CHECK-GI-FP16-NEXT: mov h2, v0.h[1]
+; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-FP16-NEXT: mov h3, v0.h[3]
; CHECK-GI-FP16-NEXT: fcvt d0, h0
-; CHECK-GI-FP16-NEXT: mov h3, v1.h[1]
-; CHECK-GI-FP16-NEXT: fcvt d2, h2
; CHECK-GI-FP16-NEXT: fcvt d1, h1
+; CHECK-GI-FP16-NEXT: fcvt d2, h2
; CHECK-GI-FP16-NEXT: fcvt d3, h3
-; CHECK-GI-FP16-NEXT: mov v0.d[1], v2.d[0]
-; CHECK-GI-FP16-NEXT: mov v1.d[1], v3.d[0]
+; CHECK-GI-FP16-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-GI-FP16-NEXT: mov v2.d[1], v3.d[0]
; CHECK-GI-FP16-NEXT: fcvtzu v0.2d, v0.2d
-; CHECK-GI-FP16-NEXT: fcvtzu v1.2d, v1.2d
+; CHECK-GI-FP16-NEXT: fcvtzu v1.2d, v2.2d
; CHECK-GI-FP16-NEXT: ret
entry:
%c = fptoui <4 x half> %a to <4 x i64>
@@ -4389,29 +4383,29 @@ define <8 x i64> @fptos_v8f16_v8i64(<8 x half> %a) {
;
; CHECK-GI-FP16-LABEL: fptos_v8f16_v8i64:
; CHECK-GI-FP16: // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT: mov s1, v0.s[1]
-; CHECK-GI-FP16-NEXT: mov s2, v0.s[2]
-; CHECK-GI-FP16-NEXT: mov s3, v0.s[3]
-; CHECK-GI-FP16-NEXT: mov h4, v0.h[1]
+; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-FP16-NEXT: mov h3, v0.h[3]
+; CHECK-GI-FP16-NEXT: mov h4, v0.h[4]
+; CHECK-GI-FP16-NEXT: mov h5, v0.h[5]
+; CHECK-GI-FP16-NEXT: mov h6, v0.h[6]
+; CHECK-GI-FP16-NEXT: mov h7, v0.h[7]
; CHECK-GI-FP16-NEXT: fcvt d0, h0
-; CHECK-GI-FP16-NEXT: mov h5, v1.h[1]
-; CHECK-GI-FP16-NEXT: mov h6, v2.h[1]
-; CHECK-GI-FP16-NEXT: mov h7, v3.h[1]
-; CHECK-GI-FP16-NEXT: fcvt d4, h4
; CHECK-GI-FP16-NEXT: fcvt d1, h1
; CHECK-GI-FP16-NEXT: fcvt d2, h2
; CHECK-GI-FP16-NEXT: fcvt d3, h3
+; CHECK-GI-FP16-NEXT: fcvt d4, h4
; CHECK-GI-FP16-NEXT: fcvt d5, h5
; CHECK-GI-FP16-NEXT: fcvt d6, h6
; CHECK-GI-FP16-NEXT: fcvt d7, h7
-; CHECK-GI-FP16-NEXT: mov v0.d[1], v4.d[0]
-; CHECK-GI-FP16-NEXT: mov v1.d[1], v5.d[0]
-; CHECK-GI-FP16-NEXT: mov v2.d[1], v6.d[0]
-; CHECK-GI-FP16-NEXT: mov v3.d[1], v7.d[0]
+; CHECK-GI-FP16-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-GI-FP16-NEXT: mov v2.d[1], v3.d[0]
+; CHECK-GI-FP16-NEXT: mov v4.d[1], v5.d[0]
+; CHECK-GI-FP16-NEXT: mov v6.d[1], v7.d[0]
; CHECK-GI-FP16-NEXT: fcvtzs v0.2d, v0.2d
-; CHECK-GI-FP16-NEXT: fcvtzs v1.2d, v1.2d
-; CHECK-GI-FP16-NEXT: fcvtzs v2.2d, v2.2d
-; CHECK-GI-FP16-NEXT: fcvtzs v3.2d, v3.2d
+; CHECK-GI-FP16-NEXT: fcvtzs v1.2d, v2.2d
+; CHECK-GI-FP16-NEXT: fcvtzs v2.2d, v4.2d
+; CHECK-GI-FP16-NEXT: fcvtzs v3.2d, v6.2d
; CHECK-GI-FP16-NEXT: ret
entry:
%c = fptosi <8 x half> %a to <8 x i64>
@@ -4497,29 +4491,29 @@ define <8 x i64> @fptou_v8f16_v8i64(<8 x half> %a) {
;
; CHECK-GI-FP16-LABEL: fptou_v8f16_v8i64:
; CHECK-GI-FP16: // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT: mov s1, v0.s[1]
-; CHECK-GI-FP16-NEXT: mov s2, v0.s[2]
-; CHECK-GI-FP16-NEXT: mov s3, v0.s[3]
-; CHECK-GI-FP16-NEXT: mov h4, v0.h[1]
+; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-FP16-NEXT: mov h3, v0.h[3]
+; CHECK-GI-FP16-NEXT: mov h4, v0.h[4]
+; CHECK-GI-FP16-NEXT: mov h5, v0.h[5]
+; CHECK-GI-FP16-NEXT: mov h6, v0.h[6]
+; CHECK-GI-FP16-NEXT: mov h7, v0.h[7]
; CHECK-GI-FP16-NEXT: fcvt d0, h0
-; CHECK-GI-FP16-NEXT: mov h5, v1.h[1]
-; CHECK-GI-FP16-NEXT: mov h6, v2.h[1]
-; CHECK-GI-FP16-NEXT: mov h7, v3.h[1]
-; CHECK-GI-FP16-NEXT: fcvt d4, h4
; CHECK-GI-FP16-NEXT: fcvt d1, h1
; CHECK-GI-FP16-NEXT: fcvt d2, h2
; CHECK-GI-FP16-NEXT: fcvt d3, h3
+; CHECK-GI-FP16-NEXT: fcvt d4, h4
; CHECK-GI-FP16-NEXT: fcvt d5, h5
; CHECK-GI-FP16-NEXT: fcvt d6, h6
; CHECK-GI-FP16-NEXT: fcvt d7, h7
-; CHECK-GI-FP16-NEXT: mov v0.d[1], v4.d[0]
-; CHECK-GI-FP16-NEXT: mov v1.d[1], v5.d[0]
-; CHECK-GI-FP16-NEXT: mov v2.d[1], v6.d[0]
-; CHECK-GI-FP16-NEXT: mov v3.d[1], v7.d[0]
+; CHECK-GI-FP16-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-GI-FP16-NEXT: mov v2.d[1], v3.d[0]
+; CHECK-GI-FP16-NEXT: mov v4.d[1], v5.d[0]
+; CHECK-GI-FP16-NEXT: mov v6.d[1], v7.d[0]
; CHECK-GI-FP16-NEXT: fcvtzu v0.2d, v0.2d
-; CHECK-GI-FP16-NEXT: fcvtzu v1.2d, v1.2d
-; CHECK-GI-FP16-NEXT: fcvtzu v2.2d, v2.2d
-; CHECK-GI-FP16-NEXT: fcvtzu v3.2d, v3.2d
+; CHECK-GI-FP16-NEXT: fcvtzu v1.2d, v2.2d
+; CHECK-GI-FP16-NEXT: fcvtzu v2.2d, v4.2d
+; CHECK-GI-FP16-NEXT: fcvtzu v3.2d, v6.2d
; CHECK-GI-FP16-NEXT: ret
entry:
%c = fptoui <8 x half> %a to <8 x i64>
@@ -5728,9 +5722,6 @@ define <2 x i32> @fptos_v2f16_v2i32(<2 x half> %a) {
;
; CHECK-GI-LABEL: fptos_v2f16_v2i32:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
; CHECK-GI-NEXT: fcvtl v0.4s, v0.4h
; CHECK-GI-NEXT: fcvtzs v0.2s, v0.2s
; CHECK-GI-NEXT: ret
@@ -5749,9 +5740,6 @@ define <2 x i32> @fptou_v2f16_v2i32(<2 x half> %a) {
;
; CHECK-GI-LABEL: fptou_v2f16_v2i32:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
; CHECK-GI-NEXT: fcvtl v0.4s, v0.4h
; CHECK-GI-NEXT: fcvtzu v0.2s, v0.2s
; CHECK-GI-NEXT: ret
@@ -6004,21 +5992,13 @@ define <2 x i16> @fptos_v2f16_v2i16(<2 x half> %a) {
;
; CHECK-GI-NOFP16-LABEL: fptos_v2f16_v2i16:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0]
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
; CHECK-GI-NOFP16-NEXT: fcvtzs v0.2s, v0.2s
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: fptos_v2f16_v2i16:
; CHECK-GI-FP16: // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
-; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
; CHECK-GI-FP16-NEXT: fcvtzs v0.4h, v0.4h
-; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
-; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
; CHECK-GI-FP16-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-FP16-NEXT: ret
@@ -6037,21 +6017,13 @@ define <2 x i16> @fptou_v2f16_v2i16(<2 x half> %a) {
;
; CHECK-GI-NOFP16-LABEL: fptou_v2f16_v2i16:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0]
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
; CHECK-GI-NOFP16-NEXT: fcvtzu v0.2s, v0.2s
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: fptou_v2f16_v2i16:
; CHECK-GI-FP16: // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
-; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
; CHECK-GI-FP16-NEXT: ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/109606
More information about the llvm-commits
mailing list