[llvm] [DAG] Add users of operand of simplified extract_vector_elt to worklist (PR #100074)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 23 00:51:41 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-selectiondag
Author: David Green (davemgreen)
<details>
<summary>Changes</summary>
This helps to ensure we revisit the last extract_element uses of a node so that it can be optimized away in cases such as extract(insert(scalartovec(x), 1), 0).
---
Patch is 170.61 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/100074.diff
17 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+1)
- (modified) llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll (+1-3)
- (modified) llvm/test/CodeGen/AArch64/arm64-vabs.ll (+17-20)
- (modified) llvm/test/CodeGen/AArch64/cmp-select-sign.ll (+12-15)
- (modified) llvm/test/CodeGen/AArch64/fptoi.ll (+202-340)
- (modified) llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll (+222-252)
- (modified) llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll (+166-196)
- (modified) llvm/test/CodeGen/AArch64/nontemporal-load.ll (+25-40)
- (modified) llvm/test/CodeGen/AArch64/sadd_sat_vec.ll (+6-9)
- (modified) llvm/test/CodeGen/AArch64/ssub_sat_vec.ll (+6-9)
- (modified) llvm/test/CodeGen/AArch64/uadd_sat_vec.ll (+4-7)
- (modified) llvm/test/CodeGen/AArch64/usub_sat_vec.ll (+4-7)
- (modified) llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll (+271-289)
- (modified) llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll (+224-237)
- (modified) llvm/test/CodeGen/Thumb2/mve-minmaxi.ll (+4-12)
- (modified) llvm/test/CodeGen/Thumb2/mve-vst3.ll (+9-11)
- (modified) llvm/test/CodeGen/Thumb2/mve-vst4.ll (+32-40)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index aa9032ea2574c..cd0440077f526 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -22533,6 +22533,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
Index == VecOp.getOperand(2)) {
SDValue Elt = VecOp.getOperand(1);
+ AddUsersToWorklist(VecOp.getNode());
return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
}
diff --git a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
index 00cc6b21ccaf8..abf2e1272d645 100644
--- a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
@@ -951,10 +951,8 @@ define <1 x i128> @sext_v1x64(<1 x i64> %arg) {
; CHECK-SD-LABEL: sext_v1x64:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: fmov x8, d0
-; CHECK-SD-NEXT: asr x1, x8, #63
-; CHECK-SD-NEXT: mov.d v0[1], x1
; CHECK-SD-NEXT: fmov x0, d0
+; CHECK-SD-NEXT: asr x1, x0, #63
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sext_v1x64:
diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
index 178c229d04e47..62a79e3547b29 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
@@ -1802,28 +1802,25 @@ define <2 x i128> @uabd_i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-NEXT: mov.d x8, v0[1]
; CHECK-NEXT: mov.d x9, v1[1]
; CHECK-NEXT: fmov x10, d0
-; CHECK-NEXT: fmov x11, d1
-; CHECK-NEXT: asr x12, x10, #63
-; CHECK-NEXT: asr x13, x11, #63
-; CHECK-NEXT: subs x10, x10, x11
+; CHECK-NEXT: fmov x12, d1
+; CHECK-NEXT: asr x14, x10, #63
; CHECK-NEXT: asr x11, x8, #63
-; CHECK-NEXT: asr x14, x9, #63
-; CHECK-NEXT: sbc x12, x12, x13
+; CHECK-NEXT: asr x13, x9, #63
+; CHECK-NEXT: asr x15, x12, #63
; CHECK-NEXT: subs x8, x8, x9
-; CHECK-NEXT: sbc x9, x11, x14
-; CHECK-NEXT: asr x13, x12, #63
-; CHECK-NEXT: asr x11, x9, #63
-; CHECK-NEXT: eor x10, x10, x13
-; CHECK-NEXT: eor x8, x8, x11
-; CHECK-NEXT: eor x9, x9, x11
-; CHECK-NEXT: subs x2, x8, x11
-; CHECK-NEXT: eor x8, x12, x13
-; CHECK-NEXT: sbc x3, x9, x11
-; CHECK-NEXT: subs x9, x10, x13
-; CHECK-NEXT: fmov d0, x9
-; CHECK-NEXT: sbc x1, x8, x13
-; CHECK-NEXT: mov.d v0[1], x1
-; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: sbc x9, x11, x13
+; CHECK-NEXT: subs x10, x10, x12
+; CHECK-NEXT: sbc x11, x14, x15
+; CHECK-NEXT: asr x13, x9, #63
+; CHECK-NEXT: asr x12, x11, #63
+; CHECK-NEXT: eor x8, x8, x13
+; CHECK-NEXT: eor x9, x9, x13
+; CHECK-NEXT: eor x10, x10, x12
+; CHECK-NEXT: eor x11, x11, x12
+; CHECK-NEXT: subs x0, x10, x12
+; CHECK-NEXT: sbc x1, x11, x12
+; CHECK-NEXT: subs x2, x8, x13
+; CHECK-NEXT: sbc x3, x9, x13
; CHECK-NEXT: ret
%aext = sext <2 x i64> %a to <2 x i128>
%bext = sext <2 x i64> %b to <2 x i128>
diff --git a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll
index 22440b79bdcd4..b4f179e992a0d 100644
--- a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll
+++ b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll
@@ -241,21 +241,18 @@ define <4 x i32> @not_sign_4xi32_3(<4 x i32> %a) {
define <4 x i65> @sign_4xi65(<4 x i65> %a) {
; CHECK-LABEL: sign_4xi65:
; CHECK: // %bb.0:
-; CHECK-NEXT: sbfx x8, x1, #0, #1
-; CHECK-NEXT: sbfx x9, x5, #0, #1
-; CHECK-NEXT: sbfx x10, x3, #0, #1
-; CHECK-NEXT: lsr x1, x8, #63
-; CHECK-NEXT: orr x8, x8, #0x1
-; CHECK-NEXT: lsr x3, x10, #63
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: sbfx x8, x7, #0, #1
-; CHECK-NEXT: lsr x5, x9, #63
-; CHECK-NEXT: orr x2, x10, #0x1
-; CHECK-NEXT: orr x4, x9, #0x1
-; CHECK-NEXT: lsr x7, x8, #63
-; CHECK-NEXT: orr x6, x8, #0x1
-; CHECK-NEXT: mov v0.d[1], x1
-; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: sbfx x8, x5, #0, #1
+; CHECK-NEXT: sbfx x9, x3, #0, #1
+; CHECK-NEXT: sbfx x10, x1, #0, #1
+; CHECK-NEXT: sbfx x11, x7, #0, #1
+; CHECK-NEXT: lsr x1, x10, #63
+; CHECK-NEXT: lsr x3, x9, #63
+; CHECK-NEXT: lsr x5, x8, #63
+; CHECK-NEXT: lsr x7, x11, #63
+; CHECK-NEXT: orr x0, x10, #0x1
+; CHECK-NEXT: orr x2, x9, #0x1
+; CHECK-NEXT: orr x4, x8, #0x1
+; CHECK-NEXT: orr x6, x11, #0x1
; CHECK-NEXT: ret
%c = icmp sgt <4 x i65> %a, <i65 -1, i65 -1, i65 -1, i65 -1>
%res = select <4 x i1> %c, <4 x i65> <i65 1, i65 1, i65 1, i65 1>, <4 x i65 > <i65 -1, i65 -1, i65 -1, i65 -1>
diff --git a/llvm/test/CodeGen/AArch64/fptoi.ll b/llvm/test/CodeGen/AArch64/fptoi.ll
index 4723ac01d6021..0c880592d955b 100644
--- a/llvm/test/CodeGen/AArch64/fptoi.ll
+++ b/llvm/test/CodeGen/AArch64/fptoi.ll
@@ -2287,20 +2287,19 @@ define <2 x i128> @fptos_v2f64_v2i128(<2 x double> %a) {
; CHECK-SD-NEXT: .cfi_offset w20, -16
; CHECK-SD-NEXT: .cfi_offset w30, -32
; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: mov d0, v0.d[1]
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: bl __fixdfti
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x19, x0
; CHECK-SD-NEXT: mov x20, x1
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: mov d0, v0.d[1]
; CHECK-SD-NEXT: bl __fixdfti
-; CHECK-SD-NEXT: fmov d0, x0
-; CHECK-SD-NEXT: mov x2, x19
-; CHECK-SD-NEXT: mov x3, x20
+; CHECK-SD-NEXT: mov x2, x0
+; CHECK-SD-NEXT: mov x3, x1
+; CHECK-SD-NEXT: mov x0, x19
+; CHECK-SD-NEXT: mov x1, x20
; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-SD-NEXT: mov v0.d[1], x1
-; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: add sp, sp, #48
; CHECK-SD-NEXT: ret
;
@@ -2345,20 +2344,19 @@ define <2 x i128> @fptou_v2f64_v2i128(<2 x double> %a) {
; CHECK-SD-NEXT: .cfi_offset w20, -16
; CHECK-SD-NEXT: .cfi_offset w30, -32
; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: mov d0, v0.d[1]
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: bl __fixunsdfti
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x19, x0
; CHECK-SD-NEXT: mov x20, x1
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: mov d0, v0.d[1]
; CHECK-SD-NEXT: bl __fixunsdfti
-; CHECK-SD-NEXT: fmov d0, x0
-; CHECK-SD-NEXT: mov x2, x19
-; CHECK-SD-NEXT: mov x3, x20
+; CHECK-SD-NEXT: mov x2, x0
+; CHECK-SD-NEXT: mov x3, x1
+; CHECK-SD-NEXT: mov x0, x19
+; CHECK-SD-NEXT: mov x1, x20
; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-SD-NEXT: mov v0.d[1], x1
-; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: add sp, sp, #48
; CHECK-SD-NEXT: ret
;
@@ -2407,28 +2405,26 @@ define <3 x i128> @fptos_v3f64_v3i128(<3 x double> %a) {
; CHECK-SD-NEXT: .cfi_offset w30, -48
; CHECK-SD-NEXT: .cfi_offset b8, -56
; CHECK-SD-NEXT: .cfi_offset b9, -64
-; CHECK-SD-NEXT: fmov d9, d0
-; CHECK-SD-NEXT: fmov d0, d1
; CHECK-SD-NEXT: fmov d8, d2
+; CHECK-SD-NEXT: fmov d9, d1
; CHECK-SD-NEXT: bl __fixdfti
-; CHECK-SD-NEXT: fmov d0, d8
+; CHECK-SD-NEXT: fmov d0, d9
; CHECK-SD-NEXT: mov x19, x0
; CHECK-SD-NEXT: mov x20, x1
; CHECK-SD-NEXT: bl __fixdfti
-; CHECK-SD-NEXT: fmov d0, d9
+; CHECK-SD-NEXT: fmov d0, d8
; CHECK-SD-NEXT: mov x21, x0
; CHECK-SD-NEXT: mov x22, x1
; CHECK-SD-NEXT: bl __fixdfti
-; CHECK-SD-NEXT: fmov d0, x0
-; CHECK-SD-NEXT: mov x2, x19
-; CHECK-SD-NEXT: mov x3, x20
-; CHECK-SD-NEXT: mov x4, x21
-; CHECK-SD-NEXT: mov x5, x22
-; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT: mov x4, x0
+; CHECK-SD-NEXT: mov x5, x1
+; CHECK-SD-NEXT: mov x0, x19
+; CHECK-SD-NEXT: mov x1, x20
+; CHECK-SD-NEXT: mov x2, x21
+; CHECK-SD-NEXT: mov x3, x22
; CHECK-SD-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT: mov v0.d[1], x1
+; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
; CHECK-SD-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: ldp d9, d8, [sp], #64 // 16-byte Folded Reload
; CHECK-SD-NEXT: ret
;
@@ -2488,28 +2484,26 @@ define <3 x i128> @fptou_v3f64_v3i128(<3 x double> %a) {
; CHECK-SD-NEXT: .cfi_offset w30, -48
; CHECK-SD-NEXT: .cfi_offset b8, -56
; CHECK-SD-NEXT: .cfi_offset b9, -64
-; CHECK-SD-NEXT: fmov d9, d0
-; CHECK-SD-NEXT: fmov d0, d1
; CHECK-SD-NEXT: fmov d8, d2
+; CHECK-SD-NEXT: fmov d9, d1
; CHECK-SD-NEXT: bl __fixunsdfti
-; CHECK-SD-NEXT: fmov d0, d8
+; CHECK-SD-NEXT: fmov d0, d9
; CHECK-SD-NEXT: mov x19, x0
; CHECK-SD-NEXT: mov x20, x1
; CHECK-SD-NEXT: bl __fixunsdfti
-; CHECK-SD-NEXT: fmov d0, d9
+; CHECK-SD-NEXT: fmov d0, d8
; CHECK-SD-NEXT: mov x21, x0
; CHECK-SD-NEXT: mov x22, x1
; CHECK-SD-NEXT: bl __fixunsdfti
-; CHECK-SD-NEXT: fmov d0, x0
-; CHECK-SD-NEXT: mov x2, x19
-; CHECK-SD-NEXT: mov x3, x20
-; CHECK-SD-NEXT: mov x4, x21
-; CHECK-SD-NEXT: mov x5, x22
-; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT: mov x4, x0
+; CHECK-SD-NEXT: mov x5, x1
+; CHECK-SD-NEXT: mov x0, x19
+; CHECK-SD-NEXT: mov x1, x20
+; CHECK-SD-NEXT: mov x2, x21
+; CHECK-SD-NEXT: mov x3, x22
; CHECK-SD-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT: mov v0.d[1], x1
+; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
; CHECK-SD-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: ldp d9, d8, [sp], #64 // 16-byte Folded Reload
; CHECK-SD-NEXT: ret
;
@@ -3694,20 +3688,19 @@ define <2 x i128> @fptos_v2f32_v2i128(<2 x float> %a) {
; CHECK-SD-NEXT: .cfi_offset w30, -32
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: mov s0, v0.s[1]
+; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-SD-NEXT: bl __fixsfti
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x19, x0
; CHECK-SD-NEXT: mov x20, x1
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT: mov s0, v0.s[1]
; CHECK-SD-NEXT: bl __fixsfti
-; CHECK-SD-NEXT: fmov d0, x0
-; CHECK-SD-NEXT: mov x2, x19
-; CHECK-SD-NEXT: mov x3, x20
+; CHECK-SD-NEXT: mov x2, x0
+; CHECK-SD-NEXT: mov x3, x1
+; CHECK-SD-NEXT: mov x0, x19
+; CHECK-SD-NEXT: mov x1, x20
; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-SD-NEXT: mov v0.d[1], x1
-; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: add sp, sp, #48
; CHECK-SD-NEXT: ret
;
@@ -3754,20 +3747,19 @@ define <2 x i128> @fptou_v2f32_v2i128(<2 x float> %a) {
; CHECK-SD-NEXT: .cfi_offset w30, -32
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: mov s0, v0.s[1]
+; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-SD-NEXT: bl __fixunssfti
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x19, x0
; CHECK-SD-NEXT: mov x20, x1
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT: mov s0, v0.s[1]
; CHECK-SD-NEXT: bl __fixunssfti
-; CHECK-SD-NEXT: fmov d0, x0
-; CHECK-SD-NEXT: mov x2, x19
-; CHECK-SD-NEXT: mov x3, x20
+; CHECK-SD-NEXT: mov x2, x0
+; CHECK-SD-NEXT: mov x3, x1
+; CHECK-SD-NEXT: mov x0, x19
+; CHECK-SD-NEXT: mov x1, x20
; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-SD-NEXT: mov v0.d[1], x1
-; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: add sp, sp, #48
; CHECK-SD-NEXT: ret
;
@@ -3822,23 +3814,22 @@ define <3 x i128> @fptos_v3f32_v3i128(<3 x float> %a) {
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x19, x0
; CHECK-SD-NEXT: mov x20, x1
-; CHECK-SD-NEXT: mov s0, v0.s[1]
+; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-SD-NEXT: bl __fixsfti
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x21, x0
; CHECK-SD-NEXT: mov x22, x1
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT: mov s0, v0.s[1]
; CHECK-SD-NEXT: bl __fixsfti
-; CHECK-SD-NEXT: fmov d0, x0
-; CHECK-SD-NEXT: mov x2, x21
-; CHECK-SD-NEXT: mov x3, x22
+; CHECK-SD-NEXT: mov x2, x0
+; CHECK-SD-NEXT: mov x3, x1
+; CHECK-SD-NEXT: mov x0, x21
+; CHECK-SD-NEXT: mov x1, x22
; CHECK-SD-NEXT: mov x4, x19
; CHECK-SD-NEXT: mov x5, x20
-; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
; CHECK-SD-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT: mov v0.d[1], x1
+; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
; CHECK-SD-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: add sp, sp, #64
; CHECK-SD-NEXT: ret
;
@@ -3904,23 +3895,22 @@ define <3 x i128> @fptou_v3f32_v3i128(<3 x float> %a) {
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x19, x0
; CHECK-SD-NEXT: mov x20, x1
-; CHECK-SD-NEXT: mov s0, v0.s[1]
+; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-SD-NEXT: bl __fixunssfti
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x21, x0
; CHECK-SD-NEXT: mov x22, x1
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT: mov s0, v0.s[1]
; CHECK-SD-NEXT: bl __fixunssfti
-; CHECK-SD-NEXT: fmov d0, x0
-; CHECK-SD-NEXT: mov x2, x21
-; CHECK-SD-NEXT: mov x3, x22
+; CHECK-SD-NEXT: mov x2, x0
+; CHECK-SD-NEXT: mov x3, x1
+; CHECK-SD-NEXT: mov x0, x21
+; CHECK-SD-NEXT: mov x1, x22
; CHECK-SD-NEXT: mov x4, x19
; CHECK-SD-NEXT: mov x5, x20
-; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
; CHECK-SD-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT: mov v0.d[1], x1
+; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
; CHECK-SD-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: add sp, sp, #64
; CHECK-SD-NEXT: ret
;
@@ -7034,20 +7024,19 @@ define <2 x i128> @fptos_v2f16_v2i128(<2 x half> %a) {
; CHECK-SD-NEXT: .cfi_offset w30, -32
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: mov h0, v0.h[1]
+; CHECK-SD-NEXT: // kill: def $h0 killed $h0 killed $q0
; CHECK-SD-NEXT: bl __fixhfti
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x19, x0
; CHECK-SD-NEXT: mov x20, x1
-; CHECK-SD-NEXT: // kill: def $h0 killed $h0 killed $q0
+; CHECK-SD-NEXT: mov h0, v0.h[1]
; CHECK-SD-NEXT: bl __fixhfti
-; CHECK-SD-NEXT: fmov d0, x0
-; CHECK-SD-NEXT: mov x2, x19
-; CHECK-SD-NEXT: mov x3, x20
+; CHECK-SD-NEXT: mov x2, x0
+; CHECK-SD-NEXT: mov x3, x1
+; CHECK-SD-NEXT: mov x0, x19
+; CHECK-SD-NEXT: mov x1, x20
; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-SD-NEXT: mov v0.d[1], x1
-; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: add sp, sp, #48
; CHECK-SD-NEXT: ret
;
@@ -7089,20 +7078,19 @@ define <2 x i128> @fptou_v2f16_v2i128(<2 x half> %a) {
; CHECK-SD-NEXT: .cfi_offset w30, -32
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: mov h0, v0.h[1]
+; CHECK-SD-NEXT: // kill: def $h0 killed $h0 killed $q0
; CHECK-SD-NEXT: bl __fixunshfti
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x19, x0
; CHECK-SD-NEXT: mov x20, x1
-; CHECK-SD-NEXT: // kill: def $h0 killed $h0 killed $q0
+; CHECK-SD-NEXT: mov h0, v0.h[1]
; CHECK-SD-NEXT: bl __fixunshfti
-; CHECK-SD-NEXT: fmov d0, x0
-; CHECK-SD-NEXT: mov x2, x19
-; CHECK-SD-NEXT: mov x3, x20
+; CHECK-SD-NEXT: mov x2, x0
+; CHECK-SD-NEXT: mov x3, x1
+; CHECK-SD-NEXT: mov x0, x19
+; CHECK-SD-NEXT: mov x1, x20
; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-SD-NEXT: mov v0.d[1], x1
-; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: add sp, sp, #48
; CHECK-SD-NEXT: ret
;
@@ -7147,28 +7135,27 @@ define <3 x i128> @fptos_v3f16_v3i128(<3 x half> %a) {
; CHECK-SD-NEXT: .cfi_offset w30, -48
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: mov h0, v0.h[1]
+; CHECK-SD-NEXT: // kill: def $h0 killed $h0 killed $q0
; CHECK-SD-NEXT: bl __fixhfti
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x19, x0
; CHECK-SD-NEXT: mov x20, x1
-; CHECK-SD-NEXT: mov h0, v0.h[2]
+; CHECK-SD-NEXT: mov h0, v0.h[1]
; CHECK-SD-NEXT: bl __fixhfti
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x21, x0
; CHECK-SD-NEXT: mov x22, x1
-; CHECK-SD-NEXT: // kill: def $h0 killed $h0 killed $q0
+; CHECK-SD-NEXT: mov h0, v0.h[2]
; CHECK-SD-NEXT: bl __fixhfti
-; CHECK-SD-NEXT: fmov d0, x0
-; CHECK-SD-NEXT: mov x2, x19
-; CHECK-SD-NEXT: mov x3, x20
-; CHECK-SD-NEXT: mov x4, x21
-; CHECK-SD-NEXT: mov x5, x22
-; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT: mov x4, x0
+; CHECK-SD-NEXT: mov x5, x1
+; CHECK-SD-NEXT: mov x0, x19
+; CHECK-SD-NEXT: mov x1, x20
+; CHECK-SD-NEXT: mov x2, x21
+; CHECK-SD-NEXT: mov x3, x22
; CHECK-SD-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT: mov v0.d[1], x1
+; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
; CHECK-SD-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: add sp, sp, #64
; CHECK-SD-NEXT: ret
;
@@ -7220,28 +7207,27 @@ define <3 x i128> @fptou_v3f16_v3i128(<3 x half> %a) {
; CHECK-SD-NEXT: .cfi_offset w30, -48
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: mov h0, v0.h[1]
+; CHECK-SD-NEXT: // kill: def $h0 killed $h0 killed $q0
; CHECK-SD-NEXT: bl __fixunshfti
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x19, x0
; CHECK-SD-NEXT: mov x20, x1
-; CHECK-SD-NEXT: mov h0, v0.h[2]
+; CHECK-SD-NEXT: mov h0, v0.h[1]
; CHECK-SD-NEXT: bl __fixunshfti
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x21, x0
; CHECK-SD-NEXT: mov x22, x1
-; CHECK-SD-NEXT: // kill: def $h0 killed $h0 killed $q0
+; CHECK-SD-NEXT: mov h0, v0.h[2]
; CHECK-SD-NEXT: bl __fixunshfti
-; CHECK-SD-NEXT: fmov d0, x0
-; CHECK-SD-NEXT: mov x2, x19
-; CHECK-SD-NEXT: mov x3, x20
-; CHECK-SD-NEXT: mov x4, x21
-; CHECK-SD-NEXT: mov x5, x22
-; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT: mov x4, x0
+; CHECK-SD-NEXT: mov x5, x1
+; CHECK-SD-NEXT: mov x0, x19
+; CHECK-SD-NEXT: mov x1, x20
+; CHECK-SD-NEXT: mov x2, x21
+; CHECK-SD-NEXT: mov x3, x22
; CHECK-SD-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT: mov v0.d[1], x1
+; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
; CHECK-SD-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: add sp...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/100074
More information about the llvm-commits
mailing list