[llvm] [ARM] Switch to soft promoting half types. (PR #80440)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 2 07:05:27 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-arm
Author: Harald van Dijk (hvdijk)
<details>
<summary>Changes</summary>
The traditional promotion is known to generate wrong code.
Fixes #<!-- -->73805.
---
Patch is 116.07 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/80440.diff
16 Files Affected:
- (modified) llvm/lib/Target/ARM/ARMISelLowering.cpp (+2-2)
- (modified) llvm/lib/Target/ARM/ARMISelLowering.h (+8)
- (modified) llvm/test/CodeGen/ARM/aes-erratum-fix.ll (+490-608)
- (modified) llvm/test/CodeGen/ARM/arm-half-promote.ll (+95-60)
- (modified) llvm/test/CodeGen/ARM/fp16-args.ll (-40)
- (modified) llvm/test/CodeGen/ARM/fp16-instructions.ll (+87-79)
- (modified) llvm/test/CodeGen/ARM/fp16-promote.ll (+45-101)
- (modified) llvm/test/CodeGen/ARM/llvm.exp10.ll (+39-24)
- (modified) llvm/test/CodeGen/ARM/llvm.frexp.ll (+32-27)
- (modified) llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-soft-float.ll (+40-24)
- (modified) llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-strict.ll (-3)
- (modified) llvm/test/CodeGen/ARM/vecreduce-fmax-legalization-soft-float.ll (+26-29)
- (modified) llvm/test/CodeGen/ARM/vecreduce-fmin-legalization-soft-float.ll (+26-29)
- (modified) llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-soft-float.ll (+20-12)
- (modified) llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-strict.ll (-3)
- (modified) llvm/test/CodeGen/Thumb2/mve-vabd.ll (+23-7)
``````````diff
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index bf8c877a547cd..b5c4a8a322ea7 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -9055,7 +9055,7 @@ SDValue ARMTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
return LowerINSERT_VECTOR_ELT_i1(Op, DAG, Subtarget);
if (getTypeAction(*DAG.getContext(), EltVT) ==
- TargetLowering::TypePromoteFloat) {
+ TargetLowering::TypeSoftPromoteHalf) {
// INSERT_VECTOR_ELT doesn't want f16 operands promoting to f32,
// but the type system will try to do that if we don't intervene.
// Reinterpret any such vector-element insertion as one with the
@@ -9065,7 +9065,7 @@ SDValue ARMTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
EVT IEltVT = MVT::getIntegerVT(EltVT.getScalarSizeInBits());
assert(getTypeAction(*DAG.getContext(), IEltVT) !=
- TargetLowering::TypePromoteFloat);
+ TargetLowering::TypeSoftPromoteHalf);
SDValue VecIn = Op.getOperand(0);
EVT VecVT = VecIn.getValueType();
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index d61a68780e3e1..93db1ebd056bc 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -762,6 +762,14 @@ class VectorType;
ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
Value *Accumulator = nullptr) const override;
+ bool softPromoteHalfType() const override {
+ return true;
+ }
+
+ bool useFPRegsForHalfType() const override {
+ return true;
+ }
+
protected:
std::pair<const TargetRegisterClass *, uint8_t>
findRepresentativeClass(const TargetRegisterInfo *TRI,
diff --git a/llvm/test/CodeGen/ARM/aes-erratum-fix.ll b/llvm/test/CodeGen/ARM/aes-erratum-fix.ll
index f9b62df37ff32..9c2da345956d1 100644
--- a/llvm/test/CodeGen/ARM/aes-erratum-fix.ll
+++ b/llvm/test/CodeGen/ARM/aes-erratum-fix.ll
@@ -1355,102 +1355,89 @@ define arm_aapcs_vfpcc void @aese_setf16_cond_via_ptr(i1 zeroext %0, half* %1, <
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-FIX-NOSCHED-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; CHECK-FIX-NOSCHED-NEXT: .pad #24
-; CHECK-FIX-NOSCHED-NEXT: sub sp, sp, #24
+; CHECK-FIX-NOSCHED-NEXT: .pad #12
+; CHECK-FIX-NOSCHED-NEXT: sub sp, sp, #12
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: beq .LBB36_3
; CHECK-FIX-NOSCHED-NEXT: @ %bb.1:
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
-; CHECK-FIX-NOSCHED-NEXT: vmov.32 r3, d16[1]
-; CHECK-FIX-NOSCHED-NEXT: vmov r7, r6, d17
-; CHECK-FIX-NOSCHED-NEXT: vld1.16 {d16[0]}, [r1:16]
-; CHECK-FIX-NOSCHED-NEXT: vmov.32 r5, d16[0]
-; CHECK-FIX-NOSCHED-NEXT: uxth r4, r3
-; CHECK-FIX-NOSCHED-NEXT: lsr r3, r3, #16
-; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #8] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: uxth r4, r7
-; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #12] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: uxth r4, r6
-; CHECK-FIX-NOSCHED-NEXT: lsr r6, r6, #16
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[3]
+; CHECK-FIX-NOSCHED-NEXT: ldrh r7, [r1]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r9, d17[0]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r10, d16[3]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r11, d16[2]
+; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #8] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[2]
; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #20] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: lsr r7, r7, #16
-; CHECK-FIX-NOSCHED-NEXT: lsr r3, r5, #16
-; CHECK-FIX-NOSCHED-NEXT: str r6, [sp, #16] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[1]
; CHECK-FIX-NOSCHED-NEXT: str r3, [sp] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: uxth r10, r5
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d16[1]
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: bne .LBB36_4
; CHECK-FIX-NOSCHED-NEXT: .LBB36_2:
-; CHECK-FIX-NOSCHED-NEXT: vmov r4, r6, d1
-; CHECK-FIX-NOSCHED-NEXT: vmov r0, r3, d0
-; CHECK-FIX-NOSCHED-NEXT: lsr r5, r4, #16
-; CHECK-FIX-NOSCHED-NEXT: lsr r1, r6, #16
-; CHECK-FIX-NOSCHED-NEXT: uxth r11, r6
-; CHECK-FIX-NOSCHED-NEXT: lsr lr, r0, #16
-; CHECK-FIX-NOSCHED-NEXT: lsr r12, r3, #16
-; CHECK-FIX-NOSCHED-NEXT: uxth r9, r4
-; CHECK-FIX-NOSCHED-NEXT: uxth r6, r3
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r0, d1[3]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r4, d1[2]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r5, d1[1]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r6, d1[0]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r12, d0[3]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 lr, d0[2]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r8, d0[1]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r1, d0[0]
; CHECK-FIX-NOSCHED-NEXT: b .LBB36_5
; CHECK-FIX-NOSCHED-NEXT: .LBB36_3:
-; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #14]
-; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #16] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #12]
-; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #20] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #8]
-; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #12] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #6]
-; CHECK-FIX-NOSCHED-NEXT: ldrh r7, [r2, #10]
-; CHECK-FIX-NOSCHED-NEXT: ldrh r10, [r2]
-; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #4]
+; CHECK-FIX-NOSCHED-NEXT: add r3, r2, #8
+; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d16[0]}, [r2:32]
+; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d17[0]}, [r3:32]
+; CHECK-FIX-NOSCHED-NEXT: add r3, r2, #4
+; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d16[1]}, [r3:32]
+; CHECK-FIX-NOSCHED-NEXT: add r3, r2, #12
+; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d17[1]}, [r3:32]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[3]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r9, d17[0]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r10, d16[3]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r11, d16[2]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r7, d16[0]
; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #8] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #2]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[2]
+; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[1]
; CHECK-FIX-NOSCHED-NEXT: str r3, [sp] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d16[1]
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: beq .LBB36_2
; CHECK-FIX-NOSCHED-NEXT: .LBB36_4:
-; CHECK-FIX-NOSCHED-NEXT: vmov r5, r3, d1
-; CHECK-FIX-NOSCHED-NEXT: mov r4, r7
-; CHECK-FIX-NOSCHED-NEXT: vmov.32 r7, d0[1]
-; CHECK-FIX-NOSCHED-NEXT: vld1.16 {d0[0]}, [r1:16]
-; CHECK-FIX-NOSCHED-NEXT: vmov.32 r0, d0[0]
-; CHECK-FIX-NOSCHED-NEXT: uxth r9, r5
-; CHECK-FIX-NOSCHED-NEXT: uxth r11, r3
-; CHECK-FIX-NOSCHED-NEXT: uxth r6, r7
-; CHECK-FIX-NOSCHED-NEXT: lsr r12, r7, #16
-; CHECK-FIX-NOSCHED-NEXT: lsr r1, r3, #16
-; CHECK-FIX-NOSCHED-NEXT: lsr r5, r5, #16
-; CHECK-FIX-NOSCHED-NEXT: mov r7, r4
-; CHECK-FIX-NOSCHED-NEXT: lsr lr, r0, #16
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r0, d1[3]
+; CHECK-FIX-NOSCHED-NEXT: ldrh r1, [r1]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r4, d1[2]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r5, d1[1]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r6, d1[0]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r12, d0[3]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 lr, d0[2]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r8, d0[1]
; CHECK-FIX-NOSCHED-NEXT: .LBB36_5:
-; CHECK-FIX-NOSCHED-NEXT: uxth r8, r0
-; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp] @ 4-byte Reload
-; CHECK-FIX-NOSCHED-NEXT: ldr r3, [sp, #4] @ 4-byte Reload
-; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r10, r0, lsl #16
-; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r0
-; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r8, lr, lsl #16
-; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[0], r0
-; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r6, r12, lsl #16
-; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[1], r0
-; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
-; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r3, lsl #16
-; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[1], r0
-; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r9, r5, lsl #16
-; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[0], r0
-; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
-; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r7, lsl #16
-; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[0], r0
-; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r11, r1, lsl #16
-; CHECK-FIX-NOSCHED-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
+; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, r1, r8, lsl #16
+; CHECK-FIX-NOSCHED-NEXT: pkhbt r3, r7, r3, lsl #16
+; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r4, r0, lsl #16
+; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[0], r1
+; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, lr, r12, lsl #16
+; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r3
+; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[1], r1
+; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, r11, r10, lsl #16
+; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[1], r1
+; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, r6, r5, lsl #16
+; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[0], r1
+; CHECK-FIX-NOSCHED-NEXT: ldr r1, [sp] @ 4-byte Reload
+; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, r9, r1, lsl #16
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[1], r0
-; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
-; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r1, lsl #16
+; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
+; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[0], r1
+; CHECK-FIX-NOSCHED-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r1, r0, lsl #16
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[1], r0
; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q9
; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
-; CHECK-FIX-NOSCHED-NEXT: add sp, sp, #24
+; CHECK-FIX-NOSCHED-NEXT: add sp, sp, #12
; CHECK-FIX-NOSCHED-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
;
; CHECK-CORTEX-FIX-LABEL: aese_setf16_cond_via_ptr:
@@ -1460,94 +1447,79 @@ define arm_aapcs_vfpcc void @aese_setf16_cond_via_ptr(i1 zeroext %0, half* %1, <
; CHECK-CORTEX-FIX-NEXT: .pad #24
; CHECK-CORTEX-FIX-NEXT: sub sp, sp, #24
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
-; CHECK-CORTEX-FIX-NEXT: beq .LBB36_3
+; CHECK-CORTEX-FIX-NEXT: beq .LBB36_2
; CHECK-CORTEX-FIX-NEXT: @ %bb.1:
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
-; CHECK-CORTEX-FIX-NEXT: vorr q9, q8, q8
-; CHECK-CORTEX-FIX-NEXT: vld1.16 {d18[0]}, [r1:16]
-; CHECK-CORTEX-FIX-NEXT: vmov.32 r3, d18[0]
-; CHECK-CORTEX-FIX-NEXT: uxth r7, r3
-; CHECK-CORTEX-FIX-NEXT: lsr r3, r3, #16
-; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #16] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: vmov.32 r3, d16[1]
-; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #20] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: uxth r7, r3
-; CHECK-CORTEX-FIX-NEXT: lsr r3, r3, #16
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[1]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r6, d17[0]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r7, d17[2]
+; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #20] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[2]
+; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #8] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[3]
; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: vmov r3, r6, d17
-; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #8] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: uxth r7, r3
-; CHECK-CORTEX-FIX-NEXT: lsr r3, r3, #16
-; CHECK-CORTEX-FIX-NEXT: uxth r11, r6
-; CHECK-CORTEX-FIX-NEXT: lsr r4, r6, #16
-; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #12] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
-; CHECK-CORTEX-FIX-NEXT: bne .LBB36_4
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d17[1]
+; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #12] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1]
+; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #16] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT: mov r3, r6
+; CHECK-CORTEX-FIX-NEXT: b .LBB36_3
; CHECK-CORTEX-FIX-NEXT: .LBB36_2:
-; CHECK-CORTEX-FIX-NEXT: vmov r1, r7, d0
-; CHECK-CORTEX-FIX-NEXT: uxth r0, r1
-; CHECK-CORTEX-FIX-NEXT: uxth r6, r7
-; CHECK-CORTEX-FIX-NEXT: lsr r12, r7, #16
-; CHECK-CORTEX-FIX-NEXT: lsr r9, r1, #16
-; CHECK-CORTEX-FIX-NEXT: str r0, [sp] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: mov r0, r3
-; CHECK-CORTEX-FIX-NEXT: vmov r7, r3, d1
-; CHECK-CORTEX-FIX-NEXT: uxth r10, r7
-; CHECK-CORTEX-FIX-NEXT: lsr r5, r7, #16
-; CHECK-CORTEX-FIX-NEXT: uxth lr, r3
-; CHECK-CORTEX-FIX-NEXT: lsr r8, r3, #16
-; CHECK-CORTEX-FIX-NEXT: mov r3, r0
-; CHECK-CORTEX-FIX-NEXT: b .LBB36_5
-; CHECK-CORTEX-FIX-NEXT: .LBB36_3:
-; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2]
-; CHECK-CORTEX-FIX-NEXT: ldrh r11, [r2, #12]
-; CHECK-CORTEX-FIX-NEXT: ldrh r4, [r2, #14]
-; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #20] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #2]
+; CHECK-CORTEX-FIX-NEXT: add r3, r2, #8
+; CHECK-CORTEX-FIX-NEXT: vld1.32 {d16[0]}, [r2:32]
+; CHECK-CORTEX-FIX-NEXT: add r7, r2, #4
+; CHECK-CORTEX-FIX-NEXT: vld1.32 {d17[0]}, [r3:32]
+; CHECK-CORTEX-FIX-NEXT: add r3, r2, #12
+; CHECK-CORTEX-FIX-NEXT: vld1.32 {d16[1]}, [r7:32]
+; CHECK-CORTEX-FIX-NEXT: vld1.32 {d17[1]}, [r3:32]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[0]
; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #16] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #4]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[1]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r7, d17[1]
+; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #20] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[2]
+; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #12] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r7, d17[2]
; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #8] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #6]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[3]
; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #8]
-; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #12] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #10]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d17[0]
+; CHECK-CORTEX-FIX-NEXT: .LBB36_3:
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r4, d17[3]
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
-; CHECK-CORTEX-FIX-NEXT: beq .LBB36_2
-; CHECK-CORTEX-FIX-NEXT: .LBB36_4:
-; CHECK-CORTEX-FIX-NEXT: vorr q8, q0, q0
-; CHECK-CORTEX-FIX-NEXT: vmov.32 r5, d0[1]
-; CHECK-CORTEX-FIX-NEXT: vld1.16 {d16[0]}, [r1:16]
-; CHECK-CORTEX-FIX-NEXT: uxth r6, r5
-; CHECK-CORTEX-FIX-NEXT: lsr r12, r5, #16
-; CHECK-CORTEX-FIX-NEXT: vmov r5, r7, d1
-; CHECK-CORTEX-FIX-NEXT: vmov.32 r1, d16[0]
-; CHECK-CORTEX-FIX-NEXT: uxth r10, r5
-; CHECK-CORTEX-FIX-NEXT: lsr r5, r5, #16
-; CHECK-CORTEX-FIX-NEXT: uxth lr, r7
-; CHECK-CORTEX-FIX-NEXT: lsr r8, r7, #16
-; CHECK-CORTEX-FIX-NEXT: uxth r0, r1
-; CHECK-CORTEX-FIX-NEXT: lsr r9, r1, #16
-; CHECK-CORTEX-FIX-NEXT: str r0, [sp] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT: beq .LBB36_5
+; CHECK-CORTEX-FIX-NEXT: @ %bb.4:
+; CHECK-CORTEX-FIX-NEXT: ldrh r0, [r1]
+; CHECK-CORTEX-FIX-NEXT: b .LBB36_6
; CHECK-CORTEX-FIX-NEXT: .LBB36_5:
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r0, d0[0]
+; CHECK-CORTEX-FIX-NEXT: .LBB36_6:
+; CHECK-CORTEX-FIX-NEXT: str r0, [sp] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
; CHECK-CORTEX-FIX-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
-; CHECK-CORTEX-FIX-NEXT: pkhbt r11, r11, r4, lsl #16
-; CHECK-CORTEX-FIX-NEXT: ldr r4, [sp, #16] @ 4-byte Reload
-; CHECK-CORTEX-FIX-NEXT: pkhbt r6, r6, r12, lsl #16
-; CHECK-CORTEX-FIX-NEXT: pkhbt r5, r10, r5, lsl #16
+; CHECK-CORTEX-FIX-NEXT: pkhbt r9, r7, r4, lsl #16
+; CHECK-CORTEX-FIX-NEXT: ldr r4, [sp, #20] @ 4-byte Reload
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r10, d0[1]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r6, d0[2]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r12, d0[3]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r11, d1[0]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r5, d1[1]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 lr, d1[2]
+; CHECK-CORTEX-FIX-NEXT: vmov.u16 r8, d1[3]
; CHECK-CORTEX-FIX-NEXT: pkhbt r7, r0, r1, lsl #16
; CHECK-CORTEX-FIX-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
; CHECK-CORTEX-FIX-NEXT: pkhbt r0, lr, r8, lsl #16
-; CHECK-CORTEX-FIX-NEXT: pkhbt r1, r1, r3, lsl #16
-; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp, #20] @ 4-byte Reload
+; CHECK-CORTEX-FIX-NEXT: pkhbt r6, r6, r12, lsl #16
+; CHECK-CORTEX-FIX-NEXT: pkhbt r5, r11, r5, lsl #16
+; CHECK-CORTEX-FIX-NEXT: pkhbt r1, r3, r1, lsl #16
+; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp, #16] @ 4-byte Reload
; CHECK-CORTEX-FIX-NEXT: pkhbt r4, r3, r4, lsl #16
; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp] @ 4-byte Reload
; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[0], r4
; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[0], r1
; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[1], r7
-; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[1], r11
-; CHECK-CORTEX-FIX-NEXT: pkhbt r3, r3, r9, lsl #16
+; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[1], r9
+; CHECK-CORTEX-FIX-NEXT: pkhbt r3, r3, r10, lsl #16
; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[0], r3
; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[0], r5
; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[1], r6
@@ -1604,210 +1576,179 @@ define arm_aapcs_vfpcc void @aese_setf16_cond_via_val(i1 zeroext %0, half %1, <1
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-FIX-NOSCHED-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; CHECK-FIX-NOSCHED-NEXT: .pad #24
-; CHECK-FIX-NOSCHED-NEXT: sub sp, sp, #24
-; CHECK-FIX-NOSCHED-NEXT: vmov r12, s0
+; CHECK-FIX-NOSCHED-NEXT: .pad #12
+; CHECK-FIX-NOSCHED-NEXT: sub sp, sp, #12
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: beq .LBB37_2
; CHECK-FIX-NOSCHED-NEXT: @ %bb.1:
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
-; CHECK-FIX-NOSCHED-NEXT: vmov.32 r3, d16[1]
-; CHECK-FIX-NOSCHED-NEXT: vmov r7, r6, d17
-; CHECK-FIX-NOSCHED-NEXT: vmov.16 d16[0], r12
-; CHECK-FIX-NOSCHED-NEXT: vmov.32 r5, d16[0]
-; CHECK-FIX-NOSCHED-NEXT: uxth r4, r7
-; CHECK-FIX-NOSCHED-NEXT: uxth r2, r3
-; CHECK-FIX-NOSCHED-NEXT: lsr r3, r3, #16
-; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #12] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: uxth r4, r6
-; CHECK-FIX-NOSCHED-NEXT: lsr r6, r6, #16
-; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #8] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: lsr r7, r7, #16
-; CHECK-FIX-NOSCHED-NEXT: lsr r3, r5, #16
-; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #20] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: uxth r3, r5
-; CHECK-FIX-NOSCHED-NEXT: str r6, [sp, #16] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s2, s0
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d17[1]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r7, d17[3]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 lr, d17[2]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r11, d16[2]
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r8, d16[1]
+; CHECK-FIX-NOSCHED-NEXT: str r2, [sp, #8] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d17[0]
+; CHECK-FIX-NOSCHED-NEXT: str r2, [sp, #4] @ 4-byte Spill
+; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d16[3]
+; CHECK-FIX-NOSCHED-NEXT: str r2, [sp] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: b .LBB37_3
; CHECK-FIX-NOSCHED-NEXT: .LBB37_2:
-; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1, #14]
-; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #16] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1, #12]
-; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #20] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1, #8]
-; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #12] @ 4-byte Spill
-; CHECK-FIX-NOSCHED-NEXT: ldrh r2, [r1, #6]
-; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1, #2]
+; CHECK-FIX-NOSCHED-NEXT: add r2, r1, #8
+; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d16[0]}, [r1:32]
+; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d17[0]}, [r2:32]
+; CHECK-FI...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/80440
More information about the llvm-commits
mailing list