[llvm] d7853ba - [ARM] Generate VDUP(Const) from constant buildvectors
David Green via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 8 12:52:18 PDT 2021
Author: David Green
Date: 2021-06-08T20:51:33+01:00
New Revision: d7853bae941006cece63013f09d524e72bbbec45
URL: https://github.com/llvm/llvm-project/commit/d7853bae941006cece63013f09d524e72bbbec45
DIFF: https://github.com/llvm/llvm-project/commit/d7853bae941006cece63013f09d524e72bbbec45.diff
LOG: [ARM] Generate VDUP(Const) from constant buildvectors
If we cannot otherwise use a VMOVimm/VMOVFPimm/VMVNimm, fall back to
producing a VDUP(const) as opposed to a constant pool load. This will at
least be smaller codesize and can allow the VDUP to be folded into other
instructions.
Differential Revision: https://reviews.llvm.org/D103808
Added:
Modified:
llvm/lib/Target/ARM/ARMISelLowering.cpp
llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout-unknown-lanes.ll
llvm/test/CodeGen/Thumb2/mve-fp16convertloops.ll
llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll
llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll
llvm/test/CodeGen/Thumb2/mve-shifts.ll
llvm/test/CodeGen/Thumb2/mve-vmovimm.ll
llvm/test/CodeGen/Thumb2/mve-vmvnimm.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index e6e495d670c1..c2376e941f9a 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -7635,6 +7635,18 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
}
}
+
+ // If we are under MVE, generate a VDUP(constant), bitcast to the original
+ // type.
+ if (ST->hasMVEIntegerOps() &&
+ (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32)) {
+ EVT DupVT = SplatBitSize == 32 ? MVT::v4i32
+ : SplatBitSize == 16 ? MVT::v8i16
+ : MVT::v16i8;
+ SDValue Const = DAG.getConstant(SplatBits.getZExtValue(), dl, MVT::i32);
+ SDValue VDup = DAG.getNode(ARMISD::VDUP, dl, DupVT, Const);
+ return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, VDup);
+ }
}
}
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout-unknown-lanes.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout-unknown-lanes.ll
index 01da3ce20b9a..c5d63a2f081a 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout-unknown-lanes.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout-unknown-lanes.ll
@@ -7,8 +7,9 @@ define arm_aapcs_vfpcc <4 x float> @arm_max_no_idx_f32_mve(float* %pSrc, i32 %bl
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: subs r2, r1, #4
-; CHECK-NEXT: adr r3, .LCPI0_0
-; CHECK-NEXT: vldrw.u32 q0, [r3]
+; CHECK-NEXT: movw r3, #0
+; CHECK-NEXT: movt r3, #65408
+; CHECK-NEXT: vdup.32 q0, r3
; CHECK-NEXT: dlstp.32 lr, r1
; CHECK-NEXT: .LBB0_1: @ %do.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
@@ -17,13 +18,6 @@ define arm_aapcs_vfpcc <4 x float> @arm_max_no_idx_f32_mve(float* %pSrc, i32 %bl
; CHECK-NEXT: letp lr, .LBB0_1
; CHECK-NEXT: @ %bb.2: @ %do.end
; CHECK-NEXT: pop {r7, pc}
-; CHECK-NEXT: .p2align 4
-; CHECK-NEXT: @ %bb.3:
-; CHECK-NEXT: .LCPI0_0:
-; CHECK-NEXT: .long 0xff800000 @ float -Inf
-; CHECK-NEXT: .long 0xff800000 @ float -Inf
-; CHECK-NEXT: .long 0xff800000 @ float -Inf
-; CHECK-NEXT: .long 0xff800000 @ float -Inf
entry:
br label %do.body
diff --git a/llvm/test/CodeGen/Thumb2/mve-fp16convertloops.ll b/llvm/test/CodeGen/Thumb2/mve-fp16convertloops.ll
index c6e4dd5867b2..a02f6b8ebb82 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fp16convertloops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fp16convertloops.ll
@@ -7,24 +7,17 @@ define void @to_4(float* nocapture readonly %x, half* noalias nocapture %y) {
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: mov.w lr, #256
-; CHECK-NEXT: adr r2, .LCPI0_0
-; CHECK-NEXT: vldrw.u32 q0, [r2]
+; CHECK-NEXT: movw r2, #26214
+; CHECK-NEXT: movt r2, #16390
; CHECK-NEXT: .LBB0_1: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vldrw.u32 q1, [r0], #16
-; CHECK-NEXT: vmul.f32 q1, q1, q0
-; CHECK-NEXT: vcvtb.f16.f32 q1, q1
-; CHECK-NEXT: vstrh.32 q1, [r1], #8
+; CHECK-NEXT: vldrw.u32 q0, [r0], #16
+; CHECK-NEXT: vmul.f32 q0, q0, r2
+; CHECK-NEXT: vcvtb.f16.f32 q0, q0
+; CHECK-NEXT: vstrh.32 q0, [r1], #8
; CHECK-NEXT: le lr, .LBB0_1
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
; CHECK-NEXT: pop {r7, pc}
-; CHECK-NEXT: .p2align 4
-; CHECK-NEXT: @ %bb.3:
-; CHECK-NEXT: .LCPI0_0:
-; CHECK-NEXT: .long 0x40066666 @ float 2.0999999
-; CHECK-NEXT: .long 0x40066666 @ float 2.0999999
-; CHECK-NEXT: .long 0x40066666 @ float 2.0999999
-; CHECK-NEXT: .long 0x40066666 @ float 2.0999999
entry:
br label %vector.body
@@ -52,28 +45,21 @@ define void @to_8(float* nocapture readonly %x, half* noalias nocapture %y) {
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: mov.w lr, #128
-; CHECK-NEXT: adr r2, .LCPI1_0
-; CHECK-NEXT: vldrw.u32 q0, [r2]
+; CHECK-NEXT: movw r2, #26214
+; CHECK-NEXT: movt r2, #16390
; CHECK-NEXT: .LBB1_1: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vldrw.u32 q1, [r0, #16]
-; CHECK-NEXT: vmul.f32 q1, q1, q0
-; CHECK-NEXT: vcvtb.f16.f32 q1, q1
-; CHECK-NEXT: vstrh.32 q1, [r1, #8]
-; CHECK-NEXT: vldrw.u32 q1, [r0], #32
-; CHECK-NEXT: vmul.f32 q1, q1, q0
-; CHECK-NEXT: vcvtb.f16.f32 q1, q1
-; CHECK-NEXT: vstrh.32 q1, [r1], #16
+; CHECK-NEXT: vldrw.u32 q0, [r0, #16]
+; CHECK-NEXT: vmul.f32 q0, q0, r2
+; CHECK-NEXT: vcvtb.f16.f32 q0, q0
+; CHECK-NEXT: vstrh.32 q0, [r1, #8]
+; CHECK-NEXT: vldrw.u32 q0, [r0], #32
+; CHECK-NEXT: vmul.f32 q0, q0, r2
+; CHECK-NEXT: vcvtb.f16.f32 q0, q0
+; CHECK-NEXT: vstrh.32 q0, [r1], #16
; CHECK-NEXT: le lr, .LBB1_1
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
; CHECK-NEXT: pop {r7, pc}
-; CHECK-NEXT: .p2align 4
-; CHECK-NEXT: @ %bb.3:
-; CHECK-NEXT: .LCPI1_0:
-; CHECK-NEXT: .long 0x40066666 @ float 2.0999999
-; CHECK-NEXT: .long 0x40066666 @ float 2.0999999
-; CHECK-NEXT: .long 0x40066666 @ float 2.0999999
-; CHECK-NEXT: .long 0x40066666 @ float 2.0999999
entry:
br label %vector.body
@@ -101,36 +87,29 @@ define void @to_16(float* nocapture readonly %x, half* noalias nocapture %y) {
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: mov.w lr, #64
-; CHECK-NEXT: adr r2, .LCPI2_0
-; CHECK-NEXT: vldrw.u32 q0, [r2]
+; CHECK-NEXT: movw r2, #26214
+; CHECK-NEXT: movt r2, #16390
; CHECK-NEXT: .LBB2_1: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vldrw.u32 q1, [r0, #48]
-; CHECK-NEXT: vmul.f32 q1, q1, q0
-; CHECK-NEXT: vcvtb.f16.f32 q1, q1
-; CHECK-NEXT: vstrh.32 q1, [r1, #24]
-; CHECK-NEXT: vldrw.u32 q1, [r0, #32]
-; CHECK-NEXT: vmul.f32 q1, q1, q0
-; CHECK-NEXT: vcvtb.f16.f32 q1, q1
-; CHECK-NEXT: vstrh.32 q1, [r1, #16]
-; CHECK-NEXT: vldrw.u32 q1, [r0, #16]
-; CHECK-NEXT: vmul.f32 q1, q1, q0
-; CHECK-NEXT: vcvtb.f16.f32 q1, q1
-; CHECK-NEXT: vstrh.32 q1, [r1, #8]
-; CHECK-NEXT: vldrw.u32 q1, [r0], #64
-; CHECK-NEXT: vmul.f32 q1, q1, q0
-; CHECK-NEXT: vcvtb.f16.f32 q1, q1
-; CHECK-NEXT: vstrh.32 q1, [r1], #32
+; CHECK-NEXT: vldrw.u32 q0, [r0, #48]
+; CHECK-NEXT: vmul.f32 q0, q0, r2
+; CHECK-NEXT: vcvtb.f16.f32 q0, q0
+; CHECK-NEXT: vstrh.32 q0, [r1, #24]
+; CHECK-NEXT: vldrw.u32 q0, [r0, #32]
+; CHECK-NEXT: vmul.f32 q0, q0, r2
+; CHECK-NEXT: vcvtb.f16.f32 q0, q0
+; CHECK-NEXT: vstrh.32 q0, [r1, #16]
+; CHECK-NEXT: vldrw.u32 q0, [r0, #16]
+; CHECK-NEXT: vmul.f32 q0, q0, r2
+; CHECK-NEXT: vcvtb.f16.f32 q0, q0
+; CHECK-NEXT: vstrh.32 q0, [r1, #8]
+; CHECK-NEXT: vldrw.u32 q0, [r0], #64
+; CHECK-NEXT: vmul.f32 q0, q0, r2
+; CHECK-NEXT: vcvtb.f16.f32 q0, q0
+; CHECK-NEXT: vstrh.32 q0, [r1], #32
; CHECK-NEXT: le lr, .LBB2_1
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
; CHECK-NEXT: pop {r7, pc}
-; CHECK-NEXT: .p2align 4
-; CHECK-NEXT: @ %bb.3:
-; CHECK-NEXT: .LCPI2_0:
-; CHECK-NEXT: .long 0x40066666 @ float 2.0999999
-; CHECK-NEXT: .long 0x40066666 @ float 2.0999999
-; CHECK-NEXT: .long 0x40066666 @ float 2.0999999
-; CHECK-NEXT: .long 0x40066666 @ float 2.0999999
entry:
br label %vector.body
@@ -158,24 +137,17 @@ define void @from_4(half* nocapture readonly %x, float* noalias nocapture %y) {
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: mov.w lr, #256
-; CHECK-NEXT: adr r2, .LCPI3_0
-; CHECK-NEXT: vldrw.u32 q0, [r2]
+; CHECK-NEXT: movw r2, #26214
+; CHECK-NEXT: movt r2, #16390
; CHECK-NEXT: .LBB3_1: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vldrh.u32 q1, [r0], #8
-; CHECK-NEXT: vcvtb.f32.f16 q1, q1
-; CHECK-NEXT: vmul.f32 q1, q1, q0
-; CHECK-NEXT: vstrb.8 q1, [r1], #16
+; CHECK-NEXT: vldrh.u32 q0, [r0], #8
+; CHECK-NEXT: vcvtb.f32.f16 q0, q0
+; CHECK-NEXT: vmul.f32 q0, q0, r2
+; CHECK-NEXT: vstrb.8 q0, [r1], #16
; CHECK-NEXT: le lr, .LBB3_1
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
; CHECK-NEXT: pop {r7, pc}
-; CHECK-NEXT: .p2align 4
-; CHECK-NEXT: @ %bb.3:
-; CHECK-NEXT: .LCPI3_0:
-; CHECK-NEXT: .long 0x40066666 @ float 2.0999999
-; CHECK-NEXT: .long 0x40066666 @ float 2.0999999
-; CHECK-NEXT: .long 0x40066666 @ float 2.0999999
-; CHECK-NEXT: .long 0x40066666 @ float 2.0999999
entry:
br label %vector.body
@@ -203,28 +175,21 @@ define void @from_8(half* nocapture readonly %x, float* noalias nocapture %y) {
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: mov.w lr, #128
-; CHECK-NEXT: adr r2, .LCPI4_0
-; CHECK-NEXT: vldrw.u32 q0, [r2]
+; CHECK-NEXT: movw r2, #26214
+; CHECK-NEXT: movt r2, #16390
; CHECK-NEXT: .LBB4_1: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vldrh.u32 q1, [r0, #8]
+; CHECK-NEXT: vldrh.u32 q0, [r0], #16
+; CHECK-NEXT: vldrh.u32 q1, [r0, #-8]
+; CHECK-NEXT: vcvtb.f32.f16 q0, q0
+; CHECK-NEXT: vmul.f32 q0, q0, r2
; CHECK-NEXT: vcvtb.f32.f16 q1, q1
-; CHECK-NEXT: vmul.f32 q1, q1, q0
+; CHECK-NEXT: vmul.f32 q1, q1, r2
; CHECK-NEXT: vstrw.32 q1, [r1, #16]
-; CHECK-NEXT: vldrh.u32 q1, [r0], #16
-; CHECK-NEXT: vcvtb.f32.f16 q1, q1
-; CHECK-NEXT: vmul.f32 q1, q1, q0
-; CHECK-NEXT: vstrw.32 q1, [r1], #32
+; CHECK-NEXT: vstrw.32 q0, [r1], #32
; CHECK-NEXT: le lr, .LBB4_1
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
; CHECK-NEXT: pop {r7, pc}
-; CHECK-NEXT: .p2align 4
-; CHECK-NEXT: @ %bb.3:
-; CHECK-NEXT: .LCPI4_0:
-; CHECK-NEXT: .long 0x40066666 @ float 2.0999999
-; CHECK-NEXT: .long 0x40066666 @ float 2.0999999
-; CHECK-NEXT: .long 0x40066666 @ float 2.0999999
-; CHECK-NEXT: .long 0x40066666 @ float 2.0999999
entry:
br label %vector.body
@@ -252,36 +217,29 @@ define void @from_16(half* nocapture readonly %x, float* noalias nocapture %y) {
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: mov.w lr, #64
-; CHECK-NEXT: adr r2, .LCPI5_0
-; CHECK-NEXT: vldrw.u32 q0, [r2]
+; CHECK-NEXT: movw r2, #26214
+; CHECK-NEXT: movt r2, #16390
; CHECK-NEXT: .LBB5_1: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vldrh.u32 q1, [r0, #24]
-; CHECK-NEXT: vcvtb.f32.f16 q1, q1
-; CHECK-NEXT: vmul.f32 q1, q1, q0
-; CHECK-NEXT: vstrw.32 q1, [r1, #48]
-; CHECK-NEXT: vldrh.u32 q1, [r0, #16]
-; CHECK-NEXT: vcvtb.f32.f16 q1, q1
-; CHECK-NEXT: vmul.f32 q1, q1, q0
-; CHECK-NEXT: vstrw.32 q1, [r1, #32]
-; CHECK-NEXT: vldrh.u32 q1, [r0, #8]
+; CHECK-NEXT: vldrh.u32 q0, [r0], #32
+; CHECK-NEXT: vldrh.u32 q1, [r0, #-24]
+; CHECK-NEXT: vldrh.u32 q2, [r0, #-16]
+; CHECK-NEXT: vldrh.u32 q3, [r0, #-8]
+; CHECK-NEXT: vcvtb.f32.f16 q0, q0
; CHECK-NEXT: vcvtb.f32.f16 q1, q1
-; CHECK-NEXT: vmul.f32 q1, q1, q0
+; CHECK-NEXT: vcvtb.f32.f16 q2, q2
+; CHECK-NEXT: vcvtb.f32.f16 q3, q3
+; CHECK-NEXT: vmul.f32 q2, q2, r2
+; CHECK-NEXT: vmul.f32 q3, q3, r2
+; CHECK-NEXT: vmul.f32 q1, q1, r2
+; CHECK-NEXT: vmul.f32 q0, q0, r2
+; CHECK-NEXT: vstrw.32 q3, [r1, #48]
+; CHECK-NEXT: vstrw.32 q2, [r1, #32]
; CHECK-NEXT: vstrw.32 q1, [r1, #16]
-; CHECK-NEXT: vldrh.u32 q1, [r0], #32
-; CHECK-NEXT: vcvtb.f32.f16 q1, q1
-; CHECK-NEXT: vmul.f32 q1, q1, q0
-; CHECK-NEXT: vstrw.32 q1, [r1], #64
+; CHECK-NEXT: vstrw.32 q0, [r1], #64
; CHECK-NEXT: le lr, .LBB5_1
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
; CHECK-NEXT: pop {r7, pc}
-; CHECK-NEXT: .p2align 4
-; CHECK-NEXT: @ %bb.3:
-; CHECK-NEXT: .LCPI5_0:
-; CHECK-NEXT: .long 0x40066666 @ float 2.0999999
-; CHECK-NEXT: .long 0x40066666 @ float 2.0999999
-; CHECK-NEXT: .long 0x40066666 @ float 2.0999999
-; CHECK-NEXT: .long 0x40066666 @ float 2.0999999
entry:
br label %vector.body
@@ -309,25 +267,18 @@ define void @both_4(half* nocapture readonly %x, half* noalias nocapture %y) {
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: mov.w lr, #256
-; CHECK-NEXT: adr r2, .LCPI6_0
-; CHECK-NEXT: vldrw.u32 q0, [r2]
+; CHECK-NEXT: movw r2, #26214
+; CHECK-NEXT: movt r2, #16390
; CHECK-NEXT: .LBB6_1: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vldrh.u32 q1, [r0], #8
-; CHECK-NEXT: vcvtb.f32.f16 q1, q1
-; CHECK-NEXT: vmul.f32 q1, q1, q0
-; CHECK-NEXT: vcvtb.f16.f32 q1, q1
-; CHECK-NEXT: vstrh.32 q1, [r1], #8
+; CHECK-NEXT: vldrh.u32 q0, [r0], #8
+; CHECK-NEXT: vcvtb.f32.f16 q0, q0
+; CHECK-NEXT: vmul.f32 q0, q0, r2
+; CHECK-NEXT: vcvtb.f16.f32 q0, q0
+; CHECK-NEXT: vstrh.32 q0, [r1], #8
; CHECK-NEXT: le lr, .LBB6_1
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
; CHECK-NEXT: pop {r7, pc}
-; CHECK-NEXT: .p2align 4
-; CHECK-NEXT: @ %bb.3:
-; CHECK-NEXT: .LCPI6_0:
-; CHECK-NEXT: .long 0x40066666 @ float 2.0999999
-; CHECK-NEXT: .long 0x40066666 @ float 2.0999999
-; CHECK-NEXT: .long 0x40066666 @ float 2.0999999
-; CHECK-NEXT: .long 0x40066666 @ float 2.0999999
entry:
br label %vector.body
@@ -356,28 +307,21 @@ define void @both_8(half* nocapture readonly %x, half* noalias nocapture %y) {
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: mov.w lr, #128
-; CHECK-NEXT: adr r2, .LCPI7_0
-; CHECK-NEXT: vldrw.u32 q0, [r2]
+; CHECK-NEXT: movw r2, #26214
+; CHECK-NEXT: movt r2, #16390
; CHECK-NEXT: .LBB7_1: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vldrh.u16 q1, [r0], #16
-; CHECK-NEXT: vcvtb.f32.f16 q2, q1
-; CHECK-NEXT: vcvtt.f32.f16 q1, q1
-; CHECK-NEXT: vmul.f32 q2, q2, q0
-; CHECK-NEXT: vmul.f32 q1, q1, q0
-; CHECK-NEXT: vcvtb.f16.f32 q2, q2
-; CHECK-NEXT: vcvtt.f16.f32 q2, q1
-; CHECK-NEXT: vstrb.8 q2, [r1], #16
+; CHECK-NEXT: vldrh.u16 q0, [r0], #16
+; CHECK-NEXT: vcvtb.f32.f16 q1, q0
+; CHECK-NEXT: vcvtt.f32.f16 q0, q0
+; CHECK-NEXT: vmul.f32 q1, q1, r2
+; CHECK-NEXT: vmul.f32 q0, q0, r2
+; CHECK-NEXT: vcvtb.f16.f32 q1, q1
+; CHECK-NEXT: vcvtt.f16.f32 q1, q0
+; CHECK-NEXT: vstrb.8 q1, [r1], #16
; CHECK-NEXT: le lr, .LBB7_1
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
; CHECK-NEXT: pop {r7, pc}
-; CHECK-NEXT: .p2align 4
-; CHECK-NEXT: @ %bb.3:
-; CHECK-NEXT: .LCPI7_0:
-; CHECK-NEXT: .long 0x40066666 @ float 2.0999999
-; CHECK-NEXT: .long 0x40066666 @ float 2.0999999
-; CHECK-NEXT: .long 0x40066666 @ float 2.0999999
-; CHECK-NEXT: .long 0x40066666 @ float 2.0999999
entry:
br label %vector.body
@@ -406,36 +350,29 @@ define void @both_16(half* nocapture readonly %x, half* noalias nocapture %y) {
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: mov.w lr, #64
-; CHECK-NEXT: adr r2, .LCPI8_0
-; CHECK-NEXT: vldrw.u32 q0, [r2]
+; CHECK-NEXT: movw r2, #26214
+; CHECK-NEXT: movt r2, #16390
; CHECK-NEXT: .LBB8_1: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vldrh.u16 q1, [r0, #16]
-; CHECK-NEXT: vcvtb.f32.f16 q2, q1
-; CHECK-NEXT: vcvtt.f32.f16 q1, q1
-; CHECK-NEXT: vmul.f32 q2, q2, q0
-; CHECK-NEXT: vmul.f32 q1, q1, q0
-; CHECK-NEXT: vcvtb.f16.f32 q2, q2
-; CHECK-NEXT: vcvtt.f16.f32 q2, q1
-; CHECK-NEXT: vldrh.u16 q1, [r0], #32
-; CHECK-NEXT: vstrh.16 q2, [r1, #16]
-; CHECK-NEXT: vcvtb.f32.f16 q2, q1
-; CHECK-NEXT: vcvtt.f32.f16 q1, q1
-; CHECK-NEXT: vmul.f32 q2, q2, q0
-; CHECK-NEXT: vmul.f32 q1, q1, q0
-; CHECK-NEXT: vcvtb.f16.f32 q2, q2
-; CHECK-NEXT: vcvtt.f16.f32 q2, q1
-; CHECK-NEXT: vstrh.16 q2, [r1], #32
+; CHECK-NEXT: vldrh.u16 q0, [r0, #16]
+; CHECK-NEXT: vcvtb.f32.f16 q1, q0
+; CHECK-NEXT: vcvtt.f32.f16 q0, q0
+; CHECK-NEXT: vmul.f32 q1, q1, r2
+; CHECK-NEXT: vmul.f32 q0, q0, r2
+; CHECK-NEXT: vcvtb.f16.f32 q1, q1
+; CHECK-NEXT: vcvtt.f16.f32 q1, q0
+; CHECK-NEXT: vldrh.u16 q0, [r0], #32
+; CHECK-NEXT: vstrh.16 q1, [r1, #16]
+; CHECK-NEXT: vcvtb.f32.f16 q1, q0
+; CHECK-NEXT: vcvtt.f32.f16 q0, q0
+; CHECK-NEXT: vmul.f32 q1, q1, r2
+; CHECK-NEXT: vmul.f32 q0, q0, r2
+; CHECK-NEXT: vcvtb.f16.f32 q1, q1
+; CHECK-NEXT: vcvtt.f16.f32 q1, q0
+; CHECK-NEXT: vstrh.16 q1, [r1], #32
; CHECK-NEXT: le lr, .LBB8_1
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
; CHECK-NEXT: pop {r7, pc}
-; CHECK-NEXT: .p2align 4
-; CHECK-NEXT: @ %bb.3:
-; CHECK-NEXT: .LCPI8_0:
-; CHECK-NEXT: .long 0x40066666 @ float 2.0999999
-; CHECK-NEXT: .long 0x40066666 @ float 2.0999999
-; CHECK-NEXT: .long 0x40066666 @ float 2.0999999
-; CHECK-NEXT: .long 0x40066666 @ float 2.0999999
entry:
br label %vector.body
@@ -464,28 +401,21 @@ define void @both_8_I(half* nocapture readonly %x, half* noalias nocapture %y) {
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: mov.w lr, #128
-; CHECK-NEXT: adr r2, .LCPI9_0
-; CHECK-NEXT: vldrw.u32 q0, [r2]
+; CHECK-NEXT: movw r2, #26214
+; CHECK-NEXT: movt r2, #16390
; CHECK-NEXT: .LBB9_1: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vldrh.u16 q1, [r0], #16
-; CHECK-NEXT: vcvtb.f32.f16 q2, q1
-; CHECK-NEXT: vcvtt.f32.f16 q1, q1
-; CHECK-NEXT: vmul.f32 q2, q2, q0
-; CHECK-NEXT: vmul.f32 q1, q1, q0
-; CHECK-NEXT: vcvtb.f16.f32 q2, q2
-; CHECK-NEXT: vcvtt.f16.f32 q2, q1
-; CHECK-NEXT: vstrb.8 q2, [r1], #16
+; CHECK-NEXT: vldrh.u16 q0, [r0], #16
+; CHECK-NEXT: vcvtb.f32.f16 q1, q0
+; CHECK-NEXT: vcvtt.f32.f16 q0, q0
+; CHECK-NEXT: vmul.f32 q1, q1, r2
+; CHECK-NEXT: vmul.f32 q0, q0, r2
+; CHECK-NEXT: vcvtb.f16.f32 q1, q1
+; CHECK-NEXT: vcvtt.f16.f32 q1, q0
+; CHECK-NEXT: vstrb.8 q1, [r1], #16
; CHECK-NEXT: le lr, .LBB9_1
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
; CHECK-NEXT: pop {r7, pc}
-; CHECK-NEXT: .p2align 4
-; CHECK-NEXT: @ %bb.3:
-; CHECK-NEXT: .LCPI9_0:
-; CHECK-NEXT: .long 0x40066666 @ float 2.0999999
-; CHECK-NEXT: .long 0x40066666 @ float 2.0999999
-; CHECK-NEXT: .long 0x40066666 @ float 2.0999999
-; CHECK-NEXT: .long 0x40066666 @ float 2.0999999
entry:
br label %vector.body
@@ -519,36 +449,29 @@ define void @both_16_I(half* nocapture readonly %x, half* noalias nocapture %y)
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: mov.w lr, #128
-; CHECK-NEXT: adr r2, .LCPI10_0
-; CHECK-NEXT: vldrw.u32 q0, [r2]
+; CHECK-NEXT: movw r2, #26214
+; CHECK-NEXT: movt r2, #16390
; CHECK-NEXT: .LBB10_1: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vldrh.u16 q1, [r0]
-; CHECK-NEXT: vcvtb.f32.f16 q2, q1
-; CHECK-NEXT: vcvtt.f32.f16 q1, q1
-; CHECK-NEXT: vmul.f32 q2, q2, q0
-; CHECK-NEXT: vmul.f32 q1, q1, q0
-; CHECK-NEXT: vcvtb.f16.f32 q2, q2
-; CHECK-NEXT: vcvtt.f16.f32 q2, q1
-; CHECK-NEXT: vldrh.u16 q1, [r0, #16]!
-; CHECK-NEXT: vstrh.16 q2, [r1]
-; CHECK-NEXT: vcvtb.f32.f16 q2, q1
-; CHECK-NEXT: vcvtt.f32.f16 q1, q1
-; CHECK-NEXT: vmul.f32 q2, q2, q0
-; CHECK-NEXT: vmul.f32 q1, q1, q0
-; CHECK-NEXT: vcvtb.f16.f32 q2, q2
-; CHECK-NEXT: vcvtt.f16.f32 q2, q1
-; CHECK-NEXT: vstrb.8 q2, [r1, #16]!
+; CHECK-NEXT: vldrh.u16 q0, [r0]
+; CHECK-NEXT: vcvtb.f32.f16 q1, q0
+; CHECK-NEXT: vcvtt.f32.f16 q0, q0
+; CHECK-NEXT: vmul.f32 q1, q1, r2
+; CHECK-NEXT: vmul.f32 q0, q0, r2
+; CHECK-NEXT: vcvtb.f16.f32 q1, q1
+; CHECK-NEXT: vcvtt.f16.f32 q1, q0
+; CHECK-NEXT: vldrh.u16 q0, [r0, #16]!
+; CHECK-NEXT: vstrh.16 q1, [r1]
+; CHECK-NEXT: vcvtb.f32.f16 q1, q0
+; CHECK-NEXT: vcvtt.f32.f16 q0, q0
+; CHECK-NEXT: vmul.f32 q1, q1, r2
+; CHECK-NEXT: vmul.f32 q0, q0, r2
+; CHECK-NEXT: vcvtb.f16.f32 q1, q1
+; CHECK-NEXT: vcvtt.f16.f32 q1, q0
+; CHECK-NEXT: vstrb.8 q1, [r1, #16]!
; CHECK-NEXT: le lr, .LBB10_1
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
; CHECK-NEXT: pop {r7, pc}
-; CHECK-NEXT: .p2align 4
-; CHECK-NEXT: @ %bb.3:
-; CHECK-NEXT: .LCPI10_0:
-; CHECK-NEXT: .long 0x40066666 @ float 2.0999999
-; CHECK-NEXT: .long 0x40066666 @ float 2.0999999
-; CHECK-NEXT: .long 0x40066666 @ float 2.0999999
-; CHECK-NEXT: .long 0x40066666 @ float 2.0999999
entry:
br label %vector.body
diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll
index cba123c9e2f2..e885a7af5d86 100644
--- a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll
@@ -221,161 +221,89 @@ end: ; preds = %middle.block
define void @justoffsets(i8* noalias nocapture readonly %r, i8* noalias nocapture %w, i32 %N) {
; CHECK-LABEL: justoffsets:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-NEXT: .pad #4
+; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; CHECK-NEXT: .pad #216
-; CHECK-NEXT: sub sp, #216
+; CHECK-NEXT: .pad #32
+; CHECK-NEXT: sub sp, #32
; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: beq.w .LBB3_3
+; CHECK-NEXT: beq .LBB3_3
; CHECK-NEXT: @ %bb.1: @ %vector.ph
+; CHECK-NEXT: adr r5, .LCPI3_2
+; CHECK-NEXT: vldrw.u32 q1, [r5]
+; CHECK-NEXT: adr r4, .LCPI3_1
+; CHECK-NEXT: movw r5, #50417
+; CHECK-NEXT: adr r3, .LCPI3_0
+; CHECK-NEXT: movw r7, #32769
+; CHECK-NEXT: vldrw.u32 q2, [r4]
+; CHECK-NEXT: movw r4, #47888
+; CHECK-NEXT: vldrw.u32 q3, [r3]
+; CHECK-NEXT: vstrw.32 q1, [sp, #16] @ 16-byte Spill
+; CHECK-NEXT: vmov.i32 q1, #0x7fff
; CHECK-NEXT: vmov.i32 q0, #0x8000
-; CHECK-NEXT: adr r7, .LCPI3_5
-; CHECK-NEXT: vstrw.32 q0, [sp, #160] @ 16-byte Spill
-; CHECK-NEXT: vldrw.u32 q0, [r7]
-; CHECK-NEXT: adr r6, .LCPI3_4
-; CHECK-NEXT: adr r5, .LCPI3_3
-; CHECK-NEXT: vstrw.32 q0, [sp, #176] @ 16-byte Spill
-; CHECK-NEXT: vldrw.u32 q0, [r6]
-; CHECK-NEXT: adr.w r8, .LCPI3_2
-; CHECK-NEXT: vstrw.32 q0, [sp, #144] @ 16-byte Spill
-; CHECK-NEXT: vldrw.u32 q0, [r5]
-; CHECK-NEXT: adr.w lr, .LCPI3_1
-; CHECK-NEXT: vstrw.32 q0, [sp, #128] @ 16-byte Spill
-; CHECK-NEXT: vldrw.u32 q0, [r8]
-; CHECK-NEXT: vstrw.32 q0, [sp, #112] @ 16-byte Spill
-; CHECK-NEXT: vldrw.u32 q0, [lr]
-; CHECK-NEXT: adr.w r12, .LCPI3_0
-; CHECK-NEXT: vstrw.32 q0, [sp, #96] @ 16-byte Spill
-; CHECK-NEXT: vldrw.u32 q0, [r12]
-; CHECK-NEXT: adr r7, .LCPI3_7
-; CHECK-NEXT: adr r5, .LCPI3_10
-; CHECK-NEXT: vstrw.32 q0, [sp, #80] @ 16-byte Spill
-; CHECK-NEXT: vmov.i32 q0, #0x7fff
-; CHECK-NEXT: vstrw.32 q0, [sp, #64] @ 16-byte Spill
-; CHECK-NEXT: vldrw.u32 q0, [r7]
-; CHECK-NEXT: adr r6, .LCPI3_9
-; CHECK-NEXT: adr r4, .LCPI3_6
-; CHECK-NEXT: vstrw.32 q0, [sp, #48] @ 16-byte Spill
-; CHECK-NEXT: vldrw.u32 q0, [r5]
-; CHECK-NEXT: adr r7, .LCPI3_8
-; CHECK-NEXT: vldrw.u32 q1, [r4]
-; CHECK-NEXT: vstrw.32 q0, [sp, #32] @ 16-byte Spill
-; CHECK-NEXT: vldrw.u32 q0, [r6]
-; CHECK-NEXT: vstrw.32 q1, [sp, #192] @ 16-byte Spill
-; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill
-; CHECK-NEXT: vldrw.u32 q0, [r7]
-; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill
+; CHECK-NEXT: movw r12, #7471
+; CHECK-NEXT: movw r9, #19595
+; CHECK-NEXT: movw r8, #38470
+; CHECK-NEXT: movt r4, #65535
+; CHECK-NEXT: movt r5, #65535
+; CHECK-NEXT: movw r6, #19485
+; CHECK-NEXT: movt r7, #65535
+; CHECK-NEXT: movw r3, #13282
+; CHECK-NEXT: vstrw.32 q1, [sp] @ 16-byte Spill
; CHECK-NEXT: dlstp.32 lr, r2
; CHECK-NEXT: .LBB3_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vldrw.u32 q0, [sp, #192] @ 16-byte Reload
-; CHECK-NEXT: vldrb.u32 q4, [r0, q0]
-; CHECK-NEXT: vldrw.u32 q0, [sp, #176] @ 16-byte Reload
-; CHECK-NEXT: vldrb.u32 q7, [r0, q0]
-; CHECK-NEXT: vldrw.u32 q0, [sp, #144] @ 16-byte Reload
-; CHECK-NEXT: vldrw.u32 q5, [sp, #112] @ 16-byte Reload
-; CHECK-NEXT: vmul.i32 q6, q7, q0
-; CHECK-NEXT: vldrw.u32 q0, [sp, #128] @ 16-byte Reload
-; CHECK-NEXT: vldrb.u32 q1, [r0, q5]
-; CHECK-NEXT: vldrw.u32 q2, [sp, #80] @ 16-byte Reload
-; CHECK-NEXT: vmul.i32 q3, q4, q0
-; CHECK-NEXT: vldrw.u32 q0, [sp, #96] @ 16-byte Reload
-; CHECK-NEXT: vadd.i32 q3, q3, q6
+; CHECK-NEXT: vldrw.u32 q1, [sp, #16] @ 16-byte Reload
+; CHECK-NEXT: vldrb.u32 q7, [r0, q1]
+; CHECK-NEXT: vldrb.u32 q5, [r0, q2]
+; CHECK-NEXT: vmul.i32 q4, q5, r8
+; CHECK-NEXT: vmla.u32 q4, q7, r9
+; CHECK-NEXT: vldrb.u32 q6, [r0, q3]
+; CHECK-NEXT: vmla.u32 q4, q6, r12
; CHECK-NEXT: adds r0, #12
-; CHECK-NEXT: vmul.i32 q6, q1, q0
-; CHECK-NEXT: vldrw.u32 q0, [sp, #160] @ 16-byte Reload
-; CHECK-NEXT: vadd.i32 q3, q3, q6
-; CHECK-NEXT: vadd.i32 q3, q3, q0
-; CHECK-NEXT: vshr.u32 q6, q3, #16
-; CHECK-NEXT: vmul.i32 q3, q7, q2
-; CHECK-NEXT: vldrw.u32 q2, [sp, #64] @ 16-byte Reload
-; CHECK-NEXT: vmul.i32 q2, q4, q2
-; CHECK-NEXT: vadd.i32 q2, q2, q3
-; CHECK-NEXT: vldrw.u32 q3, [sp, #48] @ 16-byte Reload
-; CHECK-NEXT: vmul.i32 q3, q1, q3
-; CHECK-NEXT: vadd.i32 q2, q2, q3
-; CHECK-NEXT: vldrw.u32 q3, [sp, #32] @ 16-byte Reload
-; CHECK-NEXT: vadd.i32 q2, q2, q0
-; CHECK-NEXT: vmul.i32 q3, q7, q3
-; CHECK-NEXT: vldrw.u32 q7, [sp, #16] @ 16-byte Reload
-; CHECK-NEXT: vshr.u32 q2, q2, #16
-; CHECK-NEXT: vmul.i32 q4, q4, q7
-; CHECK-NEXT: vadd.i32 q3, q4, q3
-; CHECK-NEXT: vldrw.u32 q4, [sp] @ 16-byte Reload
-; CHECK-NEXT: vmul.i32 q1, q1, q4
-; CHECK-NEXT: vadd.i32 q1, q3, q1
+; CHECK-NEXT: vadd.i32 q4, q4, q0
+; CHECK-NEXT: vshr.u32 q4, q4, #16
+; CHECK-NEXT: vstrb.32 q4, [r1, q1]
+; CHECK-NEXT: vldrw.u32 q1, [sp] @ 16-byte Reload
+; CHECK-NEXT: vmul.i32 q4, q7, q1
+; CHECK-NEXT: vmul.i32 q1, q5, r7
+; CHECK-NEXT: vmla.u32 q1, q7, r3
+; CHECK-NEXT: vmla.u32 q4, q5, r5
+; CHECK-NEXT: vmla.u32 q1, q6, r6
+; CHECK-NEXT: vmla.u32 q4, q6, r4
; CHECK-NEXT: vadd.i32 q1, q1, q0
-; CHECK-NEXT: vldrw.u32 q0, [sp, #192] @ 16-byte Reload
+; CHECK-NEXT: vadd.i32 q4, q4, q0
; CHECK-NEXT: vshr.u32 q1, q1, #16
-; CHECK-NEXT: vstrb.32 q1, [r1, q0]
-; CHECK-NEXT: vldrw.u32 q0, [sp, #176] @ 16-byte Reload
-; CHECK-NEXT: vstrb.32 q2, [r1, q0]
-; CHECK-NEXT: vstrb.32 q6, [r1, q5]
+; CHECK-NEXT: vshr.u32 q4, q4, #16
+; CHECK-NEXT: vstrb.32 q4, [r1, q2]
+; CHECK-NEXT: vstrb.32 q1, [r1, q3]
; CHECK-NEXT: adds r1, #12
; CHECK-NEXT: letp lr, .LBB3_2
; CHECK-NEXT: .LBB3_3: @ %for.cond.cleanup
-; CHECK-NEXT: add sp, #216
+; CHECK-NEXT: add sp, #32
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
+; CHECK-NEXT: add sp, #4
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.4:
; CHECK-NEXT: .LCPI3_0:
-; CHECK-NEXT: .long 4294952177 @ 0xffffc4f1
-; CHECK-NEXT: .long 4294952177 @ 0xffffc4f1
-; CHECK-NEXT: .long 4294952177 @ 0xffffc4f1
-; CHECK-NEXT: .long 4294952177 @ 0xffffc4f1
-; CHECK-NEXT: .LCPI3_1:
-; CHECK-NEXT: .long 19485 @ 0x4c1d
-; CHECK-NEXT: .long 19485 @ 0x4c1d
-; CHECK-NEXT: .long 19485 @ 0x4c1d
-; CHECK-NEXT: .long 19485 @ 0x4c1d
-; CHECK-NEXT: .LCPI3_2:
; CHECK-NEXT: .long 2 @ 0x2
; CHECK-NEXT: .long 5 @ 0x5
; CHECK-NEXT: .long 8 @ 0x8
; CHECK-NEXT: .long 11 @ 0xb
-; CHECK-NEXT: .LCPI3_3:
-; CHECK-NEXT: .long 13282 @ 0x33e2
-; CHECK-NEXT: .long 13282 @ 0x33e2
-; CHECK-NEXT: .long 13282 @ 0x33e2
-; CHECK-NEXT: .long 13282 @ 0x33e2
-; CHECK-NEXT: .LCPI3_4:
-; CHECK-NEXT: .long 4294934529 @ 0xffff8001
-; CHECK-NEXT: .long 4294934529 @ 0xffff8001
-; CHECK-NEXT: .long 4294934529 @ 0xffff8001
-; CHECK-NEXT: .long 4294934529 @ 0xffff8001
-; CHECK-NEXT: .LCPI3_5:
+; CHECK-NEXT: .LCPI3_1:
; CHECK-NEXT: .long 1 @ 0x1
; CHECK-NEXT: .long 4 @ 0x4
; CHECK-NEXT: .long 7 @ 0x7
; CHECK-NEXT: .long 10 @ 0xa
-; CHECK-NEXT: .LCPI3_6:
+; CHECK-NEXT: .LCPI3_2:
; CHECK-NEXT: .long 0 @ 0x0
; CHECK-NEXT: .long 3 @ 0x3
; CHECK-NEXT: .long 6 @ 0x6
; CHECK-NEXT: .long 9 @ 0x9
-; CHECK-NEXT: .LCPI3_7:
-; CHECK-NEXT: .long 4294949648 @ 0xffffbb10
-; CHECK-NEXT: .long 4294949648 @ 0xffffbb10
-; CHECK-NEXT: .long 4294949648 @ 0xffffbb10
-; CHECK-NEXT: .long 4294949648 @ 0xffffbb10
-; CHECK-NEXT: .LCPI3_8:
-; CHECK-NEXT: .long 7471 @ 0x1d2f
-; CHECK-NEXT: .long 7471 @ 0x1d2f
-; CHECK-NEXT: .long 7471 @ 0x1d2f
-; CHECK-NEXT: .long 7471 @ 0x1d2f
-; CHECK-NEXT: .LCPI3_9:
-; CHECK-NEXT: .long 19595 @ 0x4c8b
-; CHECK-NEXT: .long 19595 @ 0x4c8b
-; CHECK-NEXT: .long 19595 @ 0x4c8b
-; CHECK-NEXT: .long 19595 @ 0x4c8b
-; CHECK-NEXT: .LCPI3_10:
-; CHECK-NEXT: .long 38470 @ 0x9646
-; CHECK-NEXT: .long 38470 @ 0x9646
-; CHECK-NEXT: .long 38470 @ 0x9646
-; CHECK-NEXT: .long 38470 @ 0x9646
entry:
%cmp47.not = icmp eq i32 %N, 0
br i1 %cmp47.not, label %for.cond.cleanup, label %vector.ph
diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll b/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll
index 56f4acd78b4b..406d2d15a6b3 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll
@@ -4,14 +4,15 @@
define void @arm_min_helium_f32(float* %pSrc, i32 %blockSize, float* nocapture %pResult, i32* nocapture %pIndex) {
; CHECK-LABEL: arm_min_helium_f32:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r6, r7, lr}
-; CHECK-NEXT: push {r4, r6, r7, lr}
+; CHECK-NEXT: .save {r4, lr}
+; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
-; CHECK-NEXT: movs r6, #0
-; CHECK-NEXT: vidup.u32 q2, r6, #1
-; CHECK-NEXT: adr r4, .LCPI0_0
-; CHECK-NEXT: vldrw.u32 q1, [r4]
+; CHECK-NEXT: movs r4, #0
+; CHECK-NEXT: vidup.u32 q2, r4, #1
+; CHECK-NEXT: movw r4, #54437
+; CHECK-NEXT: movt r4, #21352
+; CHECK-NEXT: vdup.32 q1, r4
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: vmov.i32 q3, #0x4
; CHECK-NEXT: dlstp.32 lr, r1
@@ -24,7 +25,7 @@ define void @arm_min_helium_f32(float* %pSrc, i32 %blockSize, float* nocapture %
; CHECK-NEXT: vadd.i32 q2, q2, q3
; CHECK-NEXT: letp lr, .LBB0_1
; CHECK-NEXT: @ %bb.2: @ %do.end
-; CHECK-NEXT: vldr s8, .LCPI0_1
+; CHECK-NEXT: vldr s8, .LCPI0_0
; CHECK-NEXT: vdup.32 q3, r1
; CHECK-NEXT: vmov r0, s8
; CHECK-NEXT: vminnmv.f32 r0, q1
@@ -35,16 +36,11 @@ define void @arm_min_helium_f32(float* %pSrc, i32 %blockSize, float* nocapture %
; CHECK-NEXT: str r1, [r3]
; CHECK-NEXT: vstr s8, [r2]
; CHECK-NEXT: vpop {d8, d9}
-; CHECK-NEXT: pop {r4, r6, r7, pc}
-; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: pop {r4, pc}
+; CHECK-NEXT: .p2align 2
; CHECK-NEXT: @ %bb.3:
; CHECK-NEXT: .LCPI0_0:
; CHECK-NEXT: .long 0x5368d4a5 @ float 9.99999995E+11
-; CHECK-NEXT: .long 0x5368d4a5 @ float 9.99999995E+11
-; CHECK-NEXT: .long 0x5368d4a5 @ float 9.99999995E+11
-; CHECK-NEXT: .long 0x5368d4a5 @ float 9.99999995E+11
-; CHECK-NEXT: .LCPI0_1:
-; CHECK-NEXT: .long 0x5368d4a5 @ float 9.99999995E+11
entry:
%0 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vidup.v4i32(i32 0, i32 1)
%1 = extractvalue { <4 x i32>, i32 } %0, 0
diff --git a/llvm/test/CodeGen/Thumb2/mve-shifts.ll b/llvm/test/CodeGen/Thumb2/mve-shifts.ll
index 19d51938a54e..dbd03ff78447 100644
--- a/llvm/test/CodeGen/Thumb2/mve-shifts.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-shifts.ll
@@ -470,29 +470,11 @@ entry:
define arm_aapcs_vfpcc <16 x i8> @shl_qiv_int8_t(<16 x i8> %src1) {
; CHECK-LABEL: shl_qiv_int8_t:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: adr r0, .LCPI36_0
-; CHECK-NEXT: vldrw.u32 q1, [r0]
+; CHECK-NEXT: movw r0, #513
+; CHECK-NEXT: movt r0, #1027
+; CHECK-NEXT: vdup.32 q1, r0
; CHECK-NEXT: vshl.u8 q0, q0, q1
; CHECK-NEXT: bx lr
-; CHECK-NEXT: .p2align 4
-; CHECK-NEXT: @ %bb.1:
-; CHECK-NEXT: .LCPI36_0:
-; CHECK-NEXT: .byte 1 @ 0x1
-; CHECK-NEXT: .byte 2 @ 0x2
-; CHECK-NEXT: .byte 3 @ 0x3
-; CHECK-NEXT: .byte 4 @ 0x4
-; CHECK-NEXT: .byte 1 @ 0x1
-; CHECK-NEXT: .byte 2 @ 0x2
-; CHECK-NEXT: .byte 3 @ 0x3
-; CHECK-NEXT: .byte 4 @ 0x4
-; CHECK-NEXT: .byte 1 @ 0x1
-; CHECK-NEXT: .byte 2 @ 0x2
-; CHECK-NEXT: .byte 3 @ 0x3
-; CHECK-NEXT: .byte 4 @ 0x4
-; CHECK-NEXT: .byte 1 @ 0x1
-; CHECK-NEXT: .byte 2 @ 0x2
-; CHECK-NEXT: .byte 3 @ 0x3
-; CHECK-NEXT: .byte 4 @ 0x4
entry:
%0 = shl <16 x i8> %src1, <i8 1, i8 2, i8 3, i8 4, i8 1, i8 2, i8 3, i8 4, i8 1, i8 2, i8 3, i8 4, i8 1, i8 2, i8 3, i8 4>
ret <16 x i8> %0
diff --git a/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll b/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll
index 4226dab4adb6..ce4756b17b45 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll
@@ -21,33 +21,23 @@ entry:
ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
}
+; This has 0x01020304 or 0x04030201 vdup.32'd to q reg depending on endianness.
+; The big endian is
diff erent as there is an implicit vrev64.8 out of the
+; function, which gets constant folded away.
define arm_aapcs_vfpcc <16 x i8> @mov_int8_1234() {
; CHECKLE-LABEL: mov_int8_1234:
; CHECKLE: @ %bb.0: @ %entry
-; CHECKLE-NEXT: adr r0, .LCPI2_0
-; CHECKLE-NEXT: vldrw.u32 q0, [r0]
+; CHECKLE-NEXT: movw r0, #513
+; CHECKLE-NEXT: movt r0, #1027
+; CHECKLE-NEXT: vdup.32 q0, r0
; CHECKLE-NEXT: bx lr
-; CHECKLE-NEXT: .p2align 4
-; CHECKLE-NEXT: @ %bb.1:
-; CHECKLE-NEXT: .LCPI2_0:
-; CHECKLE-NEXT: .long 67305985 @ double 2.4380727978175888E-289
-; CHECKLE-NEXT: .long 67305985
-; CHECKLE-NEXT: .long 67305985 @ double 2.4380727978175888E-289
-; CHECKLE-NEXT: .long 67305985
;
; CHECKBE-LABEL: mov_int8_1234:
; CHECKBE: @ %bb.0: @ %entry
-; CHECKBE-NEXT: adr r0, .LCPI2_0
-; CHECKBE-NEXT: vldrb.u8 q1, [r0]
-; CHECKBE-NEXT: vrev64.8 q0, q1
+; CHECKBE-NEXT: movw r0, #772
+; CHECKBE-NEXT: movt r0, #258
+; CHECKBE-NEXT: vdup.32 q0, r0
; CHECKBE-NEXT: bx lr
-; CHECKBE-NEXT: .p2align 4
-; CHECKBE-NEXT: @ %bb.1:
-; CHECKBE-NEXT: .LCPI2_0:
-; CHECKBE-NEXT: .long 16909060 @ double 8.2078802900595913E-304
-; CHECKBE-NEXT: .long 16909060
-; CHECKBE-NEXT: .long 16909060 @ double 8.2078802900595913E-304
-; CHECKBE-NEXT: .long 16909060
entry:
ret <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 1, i8 2, i8 3, i8 4, i8 1, i8 2, i8 3, i8 4, i8 1, i8 2, i8 3, i8 4>
}
@@ -89,32 +79,11 @@ entry:
}
define arm_aapcs_vfpcc <8 x i16> @mov_int16_258() {
-; CHECKLE-LABEL: mov_int16_258:
-; CHECKLE: @ %bb.0: @ %entry
-; CHECKLE-NEXT: adr r0, .LCPI7_0
-; CHECKLE-NEXT: vldrw.u32 q0, [r0]
-; CHECKLE-NEXT: bx lr
-; CHECKLE-NEXT: .p2align 4
-; CHECKLE-NEXT: @ %bb.1:
-; CHECKLE-NEXT: .LCPI7_0:
-; CHECKLE-NEXT: .long 16908546 @ double 8.204306265173532E-304
-; CHECKLE-NEXT: .long 16908546
-; CHECKLE-NEXT: .long 16908546 @ double 8.204306265173532E-304
-; CHECKLE-NEXT: .long 16908546
-;
-; CHECKBE-LABEL: mov_int16_258:
-; CHECKBE: @ %bb.0: @ %entry
-; CHECKBE-NEXT: adr r0, .LCPI7_0
-; CHECKBE-NEXT: vldrb.u8 q1, [r0]
-; CHECKBE-NEXT: vrev64.8 q0, q1
-; CHECKBE-NEXT: bx lr
-; CHECKBE-NEXT: .p2align 4
-; CHECKBE-NEXT: @ %bb.1:
-; CHECKBE-NEXT: .LCPI7_0:
-; CHECKBE-NEXT: .long 16908546 @ double 8.204306265173532E-304
-; CHECKBE-NEXT: .long 16908546
-; CHECKBE-NEXT: .long 16908546 @ double 8.204306265173532E-304
-; CHECKBE-NEXT: .long 16908546
+; CHECK-LABEL: mov_int16_258:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: mov.w r0, #258
+; CHECK-NEXT: vdup.16 q0, r0
+; CHECK-NEXT: bx lr
entry:
ret <8 x i16> <i16 258, i16 258, i16 258, i16 258, i16 258, i16 258, i16 258, i16 258>
}
@@ -156,32 +125,12 @@ entry:
}
define arm_aapcs_vfpcc <4 x i32> @mov_int32_16777217() {
-; CHECKLE-LABEL: mov_int32_16777217:
-; CHECKLE: @ %bb.0: @ %entry
-; CHECKLE-NEXT: adr r0, .LCPI12_0
-; CHECKLE-NEXT: vldrw.u32 q0, [r0]
-; CHECKLE-NEXT: bx lr
-; CHECKLE-NEXT: .p2align 4
-; CHECKLE-NEXT: @ %bb.1:
-; CHECKLE-NEXT: .LCPI12_0:
-; CHECKLE-NEXT: .long 16777217 @ double 7.2911290000737531E-304
-; CHECKLE-NEXT: .long 16777217
-; CHECKLE-NEXT: .long 16777217 @ double 7.2911290000737531E-304
-; CHECKLE-NEXT: .long 16777217
-;
-; CHECKBE-LABEL: mov_int32_16777217:
-; CHECKBE: @ %bb.0: @ %entry
-; CHECKBE-NEXT: adr r0, .LCPI12_0
-; CHECKBE-NEXT: vldrb.u8 q1, [r0]
-; CHECKBE-NEXT: vrev64.8 q0, q1
-; CHECKBE-NEXT: bx lr
-; CHECKBE-NEXT: .p2align 4
-; CHECKBE-NEXT: @ %bb.1:
-; CHECKBE-NEXT: .LCPI12_0:
-; CHECKBE-NEXT: .long 16777217 @ double 7.2911290000737531E-304
-; CHECKBE-NEXT: .long 16777217
-; CHECKBE-NEXT: .long 16777217 @ double 7.2911290000737531E-304
-; CHECKBE-NEXT: .long 16777217
+; CHECK-LABEL: mov_int32_16777217:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: movs r0, #1
+; CHECK-NEXT: movt r0, #256
+; CHECK-NEXT: vdup.32 q0, r0
+; CHECK-NEXT: bx lr
entry:
ret <4 x i32> <i32 16777217, i32 16777217, i32 16777217, i32 16777217>
}
@@ -223,32 +172,12 @@ entry:
}
define arm_aapcs_vfpcc <4 x i32> @mov_int32_4278190335() {
-; CHECKLE-LABEL: mov_int32_4278190335:
-; CHECKLE: @ %bb.0: @ %entry
-; CHECKLE-NEXT: adr r0, .LCPI17_0
-; CHECKLE-NEXT: vldrw.u32 q0, [r0]
-; CHECKLE-NEXT: bx lr
-; CHECKLE-NEXT: .p2align 4
-; CHECKLE-NEXT: @ %bb.1:
-; CHECKLE-NEXT: .LCPI17_0:
-; CHECKLE-NEXT: .long 4278190335 @ double -5.4874634341155774E+303
-; CHECKLE-NEXT: .long 4278190335
-; CHECKLE-NEXT: .long 4278190335 @ double -5.4874634341155774E+303
-; CHECKLE-NEXT: .long 4278190335
-;
-; CHECKBE-LABEL: mov_int32_4278190335:
-; CHECKBE: @ %bb.0: @ %entry
-; CHECKBE-NEXT: adr r0, .LCPI17_0
-; CHECKBE-NEXT: vldrb.u8 q1, [r0]
-; CHECKBE-NEXT: vrev64.8 q0, q1
-; CHECKBE-NEXT: bx lr
-; CHECKBE-NEXT: .p2align 4
-; CHECKBE-NEXT: @ %bb.1:
-; CHECKBE-NEXT: .LCPI17_0:
-; CHECKBE-NEXT: .long 4278190335 @ double -5.4874634341155774E+303
-; CHECKBE-NEXT: .long 4278190335
-; CHECKBE-NEXT: .long 4278190335 @ double -5.4874634341155774E+303
-; CHECKBE-NEXT: .long 4278190335
+; CHECK-LABEL: mov_int32_4278190335:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: movs r0, #255
+; CHECK-NEXT: movt r0, #65280
+; CHECK-NEXT: vdup.32 q0, r0
+; CHECK-NEXT: bx lr
entry:
ret <4 x i32> <i32 4278190335, i32 4278190335, i32 4278190335, i32 4278190335>
}
@@ -263,32 +192,11 @@ entry:
}
define arm_aapcs_vfpcc <4 x i32> @mov_int32_16908546() {
-; CHECKLE-LABEL: mov_int32_16908546:
-; CHECKLE: @ %bb.0: @ %entry
-; CHECKLE-NEXT: adr r0, .LCPI19_0
-; CHECKLE-NEXT: vldrw.u32 q0, [r0]
-; CHECKLE-NEXT: bx lr
-; CHECKLE-NEXT: .p2align 4
-; CHECKLE-NEXT: @ %bb.1:
-; CHECKLE-NEXT: .LCPI19_0:
-; CHECKLE-NEXT: .long 16908546 @ double 8.204306265173532E-304
-; CHECKLE-NEXT: .long 16908546
-; CHECKLE-NEXT: .long 16908546 @ double 8.204306265173532E-304
-; CHECKLE-NEXT: .long 16908546
-;
-; CHECKBE-LABEL: mov_int32_16908546:
-; CHECKBE: @ %bb.0: @ %entry
-; CHECKBE-NEXT: adr r0, .LCPI19_0
-; CHECKBE-NEXT: vldrb.u8 q1, [r0]
-; CHECKBE-NEXT: vrev64.8 q0, q1
-; CHECKBE-NEXT: bx lr
-; CHECKBE-NEXT: .p2align 4
-; CHECKBE-NEXT: @ %bb.1:
-; CHECKBE-NEXT: .LCPI19_0:
-; CHECKBE-NEXT: .long 16908546 @ double 8.204306265173532E-304
-; CHECKBE-NEXT: .long 16908546
-; CHECKBE-NEXT: .long 16908546 @ double 8.204306265173532E-304
-; CHECKBE-NEXT: .long 16908546
+; CHECK-LABEL: mov_int32_16908546:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: mov.w r0, #258
+; CHECK-NEXT: vdup.16 q0, r0
+; CHECK-NEXT: bx lr
entry:
ret <4 x i32> <i32 16908546, i32 16908546, i32 16908546, i32 16908546>
}
@@ -425,63 +333,22 @@ entry:
}
define arm_aapcs_vfpcc <4 x float> @mov_float_1() {
-; CHECKLE-LABEL: mov_float_1:
-; CHECKLE: @ %bb.0: @ %entry
-; CHECKLE-NEXT: adr r0, .LCPI28_0
-; CHECKLE-NEXT: vldrw.u32 q0, [r0]
-; CHECKLE-NEXT: bx lr
-; CHECKLE-NEXT: .p2align 4
-; CHECKLE-NEXT: @ %bb.1:
-; CHECKLE-NEXT: .LCPI28_0:
-; CHECKLE-NEXT: .long 1065353216 @ double 0.007812501848093234
-; CHECKLE-NEXT: .long 1065353216
-; CHECKLE-NEXT: .long 1065353216 @ double 0.007812501848093234
-; CHECKLE-NEXT: .long 1065353216
-;
-; CHECKBE-LABEL: mov_float_1:
-; CHECKBE: @ %bb.0: @ %entry
-; CHECKBE-NEXT: adr r0, .LCPI28_0
-; CHECKBE-NEXT: vldrb.u8 q1, [r0]
-; CHECKBE-NEXT: vrev64.8 q0, q1
-; CHECKBE-NEXT: bx lr
-; CHECKBE-NEXT: .p2align 4
-; CHECKBE-NEXT: @ %bb.1:
-; CHECKBE-NEXT: .LCPI28_0:
-; CHECKBE-NEXT: .long 1065353216 @ double 0.007812501848093234
-; CHECKBE-NEXT: .long 1065353216
-; CHECKBE-NEXT: .long 1065353216 @ double 0.007812501848093234
-; CHECKBE-NEXT: .long 1065353216
+; CHECK-LABEL: mov_float_1:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: mov.w r0, #1065353216
+; CHECK-NEXT: vdup.32 q0, r0
+; CHECK-NEXT: bx lr
entry:
ret <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
}
define arm_aapcs_vfpcc <4 x float> @mov_float_m3() {
-; CHECKLE-LABEL: mov_float_m3:
-; CHECKLE: @ %bb.0: @ %entry
-; CHECKLE-NEXT: adr r0, .LCPI29_0
-; CHECKLE-NEXT: vldrw.u32 q0, [r0]
-; CHECKLE-NEXT: bx lr
-; CHECKLE-NEXT: .p2align 4
-; CHECKLE-NEXT: @ %bb.1:
-; CHECKLE-NEXT: .LCPI29_0:
-; CHECKLE-NEXT: .long 3225419776 @ double -32.000022917985916
-; CHECKLE-NEXT: .long 3225419776
-; CHECKLE-NEXT: .long 3225419776 @ double -32.000022917985916
-; CHECKLE-NEXT: .long 3225419776
-;
-; CHECKBE-LABEL: mov_float_m3:
-; CHECKBE: @ %bb.0: @ %entry
-; CHECKBE-NEXT: adr r0, .LCPI29_0
-; CHECKBE-NEXT: vldrb.u8 q1, [r0]
-; CHECKBE-NEXT: vrev64.8 q0, q1
-; CHECKBE-NEXT: bx lr
-; CHECKBE-NEXT: .p2align 4
-; CHECKBE-NEXT: @ %bb.1:
-; CHECKBE-NEXT: .LCPI29_0:
-; CHECKBE-NEXT: .long 3225419776 @ double -32.000022917985916
-; CHECKBE-NEXT: .long 3225419776
-; CHECKBE-NEXT: .long 3225419776 @ double -32.000022917985916
-; CHECKBE-NEXT: .long 3225419776
+; CHECK-LABEL: mov_float_m3:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: movt r0, #49216
+; CHECK-NEXT: vdup.32 q0, r0
+; CHECK-NEXT: bx lr
entry:
ret <4 x float> <float -3.000000e+00, float -3.000000e+00, float -3.000000e+00, float -3.000000e+00>
}
diff --git a/llvm/test/CodeGen/Thumb2/mve-vmvnimm.ll b/llvm/test/CodeGen/Thumb2/mve-vmvnimm.ll
index a54d005444f7..898864342fd1 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vmvnimm.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vmvnimm.ll
@@ -68,16 +68,10 @@ entry:
define arm_aapcs_vfpcc <4 x i32> @mov_int32_4278386688() {
; CHECK-LABEL: mov_int32_4278386688:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: adr r0, .LCPI7_0
-; CHECK-NEXT: vldrw.u32 q0, [r0]
+; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: movt r0, #65283
+; CHECK-NEXT: vdup.32 q0, r0
; CHECK-NEXT: bx lr
-; CHECK-NEXT: .p2align 4
-; CHECK-NEXT: @ %bb.1:
-; CHECK-NEXT: .LCPI7_0:
-; CHECK-NEXT: .long 4278386688 @ double -6.5147775434702224E+303
-; CHECK-NEXT: .long 4278386688
-; CHECK-NEXT: .long 4278386688 @ double -6.5147775434702224E+303
-; CHECK-NEXT: .long 4278386688
entry:
ret <4 x i32> <i32 4278386688, i32 4278386688, i32 4278386688, i32 4278386688>
}
More information about the llvm-commits
mailing list