[llvm] [CodeGen] Combine two loops in SloIndexes.cpp file (PR #127631)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 28 06:25:37 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-regalloc
Author: None (Rifet-c)
<details>
<summary>Changes</summary>
Merged two loops that were iterating over the same machine basic block into one, also did some minor readability improvements (variable renaming, commenting and absorbing if condition into a variable)
---
Patch is 60.50 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/127631.diff
3 Files Affected:
- (modified) llvm/lib/CodeGen/SlotIndexes.cpp (+20-12)
- (modified) llvm/test/CodeGen/Thumb2/mve-shuffle.ll (+133-67)
- (modified) llvm/test/CodeGen/Thumb2/mve-vld3.ll (+812-407)
``````````diff
diff --git a/llvm/lib/CodeGen/SlotIndexes.cpp b/llvm/lib/CodeGen/SlotIndexes.cpp
index 1b92a5aa59d18..63e8627ad2a66 100644
--- a/llvm/lib/CodeGen/SlotIndexes.cpp
+++ b/llvm/lib/CodeGen/SlotIndexes.cpp
@@ -212,6 +212,7 @@ void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB,
IndexList::iterator ListI = endIdx.listEntry()->getIterator();
MachineBasicBlock::iterator MBBI = End;
bool pastStart = false;
+ bool oldIndexesRemoved = false;
while (ListI != ListB || MBBI != Begin || (includeStart && !pastStart)) {
assert(ListI->getIndex() >= startIdx.getIndex() &&
(includeStart || !pastStart) &&
@@ -220,6 +221,8 @@ void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB,
MachineInstr *SlotMI = ListI->getInstr();
MachineInstr *MI = (MBBI != MBB->end() && !pastStart) ? &*MBBI : nullptr;
bool MBBIAtBegin = MBBI == Begin && (!includeStart || pastStart);
+ bool MIIndexNotFound = MI && mi2iMap.find(MI) == mi2iMap.end();
+ bool SlotMIRemoved = false;
if (SlotMI == MI && !MBBIAtBegin) {
--ListI;
@@ -227,25 +230,30 @@ void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB,
--MBBI;
else
pastStart = true;
- } else if (MI && !mi2iMap.contains(MI)) {
+ } else if (MIIndexNotFound || oldIndexesRemoved) {
if (MBBI != Begin)
--MBBI;
else
pastStart = true;
} else {
- --ListI;
- if (SlotMI)
+ // We ran through all the indexes on the interval
+ // -> The only thing left is to go through all the
+ // remaining MBB instructions and update their indexes
+ if (ListI == ListB)
+ oldIndexesRemoved = true;
+ else
+ --ListI;
+ if (SlotMI) {
removeMachineInstrFromMaps(*SlotMI);
+ SlotMIRemoved = true;
+ }
}
- }
- // In theory this could be combined with the previous loop, but it is tricky
- // to update the IndexList while we are iterating it.
- for (MachineBasicBlock::iterator I = End; I != Begin;) {
- --I;
- MachineInstr &MI = *I;
- if (!MI.isDebugOrPseudoInstr() && !mi2iMap.contains(&MI))
- insertMachineInstrInMaps(MI);
+ MachineInstr *instrToInsert = SlotMIRemoved ? SlotMI : MI;
+
+ // Insert isntruction back into the maps after passing it/removing the index
+ if ((MIIndexNotFound || SlotMIRemoved) && instrToInsert->getParent() != nullptr && !instrToInsert->isDebugOrPseudoInstr())
+ insertMachineInstrInMaps(*instrToInsert);
}
}
@@ -287,4 +295,4 @@ LLVM_DUMP_METHOD void SlotIndex::dump() const {
print(dbgs());
dbgs() << "\n";
}
-#endif
+#endif
\ No newline at end of file
diff --git a/llvm/test/CodeGen/Thumb2/mve-shuffle.ll b/llvm/test/CodeGen/Thumb2/mve-shuffle.ll
index feda774ab0e65..1c3482111c273 100644
--- a/llvm/test/CodeGen/Thumb2/mve-shuffle.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-shuffle.ll
@@ -223,18 +223,31 @@ entry:
}
define arm_aapcs_vfpcc <8 x i16> @shuffle3_i16(<8 x i16> %src) {
-; CHECK-LABEL: shuffle3_i16:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmovx.f16 s5, s3
-; CHECK-NEXT: vmovx.f16 s6, s1
-; CHECK-NEXT: vmovx.f16 s4, s0
-; CHECK-NEXT: vins.f16 s1, s0
-; CHECK-NEXT: vins.f16 s6, s4
-; CHECK-NEXT: vins.f16 s5, s3
-; CHECK-NEXT: vmov.f32 s4, s2
-; CHECK-NEXT: vmov.f32 s7, s1
-; CHECK-NEXT: vmov q0, q1
-; CHECK-NEXT: bx lr
+; CHECK-LV-LABEL: shuffle3_i16:
+; CHECK-LV: @ %bb.0: @ %entry
+; CHECK-LV-NEXT: vmovx.f16 s5, s3
+; CHECK-LV-NEXT: vmovx.f16 s6, s1
+; CHECK-LV-NEXT: vmovx.f16 s4, s0
+; CHECK-LV-NEXT: vins.f16 s1, s0
+; CHECK-LV-NEXT: vins.f16 s6, s4
+; CHECK-LV-NEXT: vins.f16 s5, s3
+; CHECK-LV-NEXT: vmov.f32 s4, s2
+; CHECK-LV-NEXT: vmov.f32 s7, s1
+; CHECK-LV-NEXT: vmov q0, q1
+; CHECK-LV-NEXT: bx lr
+
+; CHECK-LIS-LABEL: shuffle3_i16:
+; CHECK-LIS: @ %bb.0: @ %entry
+; CHECK-LIS-NEXT: vmov q1, q0
+; CHECK-LIS-NEXT: vmovx.f16 s2, s5
+; CHECK-LIS-NEXT: vmovx.f16 s0, s4
+; CHECK-LIS-NEXT: vins.f16 s5, s4
+; CHECK-LIS-NEXT: vins.f16 s2, s0
+; CHECK-LIS-NEXT: vmov.f32 s0, s6
+; CHECK-LIS-NEXT: vmovx.f16 s1, s7
+; CHECK-LIS-NEXT: vmov.f32 s3, s5
+; CHECK-LIS-NEXT: vins.f16 s1, s7
+; CHECK-LIS-NEXT: bx lr
entry:
%out = shufflevector <8 x i16> %src, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 7, i32 6, i32 3, i32 1, i32 2, i32 0>
ret <8 x i16> %out
@@ -1145,18 +1158,31 @@ entry:
}
define arm_aapcs_vfpcc <8 x half> @shuffle3_f16(<8 x half> %src) {
-; CHECK-LABEL: shuffle3_f16:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmovx.f16 s5, s3
-; CHECK-NEXT: vmovx.f16 s6, s1
-; CHECK-NEXT: vmovx.f16 s4, s0
-; CHECK-NEXT: vins.f16 s1, s0
-; CHECK-NEXT: vins.f16 s6, s4
-; CHECK-NEXT: vins.f16 s5, s3
-; CHECK-NEXT: vmov.f32 s4, s2
-; CHECK-NEXT: vmov.f32 s7, s1
-; CHECK-NEXT: vmov q0, q1
-; CHECK-NEXT: bx lr
+; CHECK-LV-LABEL: shuffle3_f16:
+; CHECK-LV: @ %bb.0: @ %entry
+; CHECK-LV-NEXT: vmovx.f16 s5, s3
+; CHECK-LV-NEXT: vmovx.f16 s6, s1
+; CHECK-LV-NEXT: vmovx.f16 s4, s0
+; CHECK-LV-NEXT: vins.f16 s1, s0
+; CHECK-LV-NEXT: vins.f16 s6, s4
+; CHECK-LV-NEXT: vins.f16 s5, s3
+; CHECK-LV-NEXT: vmov.f32 s4, s2
+; CHECK-LV-NEXT: vmov.f32 s7, s1
+; CHECK-LV-NEXT: vmov q0, q1
+; CHECK-LV-NEXT: bx lr
+
+; CHECK-LIS-LABEL: shuffle3_f16:
+; CHECK-LIS: @ %bb.0: @ %entry
+; CHECK-LIS-NEXT: vmov q1, q0
+; CHECK-LIS-NEXT: vmovx.f16 s2, s5
+; CHECK-LIS-NEXT: vmovx.f16 s0, s4
+; CHECK-LIS-NEXT: vins.f16 s5, s4
+; CHECK-LIS-NEXT: vins.f16 s2, s0
+; CHECK-LIS-NEXT: vmov.f32 s0, s6
+; CHECK-LIS-NEXT: vmovx.f16 s1, s7
+; CHECK-LIS-NEXT: vmov.f32 s3, s5
+; CHECK-LIS-NEXT: vins.f16 s1, s7
+; CHECK-LIS-NEXT: bx lr
entry:
%out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> <i32 4, i32 5, i32 7, i32 6, i32 3, i32 1, i32 2, i32 0>
ret <8 x half> %out
@@ -1467,27 +1493,47 @@ entry:
ret <2 x double> %out
}
define arm_aapcs_vfpcc <8 x double> @shuffle9_f64(<4 x double> %src1, <4 x double> %src2) {
-; CHECK-LABEL: shuffle9_f64:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .vsave {d8, d9, d10, d11}
-; CHECK-NEXT: vpush {d8, d9, d10, d11}
-; CHECK-NEXT: vmov q5, q2
-; CHECK-NEXT: vmov.f32 s16, s0
-; CHECK-NEXT: vmov.f32 s18, s20
-; CHECK-NEXT: vmov.f32 s20, s2
-; CHECK-NEXT: vmov.f32 s10, s12
-; CHECK-NEXT: vmov.f32 s19, s21
-; CHECK-NEXT: vmov.f32 s8, s4
-; CHECK-NEXT: vmov.f32 s17, s1
-; CHECK-NEXT: vmov.f32 s21, s3
-; CHECK-NEXT: vmov q0, q4
-; CHECK-NEXT: vmov.f32 s12, s6
-; CHECK-NEXT: vmov.f32 s11, s13
-; CHECK-NEXT: vmov.f32 s9, s5
-; CHECK-NEXT: vmov.f32 s13, s7
-; CHECK-NEXT: vmov q1, q5
-; CHECK-NEXT: vpop {d8, d9, d10, d11}
-; CHECK-NEXT: bx lr
+; CHECK-LV-LABEL: shuffle9_f64:
+; CHECK-LV: @ %bb.0: @ %entry
+; CHECK-LV-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-LV-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-LV-NEXT: vmov q5, q2
+; CHECK-LV-NEXT: vmov.f32 s16, s0
+; CHECK-LV-NEXT: vmov.f32 s18, s20
+; CHECK-LV-NEXT: vmov.f32 s20, s2
+; CHECK-LV-NEXT: vmov.f32 s10, s12
+; CHECK-LV-NEXT: vmov.f32 s19, s21
+; CHECK-LV-NEXT: vmov.f32 s8, s4
+; CHECK-LV-NEXT: vmov.f32 s17, s1
+; CHECK-LV-NEXT: vmov.f32 s21, s3
+; CHECK-LV-NEXT: vmov q0, q4
+; CHECK-LV-NEXT: vmov.f32 s12, s6
+; CHECK-LV-NEXT: vmov.f32 s11, s13
+; CHECK-LV-NEXT: vmov.f32 s9, s5
+; CHECK-LV-NEXT: vmov.f32 s13, s7
+; CHECK-LV-NEXT: vmov q1, q5
+; CHECK-LV-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-LV-NEXT: bx lr
+
+; CHECK-LIS-LABEL: shuffle9_f64:
+; CHECK-LIS: @ %bb.0: @ %entry
+; CHECK-LIS-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-LIS-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-LIS-NEXT: vmov q5, q2
+; CHECK-LIS-NEXT: vmov q4, q0
+; CHECK-LIS-NEXT: vmov.f32 s2, s20
+; CHECK-LIS-NEXT: vmov.f32 s20, s18
+; CHECK-LIS-NEXT: vmov.f32 s10, s12
+; CHECK-LIS-NEXT: vmov.f32 s3, s21
+; CHECK-LIS-NEXT: vmov.f32 s8, s4
+; CHECK-LIS-NEXT: vmov.f32 s21, s19
+; CHECK-LIS-NEXT: vmov.f32 s12, s6
+; CHECK-LIS-NEXT: vmov.f32 s11, s13
+; CHECK-LIS-NEXT: vmov.f32 s9, s5
+; CHECK-LIS-NEXT: vmov.f32 s13, s7
+; CHECK-LIS-NEXT: vmov q1, q5
+; CHECK-LIS-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-LIS-NEXT: bx lr
entry:
%out = shufflevector <4 x double> %src1, <4 x double> %src2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
ret <8 x double> %out
@@ -1560,27 +1606,47 @@ entry:
ret <2 x i64> %out
}
define arm_aapcs_vfpcc <8 x i64> @shuffle9_i64(<4 x i64> %src1, <4 x i64> %src2) {
-; CHECK-LABEL: shuffle9_i64:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .vsave {d8, d9, d10, d11}
-; CHECK-NEXT: vpush {d8, d9, d10, d11}
-; CHECK-NEXT: vmov q5, q2
-; CHECK-NEXT: vmov.f32 s16, s0
-; CHECK-NEXT: vmov.f32 s18, s20
-; CHECK-NEXT: vmov.f32 s20, s2
-; CHECK-NEXT: vmov.f32 s10, s12
-; CHECK-NEXT: vmov.f32 s19, s21
-; CHECK-NEXT: vmov.f32 s8, s4
-; CHECK-NEXT: vmov.f32 s17, s1
-; CHECK-NEXT: vmov.f32 s21, s3
-; CHECK-NEXT: vmov q0, q4
-; CHECK-NEXT: vmov.f32 s12, s6
-; CHECK-NEXT: vmov.f32 s11, s13
-; CHECK-NEXT: vmov.f32 s9, s5
-; CHECK-NEXT: vmov.f32 s13, s7
-; CHECK-NEXT: vmov q1, q5
-; CHECK-NEXT: vpop {d8, d9, d10, d11}
-; CHECK-NEXT: bx lr
+; CHECK-LV-LABEL: shuffle9_i64:
+; CHECK-LV: @ %bb.0: @ %entry
+; CHECK-LV-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-LV-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-LV-NEXT: vmov q5, q2
+; CHECK-LV-NEXT: vmov.f32 s16, s0
+; CHECK-LV-NEXT: vmov.f32 s18, s20
+; CHECK-LV-NEXT: vmov.f32 s20, s2
+; CHECK-LV-NEXT: vmov.f32 s10, s12
+; CHECK-LV-NEXT: vmov.f32 s19, s21
+; CHECK-LV-NEXT: vmov.f32 s8, s4
+; CHECK-LV-NEXT: vmov.f32 s17, s1
+; CHECK-LV-NEXT: vmov.f32 s21, s3
+; CHECK-LV-NEXT: vmov q0, q4
+; CHECK-LV-NEXT: vmov.f32 s12, s6
+; CHECK-LV-NEXT: vmov.f32 s11, s13
+; CHECK-LV-NEXT: vmov.f32 s9, s5
+; CHECK-LV-NEXT: vmov.f32 s13, s7
+; CHECK-LV-NEXT: vmov q1, q5
+; CHECK-LV-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-LV-NEXT: bx lr
+
+; CHECK-LIS-LABEL: shuffle9_i64:
+; CHECK-LIS: @ %bb.0: @ %entry
+; CHECK-LIS-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-LIS-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-LIS-NEXT: vmov q5, q2
+; CHECK-LIS-NEXT: vmov q4, q0
+; CHECK-LIS-NEXT: vmov.f32 s2, s20
+; CHECK-LIS-NEXT: vmov.f32 s20, s18
+; CHECK-LIS-NEXT: vmov.f32 s10, s12
+; CHECK-LIS-NEXT: vmov.f32 s3, s21
+; CHECK-LIS-NEXT: vmov.f32 s8, s4
+; CHECK-LIS-NEXT: vmov.f32 s21, s19
+; CHECK-LIS-NEXT: vmov.f32 s12, s6
+; CHECK-LIS-NEXT: vmov.f32 s11, s13
+; CHECK-LIS-NEXT: vmov.f32 s9, s5
+; CHECK-LIS-NEXT: vmov.f32 s13, s7
+; CHECK-LIS-NEXT: vmov q1, q5
+; CHECK-LIS-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-LIS-NEXT: bx lr
entry:
%out = shufflevector <4 x i64> %src1, <4 x i64> %src2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
ret <8 x i64> %out
@@ -1822,4 +1888,4 @@ entry:
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK-LIS: {{.*}}
-; CHECK-LV: {{.*}}
+; CHECK-LV: {{.*}}
\ No newline at end of file
diff --git a/llvm/test/CodeGen/Thumb2/mve-vld3.ll b/llvm/test/CodeGen/Thumb2/mve-vld3.ll
index 0c58abaa1c86e..b6c8056891f83 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vld3.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vld3.ll
@@ -33,29 +33,53 @@ entry:
}
define void @vld3_v4i32(ptr %src, ptr %dst) {
-; CHECK-LABEL: vld3_v4i32:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .vsave {d8, d9}
-; CHECK-NEXT: vpush {d8, d9}
-; CHECK-NEXT: vldrw.u32 q0, [r0, #16]
-; CHECK-NEXT: vldrw.u32 q1, [r0]
-; CHECK-NEXT: vldrw.u32 q4, [r0, #32]
-; CHECK-NEXT: vmov.f32 s10, s2
-; CHECK-NEXT: vmov.f32 s13, s0
-; CHECK-NEXT: vmov.f32 s14, s3
-; CHECK-NEXT: vmov.f32 s8, s4
-; CHECK-NEXT: vmov.f32 s9, s7
-; CHECK-NEXT: vmov.f32 s12, s5
-; CHECK-NEXT: vmov.f32 s15, s18
-; CHECK-NEXT: vmov.f32 s11, s17
-; CHECK-NEXT: vadd.i32 q2, q2, q3
-; CHECK-NEXT: vmov.f32 s0, s6
-; CHECK-NEXT: vmov.f32 s2, s16
-; CHECK-NEXT: vmov.f32 s3, s19
-; CHECK-NEXT: vadd.i32 q0, q2, q0
-; CHECK-NEXT: vstrw.32 q0, [r1]
-; CHECK-NEXT: vpop {d8, d9}
-; CHECK-NEXT: bx lr
+; CHECK-LV-LABEL: vld3_v4i32:
+; CHECK-LV: @ %bb.0: @ %entry
+; CHECK-LV-NEXT: .vsave {d8, d9}
+; CHECK-LV-NEXT: vpush {d8, d9}
+; CHECK-LV-NEXT: vldrw.u32 q0, [r0, #16]
+; CHECK-LV-NEXT: vldrw.u32 q1, [r0]
+; CHECK-LV-NEXT: vldrw.u32 q4, [r0, #32]
+; CHECK-LV-NEXT: vmov.f32 s10, s2
+; CHECK-LV-NEXT: vmov.f32 s13, s0
+; CHECK-LV-NEXT: vmov.f32 s14, s3
+; CHECK-LV-NEXT: vmov.f32 s8, s4
+; CHECK-LV-NEXT: vmov.f32 s9, s7
+; CHECK-LV-NEXT: vmov.f32 s12, s5
+; CHECK-LV-NEXT: vmov.f32 s15, s18
+; CHECK-LV-NEXT: vmov.f32 s11, s17
+; CHECK-LV-NEXT: vadd.i32 q2, q2, q3
+; CHECK-LV-NEXT: vmov.f32 s0, s6
+; CHECK-LV-NEXT: vmov.f32 s2, s16
+; CHECK-LV-NEXT: vmov.f32 s3, s19
+; CHECK-LV-NEXT: vadd.i32 q0, q2, q0
+; CHECK-LV-NEXT: vstrw.32 q0, [r1]
+; CHECK-LV-NEXT: vpop {d8, d9}
+; CHECK-LV-NEXT: bx lr
+
+; CHECK-LIS-LABEL: vld3_v4i32:
+; CHECK-LIS: @ %bb.0: @ %entry
+; CHECK-LIS-NEXT: .vsave {d8, d9}
+; CHECK-LIS-NEXT: vpush {d8, d9}
+; CHECK-LIS-NEXT: vldrw.u32 q0, [r0, #16]
+; CHECK-LIS-NEXT: vldrw.u32 q1, [r0]
+; CHECK-LIS-NEXT: vldrw.u32 q3, [r0, #32]
+; CHECK-LIS-NEXT: vmov.f32 s10, s2
+; CHECK-LIS-NEXT: vmov.f32 s17, s0
+; CHECK-LIS-NEXT: vmov.f32 s18, s3
+; CHECK-LIS-NEXT: vmov.f32 s8, s4
+; CHECK-LIS-NEXT: vmov.f32 s9, s7
+; CHECK-LIS-NEXT: vmov.f32 s16, s5
+; CHECK-LIS-NEXT: vmov.f32 s19, s14
+; CHECK-LIS-NEXT: vmov.f32 s11, s13
+; CHECK-LIS-NEXT: vadd.i32 q2, q2, q4
+; CHECK-LIS-NEXT: vmov.f32 s0, s6
+; CHECK-LIS-NEXT: vmov.f32 s2, s12
+; CHECK-LIS-NEXT: vmov.f32 s3, s15
+; CHECK-LIS-NEXT: vadd.i32 q0, q2, q0
+; CHECK-LIS-NEXT: vstrw.32 q0, [r1]
+; CHECK-LIS-NEXT: vpop {d8, d9}
+; CHECK-LIS-NEXT: bx lr
entry:
%l1 = load <12 x i32>, ptr %src, align 4
%s1 = shufflevector <12 x i32> %l1, <12 x i32> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
@@ -68,46 +92,87 @@ entry:
}
define void @vld3_v8i32(ptr %src, ptr %dst) {
-; CHECK-LABEL: vld3_v8i32:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .vsave {d8, d9, d10, d11}
-; CHECK-NEXT: vpush {d8, d9, d10, d11}
-; CHECK-NEXT: vldrw.u32 q0, [r0, #64]
-; CHECK-NEXT: vldrw.u32 q1, [r0, #48]
-; CHECK-NEXT: vldrw.u32 q4, [r0, #80]
-; CHECK-NEXT: vmov.f32 s10, s2
-; CHECK-NEXT: vmov.f32 s13, s0
-; CHECK-NEXT: vmov.f32 s14, s3
-; CHECK-NEXT: vmov.f32 s8, s4
-; CHECK-NEXT: vmov.f32 s9, s7
-; CHECK-NEXT: vmov.f32 s12, s5
-; CHECK-NEXT: vmov.f32 s15, s18
-; CHECK-NEXT: vmov.f32 s11, s17
-; CHECK-NEXT: vadd.i32 q2, q2, q3
-; CHECK-NEXT: vmov.f32 s0, s6
-; CHECK-NEXT: vmov.f32 s2, s16
-; CHECK-NEXT: vldrw.u32 q1, [r0, #16]
-; CHECK-NEXT: vmov.f32 s3, s19
-; CHECK-NEXT: vldrw.u32 q3, [r0, #32]
-; CHECK-NEXT: vadd.i32 q0, q2, q0
-; CHECK-NEXT: vldrw.u32 q2, [r0]
-; CHECK-NEXT: vmov.f32 s17, s4
-; CHECK-NEXT: vstrw.32 q0, [r1, #16]
-; CHECK-NEXT: vmov.f32 s18, s7
-; CHECK-NEXT: vmov.f32 s22, s6
-; CHECK-NEXT: vmov.f32 s16, s9
-; CHECK-NEXT: vmov.f32 s19, s14
-; CHECK-NEXT: vmov.f32 s20, s8
-; CHECK-NEXT: vmov.f32 s21, s11
-; CHECK-NEXT: vmov.f32 s23, s13
-; CHECK-NEXT: vadd.i32 q4, q5, q4
-; CHECK-NEXT: vmov.f32 s4, s10
-; CHECK-NEXT: vmov.f32 s6, s12
-; CHECK-NEXT: vmov.f32 s7, s15
-; CHECK-NEXT: vadd.i32 q1, q4, q1
-; CHECK-NEXT: vstrw.32 q1, [r1]
-; CHECK-NEXT: vpop {d8, d9, d10, d11}
-; CHECK-NEXT: bx lr
+; CHECK-LV-LABEL: vld3_v8i32:
+; CHECK-LV: @ %bb.0: @ %entry
+; CHECK-LV-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-LV-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-LV-NEXT: vldrw.u32 q0, [r0, #64]
+; CHECK-LV-NEXT: vldrw.u32 q1, [r0, #48]
+; CHECK-LV-NEXT: vldrw.u32 q4, [r0, #80]
+; CHECK-LV-NEXT: vmov.f32 s10, s2
+; CHECK-LV-NEXT: vmov.f32 s13, s0
+; CHECK-LV-NEXT: vmov.f32 s14, s3
+; CHECK-LV-NEXT: vmov.f32 s8, s4
+; CHECK-LV-NEXT: vmov.f32 s9, s7
+; CHECK-LV-NEXT: vmov.f32 s12, s5
+; CHECK-LV-NEXT: vmov.f32 s15, s18
+; CHECK-LV-NEXT: vmov.f32 s11, s17
+; CHECK-LV-NEXT: vadd.i32 q2, q2, q3
+; CHECK-LV-NEXT: vmov.f32 s0, s6
+; CHECK-LV-NEXT: vmov.f32 s2, s16
+; CHECK-LV-NEXT: vldrw.u32 q1, [r0, #16]
+; CHECK-LV-NEXT: vmov.f32 s3, s19
+; CHECK-LV-NEXT: vldrw.u32 q3, [r0, #32]
+; CHECK-LV-NEXT: vadd.i32 q0, q2, q0
+; CHECK-LV-NEXT: vldrw.u32 q2, [r0]
+; CHECK-LV-NEXT: vmov.f32 s17, s4
+; CHECK-LV-NEXT: vstrw.32 q0, [r1, #16]
+; CHECK-LV-NEXT: vmov.f32 s18, s7
+; CHECK-LV-NEXT: vmov.f32 s22, s6
+; CHECK-LV-NEXT: vmov.f32 s16, s9
+; CHECK-LV-NEXT: vmov.f32 s19, s14
+; CHECK-LV-NEXT: vmov.f32 s20, s8
+; CHECK-LV-NEXT: vmov.f32 s21, s11
+; CHECK-LV-NEXT: vmov.f32 s23, s13
+; CHECK-LV-NEXT: vadd.i32 q4, q5, q4
+; CHECK-LV-NEXT: vmov.f32 s4, s10
+; CHECK-LV-NEXT: vmov.f32 s6, s12
+; CHECK-LV-NEXT: vmov.f32 s7, s15
+; CHECK-LV-NEXT: vadd.i32 q1, q4, q1
+; CHECK-LV-NEXT: vstrw.32 q1, [r1]
+; CHECK-LV-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-LV-NEXT: bx lr
+
+; CHECK-LIS-LABEL: vld3_v8i32:
+; CHECK-LIS: @ %bb.0: @ %entry
+; CHECK-LIS-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-LIS-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-LIS-NEXT: vldrw.u32 q0, [r0, #64]
+; CHECK-LIS-NEXT: vldrw.u32 q1, [r0, #48]
+; CHECK-LIS-NEXT: vldrw.u32 q3, [r0, #80]
+; CHECK-LIS-NEXT: vmov.f32 s10, s2
+; CHECK-LIS-NEXT: vmov.f32 s17, s0
+; CHECK-LIS-NEXT: vmov.f32 s18, s3
+; CHECK-LIS-NEXT: vmov.f32 s8, s4
+; CHECK-LIS-NEXT: vmov.f32 s9, s7
+; CHECK-LIS-NEXT: vmov.f32 s16, s5
+; CHECK-LIS-NEXT: vmov.f32 s19, s14
+; CHECK-LIS-NEXT: vmov.f32 s11, s13
+; CHECK-LIS-NEXT: vmov.f32 s0, s6
+; CHECK-LIS-NEXT: vadd.i32 q2, q2, q4
+; CHECK-LIS-NEXT: vmov.f32 s2, s12
+; CHECK-LIS-NEXT: vldrw.u32 q1, [r0, #16]
+; CHECK-LIS-NEXT: vmov.f32 s3, s15
+; CHECK-LIS-NEXT: vldrw.u32 q3, [r0, #32]
+; CHECK-LIS-NEXT: vadd.i32 q0, q2, q0
+; CHECK-LIS-NEXT: vldrw.u32 q2, [r0]
+; CHECK-LIS-NEXT: vmov.f32 s17, s4
+; CHECK-LIS-NEXT: vstrw.32 q0, [r1, #16]
+; CHECK-LIS-NEXT: vmov.f32 s18, s7
+; CHECK-LIS-NEXT: vmov.f32 s22, s6
+; CHECK-LIS-NEXT: vmov.f32 s16, s9
+; CHECK-LIS-NEXT: vmov.f32 s19, s14
+; CHECK-LIS-NEXT: vmov.f32 s20, s8
+; CHECK-LIS-NEXT: vmov.f32 s21, s11
+; CHECK-LIS-NEXT: vmov.f32 s23, s13
+; CHECK-LIS-NEXT: vadd.i32 q4, q5, q4
+; CHECK-LIS-NEXT: vmov.f32 s4, s10
+; CHECK-LIS-NEXT: vmov.f32 s6, s12
+; CHECK-LIS-NEXT: vmov.f32 s7, s15
+; CHECK-LIS-NEXT: vadd.i32 q1, q4, q1
+; CHECK-LIS-NEXT: vstrw.32 q1, [r1]
+; CHECK-LIS-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-LIS-NEXT: bx lr
entry:
%l1 = load <24 x i32>, ptr %src, align 4
%s1 = shufflevector <24 x i32> %l1, <24 x i32> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
@@ -120,80 +185,155 @@ entry:
}
define void @vld3_v16i32(ptr %src, ptr %dst) {
-; CHECK-LABEL: vld3_v16i32:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; CHECK-NEXT: vldrw.u32 q0, [r0, #64]
-; CHECK-NEXT: vldrw.u32 q1, [r0, #48]
-; CHECK-NEXT: vldrw.u32 q4, [r0, #80]
-; CHECK-NEXT: vldrw.u32 q6, [r0, #176]
-; CHECK-NEXT: vmov.f32 s10, s2
-; CHECK-NEXT: vmov.f32 s13, s0
-; CHECK-NEXT: vmov.f32 s14, s3
-; CHECK-NEXT: vmov.f32 s8, s4
-; CHECK-NEXT: vmov.f32 s9, s7
-; CHECK-NEXT: vmov.f32 s12, s5
-; CHECK-NEXT: vmov.f32 s15, s18
-; CHECK-NEXT: vmov.f32 s11, s17
-; CHECK-NEXT: vadd.i32 q2, q2, q3
-; CHECK-NEXT: vmov.f32 s0, s6
-; CHECK-NEXT: vmov.f32 s2, s16
-; CHECK-NEXT: vldrw.u32 q1, [r0, #16]
-; CHECK-NEXT: vmov.f32 s3, s19
-; CHECK-NEXT: vldrw.u32 q3, [r0, #32]
-; CHECK-NEXT: vadd.i32 q0, q2, q0
-; CHECK-NEXT: vldrw.u32 q2, [r0]
-; CHECK-NEXT: vmov.f32 s17, s4
-; CHECK-NEXT: vmov.f32 s18, s7
-; CHECK-NEXT: vmov.f32 s22, s6
-; CHECK-NEXT: vmov.f32 s16, s9
-; CHECK-NEXT: vmov.f32 s19, s14
-; CHECK-NEXT: vmov.f32 s20, s8
-; CHECK-NEXT: ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/127631
More information about the llvm-commits
mailing list