[llvm] [RISCV] Lower the alignment requirement for a GPR pair spill for Zdinx on RV32. (PR #85871)

via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 19 15:02:02 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-risc-v

Author: Craig Topper (topperc)

<details>
<summary>Changes</summary>

I believe we can use XLen alignment as long as eliminateFrameIndex
limits the maximum folded offset to 2043. This way when we split
the load/store into two 2 instructions we'll be able to add 4
without overflowing simm12.
    
The test is long to make sure we generate enough spills to have a
large offset. I'm open to suggestions on ways to shorten it.

Stacked on a minor refactor https://github.com/llvm/llvm-project/pull/85847

---

Patch is 106.62 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/85871.diff


3 Files Affected:

- (modified) llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp (+19-13) 
- (modified) llvm/lib/Target/RISCV/RISCVRegisterInfo.td (+1-1) 
- (added) llvm/test/CodeGen/RISCV/zdinx-large-spill.ll (+2873) 


``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index a68674b221d38e..881aab955f7d0b 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -431,29 +431,35 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   }
 
   if (!IsRVVSpill) {
-    if (MI.getOpcode() == RISCV::ADDI && !isInt<12>(Offset.getFixed())) {
+    int64_t Val = Offset.getFixed();
+    int64_t Lo12 = SignExtend64<12>(Val);
+    unsigned Opc = MI.getOpcode();
+    if (Opc == RISCV::ADDI && !isInt<12>(Val)) {
       // We chose to emit the canonical immediate sequence rather than folding
       // the offset into the using add under the theory that doing so doesn't
       // save dynamic instruction count and some target may fuse the canonical
       // 32 bit immediate sequence.  We still need to clear the portion of the
       // offset encoded in the immediate.
       MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0);
+    } else if ((Opc == RISCV::PREFETCH_I || Opc == RISCV::PREFETCH_R ||
+                Opc == RISCV::PREFETCH_W) &&
+               (Lo12 & 0b11111) != 0) {
+      // Prefetch instructions require the offset to be 32 byte aligned.
+      MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0);
+    } else if ((Opc == RISCV::PseudoRV32ZdinxLD ||
+                 Opc == RISCV::PseudoRV32ZdinxSD) &&
+               Lo12 >= 2044) {
+      // This instruction will be split into 2 instructions. The second
+      // instruction will add 4 to the immediate. If that would overflow 12
+      // bits, we can't fold the offset.
+      MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0);
     } else {
       // We can encode an add with 12 bit signed immediate in the immediate
       // operand of our user instruction.  As a result, the remaining
       // offset can by construction, at worst, a LUI and a ADD.
-      int64_t Val = Offset.getFixed();
-      int64_t Lo12 = SignExtend64<12>(Val);
-      if ((MI.getOpcode() == RISCV::PREFETCH_I ||
-           MI.getOpcode() == RISCV::PREFETCH_R ||
-           MI.getOpcode() == RISCV::PREFETCH_W) &&
-          (Lo12 & 0b11111) != 0)
-        MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0);
-      else {
-        MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Lo12);
-        Offset = StackOffset::get((uint64_t)Val - (uint64_t)Lo12,
-                                  Offset.getScalable());
-      }
+      MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Lo12);
+      Offset = StackOffset::get((uint64_t)Val - (uint64_t)Lo12,
+                                Offset.getScalable());
     }
   }
 
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index 225b57554c1dc0..9da1f73681c68c 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -573,7 +573,7 @@ let RegAltNameIndices = [ABIRegAltName] in {
 }
 
 let RegInfos = RegInfoByHwMode<[RV32, RV64],
-                               [RegInfo<64, 64, 64>, RegInfo<128, 128, 128>]>,
+                               [RegInfo<64, 64, 32>, RegInfo<128, 128, 64>]>,
     DecoderMethod = "DecodeGPRPairRegisterClass" in
 def GPRPair : RegisterClass<"RISCV", [XLenPairFVT], 64, (add
     X10_X11, X12_X13, X14_X15, X16_X17,
diff --git a/llvm/test/CodeGen/RISCV/zdinx-large-spill.ll b/llvm/test/CodeGen/RISCV/zdinx-large-spill.ll
new file mode 100644
index 00000000000000..d9856478b19053
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/zdinx-large-spill.ll
@@ -0,0 +1,2873 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -mtriple=riscv32 -mattr=+zdinx | FileCheck %s
+
+; Generate over 2048 bytes of spills by load a bunch of values and then forcing
+; all GPRs to be spilled via inline assembly that clobbers all registes. We
+; want to make sure eliminateFrameIndex doesn't fold sp+2044 as an offset in a
+; GPR pair spill instruction. When we split the pair spill, we would be unable
+; to add 4 to the immediate without overflowing simm12.
+
+; 2040(sp) should be the largest offset we have.
+
+define void @foo(ptr nocapture noundef %0) nounwind {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -2032
+; CHECK-NEXT:    sw ra, 2028(sp) # 4-byte Folded Spill
+; CHECK-NEXT:    sw s0, 2024(sp) # 4-byte Folded Spill
+; CHECK-NEXT:    sw s1, 2020(sp) # 4-byte Folded Spill
+; CHECK-NEXT:    sw s2, 2016(sp) # 4-byte Folded Spill
+; CHECK-NEXT:    sw s3, 2012(sp) # 4-byte Folded Spill
+; CHECK-NEXT:    sw s4, 2008(sp) # 4-byte Folded Spill
+; CHECK-NEXT:    sw s5, 2004(sp) # 4-byte Folded Spill
+; CHECK-NEXT:    sw s6, 2000(sp) # 4-byte Folded Spill
+; CHECK-NEXT:    sw s7, 1996(sp) # 4-byte Folded Spill
+; CHECK-NEXT:    sw s8, 1992(sp) # 4-byte Folded Spill
+; CHECK-NEXT:    sw s9, 1988(sp) # 4-byte Folded Spill
+; CHECK-NEXT:    sw s10, 1984(sp) # 4-byte Folded Spill
+; CHECK-NEXT:    sw s11, 1980(sp) # 4-byte Folded Spill
+; CHECK-NEXT:    addi sp, sp, -80
+; CHECK-NEXT:    sw a0, 8(sp) # 4-byte Folded Spill
+; CHECK-NEXT:    lw a2, 0(a0)
+; CHECK-NEXT:    lw a3, 4(a0)
+; CHECK-NEXT:    lui a1, 1
+; CHECK-NEXT:    add a1, sp, a1
+; CHECK-NEXT:    sw a2, -2044(a1)
+; CHECK-NEXT:    sw a3, -2040(a1)
+; CHECK-NEXT:    lw a2, 8(a0)
+; CHECK-NEXT:    lw a3, 12(a0)
+; CHECK-NEXT:    addi a1, sp, 2044
+; CHECK-NEXT:    sw a2, 0(a1)
+; CHECK-NEXT:    sw a3, 4(a1)
+; CHECK-NEXT:    lw a2, 16(a0)
+; CHECK-NEXT:    lw a3, 20(a0)
+; CHECK-NEXT:    sw a2, 2036(sp)
+; CHECK-NEXT:    sw a3, 2040(sp)
+; CHECK-NEXT:    lw a2, 24(a0)
+; CHECK-NEXT:    lw a3, 28(a0)
+; CHECK-NEXT:    sw a2, 2028(sp)
+; CHECK-NEXT:    sw a3, 2032(sp)
+; CHECK-NEXT:    lw a2, 32(a0)
+; CHECK-NEXT:    lw a3, 36(a0)
+; CHECK-NEXT:    sw a2, 2020(sp)
+; CHECK-NEXT:    sw a3, 2024(sp)
+; CHECK-NEXT:    lw a2, 40(a0)
+; CHECK-NEXT:    lw a3, 44(a0)
+; CHECK-NEXT:    sw a2, 2012(sp)
+; CHECK-NEXT:    sw a3, 2016(sp)
+; CHECK-NEXT:    lw a2, 48(a0)
+; CHECK-NEXT:    lw a3, 52(a0)
+; CHECK-NEXT:    sw a2, 2004(sp)
+; CHECK-NEXT:    sw a3, 2008(sp)
+; CHECK-NEXT:    lw a2, 56(a0)
+; CHECK-NEXT:    lw a3, 60(a0)
+; CHECK-NEXT:    sw a2, 1996(sp)
+; CHECK-NEXT:    sw a3, 2000(sp)
+; CHECK-NEXT:    lw a2, 64(a0)
+; CHECK-NEXT:    lw a3, 68(a0)
+; CHECK-NEXT:    sw a2, 1988(sp)
+; CHECK-NEXT:    sw a3, 1992(sp)
+; CHECK-NEXT:    lw a2, 72(a0)
+; CHECK-NEXT:    lw a3, 76(a0)
+; CHECK-NEXT:    sw a2, 1980(sp)
+; CHECK-NEXT:    sw a3, 1984(sp)
+; CHECK-NEXT:    lw a2, 80(a0)
+; CHECK-NEXT:    lw a3, 84(a0)
+; CHECK-NEXT:    sw a2, 1972(sp)
+; CHECK-NEXT:    sw a3, 1976(sp)
+; CHECK-NEXT:    lw a2, 88(a0)
+; CHECK-NEXT:    lw a3, 92(a0)
+; CHECK-NEXT:    sw a2, 1964(sp)
+; CHECK-NEXT:    sw a3, 1968(sp)
+; CHECK-NEXT:    lw a2, 96(a0)
+; CHECK-NEXT:    lw a3, 100(a0)
+; CHECK-NEXT:    sw a2, 1956(sp)
+; CHECK-NEXT:    sw a3, 1960(sp)
+; CHECK-NEXT:    lw a2, 104(a0)
+; CHECK-NEXT:    lw a3, 108(a0)
+; CHECK-NEXT:    sw a2, 1948(sp)
+; CHECK-NEXT:    sw a3, 1952(sp)
+; CHECK-NEXT:    lw a2, 112(a0)
+; CHECK-NEXT:    lw a3, 116(a0)
+; CHECK-NEXT:    sw a2, 1940(sp)
+; CHECK-NEXT:    sw a3, 1944(sp)
+; CHECK-NEXT:    lw a2, 120(a0)
+; CHECK-NEXT:    lw a3, 124(a0)
+; CHECK-NEXT:    sw a2, 1932(sp)
+; CHECK-NEXT:    sw a3, 1936(sp)
+; CHECK-NEXT:    lw a2, 128(a0)
+; CHECK-NEXT:    lw a3, 132(a0)
+; CHECK-NEXT:    sw a2, 1924(sp)
+; CHECK-NEXT:    sw a3, 1928(sp)
+; CHECK-NEXT:    lw a2, 136(a0)
+; CHECK-NEXT:    lw a3, 140(a0)
+; CHECK-NEXT:    sw a2, 1916(sp)
+; CHECK-NEXT:    sw a3, 1920(sp)
+; CHECK-NEXT:    lw a2, 144(a0)
+; CHECK-NEXT:    lw a3, 148(a0)
+; CHECK-NEXT:    sw a2, 1908(sp)
+; CHECK-NEXT:    sw a3, 1912(sp)
+; CHECK-NEXT:    lw a2, 152(a0)
+; CHECK-NEXT:    lw a3, 156(a0)
+; CHECK-NEXT:    sw a2, 1900(sp)
+; CHECK-NEXT:    sw a3, 1904(sp)
+; CHECK-NEXT:    lw a2, 160(a0)
+; CHECK-NEXT:    lw a3, 164(a0)
+; CHECK-NEXT:    sw a2, 1892(sp)
+; CHECK-NEXT:    sw a3, 1896(sp)
+; CHECK-NEXT:    lw a2, 168(a0)
+; CHECK-NEXT:    lw a3, 172(a0)
+; CHECK-NEXT:    sw a2, 1884(sp)
+; CHECK-NEXT:    sw a3, 1888(sp)
+; CHECK-NEXT:    lw a2, 176(a0)
+; CHECK-NEXT:    lw a3, 180(a0)
+; CHECK-NEXT:    sw a2, 1876(sp)
+; CHECK-NEXT:    sw a3, 1880(sp)
+; CHECK-NEXT:    lw a2, 184(a0)
+; CHECK-NEXT:    lw a3, 188(a0)
+; CHECK-NEXT:    sw a2, 1868(sp)
+; CHECK-NEXT:    sw a3, 1872(sp)
+; CHECK-NEXT:    lw a2, 192(a0)
+; CHECK-NEXT:    lw a3, 196(a0)
+; CHECK-NEXT:    sw a2, 1860(sp)
+; CHECK-NEXT:    sw a3, 1864(sp)
+; CHECK-NEXT:    lw a2, 200(a0)
+; CHECK-NEXT:    lw a3, 204(a0)
+; CHECK-NEXT:    sw a2, 1852(sp)
+; CHECK-NEXT:    sw a3, 1856(sp)
+; CHECK-NEXT:    lw a2, 208(a0)
+; CHECK-NEXT:    lw a3, 212(a0)
+; CHECK-NEXT:    sw a2, 1844(sp)
+; CHECK-NEXT:    sw a3, 1848(sp)
+; CHECK-NEXT:    lw a2, 216(a0)
+; CHECK-NEXT:    lw a3, 220(a0)
+; CHECK-NEXT:    sw a2, 1836(sp)
+; CHECK-NEXT:    sw a3, 1840(sp)
+; CHECK-NEXT:    lw a2, 224(a0)
+; CHECK-NEXT:    lw a3, 228(a0)
+; CHECK-NEXT:    sw a2, 1828(sp)
+; CHECK-NEXT:    sw a3, 1832(sp)
+; CHECK-NEXT:    lw a2, 232(a0)
+; CHECK-NEXT:    lw a3, 236(a0)
+; CHECK-NEXT:    sw a2, 1820(sp)
+; CHECK-NEXT:    sw a3, 1824(sp)
+; CHECK-NEXT:    lw a2, 240(a0)
+; CHECK-NEXT:    lw a3, 244(a0)
+; CHECK-NEXT:    sw a2, 1812(sp)
+; CHECK-NEXT:    sw a3, 1816(sp)
+; CHECK-NEXT:    lw a2, 248(a0)
+; CHECK-NEXT:    lw a3, 252(a0)
+; CHECK-NEXT:    sw a2, 1804(sp)
+; CHECK-NEXT:    sw a3, 1808(sp)
+; CHECK-NEXT:    lw a2, 256(a0)
+; CHECK-NEXT:    lw a3, 260(a0)
+; CHECK-NEXT:    sw a2, 1796(sp)
+; CHECK-NEXT:    sw a3, 1800(sp)
+; CHECK-NEXT:    lw a2, 264(a0)
+; CHECK-NEXT:    lw a3, 268(a0)
+; CHECK-NEXT:    sw a2, 1788(sp)
+; CHECK-NEXT:    sw a3, 1792(sp)
+; CHECK-NEXT:    lw a2, 272(a0)
+; CHECK-NEXT:    lw a3, 276(a0)
+; CHECK-NEXT:    sw a2, 1780(sp)
+; CHECK-NEXT:    sw a3, 1784(sp)
+; CHECK-NEXT:    lw a2, 280(a0)
+; CHECK-NEXT:    lw a3, 284(a0)
+; CHECK-NEXT:    sw a2, 1772(sp)
+; CHECK-NEXT:    sw a3, 1776(sp)
+; CHECK-NEXT:    lw a2, 288(a0)
+; CHECK-NEXT:    lw a3, 292(a0)
+; CHECK-NEXT:    sw a2, 1764(sp)
+; CHECK-NEXT:    sw a3, 1768(sp)
+; CHECK-NEXT:    lw a2, 296(a0)
+; CHECK-NEXT:    lw a3, 300(a0)
+; CHECK-NEXT:    sw a2, 1756(sp)
+; CHECK-NEXT:    sw a3, 1760(sp)
+; CHECK-NEXT:    lw a2, 304(a0)
+; CHECK-NEXT:    lw a3, 308(a0)
+; CHECK-NEXT:    sw a2, 1748(sp)
+; CHECK-NEXT:    sw a3, 1752(sp)
+; CHECK-NEXT:    lw a2, 312(a0)
+; CHECK-NEXT:    lw a3, 316(a0)
+; CHECK-NEXT:    sw a2, 1740(sp)
+; CHECK-NEXT:    sw a3, 1744(sp)
+; CHECK-NEXT:    lw a2, 320(a0)
+; CHECK-NEXT:    lw a3, 324(a0)
+; CHECK-NEXT:    sw a2, 1732(sp)
+; CHECK-NEXT:    sw a3, 1736(sp)
+; CHECK-NEXT:    lw a2, 328(a0)
+; CHECK-NEXT:    lw a3, 332(a0)
+; CHECK-NEXT:    sw a2, 1724(sp)
+; CHECK-NEXT:    sw a3, 1728(sp)
+; CHECK-NEXT:    lw a2, 336(a0)
+; CHECK-NEXT:    lw a3, 340(a0)
+; CHECK-NEXT:    sw a2, 1716(sp)
+; CHECK-NEXT:    sw a3, 1720(sp)
+; CHECK-NEXT:    lw a2, 344(a0)
+; CHECK-NEXT:    lw a3, 348(a0)
+; CHECK-NEXT:    sw a2, 1708(sp)
+; CHECK-NEXT:    sw a3, 1712(sp)
+; CHECK-NEXT:    lw a2, 352(a0)
+; CHECK-NEXT:    lw a3, 356(a0)
+; CHECK-NEXT:    sw a2, 1700(sp)
+; CHECK-NEXT:    sw a3, 1704(sp)
+; CHECK-NEXT:    lw a2, 360(a0)
+; CHECK-NEXT:    lw a3, 364(a0)
+; CHECK-NEXT:    sw a2, 1692(sp)
+; CHECK-NEXT:    sw a3, 1696(sp)
+; CHECK-NEXT:    lw a2, 368(a0)
+; CHECK-NEXT:    lw a3, 372(a0)
+; CHECK-NEXT:    sw a2, 1684(sp)
+; CHECK-NEXT:    sw a3, 1688(sp)
+; CHECK-NEXT:    lw a2, 376(a0)
+; CHECK-NEXT:    lw a3, 380(a0)
+; CHECK-NEXT:    sw a2, 1676(sp)
+; CHECK-NEXT:    sw a3, 1680(sp)
+; CHECK-NEXT:    lw a2, 384(a0)
+; CHECK-NEXT:    lw a3, 388(a0)
+; CHECK-NEXT:    sw a2, 1668(sp)
+; CHECK-NEXT:    sw a3, 1672(sp)
+; CHECK-NEXT:    lw a2, 392(a0)
+; CHECK-NEXT:    lw a3, 396(a0)
+; CHECK-NEXT:    sw a2, 1660(sp)
+; CHECK-NEXT:    sw a3, 1664(sp)
+; CHECK-NEXT:    lw a2, 400(a0)
+; CHECK-NEXT:    lw a3, 404(a0)
+; CHECK-NEXT:    sw a2, 1652(sp)
+; CHECK-NEXT:    sw a3, 1656(sp)
+; CHECK-NEXT:    lw a2, 408(a0)
+; CHECK-NEXT:    lw a3, 412(a0)
+; CHECK-NEXT:    sw a2, 1644(sp)
+; CHECK-NEXT:    sw a3, 1648(sp)
+; CHECK-NEXT:    lw a2, 416(a0)
+; CHECK-NEXT:    lw a3, 420(a0)
+; CHECK-NEXT:    sw a2, 1636(sp)
+; CHECK-NEXT:    sw a3, 1640(sp)
+; CHECK-NEXT:    lw a2, 424(a0)
+; CHECK-NEXT:    lw a3, 428(a0)
+; CHECK-NEXT:    sw a2, 1628(sp)
+; CHECK-NEXT:    sw a3, 1632(sp)
+; CHECK-NEXT:    lw a2, 432(a0)
+; CHECK-NEXT:    lw a3, 436(a0)
+; CHECK-NEXT:    sw a2, 1620(sp)
+; CHECK-NEXT:    sw a3, 1624(sp)
+; CHECK-NEXT:    lw a2, 440(a0)
+; CHECK-NEXT:    lw a3, 444(a0)
+; CHECK-NEXT:    sw a2, 1612(sp)
+; CHECK-NEXT:    sw a3, 1616(sp)
+; CHECK-NEXT:    lw a2, 448(a0)
+; CHECK-NEXT:    lw a3, 452(a0)
+; CHECK-NEXT:    sw a2, 1604(sp)
+; CHECK-NEXT:    sw a3, 1608(sp)
+; CHECK-NEXT:    lw a2, 456(a0)
+; CHECK-NEXT:    lw a3, 460(a0)
+; CHECK-NEXT:    sw a2, 1596(sp)
+; CHECK-NEXT:    sw a3, 1600(sp)
+; CHECK-NEXT:    lw a2, 464(a0)
+; CHECK-NEXT:    lw a3, 468(a0)
+; CHECK-NEXT:    sw a2, 1588(sp)
+; CHECK-NEXT:    sw a3, 1592(sp)
+; CHECK-NEXT:    lw a2, 472(a0)
+; CHECK-NEXT:    lw a3, 476(a0)
+; CHECK-NEXT:    sw a2, 1580(sp)
+; CHECK-NEXT:    sw a3, 1584(sp)
+; CHECK-NEXT:    lw a2, 480(a0)
+; CHECK-NEXT:    lw a3, 484(a0)
+; CHECK-NEXT:    sw a2, 1572(sp)
+; CHECK-NEXT:    sw a3, 1576(sp)
+; CHECK-NEXT:    lw a2, 488(a0)
+; CHECK-NEXT:    lw a3, 492(a0)
+; CHECK-NEXT:    sw a2, 1564(sp)
+; CHECK-NEXT:    sw a3, 1568(sp)
+; CHECK-NEXT:    lw a2, 496(a0)
+; CHECK-NEXT:    lw a3, 500(a0)
+; CHECK-NEXT:    sw a2, 1556(sp)
+; CHECK-NEXT:    sw a3, 1560(sp)
+; CHECK-NEXT:    lw a2, 504(a0)
+; CHECK-NEXT:    lw a3, 508(a0)
+; CHECK-NEXT:    sw a2, 1548(sp)
+; CHECK-NEXT:    sw a3, 1552(sp)
+; CHECK-NEXT:    lw a2, 512(a0)
+; CHECK-NEXT:    lw a3, 516(a0)
+; CHECK-NEXT:    sw a2, 1540(sp)
+; CHECK-NEXT:    sw a3, 1544(sp)
+; CHECK-NEXT:    lw a2, 520(a0)
+; CHECK-NEXT:    lw a3, 524(a0)
+; CHECK-NEXT:    sw a2, 1532(sp)
+; CHECK-NEXT:    sw a3, 1536(sp)
+; CHECK-NEXT:    lw a2, 528(a0)
+; CHECK-NEXT:    lw a3, 532(a0)
+; CHECK-NEXT:    sw a2, 1524(sp)
+; CHECK-NEXT:    sw a3, 1528(sp)
+; CHECK-NEXT:    lw a2, 536(a0)
+; CHECK-NEXT:    lw a3, 540(a0)
+; CHECK-NEXT:    sw a2, 1516(sp)
+; CHECK-NEXT:    sw a3, 1520(sp)
+; CHECK-NEXT:    lw a2, 544(a0)
+; CHECK-NEXT:    lw a3, 548(a0)
+; CHECK-NEXT:    sw a2, 1508(sp)
+; CHECK-NEXT:    sw a3, 1512(sp)
+; CHECK-NEXT:    lw a2, 552(a0)
+; CHECK-NEXT:    lw a3, 556(a0)
+; CHECK-NEXT:    sw a2, 1500(sp)
+; CHECK-NEXT:    sw a3, 1504(sp)
+; CHECK-NEXT:    lw a2, 560(a0)
+; CHECK-NEXT:    lw a3, 564(a0)
+; CHECK-NEXT:    sw a2, 1492(sp)
+; CHECK-NEXT:    sw a3, 1496(sp)
+; CHECK-NEXT:    lw a2, 568(a0)
+; CHECK-NEXT:    lw a3, 572(a0)
+; CHECK-NEXT:    sw a2, 1484(sp)
+; CHECK-NEXT:    sw a3, 1488(sp)
+; CHECK-NEXT:    lw a2, 576(a0)
+; CHECK-NEXT:    lw a3, 580(a0)
+; CHECK-NEXT:    sw a2, 1476(sp)
+; CHECK-NEXT:    sw a3, 1480(sp)
+; CHECK-NEXT:    lw a2, 584(a0)
+; CHECK-NEXT:    lw a3, 588(a0)
+; CHECK-NEXT:    sw a2, 1468(sp)
+; CHECK-NEXT:    sw a3, 1472(sp)
+; CHECK-NEXT:    lw a2, 592(a0)
+; CHECK-NEXT:    lw a3, 596(a0)
+; CHECK-NEXT:    sw a2, 1460(sp)
+; CHECK-NEXT:    sw a3, 1464(sp)
+; CHECK-NEXT:    lw a2, 600(a0)
+; CHECK-NEXT:    lw a3, 604(a0)
+; CHECK-NEXT:    sw a2, 1452(sp)
+; CHECK-NEXT:    sw a3, 1456(sp)
+; CHECK-NEXT:    lw a2, 608(a0)
+; CHECK-NEXT:    lw a3, 612(a0)
+; CHECK-NEXT:    sw a2, 1444(sp)
+; CHECK-NEXT:    sw a3, 1448(sp)
+; CHECK-NEXT:    lw a2, 616(a0)
+; CHECK-NEXT:    lw a3, 620(a0)
+; CHECK-NEXT:    sw a2, 1436(sp)
+; CHECK-NEXT:    sw a3, 1440(sp)
+; CHECK-NEXT:    lw a2, 624(a0)
+; CHECK-NEXT:    lw a3, 628(a0)
+; CHECK-NEXT:    sw a2, 1428(sp)
+; CHECK-NEXT:    sw a3, 1432(sp)
+; CHECK-NEXT:    lw a2, 632(a0)
+; CHECK-NEXT:    lw a3, 636(a0)
+; CHECK-NEXT:    sw a2, 1420(sp)
+; CHECK-NEXT:    sw a3, 1424(sp)
+; CHECK-NEXT:    lw a2, 640(a0)
+; CHECK-NEXT:    lw a3, 644(a0)
+; CHECK-NEXT:    sw a2, 1412(sp)
+; CHECK-NEXT:    sw a3, 1416(sp)
+; CHECK-NEXT:    lw a2, 648(a0)
+; CHECK-NEXT:    lw a3, 652(a0)
+; CHECK-NEXT:    sw a2, 1404(sp)
+; CHECK-NEXT:    sw a3, 1408(sp)
+; CHECK-NEXT:    lw a2, 656(a0)
+; CHECK-NEXT:    lw a3, 660(a0)
+; CHECK-NEXT:    sw a2, 1396(sp)
+; CHECK-NEXT:    sw a3, 1400(sp)
+; CHECK-NEXT:    lw a2, 664(a0)
+; CHECK-NEXT:    lw a3, 668(a0)
+; CHECK-NEXT:    sw a2, 1388(sp)
+; CHECK-NEXT:    sw a3, 1392(sp)
+; CHECK-NEXT:    lw a2, 672(a0)
+; CHECK-NEXT:    lw a3, 676(a0)
+; CHECK-NEXT:    sw a2, 1380(sp)
+; CHECK-NEXT:    sw a3, 1384(sp)
+; CHECK-NEXT:    lw a2, 680(a0)
+; CHECK-NEXT:    lw a3, 684(a0)
+; CHECK-NEXT:    sw a2, 1372(sp)
+; CHECK-NEXT:    sw a3, 1376(sp)
+; CHECK-NEXT:    lw a2, 688(a0)
+; CHECK-NEXT:    lw a3, 692(a0)
+; CHECK-NEXT:    sw a2, 1364(sp)
+; CHECK-NEXT:    sw a3, 1368(sp)
+; CHECK-NEXT:    lw a2, 696(a0)
+; CHECK-NEXT:    lw a3, 700(a0)
+; CHECK-NEXT:    sw a2, 1356(sp)
+; CHECK-NEXT:    sw a3, 1360(sp)
+; CHECK-NEXT:    lw a2, 704(a0)
+; CHECK-NEXT:    lw a3, 708(a0)
+; CHECK-NEXT:    sw a2, 1348(sp)
+; CHECK-NEXT:    sw a3, 1352(sp)
+; CHECK-NEXT:    lw a2, 712(a0)
+; CHECK-NEXT:    lw a3, 716(a0)
+; CHECK-NEXT:    sw a2, 1340(sp)
+; CHECK-NEXT:    sw a3, 1344(sp)
+; CHECK-NEXT:    lw a2, 720(a0)
+; CHECK-NEXT:    lw a3, 724(a0)
+; CHECK-NEXT:    sw a2, 1332(sp)
+; CHECK-NEXT:    sw a3, 1336(sp)
+; CHECK-NEXT:    lw a2, 728(a0)
+; CHECK-NEXT:    lw a3, 732(a0)
+; CHECK-NEXT:    sw a2, 1324(sp)
+; CHECK-NEXT:    sw a3, 1328(sp)
+; CHECK-NEXT:    lw a2, 736(a0)
+; CHECK-NEXT:    lw a3, 740(a0)
+; CHECK-NEXT:    sw a2, 1316(sp)
+; CHECK-NEXT:    sw a3, 1320(sp)
+; CHECK-NEXT:    lw a2, 744(a0)
+; CHECK-NEXT:    lw a3, 748(a0)
+; CHECK-NEXT:    sw a2, 1308(sp)
+; CHECK-NEXT:    sw a3, 1312(sp)
+; CHECK-NEXT:    lw a2, 752(a0)
+; CHECK-NEXT:    lw a3, 756(a0)
+; CHECK-NEXT:    sw a2, 1300(sp)
+; CHECK-NEXT:    sw a3, 1304(sp)
+; CHECK-NEXT:    lw a2, 760(a0)
+; CHECK-NEXT:    lw a3, 764(a0)
+; CHECK-NEXT:    sw a2, 1292(sp)
+; CHECK-NEXT:    sw a3, 1296(sp)
+; CHECK-NEXT:    lw a2, 768(a0)
+; CHECK-NEXT:    lw a3, 772(a0)
+; CHECK-NEXT:    sw a2, 1284(sp)
+; CHECK-NEXT:    sw a3, 1288(sp)
+; CHECK-NEXT:    lw a2, 776(a0)
+; CHECK-NEXT:    lw a3, 780(a0)
+; CHECK-NEXT:    sw a2, 1276(sp)
+; CHECK-NEXT:    sw a3, 1280(sp)
+; CHECK-NEXT:    lw a2, 784(a0)
+; CHECK-NEXT:    lw a3, 788(a0)
+; CHECK-NEXT:    sw a2, 1268(sp)
+; CHECK-NEXT:    sw a3, 1272(sp)
+; CHECK-NEXT:    lw a2, 792(a0)
+; CHECK-NEXT:    lw a3, 796(a0)
+; CHECK-NEXT:    sw a2, 1260(sp)
+; CHECK-NEXT:    sw a3, 1264(sp)
+; CHECK-NEXT:    lw a2, 800(a0)
+; CHECK-NEXT:    lw a3, 804(a0)
+; CHECK-NEXT:    sw a2, 1252(sp)
+; CHECK-NEXT:    sw a3, 1256(sp)
+; CHECK-NEXT:    lw a2, 808(a0)
+; CHECK-NEXT:    lw a3, 812(a0)
+; CHECK-NEXT:    sw a2, 1244(sp)
+; CHECK-NEXT:    sw a3, 1248(sp)
+; CHECK-NEXT:    lw a2, 816(a0)
+; CHECK-NEXT:    lw a3, 820(a0)
+; CHECK-NEXT:    sw a2, 1236(sp)
+; CHECK-NEXT:    sw a3, 1240(sp)
+; CHECK-NEXT:    lw a2, 824(a0)
+; CHECK-NEXT:    lw a3, 828(a0)
+; CHECK-NEXT:    sw a2, 1228(sp)
+; CHECK-NEXT:    sw a3, 1232(sp)
+; CHECK-NEXT:    lw a2, 832(a0)
+; CHECK-NEXT:    lw a3, 836(a0)
+; CHECK-NEXT:    sw a2, 1220(sp)
+; CHECK-NEXT:    sw a3, 1224(sp)
+; CHECK-NEXT:    lw a2, 840(a0)
+; CHECK-NEXT:    lw a3, 844(a0)
+; CHECK-NEXT:    sw a2, 1212(sp)
+; CHECK-NEXT:    sw a3, 1216(sp)
+; CHECK-NEXT:    lw a2, 848(a0)
+; CHECK-NEXT:    lw a3, 852(a0)
+; CHECK-NEXT:    sw a2, 1204(sp)
+; CHECK-NEXT:    sw a3, 1208(sp)
+; CHECK-NEXT:    lw a2, 856(a0)
+; CHECK-NEXT:    lw a3, 860(a0)
+; CHECK-NEXT:    sw a2, 1196(sp)
+; CHECK-NEXT:    sw a3, 1200(sp)
+; CHECK-NEXT:    lw a2, 864(a0)
+; CHECK-NEXT:    lw a3, 868(a0)
+; CHECK-NEXT:    sw a2, 1188(sp)
+; CHECK-NEXT:    sw a3, 1192(sp)
+; CHECK-NEXT:    lw a2, 872(a0)
+; CHECK-NEXT:    lw a3, 876(a0)
+; CHECK-NEXT:    sw a2, 1180(sp)
+; CHECK-NEXT:    sw a3, 1184(sp)
+; CHECK-NEXT:    lw a2, 880(a0)
+; CHECK-NEXT:    lw a3, 884(a0)
+; CHECK-NEXT:    sw a2, 1172(sp)
+; CHECK-NEXT:    sw a3, 1176(sp)
+; CHECK-NEXT:    lw a2, 888(a0)
+; CHECK-NEXT:    lw a3, 892(a0)
+; CHECK-NEXT:    sw a2, 1164(sp)
+; CHECK-NEXT:    sw a3, 1168(sp)
+; CHECK-NEXT:    lw a2, 896(a0)
+; CHECK-NEXT:    lw a3, 900(a0)
+; CHECK-NEXT:    sw a2, 1156(sp)
+; CHECK-NEXT:    sw a3, 1160(sp)
+; CHECK-NEXT:    lw a2, 904(a0)
+; CHECK-NEXT:    lw a3, 908(a0)
+; CHECK-NEXT:    sw a2, 1148(sp)
...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/85871


More information about the llvm-commits mailing list