[llvm] [RegAlloc] Constrain rematted regclass to use (PR #164386)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 21 03:25:13 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-regalloc
Author: Luke Lau (lukel97)
<details>
<summary>Changes</summary>
When rematting we create a new virtual register with the original def's register class. However the use may have a different register class if the interval is split, which means we end up with an invalid register class.
This fixes #<!-- -->164181 by constraining the newly created register to the use's register class.
The test case is reduced as far as it goes. Because this test requires us to reach a certain amount of register pressure in certain conditions I'm not sure if there's an easy way to handwrite this scenario.
---
Patch is 32.44 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/164386.diff
2 Files Affected:
- (modified) llvm/lib/CodeGen/InlineSpiller.cpp (+3)
- (added) llvm/test/CodeGen/AArch64/pr164181.ll (+676)
``````````diff
diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp
index d6e85059b5db8..c3e0964594bd5 100644
--- a/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -721,6 +721,9 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
// Allocate a new register for the remat.
Register NewVReg = Edit->createFrom(Original);
+ // Constrain it to the register class of MI.
+ MRI.constrainRegClass(NewVReg, MRI.getRegClass(VirtReg.reg()));
+
// Finally we can rematerialize OrigMI before MI.
SlotIndex DefIdx =
Edit->rematerializeAt(*MI.getParent(), MI, NewVReg, RM, TRI);
diff --git a/llvm/test/CodeGen/AArch64/pr164181.ll b/llvm/test/CodeGen/AArch64/pr164181.ll
new file mode 100644
index 0000000000000..a90f156cc91d7
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/pr164181.ll
@@ -0,0 +1,676 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -verify-machineinstrs < %s | FileCheck %s
+
+; This test recreates a regalloc crash reported in
+; https://github.com/llvm/llvm-project/issues/164181
+; When rematting an instruction we need to make sure to constrain the newly
+; allocated register to both the rematted def's reg class and the use's reg
+; class.
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "aarch64-unknown-linux-gnu"
+
+ at var_32 = external global i16
+ at var_35 = external global i64
+ at var_39 = external global i64
+ at var_46 = external global i64
+ at var_50 = external global i32
+
+define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var_5, i32 %var_6, i32 %var_7, i8 %var_10, i64 %var_11, i8 %var_14, i32 %var_15, i64 %var_16, ptr %arr_3, ptr %arr_4, ptr %arr_6, ptr %arr_7, ptr %arr_12, ptr %arr_13, ptr %arr_19, i64 %mul, i64 %conv35, i64 %idxprom138.us16, i8 %0, i8 %1, ptr %invariant.gep875.us) #0 {
+; CHECK-LABEL: f:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub sp, sp, #240
+; CHECK-NEXT: str x30, [sp, #144] // 8-byte Folded Spill
+; CHECK-NEXT: stp x28, x27, [sp, #160] // 16-byte Folded Spill
+; CHECK-NEXT: stp x26, x25, [sp, #176] // 16-byte Folded Spill
+; CHECK-NEXT: stp x24, x23, [sp, #192] // 16-byte Folded Spill
+; CHECK-NEXT: stp x22, x21, [sp, #208] // 16-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #224] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 240
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w20, -16
+; CHECK-NEXT: .cfi_offset w21, -24
+; CHECK-NEXT: .cfi_offset w22, -32
+; CHECK-NEXT: .cfi_offset w23, -40
+; CHECK-NEXT: .cfi_offset w24, -48
+; CHECK-NEXT: .cfi_offset w25, -56
+; CHECK-NEXT: .cfi_offset w26, -64
+; CHECK-NEXT: .cfi_offset w27, -72
+; CHECK-NEXT: .cfi_offset w28, -80
+; CHECK-NEXT: .cfi_offset w30, -96
+; CHECK-NEXT: str w6, [sp, #20] // 4-byte Folded Spill
+; CHECK-NEXT: str w4, [sp, #72] // 4-byte Folded Spill
+; CHECK-NEXT: str w3, [sp, #112] // 4-byte Folded Spill
+; CHECK-NEXT: str w5, [sp, #36] // 4-byte Folded Spill
+; CHECK-NEXT: tbz w5, #0, .LBB0_43
+; CHECK-NEXT: // %bb.1: // %for.body41.lr.ph
+; CHECK-NEXT: ldr x4, [sp, #312]
+; CHECK-NEXT: ldr x14, [sp, #280]
+; CHECK-NEXT: tbz w0, #0, .LBB0_42
+; CHECK-NEXT: // %bb.2: // %for.body41.us.preheader
+; CHECK-NEXT: ldrb w8, [sp, #368]
+; CHECK-NEXT: ldrb w12, [sp, #256]
+; CHECK-NEXT: ldr w26, [sp, #264]
+; CHECK-NEXT: adrp x20, :got:var_50
+; CHECK-NEXT: mov x28, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov w21, #36006 // =0x8ca6
+; CHECK-NEXT: ldr x11, [sp, #376]
+; CHECK-NEXT: ldrb w13, [sp, #360]
+; CHECK-NEXT: ldp x17, x16, [sp, #296]
+; CHECK-NEXT: mov w22, #1 // =0x1
+; CHECK-NEXT: add x27, x14, #120
+; CHECK-NEXT: ldr x18, [sp, #288]
+; CHECK-NEXT: ldr x7, [sp, #272]
+; CHECK-NEXT: ldr x5, [sp, #248]
+; CHECK-NEXT: mov x10, xzr
+; CHECK-NEXT: mov w23, wzr
+; CHECK-NEXT: mov w30, wzr
+; CHECK-NEXT: ldrb w19, [sp, #240]
+; CHECK-NEXT: mov w25, wzr
+; CHECK-NEXT: mov x24, xzr
+; CHECK-NEXT: str w8, [sp, #108] // 4-byte Folded Spill
+; CHECK-NEXT: mov x3, x26
+; CHECK-NEXT: ldp x9, x8, [sp, #344]
+; CHECK-NEXT: str w12, [sp, #92] // 4-byte Folded Spill
+; CHECK-NEXT: mov w12, #1 // =0x1
+; CHECK-NEXT: bic w12, w12, w0
+; CHECK-NEXT: str w12, [sp, #76] // 4-byte Folded Spill
+; CHECK-NEXT: mov w12, #48 // =0x30
+; CHECK-NEXT: str x9, [sp, #136] // 8-byte Folded Spill
+; CHECK-NEXT: ldp x9, x15, [sp, #328]
+; CHECK-NEXT: madd x8, x8, x12, x9
+; CHECK-NEXT: str x8, [sp, #64] // 8-byte Folded Spill
+; CHECK-NEXT: add x8, x26, w26, uxtw #1
+; CHECK-NEXT: ldr x20, [x20, :got_lo12:var_50]
+; CHECK-NEXT: str x26, [sp, #96] // 8-byte Folded Spill
+; CHECK-NEXT: str x14, [sp, #152] // 8-byte Folded Spill
+; CHECK-NEXT: lsl x6, x8, #3
+; CHECK-NEXT: add x8, x14, #120
+; CHECK-NEXT: str x4, [sp, #24] // 8-byte Folded Spill
+; CHECK-NEXT: str w19, [sp, #16] // 4-byte Folded Spill
+; CHECK-NEXT: str x8, [sp, #80] // 8-byte Folded Spill
+; CHECK-NEXT: b .LBB0_4
+; CHECK-NEXT: .p2align 5, , 16
+; CHECK-NEXT: .LBB0_3: // in Loop: Header=BB0_4 Depth=1
+; CHECK-NEXT: ldr w19, [sp, #16] // 4-byte Folded Reload
+; CHECK-NEXT: ldr x24, [sp, #40] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x14, [sp, #152] // 8-byte Folded Reload
+; CHECK-NEXT: mov w23, #1 // =0x1
+; CHECK-NEXT: mov w30, #1 // =0x1
+; CHECK-NEXT: mov w25, w19
+; CHECK-NEXT: .LBB0_4: // %for.body41.us
+; CHECK-NEXT: // =>This Loop Header: Depth=1
+; CHECK-NEXT: // Child Loop BB0_6 Depth 2
+; CHECK-NEXT: // Child Loop BB0_8 Depth 3
+; CHECK-NEXT: // Child Loop BB0_10 Depth 4
+; CHECK-NEXT: // Child Loop BB0_11 Depth 5
+; CHECK-NEXT: // Child Loop BB0_28 Depth 5
+; CHECK-NEXT: // Child Loop BB0_39 Depth 5
+; CHECK-NEXT: ldr w8, [sp, #20] // 4-byte Folded Reload
+; CHECK-NEXT: mov x12, x24
+; CHECK-NEXT: str x24, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT: str w8, [x14]
+; CHECK-NEXT: mov w8, #1 // =0x1
+; CHECK-NEXT: strb w19, [x14]
+; CHECK-NEXT: b .LBB0_6
+; CHECK-NEXT: .p2align 5, , 16
+; CHECK-NEXT: .LBB0_5: // %for.cond.cleanup93.us
+; CHECK-NEXT: // in Loop: Header=BB0_6 Depth=2
+; CHECK-NEXT: ldr w9, [sp, #36] // 4-byte Folded Reload
+; CHECK-NEXT: ldr x4, [sp, #24] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x24, x12, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: mov x22, xzr
+; CHECK-NEXT: mov w25, wzr
+; CHECK-NEXT: mov w8, wzr
+; CHECK-NEXT: tbz w9, #0, .LBB0_3
+; CHECK-NEXT: .LBB0_6: // %for.body67.us
+; CHECK-NEXT: // Parent Loop BB0_4 Depth=1
+; CHECK-NEXT: // => This Loop Header: Depth=2
+; CHECK-NEXT: // Child Loop BB0_8 Depth 3
+; CHECK-NEXT: // Child Loop BB0_10 Depth 4
+; CHECK-NEXT: // Child Loop BB0_11 Depth 5
+; CHECK-NEXT: // Child Loop BB0_28 Depth 5
+; CHECK-NEXT: // Child Loop BB0_39 Depth 5
+; CHECK-NEXT: str x12, [sp, #40] // 8-byte Folded Spill
+; CHECK-NEXT: cmn x24, #30
+; CHECK-NEXT: mov x12, #-30 // =0xffffffffffffffe2
+; CHECK-NEXT: add x19, x4, w8, sxtw #2
+; CHECK-NEXT: mov x9, xzr
+; CHECK-NEXT: csel x12, x24, x12, lo
+; CHECK-NEXT: mov w4, w30
+; CHECK-NEXT: str x12, [sp, #56] // 8-byte Folded Spill
+; CHECK-NEXT: b .LBB0_8
+; CHECK-NEXT: .p2align 5, , 16
+; CHECK-NEXT: .LBB0_7: // %for.cond.cleanup98.us
+; CHECK-NEXT: // in Loop: Header=BB0_8 Depth=3
+; CHECK-NEXT: ldr w4, [sp, #72] // 4-byte Folded Reload
+; CHECK-NEXT: ldr w23, [sp, #128] // 4-byte Folded Reload
+; CHECK-NEXT: mov w9, #1 // =0x1
+; CHECK-NEXT: mov x22, xzr
+; CHECK-NEXT: tbnz w0, #0, .LBB0_5
+; CHECK-NEXT: .LBB0_8: // %for.cond95.preheader.us
+; CHECK-NEXT: // Parent Loop BB0_4 Depth=1
+; CHECK-NEXT: // Parent Loop BB0_6 Depth=2
+; CHECK-NEXT: // => This Loop Header: Depth=3
+; CHECK-NEXT: // Child Loop BB0_10 Depth 4
+; CHECK-NEXT: // Child Loop BB0_11 Depth 5
+; CHECK-NEXT: // Child Loop BB0_28 Depth 5
+; CHECK-NEXT: // Child Loop BB0_39 Depth 5
+; CHECK-NEXT: ldr x8, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT: mov w14, #1152 // =0x480
+; CHECK-NEXT: mov w24, #1 // =0x1
+; CHECK-NEXT: mov w12, wzr
+; CHECK-NEXT: str wzr, [sp, #132] // 4-byte Folded Spill
+; CHECK-NEXT: mov w30, w4
+; CHECK-NEXT: madd x8, x9, x14, x8
+; CHECK-NEXT: mov w14, #1 // =0x1
+; CHECK-NEXT: str x8, [sp, #120] // 8-byte Folded Spill
+; CHECK-NEXT: add x8, x9, x9, lsl #1
+; CHECK-NEXT: lsl x26, x8, #4
+; CHECK-NEXT: sxtb w8, w23
+; CHECK-NEXT: mov w23, w25
+; CHECK-NEXT: str w8, [sp, #116] // 4-byte Folded Spill
+; CHECK-NEXT: b .LBB0_10
+; CHECK-NEXT: .p2align 5, , 16
+; CHECK-NEXT: .LBB0_9: // %for.cond510.preheader.us
+; CHECK-NEXT: // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT: ldr w23, [sp, #92] // 4-byte Folded Reload
+; CHECK-NEXT: mov x22, x8
+; CHECK-NEXT: ldr x3, [sp, #96] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x27, [sp, #80] // 8-byte Folded Reload
+; CHECK-NEXT: mov x28, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov x14, xzr
+; CHECK-NEXT: ldr w8, [sp, #76] // 4-byte Folded Reload
+; CHECK-NEXT: tbz w8, #31, .LBB0_7
+; CHECK-NEXT: .LBB0_10: // %for.body99.us
+; CHECK-NEXT: // Parent Loop BB0_4 Depth=1
+; CHECK-NEXT: // Parent Loop BB0_6 Depth=2
+; CHECK-NEXT: // Parent Loop BB0_8 Depth=3
+; CHECK-NEXT: // => This Loop Header: Depth=4
+; CHECK-NEXT: // Child Loop BB0_11 Depth 5
+; CHECK-NEXT: // Child Loop BB0_28 Depth 5
+; CHECK-NEXT: // Child Loop BB0_39 Depth 5
+; CHECK-NEXT: ldr w8, [sp, #116] // 4-byte Folded Reload
+; CHECK-NEXT: and w8, w8, w8, asr #31
+; CHECK-NEXT: str w8, [sp, #128] // 4-byte Folded Spill
+; CHECK-NEXT: .p2align 5, , 16
+; CHECK-NEXT: .LBB0_11: // %for.body113.us
+; CHECK-NEXT: // Parent Loop BB0_4 Depth=1
+; CHECK-NEXT: // Parent Loop BB0_6 Depth=2
+; CHECK-NEXT: // Parent Loop BB0_8 Depth=3
+; CHECK-NEXT: // Parent Loop BB0_10 Depth=4
+; CHECK-NEXT: // => This Inner Loop Header: Depth=5
+; CHECK-NEXT: tbnz w0, #0, .LBB0_11
+; CHECK-NEXT: // %bb.12: // %for.cond131.preheader.us
+; CHECK-NEXT: // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT: ldr w8, [sp, #112] // 4-byte Folded Reload
+; CHECK-NEXT: mov w4, #1 // =0x1
+; CHECK-NEXT: strb w8, [x18]
+; CHECK-NEXT: ldr x8, [sp, #120] // 8-byte Folded Reload
+; CHECK-NEXT: ldrh w8, [x8]
+; CHECK-NEXT: cbnz w4, .LBB0_14
+; CHECK-NEXT: // %bb.13: // %cond.true146.us
+; CHECK-NEXT: // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT: ldrsb w4, [x27, x3]
+; CHECK-NEXT: b .LBB0_15
+; CHECK-NEXT: .p2align 5, , 16
+; CHECK-NEXT: .LBB0_14: // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT: mov w4, wzr
+; CHECK-NEXT: .LBB0_15: // %cond.end154.us
+; CHECK-NEXT: // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT: mov w25, #18984 // =0x4a28
+; CHECK-NEXT: mul w8, w8, w25
+; CHECK-NEXT: and w8, w8, #0xfff8
+; CHECK-NEXT: lsl w8, w8, w4
+; CHECK-NEXT: cbz w8, .LBB0_17
+; CHECK-NEXT: // %bb.16: // %if.then.us
+; CHECK-NEXT: // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT: str wzr, [sp, #132] // 4-byte Folded Spill
+; CHECK-NEXT: str wzr, [x18]
+; CHECK-NEXT: .LBB0_17: // %if.end.us
+; CHECK-NEXT: // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT: ldr w8, [sp, #108] // 4-byte Folded Reload
+; CHECK-NEXT: mov w4, #18984 // =0x4a28
+; CHECK-NEXT: mov w25, w23
+; CHECK-NEXT: strb w8, [x18]
+; CHECK-NEXT: ldrsb w8, [x27, x3]
+; CHECK-NEXT: lsl w8, w4, w8
+; CHECK-NEXT: mov x4, #-18403 // =0xffffffffffffb81d
+; CHECK-NEXT: movk x4, #58909, lsl #16
+; CHECK-NEXT: cbz w8, .LBB0_19
+; CHECK-NEXT: // %bb.18: // %if.then.us.2
+; CHECK-NEXT: // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT: str wzr, [sp, #132] // 4-byte Folded Spill
+; CHECK-NEXT: strb wzr, [x18]
+; CHECK-NEXT: .LBB0_19: // %if.then.us.5
+; CHECK-NEXT: // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT: ldr w23, [sp, #132] // 4-byte Folded Reload
+; CHECK-NEXT: mov w8, #29625 // =0x73b9
+; CHECK-NEXT: movk w8, #21515, lsl #16
+; CHECK-NEXT: cmp w23, w8
+; CHECK-NEXT: csel w23, w23, w8, lt
+; CHECK-NEXT: str w23, [sp, #132] // 4-byte Folded Spill
+; CHECK-NEXT: tbz w0, #0, .LBB0_21
+; CHECK-NEXT: // %bb.20: // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT: mov w8, wzr
+; CHECK-NEXT: b .LBB0_22
+; CHECK-NEXT: .p2align 5, , 16
+; CHECK-NEXT: .LBB0_21: // %cond.true146.us.7
+; CHECK-NEXT: // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT: ldrsb w8, [x27, x3]
+; CHECK-NEXT: .LBB0_22: // %cond.end154.us.7
+; CHECK-NEXT: // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT: mov w23, #18984 // =0x4a28
+; CHECK-NEXT: mov w3, #149 // =0x95
+; CHECK-NEXT: lsl w8, w23, w8
+; CHECK-NEXT: cbz w8, .LBB0_24
+; CHECK-NEXT: // %bb.23: // %if.then.us.7
+; CHECK-NEXT: // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT: ldr x8, [sp, #152] // 8-byte Folded Reload
+; CHECK-NEXT: str wzr, [sp, #132] // 4-byte Folded Spill
+; CHECK-NEXT: str wzr, [x8]
+; CHECK-NEXT: .LBB0_24: // %if.end.us.7
+; CHECK-NEXT: // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT: mov x23, xzr
+; CHECK-NEXT: b .LBB0_28
+; CHECK-NEXT: .p2align 5, , 16
+; CHECK-NEXT: .LBB0_25: // %cond.true331.us
+; CHECK-NEXT: // in Loop: Header=BB0_28 Depth=5
+; CHECK-NEXT: ldrsb w4, [x10]
+; CHECK-NEXT: .LBB0_26: // %cond.end345.us
+; CHECK-NEXT: // in Loop: Header=BB0_28 Depth=5
+; CHECK-NEXT: strh w4, [x18]
+; CHECK-NEXT: mul x4, x22, x28
+; CHECK-NEXT: adrp x22, :got:var_46
+; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: ldr x22, [x22, :got_lo12:var_46]
+; CHECK-NEXT: str x4, [x22]
+; CHECK-NEXT: mov x4, #-18403 // =0xffffffffffffb81d
+; CHECK-NEXT: movk x4, #58909, lsl #16
+; CHECK-NEXT: .LBB0_27: // %for.inc371.us
+; CHECK-NEXT: // in Loop: Header=BB0_28 Depth=5
+; CHECK-NEXT: mov w22, #-18978 // =0xffffb5de
+; CHECK-NEXT: orr x23, x23, #0x1
+; CHECK-NEXT: mov x24, xzr
+; CHECK-NEXT: mul w12, w12, w22
+; CHECK-NEXT: mov x22, x5
+; CHECK-NEXT: tbz w0, #0, .LBB0_36
+; CHECK-NEXT: .LBB0_28: // %for.body194.us
+; CHECK-NEXT: // Parent Loop BB0_4 Depth=1
+; CHECK-NEXT: // Parent Loop BB0_6 Depth=2
+; CHECK-NEXT: // Parent Loop BB0_8 Depth=3
+; CHECK-NEXT: // Parent Loop BB0_10 Depth=4
+; CHECK-NEXT: // => This Inner Loop Header: Depth=5
+; CHECK-NEXT: cbnz wzr, .LBB0_30
+; CHECK-NEXT: // %bb.29: // %if.then222.us
+; CHECK-NEXT: // in Loop: Header=BB0_28 Depth=5
+; CHECK-NEXT: adrp x27, :got:var_32
+; CHECK-NEXT: ldur w8, [x19, #-12]
+; CHECK-NEXT: ldr x27, [x27, :got_lo12:var_32]
+; CHECK-NEXT: strh w8, [x27]
+; CHECK-NEXT: sxtb w8, w25
+; CHECK-NEXT: bic w25, w8, w8, asr #31
+; CHECK-NEXT: b .LBB0_31
+; CHECK-NEXT: .p2align 5, , 16
+; CHECK-NEXT: .LBB0_30: // in Loop: Header=BB0_28 Depth=5
+; CHECK-NEXT: mov w25, wzr
+; CHECK-NEXT: .LBB0_31: // %if.end239.us
+; CHECK-NEXT: // in Loop: Header=BB0_28 Depth=5
+; CHECK-NEXT: strb w3, [x16]
+; CHECK-NEXT: tst w13, #0xff
+; CHECK-NEXT: b.eq .LBB0_33
+; CHECK-NEXT: // %bb.32: // %if.then254.us
+; CHECK-NEXT: // in Loop: Header=BB0_28 Depth=5
+; CHECK-NEXT: ldrh w8, [x26, x14, lsl #1]
+; CHECK-NEXT: adrp x27, :got:var_35
+; CHECK-NEXT: ldr x27, [x27, :got_lo12:var_35]
+; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: csel x8, xzr, x7, eq
+; CHECK-NEXT: str x8, [x27]
+; CHECK-NEXT: strh w1, [x17]
+; CHECK-NEXT: .LBB0_33: // %if.end282.us
+; CHECK-NEXT: // in Loop: Header=BB0_28 Depth=5
+; CHECK-NEXT: orr x27, x24, x4
+; CHECK-NEXT: adrp x8, :got:var_39
+; CHECK-NEXT: str x27, [x18]
+; CHECK-NEXT: ldr x8, [x8, :got_lo12:var_39]
+; CHECK-NEXT: str x10, [x8]
+; CHECK-NEXT: ldrb w8, [x6, x9]
+; CHECK-NEXT: str x8, [x18]
+; CHECK-NEXT: mov w8, #1 // =0x1
+; CHECK-NEXT: cbnz x2, .LBB0_27
+; CHECK-NEXT: // %bb.34: // %if.then327.us
+; CHECK-NEXT: // in Loop: Header=BB0_28 Depth=5
+; CHECK-NEXT: cbz w8, .LBB0_25
+; CHECK-NEXT: // %bb.35: // in Loop: Header=BB0_28 Depth=5
+; CHECK-NEXT: mov w4, wzr
+; CHECK-NEXT: b .LBB0_26
+; CHECK-NEXT: .p2align 5, , 16
+; CHECK-NEXT: .LBB0_36: // %for.cond376.preheader.us
+; CHECK-NEXT: // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT: mov w3, #1152 // =0x480
+; CHECK-NEXT: mov x22, xzr
+; CHECK-NEXT: mov w4, wzr
+; CHECK-NEXT: mov x24, x27
+; CHECK-NEXT: lsl x23, x14, #1
+; CHECK-NEXT: mov x27, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: madd x14, x14, x3, x11
+; CHECK-NEXT: mov w28, w30
+; CHECK-NEXT: mov w3, #-7680 // =0xffffe200
+; CHECK-NEXT: b .LBB0_39
+; CHECK-NEXT: .p2align 5, , 16
+; CHECK-NEXT: .LBB0_37: // %if.then466.us
+; CHECK-NEXT: // in Loop: Header=BB0_39 Depth=5
+; CHECK-NEXT: ldr x28, [sp, #152] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x3, [sp, #136] // 8-byte Folded Reload
+; CHECK-NEXT: sxtb w4, w4
+; CHECK-NEXT: bic w4, w4, w4, asr #31
+; CHECK-NEXT: str x3, [x28]
+; CHECK-NEXT: mov w3, #-7680 // =0xffffe200
+; CHECK-NEXT: .LBB0_38: // %for.inc505.us
+; CHECK-NEXT: // in Loop: Header=BB0_39 Depth=5
+; CHECK-NEXT: add x22, x22, #1
+; CHECK-NEXT: add x27, x27, #1
+; CHECK-NEXT: mov w28, wzr
+; CHECK-NEXT: cmp x27, #0
+; CHECK-NEXT: b.hs .LBB0_9
+; CHECK-NEXT: .LBB0_39: // %for.body380.us
+; CHECK-NEXT: // Parent Loop BB0_4 Depth=1
+; CHECK-NEXT: // Parent Loop BB0_6 Depth=2
+; CHECK-NEXT: // Parent Loop BB0_8 Depth=3
+; CHECK-NEXT: // Parent Loop BB0_10 Depth=4
+; CHECK-NEXT: // => This Inner Loop Header: Depth=5
+; CHECK-NEXT: mov w30, w28
+; CHECK-NEXT: ldrh w28, [x23]
+; CHECK-NEXT: tst w0, #0x1
+; CHECK-NEXT: strh w28, [x11]
+; CHECK-NEXT: csel w28, w21, w3, ne
+; CHECK-NEXT: str w28, [x20]
+; CHECK-NEXT: cbz x15, .LBB0_38
+; CHECK-NEXT: // %bb.40: // %if.then436.us
+; CHECK-NEXT: // in Loop: Header=BB0_39 Depth=5
+; CHECK-NEXT: ldrh w28, [x14]
+; CHECK-NEXT: cbnz w28, .LBB0_37
+; CHECK-NEXT: // %bb.41: // in Loop: Header=BB0_39 Depth=5
+; CHECK-NEXT: mov w4, wzr
+; CHECK-NEXT: b .LBB0_38
+; CHECK-NEXT: .LBB0_42: // %for.body41
+; CHECK-NEXT: strb wzr, [x4]
+; CHECK-NEXT: strb wzr, [x14]
+; CHECK-NEXT: .LBB0_43: // %for.cond563.preheader
+; CHECK-NEXT: ldp x20, x19, [sp, #224] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x22, x21, [sp, #208] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x24, x23, [sp, #192] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x26, x25, [sp, #176] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x28, x27, [sp, #160] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #144] // 8-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #240
+; CHECK-NEXT: ret
+entry:
+ br i1 %var_5, label %for.body41.lr.ph, label %for.cond563.preheader
+
+for.body41.lr.ph: ; preds = %entry
+ %arrayidx147 = getelementptr i8, ptr %arr_3, i64 120
+ %tobool326.not = icmp eq i64 %var_2, 0
+ %not353 = xor i64 0, -1
+ %add538 = select i1 %var_0, i16 0, i16 1
+ br i1 %var_0, label %for.body41.us, label %for.body41
+
+for.body41.us: ; preds = %for.cond.cleanup93.us, %for.body41.lr.ph
+ %var_24.promoted9271009.us = phi i64 [ 0, %for.body41.lr.ph ], [ %6, %for.cond.cleanup93.us ]
+ %var_37.promoted9301008.us = phi i64 [ 1, %for.body41.lr.ph ], [ 0, %for.cond.cleanup93.us ]
+ %2 = phi i8 [ 0, %for.body41.lr.ph ], [ 1, %for.cond.cleanup93.us ]
+ %add4139751001.us = phi i16 [ 0, %for.body41.lr.ph ], [ 1, %for.cond.cleanup93.us ]
+ %3 = phi i8 [ 0, %for.body41.lr.ph ], [ %var_10, %for.cond.cleanup93.us ]
+ store i32 %var_6, ptr %arr_3, align 4
+ store i8 %var_10, ptr %arr_3, align 1
+ br label %for.body67.us
+
+for.body67.us: ; preds = %for.cond.cleanup93.us, %for.body41.us
+ %4 = phi i8 [ %3, %for.body41.us ], [ 0, %for.cond.cleanup93.us ]
+ %add413977.us = phi i16 [ %add4139751001.us, %for.body41.us ], [ %add413.us17, %for.cond.cleanup93.us ]
+ %5 = phi i8 [ %2, %for.body41.us ], [ %.sroa.speculated829.us, %for.cond.cleanup93.us ]
+ %conv64922.us = phi i32 [ 1, %for.body41.us ], [ 0, %for.cond.clea...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/164386
More information about the llvm-commits
mailing list