[llvm] [RegAlloc] Constrain rematted regclass to use (PR #164386)

via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 21 03:25:13 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-regalloc

Author: Luke Lau (lukel97)

<details>
<summary>Changes</summary>

When rematting we create a new virtual register with the original def's register class.  However the use may have a different register class if the interval is split, which means we end up with an invalid register class.

This fixes #<!-- -->164181 by constraining the newly created register to the use's register class.

The test case is reduced as far as it goes. Because this test requires us to reach a certain amount of register pressure in certain conditions I'm not sure if there's an easy way to handwrite this scenario.


---

Patch is 32.44 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/164386.diff


2 Files Affected:

- (modified) llvm/lib/CodeGen/InlineSpiller.cpp (+3) 
- (added) llvm/test/CodeGen/AArch64/pr164181.ll (+676) 


``````````diff
diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp
index d6e85059b5db8..c3e0964594bd5 100644
--- a/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -721,6 +721,9 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
   // Allocate a new register for the remat.
   Register NewVReg = Edit->createFrom(Original);
 
+  // Constrain it to the register class of MI.
+  MRI.constrainRegClass(NewVReg, MRI.getRegClass(VirtReg.reg()));
+
   // Finally we can rematerialize OrigMI before MI.
   SlotIndex DefIdx =
       Edit->rematerializeAt(*MI.getParent(), MI, NewVReg, RM, TRI);
diff --git a/llvm/test/CodeGen/AArch64/pr164181.ll b/llvm/test/CodeGen/AArch64/pr164181.ll
new file mode 100644
index 0000000000000..a90f156cc91d7
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/pr164181.ll
@@ -0,0 +1,676 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -verify-machineinstrs < %s | FileCheck %s
+
+; This test recreates a regalloc crash reported in
+; https://github.com/llvm/llvm-project/issues/164181
+; When rematting an instruction we need to make sure to constrain the newly
+; allocated register to both the rematted def's reg class and the use's reg
+; class.
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "aarch64-unknown-linux-gnu"
+
+ at var_32 = external global i16
+ at var_35 = external global i64
+ at var_39 = external global i64
+ at var_46 = external global i64
+ at var_50 = external global i32
+
+define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var_5, i32 %var_6, i32 %var_7, i8 %var_10, i64 %var_11, i8 %var_14, i32 %var_15, i64 %var_16, ptr %arr_3, ptr %arr_4, ptr %arr_6, ptr %arr_7, ptr %arr_12, ptr %arr_13, ptr %arr_19, i64 %mul, i64 %conv35, i64 %idxprom138.us16, i8 %0, i8 %1, ptr %invariant.gep875.us) #0 {
+; CHECK-LABEL: f:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sub sp, sp, #240
+; CHECK-NEXT:    str x30, [sp, #144] // 8-byte Folded Spill
+; CHECK-NEXT:    stp x28, x27, [sp, #160] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x26, x25, [sp, #176] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x24, x23, [sp, #192] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x22, x21, [sp, #208] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #224] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 240
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w20, -16
+; CHECK-NEXT:    .cfi_offset w21, -24
+; CHECK-NEXT:    .cfi_offset w22, -32
+; CHECK-NEXT:    .cfi_offset w23, -40
+; CHECK-NEXT:    .cfi_offset w24, -48
+; CHECK-NEXT:    .cfi_offset w25, -56
+; CHECK-NEXT:    .cfi_offset w26, -64
+; CHECK-NEXT:    .cfi_offset w27, -72
+; CHECK-NEXT:    .cfi_offset w28, -80
+; CHECK-NEXT:    .cfi_offset w30, -96
+; CHECK-NEXT:    str w6, [sp, #20] // 4-byte Folded Spill
+; CHECK-NEXT:    str w4, [sp, #72] // 4-byte Folded Spill
+; CHECK-NEXT:    str w3, [sp, #112] // 4-byte Folded Spill
+; CHECK-NEXT:    str w5, [sp, #36] // 4-byte Folded Spill
+; CHECK-NEXT:    tbz w5, #0, .LBB0_43
+; CHECK-NEXT:  // %bb.1: // %for.body41.lr.ph
+; CHECK-NEXT:    ldr x4, [sp, #312]
+; CHECK-NEXT:    ldr x14, [sp, #280]
+; CHECK-NEXT:    tbz w0, #0, .LBB0_42
+; CHECK-NEXT:  // %bb.2: // %for.body41.us.preheader
+; CHECK-NEXT:    ldrb w8, [sp, #368]
+; CHECK-NEXT:    ldrb w12, [sp, #256]
+; CHECK-NEXT:    ldr w26, [sp, #264]
+; CHECK-NEXT:    adrp x20, :got:var_50
+; CHECK-NEXT:    mov x28, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    mov w21, #36006 // =0x8ca6
+; CHECK-NEXT:    ldr x11, [sp, #376]
+; CHECK-NEXT:    ldrb w13, [sp, #360]
+; CHECK-NEXT:    ldp x17, x16, [sp, #296]
+; CHECK-NEXT:    mov w22, #1 // =0x1
+; CHECK-NEXT:    add x27, x14, #120
+; CHECK-NEXT:    ldr x18, [sp, #288]
+; CHECK-NEXT:    ldr x7, [sp, #272]
+; CHECK-NEXT:    ldr x5, [sp, #248]
+; CHECK-NEXT:    mov x10, xzr
+; CHECK-NEXT:    mov w23, wzr
+; CHECK-NEXT:    mov w30, wzr
+; CHECK-NEXT:    ldrb w19, [sp, #240]
+; CHECK-NEXT:    mov w25, wzr
+; CHECK-NEXT:    mov x24, xzr
+; CHECK-NEXT:    str w8, [sp, #108] // 4-byte Folded Spill
+; CHECK-NEXT:    mov x3, x26
+; CHECK-NEXT:    ldp x9, x8, [sp, #344]
+; CHECK-NEXT:    str w12, [sp, #92] // 4-byte Folded Spill
+; CHECK-NEXT:    mov w12, #1 // =0x1
+; CHECK-NEXT:    bic w12, w12, w0
+; CHECK-NEXT:    str w12, [sp, #76] // 4-byte Folded Spill
+; CHECK-NEXT:    mov w12, #48 // =0x30
+; CHECK-NEXT:    str x9, [sp, #136] // 8-byte Folded Spill
+; CHECK-NEXT:    ldp x9, x15, [sp, #328]
+; CHECK-NEXT:    madd x8, x8, x12, x9
+; CHECK-NEXT:    str x8, [sp, #64] // 8-byte Folded Spill
+; CHECK-NEXT:    add x8, x26, w26, uxtw #1
+; CHECK-NEXT:    ldr x20, [x20, :got_lo12:var_50]
+; CHECK-NEXT:    str x26, [sp, #96] // 8-byte Folded Spill
+; CHECK-NEXT:    str x14, [sp, #152] // 8-byte Folded Spill
+; CHECK-NEXT:    lsl x6, x8, #3
+; CHECK-NEXT:    add x8, x14, #120
+; CHECK-NEXT:    str x4, [sp, #24] // 8-byte Folded Spill
+; CHECK-NEXT:    str w19, [sp, #16] // 4-byte Folded Spill
+; CHECK-NEXT:    str x8, [sp, #80] // 8-byte Folded Spill
+; CHECK-NEXT:    b .LBB0_4
+; CHECK-NEXT:    .p2align 5, , 16
+; CHECK-NEXT:  .LBB0_3: // in Loop: Header=BB0_4 Depth=1
+; CHECK-NEXT:    ldr w19, [sp, #16] // 4-byte Folded Reload
+; CHECK-NEXT:    ldr x24, [sp, #40] // 8-byte Folded Reload
+; CHECK-NEXT:    ldr x14, [sp, #152] // 8-byte Folded Reload
+; CHECK-NEXT:    mov w23, #1 // =0x1
+; CHECK-NEXT:    mov w30, #1 // =0x1
+; CHECK-NEXT:    mov w25, w19
+; CHECK-NEXT:  .LBB0_4: // %for.body41.us
+; CHECK-NEXT:    // =>This Loop Header: Depth=1
+; CHECK-NEXT:    // Child Loop BB0_6 Depth 2
+; CHECK-NEXT:    // Child Loop BB0_8 Depth 3
+; CHECK-NEXT:    // Child Loop BB0_10 Depth 4
+; CHECK-NEXT:    // Child Loop BB0_11 Depth 5
+; CHECK-NEXT:    // Child Loop BB0_28 Depth 5
+; CHECK-NEXT:    // Child Loop BB0_39 Depth 5
+; CHECK-NEXT:    ldr w8, [sp, #20] // 4-byte Folded Reload
+; CHECK-NEXT:    mov x12, x24
+; CHECK-NEXT:    str x24, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT:    str w8, [x14]
+; CHECK-NEXT:    mov w8, #1 // =0x1
+; CHECK-NEXT:    strb w19, [x14]
+; CHECK-NEXT:    b .LBB0_6
+; CHECK-NEXT:    .p2align 5, , 16
+; CHECK-NEXT:  .LBB0_5: // %for.cond.cleanup93.us
+; CHECK-NEXT:    // in Loop: Header=BB0_6 Depth=2
+; CHECK-NEXT:    ldr w9, [sp, #36] // 4-byte Folded Reload
+; CHECK-NEXT:    ldr x4, [sp, #24] // 8-byte Folded Reload
+; CHECK-NEXT:    ldp x24, x12, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    mov x22, xzr
+; CHECK-NEXT:    mov w25, wzr
+; CHECK-NEXT:    mov w8, wzr
+; CHECK-NEXT:    tbz w9, #0, .LBB0_3
+; CHECK-NEXT:  .LBB0_6: // %for.body67.us
+; CHECK-NEXT:    // Parent Loop BB0_4 Depth=1
+; CHECK-NEXT:    // => This Loop Header: Depth=2
+; CHECK-NEXT:    // Child Loop BB0_8 Depth 3
+; CHECK-NEXT:    // Child Loop BB0_10 Depth 4
+; CHECK-NEXT:    // Child Loop BB0_11 Depth 5
+; CHECK-NEXT:    // Child Loop BB0_28 Depth 5
+; CHECK-NEXT:    // Child Loop BB0_39 Depth 5
+; CHECK-NEXT:    str x12, [sp, #40] // 8-byte Folded Spill
+; CHECK-NEXT:    cmn x24, #30
+; CHECK-NEXT:    mov x12, #-30 // =0xffffffffffffffe2
+; CHECK-NEXT:    add x19, x4, w8, sxtw #2
+; CHECK-NEXT:    mov x9, xzr
+; CHECK-NEXT:    csel x12, x24, x12, lo
+; CHECK-NEXT:    mov w4, w30
+; CHECK-NEXT:    str x12, [sp, #56] // 8-byte Folded Spill
+; CHECK-NEXT:    b .LBB0_8
+; CHECK-NEXT:    .p2align 5, , 16
+; CHECK-NEXT:  .LBB0_7: // %for.cond.cleanup98.us
+; CHECK-NEXT:    // in Loop: Header=BB0_8 Depth=3
+; CHECK-NEXT:    ldr w4, [sp, #72] // 4-byte Folded Reload
+; CHECK-NEXT:    ldr w23, [sp, #128] // 4-byte Folded Reload
+; CHECK-NEXT:    mov w9, #1 // =0x1
+; CHECK-NEXT:    mov x22, xzr
+; CHECK-NEXT:    tbnz w0, #0, .LBB0_5
+; CHECK-NEXT:  .LBB0_8: // %for.cond95.preheader.us
+; CHECK-NEXT:    // Parent Loop BB0_4 Depth=1
+; CHECK-NEXT:    // Parent Loop BB0_6 Depth=2
+; CHECK-NEXT:    // => This Loop Header: Depth=3
+; CHECK-NEXT:    // Child Loop BB0_10 Depth 4
+; CHECK-NEXT:    // Child Loop BB0_11 Depth 5
+; CHECK-NEXT:    // Child Loop BB0_28 Depth 5
+; CHECK-NEXT:    // Child Loop BB0_39 Depth 5
+; CHECK-NEXT:    ldr x8, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT:    mov w14, #1152 // =0x480
+; CHECK-NEXT:    mov w24, #1 // =0x1
+; CHECK-NEXT:    mov w12, wzr
+; CHECK-NEXT:    str wzr, [sp, #132] // 4-byte Folded Spill
+; CHECK-NEXT:    mov w30, w4
+; CHECK-NEXT:    madd x8, x9, x14, x8
+; CHECK-NEXT:    mov w14, #1 // =0x1
+; CHECK-NEXT:    str x8, [sp, #120] // 8-byte Folded Spill
+; CHECK-NEXT:    add x8, x9, x9, lsl #1
+; CHECK-NEXT:    lsl x26, x8, #4
+; CHECK-NEXT:    sxtb w8, w23
+; CHECK-NEXT:    mov w23, w25
+; CHECK-NEXT:    str w8, [sp, #116] // 4-byte Folded Spill
+; CHECK-NEXT:    b .LBB0_10
+; CHECK-NEXT:    .p2align 5, , 16
+; CHECK-NEXT:  .LBB0_9: // %for.cond510.preheader.us
+; CHECK-NEXT:    // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT:    ldr w23, [sp, #92] // 4-byte Folded Reload
+; CHECK-NEXT:    mov x22, x8
+; CHECK-NEXT:    ldr x3, [sp, #96] // 8-byte Folded Reload
+; CHECK-NEXT:    ldr x27, [sp, #80] // 8-byte Folded Reload
+; CHECK-NEXT:    mov x28, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    mov x14, xzr
+; CHECK-NEXT:    ldr w8, [sp, #76] // 4-byte Folded Reload
+; CHECK-NEXT:    tbz w8, #31, .LBB0_7
+; CHECK-NEXT:  .LBB0_10: // %for.body99.us
+; CHECK-NEXT:    // Parent Loop BB0_4 Depth=1
+; CHECK-NEXT:    // Parent Loop BB0_6 Depth=2
+; CHECK-NEXT:    // Parent Loop BB0_8 Depth=3
+; CHECK-NEXT:    // => This Loop Header: Depth=4
+; CHECK-NEXT:    // Child Loop BB0_11 Depth 5
+; CHECK-NEXT:    // Child Loop BB0_28 Depth 5
+; CHECK-NEXT:    // Child Loop BB0_39 Depth 5
+; CHECK-NEXT:    ldr w8, [sp, #116] // 4-byte Folded Reload
+; CHECK-NEXT:    and w8, w8, w8, asr #31
+; CHECK-NEXT:    str w8, [sp, #128] // 4-byte Folded Spill
+; CHECK-NEXT:    .p2align 5, , 16
+; CHECK-NEXT:  .LBB0_11: // %for.body113.us
+; CHECK-NEXT:    // Parent Loop BB0_4 Depth=1
+; CHECK-NEXT:    // Parent Loop BB0_6 Depth=2
+; CHECK-NEXT:    // Parent Loop BB0_8 Depth=3
+; CHECK-NEXT:    // Parent Loop BB0_10 Depth=4
+; CHECK-NEXT:    // => This Inner Loop Header: Depth=5
+; CHECK-NEXT:    tbnz w0, #0, .LBB0_11
+; CHECK-NEXT:  // %bb.12: // %for.cond131.preheader.us
+; CHECK-NEXT:    // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT:    ldr w8, [sp, #112] // 4-byte Folded Reload
+; CHECK-NEXT:    mov w4, #1 // =0x1
+; CHECK-NEXT:    strb w8, [x18]
+; CHECK-NEXT:    ldr x8, [sp, #120] // 8-byte Folded Reload
+; CHECK-NEXT:    ldrh w8, [x8]
+; CHECK-NEXT:    cbnz w4, .LBB0_14
+; CHECK-NEXT:  // %bb.13: // %cond.true146.us
+; CHECK-NEXT:    // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT:    ldrsb w4, [x27, x3]
+; CHECK-NEXT:    b .LBB0_15
+; CHECK-NEXT:    .p2align 5, , 16
+; CHECK-NEXT:  .LBB0_14: // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT:    mov w4, wzr
+; CHECK-NEXT:  .LBB0_15: // %cond.end154.us
+; CHECK-NEXT:    // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT:    mov w25, #18984 // =0x4a28
+; CHECK-NEXT:    mul w8, w8, w25
+; CHECK-NEXT:    and w8, w8, #0xfff8
+; CHECK-NEXT:    lsl w8, w8, w4
+; CHECK-NEXT:    cbz w8, .LBB0_17
+; CHECK-NEXT:  // %bb.16: // %if.then.us
+; CHECK-NEXT:    // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT:    str wzr, [sp, #132] // 4-byte Folded Spill
+; CHECK-NEXT:    str wzr, [x18]
+; CHECK-NEXT:  .LBB0_17: // %if.end.us
+; CHECK-NEXT:    // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT:    ldr w8, [sp, #108] // 4-byte Folded Reload
+; CHECK-NEXT:    mov w4, #18984 // =0x4a28
+; CHECK-NEXT:    mov w25, w23
+; CHECK-NEXT:    strb w8, [x18]
+; CHECK-NEXT:    ldrsb w8, [x27, x3]
+; CHECK-NEXT:    lsl w8, w4, w8
+; CHECK-NEXT:    mov x4, #-18403 // =0xffffffffffffb81d
+; CHECK-NEXT:    movk x4, #58909, lsl #16
+; CHECK-NEXT:    cbz w8, .LBB0_19
+; CHECK-NEXT:  // %bb.18: // %if.then.us.2
+; CHECK-NEXT:    // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT:    str wzr, [sp, #132] // 4-byte Folded Spill
+; CHECK-NEXT:    strb wzr, [x18]
+; CHECK-NEXT:  .LBB0_19: // %if.then.us.5
+; CHECK-NEXT:    // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT:    ldr w23, [sp, #132] // 4-byte Folded Reload
+; CHECK-NEXT:    mov w8, #29625 // =0x73b9
+; CHECK-NEXT:    movk w8, #21515, lsl #16
+; CHECK-NEXT:    cmp w23, w8
+; CHECK-NEXT:    csel w23, w23, w8, lt
+; CHECK-NEXT:    str w23, [sp, #132] // 4-byte Folded Spill
+; CHECK-NEXT:    tbz w0, #0, .LBB0_21
+; CHECK-NEXT:  // %bb.20: // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT:    mov w8, wzr
+; CHECK-NEXT:    b .LBB0_22
+; CHECK-NEXT:    .p2align 5, , 16
+; CHECK-NEXT:  .LBB0_21: // %cond.true146.us.7
+; CHECK-NEXT:    // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT:    ldrsb w8, [x27, x3]
+; CHECK-NEXT:  .LBB0_22: // %cond.end154.us.7
+; CHECK-NEXT:    // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT:    mov w23, #18984 // =0x4a28
+; CHECK-NEXT:    mov w3, #149 // =0x95
+; CHECK-NEXT:    lsl w8, w23, w8
+; CHECK-NEXT:    cbz w8, .LBB0_24
+; CHECK-NEXT:  // %bb.23: // %if.then.us.7
+; CHECK-NEXT:    // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT:    ldr x8, [sp, #152] // 8-byte Folded Reload
+; CHECK-NEXT:    str wzr, [sp, #132] // 4-byte Folded Spill
+; CHECK-NEXT:    str wzr, [x8]
+; CHECK-NEXT:  .LBB0_24: // %if.end.us.7
+; CHECK-NEXT:    // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT:    mov x23, xzr
+; CHECK-NEXT:    b .LBB0_28
+; CHECK-NEXT:    .p2align 5, , 16
+; CHECK-NEXT:  .LBB0_25: // %cond.true331.us
+; CHECK-NEXT:    // in Loop: Header=BB0_28 Depth=5
+; CHECK-NEXT:    ldrsb w4, [x10]
+; CHECK-NEXT:  .LBB0_26: // %cond.end345.us
+; CHECK-NEXT:    // in Loop: Header=BB0_28 Depth=5
+; CHECK-NEXT:    strh w4, [x18]
+; CHECK-NEXT:    mul x4, x22, x28
+; CHECK-NEXT:    adrp x22, :got:var_46
+; CHECK-NEXT:    mov x8, xzr
+; CHECK-NEXT:    ldr x22, [x22, :got_lo12:var_46]
+; CHECK-NEXT:    str x4, [x22]
+; CHECK-NEXT:    mov x4, #-18403 // =0xffffffffffffb81d
+; CHECK-NEXT:    movk x4, #58909, lsl #16
+; CHECK-NEXT:  .LBB0_27: // %for.inc371.us
+; CHECK-NEXT:    // in Loop: Header=BB0_28 Depth=5
+; CHECK-NEXT:    mov w22, #-18978 // =0xffffb5de
+; CHECK-NEXT:    orr x23, x23, #0x1
+; CHECK-NEXT:    mov x24, xzr
+; CHECK-NEXT:    mul w12, w12, w22
+; CHECK-NEXT:    mov x22, x5
+; CHECK-NEXT:    tbz w0, #0, .LBB0_36
+; CHECK-NEXT:  .LBB0_28: // %for.body194.us
+; CHECK-NEXT:    // Parent Loop BB0_4 Depth=1
+; CHECK-NEXT:    // Parent Loop BB0_6 Depth=2
+; CHECK-NEXT:    // Parent Loop BB0_8 Depth=3
+; CHECK-NEXT:    // Parent Loop BB0_10 Depth=4
+; CHECK-NEXT:    // => This Inner Loop Header: Depth=5
+; CHECK-NEXT:    cbnz wzr, .LBB0_30
+; CHECK-NEXT:  // %bb.29: // %if.then222.us
+; CHECK-NEXT:    // in Loop: Header=BB0_28 Depth=5
+; CHECK-NEXT:    adrp x27, :got:var_32
+; CHECK-NEXT:    ldur w8, [x19, #-12]
+; CHECK-NEXT:    ldr x27, [x27, :got_lo12:var_32]
+; CHECK-NEXT:    strh w8, [x27]
+; CHECK-NEXT:    sxtb w8, w25
+; CHECK-NEXT:    bic w25, w8, w8, asr #31
+; CHECK-NEXT:    b .LBB0_31
+; CHECK-NEXT:    .p2align 5, , 16
+; CHECK-NEXT:  .LBB0_30: // in Loop: Header=BB0_28 Depth=5
+; CHECK-NEXT:    mov w25, wzr
+; CHECK-NEXT:  .LBB0_31: // %if.end239.us
+; CHECK-NEXT:    // in Loop: Header=BB0_28 Depth=5
+; CHECK-NEXT:    strb w3, [x16]
+; CHECK-NEXT:    tst w13, #0xff
+; CHECK-NEXT:    b.eq .LBB0_33
+; CHECK-NEXT:  // %bb.32: // %if.then254.us
+; CHECK-NEXT:    // in Loop: Header=BB0_28 Depth=5
+; CHECK-NEXT:    ldrh w8, [x26, x14, lsl #1]
+; CHECK-NEXT:    adrp x27, :got:var_35
+; CHECK-NEXT:    ldr x27, [x27, :got_lo12:var_35]
+; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    csel x8, xzr, x7, eq
+; CHECK-NEXT:    str x8, [x27]
+; CHECK-NEXT:    strh w1, [x17]
+; CHECK-NEXT:  .LBB0_33: // %if.end282.us
+; CHECK-NEXT:    // in Loop: Header=BB0_28 Depth=5
+; CHECK-NEXT:    orr x27, x24, x4
+; CHECK-NEXT:    adrp x8, :got:var_39
+; CHECK-NEXT:    str x27, [x18]
+; CHECK-NEXT:    ldr x8, [x8, :got_lo12:var_39]
+; CHECK-NEXT:    str x10, [x8]
+; CHECK-NEXT:    ldrb w8, [x6, x9]
+; CHECK-NEXT:    str x8, [x18]
+; CHECK-NEXT:    mov w8, #1 // =0x1
+; CHECK-NEXT:    cbnz x2, .LBB0_27
+; CHECK-NEXT:  // %bb.34: // %if.then327.us
+; CHECK-NEXT:    // in Loop: Header=BB0_28 Depth=5
+; CHECK-NEXT:    cbz w8, .LBB0_25
+; CHECK-NEXT:  // %bb.35: // in Loop: Header=BB0_28 Depth=5
+; CHECK-NEXT:    mov w4, wzr
+; CHECK-NEXT:    b .LBB0_26
+; CHECK-NEXT:    .p2align 5, , 16
+; CHECK-NEXT:  .LBB0_36: // %for.cond376.preheader.us
+; CHECK-NEXT:    // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT:    mov w3, #1152 // =0x480
+; CHECK-NEXT:    mov x22, xzr
+; CHECK-NEXT:    mov w4, wzr
+; CHECK-NEXT:    mov x24, x27
+; CHECK-NEXT:    lsl x23, x14, #1
+; CHECK-NEXT:    mov x27, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    madd x14, x14, x3, x11
+; CHECK-NEXT:    mov w28, w30
+; CHECK-NEXT:    mov w3, #-7680 // =0xffffe200
+; CHECK-NEXT:    b .LBB0_39
+; CHECK-NEXT:    .p2align 5, , 16
+; CHECK-NEXT:  .LBB0_37: // %if.then466.us
+; CHECK-NEXT:    // in Loop: Header=BB0_39 Depth=5
+; CHECK-NEXT:    ldr x28, [sp, #152] // 8-byte Folded Reload
+; CHECK-NEXT:    ldr x3, [sp, #136] // 8-byte Folded Reload
+; CHECK-NEXT:    sxtb w4, w4
+; CHECK-NEXT:    bic w4, w4, w4, asr #31
+; CHECK-NEXT:    str x3, [x28]
+; CHECK-NEXT:    mov w3, #-7680 // =0xffffe200
+; CHECK-NEXT:  .LBB0_38: // %for.inc505.us
+; CHECK-NEXT:    // in Loop: Header=BB0_39 Depth=5
+; CHECK-NEXT:    add x22, x22, #1
+; CHECK-NEXT:    add x27, x27, #1
+; CHECK-NEXT:    mov w28, wzr
+; CHECK-NEXT:    cmp x27, #0
+; CHECK-NEXT:    b.hs .LBB0_9
+; CHECK-NEXT:  .LBB0_39: // %for.body380.us
+; CHECK-NEXT:    // Parent Loop BB0_4 Depth=1
+; CHECK-NEXT:    // Parent Loop BB0_6 Depth=2
+; CHECK-NEXT:    // Parent Loop BB0_8 Depth=3
+; CHECK-NEXT:    // Parent Loop BB0_10 Depth=4
+; CHECK-NEXT:    // => This Inner Loop Header: Depth=5
+; CHECK-NEXT:    mov w30, w28
+; CHECK-NEXT:    ldrh w28, [x23]
+; CHECK-NEXT:    tst w0, #0x1
+; CHECK-NEXT:    strh w28, [x11]
+; CHECK-NEXT:    csel w28, w21, w3, ne
+; CHECK-NEXT:    str w28, [x20]
+; CHECK-NEXT:    cbz x15, .LBB0_38
+; CHECK-NEXT:  // %bb.40: // %if.then436.us
+; CHECK-NEXT:    // in Loop: Header=BB0_39 Depth=5
+; CHECK-NEXT:    ldrh w28, [x14]
+; CHECK-NEXT:    cbnz w28, .LBB0_37
+; CHECK-NEXT:  // %bb.41: // in Loop: Header=BB0_39 Depth=5
+; CHECK-NEXT:    mov w4, wzr
+; CHECK-NEXT:    b .LBB0_38
+; CHECK-NEXT:  .LBB0_42: // %for.body41
+; CHECK-NEXT:    strb wzr, [x4]
+; CHECK-NEXT:    strb wzr, [x14]
+; CHECK-NEXT:  .LBB0_43: // %for.cond563.preheader
+; CHECK-NEXT:    ldp x20, x19, [sp, #224] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x22, x21, [sp, #208] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x24, x23, [sp, #192] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x26, x25, [sp, #176] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x28, x27, [sp, #160] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr x30, [sp, #144] // 8-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #240
+; CHECK-NEXT:    ret
+entry:
+  br i1 %var_5, label %for.body41.lr.ph, label %for.cond563.preheader
+
+for.body41.lr.ph:                                 ; preds = %entry
+  %arrayidx147 = getelementptr i8, ptr %arr_3, i64 120
+  %tobool326.not = icmp eq i64 %var_2, 0
+  %not353 = xor i64 0, -1
+  %add538 = select i1 %var_0, i16 0, i16 1
+  br i1 %var_0, label %for.body41.us, label %for.body41
+
+for.body41.us:                                    ; preds = %for.cond.cleanup93.us, %for.body41.lr.ph
+  %var_24.promoted9271009.us = phi i64 [ 0, %for.body41.lr.ph ], [ %6, %for.cond.cleanup93.us ]
+  %var_37.promoted9301008.us = phi i64 [ 1, %for.body41.lr.ph ], [ 0, %for.cond.cleanup93.us ]
+  %2 = phi i8 [ 0, %for.body41.lr.ph ], [ 1, %for.cond.cleanup93.us ]
+  %add4139751001.us = phi i16 [ 0, %for.body41.lr.ph ], [ 1, %for.cond.cleanup93.us ]
+  %3 = phi i8 [ 0, %for.body41.lr.ph ], [ %var_10, %for.cond.cleanup93.us ]
+  store i32 %var_6, ptr %arr_3, align 4
+  store i8 %var_10, ptr %arr_3, align 1
+  br label %for.body67.us
+
+for.body67.us:                                    ; preds = %for.cond.cleanup93.us, %for.body41.us
+  %4 = phi i8 [ %3, %for.body41.us ], [ 0, %for.cond.cleanup93.us ]
+  %add413977.us = phi i16 [ %add4139751001.us, %for.body41.us ], [ %add413.us17, %for.cond.cleanup93.us ]
+  %5 = phi i8 [ %2, %for.body41.us ], [ %.sroa.speculated829.us, %for.cond.cleanup93.us ]
+  %conv64922.us = phi i32 [ 1, %for.body41.us ], [ 0, %for.cond.clea...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/164386


More information about the llvm-commits mailing list