[llvm] [RegAlloc] Constrain rematted regclass to use (PR #164386)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 21 03:27:02 PDT 2025
================
@@ -0,0 +1,676 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -verify-machineinstrs < %s | FileCheck %s
+
+; This test recreates a regalloc crash reported in
+; https://github.com/llvm/llvm-project/issues/164181
+; When rematting an instruction we need to make sure to constrain the newly
+; allocated register to both the rematted def's reg class and the use's reg
+; class.
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "aarch64-unknown-linux-gnu"
+
+ at var_32 = external global i16
+ at var_35 = external global i64
+ at var_39 = external global i64
+ at var_46 = external global i64
+ at var_50 = external global i32
+
+define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var_5, i32 %var_6, i32 %var_7, i8 %var_10, i64 %var_11, i8 %var_14, i32 %var_15, i64 %var_16, ptr %arr_3, ptr %arr_4, ptr %arr_6, ptr %arr_7, ptr %arr_12, ptr %arr_13, ptr %arr_19, i64 %mul, i64 %conv35, i64 %idxprom138.us16, i8 %0, i8 %1, ptr %invariant.gep875.us) #0 {
+; CHECK-LABEL: f:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub sp, sp, #240
+; CHECK-NEXT: str x30, [sp, #144] // 8-byte Folded Spill
+; CHECK-NEXT: stp x28, x27, [sp, #160] // 16-byte Folded Spill
+; CHECK-NEXT: stp x26, x25, [sp, #176] // 16-byte Folded Spill
+; CHECK-NEXT: stp x24, x23, [sp, #192] // 16-byte Folded Spill
+; CHECK-NEXT: stp x22, x21, [sp, #208] // 16-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #224] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 240
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w20, -16
+; CHECK-NEXT: .cfi_offset w21, -24
+; CHECK-NEXT: .cfi_offset w22, -32
+; CHECK-NEXT: .cfi_offset w23, -40
+; CHECK-NEXT: .cfi_offset w24, -48
+; CHECK-NEXT: .cfi_offset w25, -56
+; CHECK-NEXT: .cfi_offset w26, -64
+; CHECK-NEXT: .cfi_offset w27, -72
+; CHECK-NEXT: .cfi_offset w28, -80
+; CHECK-NEXT: .cfi_offset w30, -96
+; CHECK-NEXT: str w6, [sp, #20] // 4-byte Folded Spill
+; CHECK-NEXT: str w4, [sp, #72] // 4-byte Folded Spill
+; CHECK-NEXT: str w3, [sp, #112] // 4-byte Folded Spill
+; CHECK-NEXT: str w5, [sp, #36] // 4-byte Folded Spill
+; CHECK-NEXT: tbz w5, #0, .LBB0_43
+; CHECK-NEXT: // %bb.1: // %for.body41.lr.ph
+; CHECK-NEXT: ldr x4, [sp, #312]
+; CHECK-NEXT: ldr x14, [sp, #280]
+; CHECK-NEXT: tbz w0, #0, .LBB0_42
+; CHECK-NEXT: // %bb.2: // %for.body41.us.preheader
+; CHECK-NEXT: ldrb w8, [sp, #368]
+; CHECK-NEXT: ldrb w12, [sp, #256]
+; CHECK-NEXT: ldr w26, [sp, #264]
+; CHECK-NEXT: adrp x20, :got:var_50
+; CHECK-NEXT: mov x28, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov w21, #36006 // =0x8ca6
+; CHECK-NEXT: ldr x11, [sp, #376]
+; CHECK-NEXT: ldrb w13, [sp, #360]
+; CHECK-NEXT: ldp x17, x16, [sp, #296]
+; CHECK-NEXT: mov w22, #1 // =0x1
+; CHECK-NEXT: add x27, x14, #120
+; CHECK-NEXT: ldr x18, [sp, #288]
+; CHECK-NEXT: ldr x7, [sp, #272]
+; CHECK-NEXT: ldr x5, [sp, #248]
+; CHECK-NEXT: mov x10, xzr
+; CHECK-NEXT: mov w23, wzr
+; CHECK-NEXT: mov w30, wzr
+; CHECK-NEXT: ldrb w19, [sp, #240]
+; CHECK-NEXT: mov w25, wzr
+; CHECK-NEXT: mov x24, xzr
+; CHECK-NEXT: str w8, [sp, #108] // 4-byte Folded Spill
+; CHECK-NEXT: mov x3, x26
+; CHECK-NEXT: ldp x9, x8, [sp, #344]
+; CHECK-NEXT: str w12, [sp, #92] // 4-byte Folded Spill
+; CHECK-NEXT: mov w12, #1 // =0x1
+; CHECK-NEXT: bic w12, w12, w0
+; CHECK-NEXT: str w12, [sp, #76] // 4-byte Folded Spill
+; CHECK-NEXT: mov w12, #48 // =0x30
+; CHECK-NEXT: str x9, [sp, #136] // 8-byte Folded Spill
+; CHECK-NEXT: ldp x9, x15, [sp, #328]
+; CHECK-NEXT: madd x8, x8, x12, x9
+; CHECK-NEXT: str x8, [sp, #64] // 8-byte Folded Spill
+; CHECK-NEXT: add x8, x26, w26, uxtw #1
+; CHECK-NEXT: ldr x20, [x20, :got_lo12:var_50]
+; CHECK-NEXT: str x26, [sp, #96] // 8-byte Folded Spill
+; CHECK-NEXT: str x14, [sp, #152] // 8-byte Folded Spill
+; CHECK-NEXT: lsl x6, x8, #3
+; CHECK-NEXT: add x8, x14, #120
+; CHECK-NEXT: str x4, [sp, #24] // 8-byte Folded Spill
+; CHECK-NEXT: str w19, [sp, #16] // 4-byte Folded Spill
+; CHECK-NEXT: str x8, [sp, #80] // 8-byte Folded Spill
+; CHECK-NEXT: b .LBB0_4
+; CHECK-NEXT: .p2align 5, , 16
+; CHECK-NEXT: .LBB0_3: // in Loop: Header=BB0_4 Depth=1
+; CHECK-NEXT: ldr w19, [sp, #16] // 4-byte Folded Reload
+; CHECK-NEXT: ldr x24, [sp, #40] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x14, [sp, #152] // 8-byte Folded Reload
+; CHECK-NEXT: mov w23, #1 // =0x1
+; CHECK-NEXT: mov w30, #1 // =0x1
+; CHECK-NEXT: mov w25, w19
+; CHECK-NEXT: .LBB0_4: // %for.body41.us
+; CHECK-NEXT: // =>This Loop Header: Depth=1
+; CHECK-NEXT: // Child Loop BB0_6 Depth 2
+; CHECK-NEXT: // Child Loop BB0_8 Depth 3
+; CHECK-NEXT: // Child Loop BB0_10 Depth 4
+; CHECK-NEXT: // Child Loop BB0_11 Depth 5
+; CHECK-NEXT: // Child Loop BB0_28 Depth 5
+; CHECK-NEXT: // Child Loop BB0_39 Depth 5
+; CHECK-NEXT: ldr w8, [sp, #20] // 4-byte Folded Reload
+; CHECK-NEXT: mov x12, x24
+; CHECK-NEXT: str x24, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT: str w8, [x14]
+; CHECK-NEXT: mov w8, #1 // =0x1
+; CHECK-NEXT: strb w19, [x14]
+; CHECK-NEXT: b .LBB0_6
+; CHECK-NEXT: .p2align 5, , 16
+; CHECK-NEXT: .LBB0_5: // %for.cond.cleanup93.us
+; CHECK-NEXT: // in Loop: Header=BB0_6 Depth=2
+; CHECK-NEXT: ldr w9, [sp, #36] // 4-byte Folded Reload
+; CHECK-NEXT: ldr x4, [sp, #24] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x24, x12, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: mov x22, xzr
+; CHECK-NEXT: mov w25, wzr
+; CHECK-NEXT: mov w8, wzr
+; CHECK-NEXT: tbz w9, #0, .LBB0_3
+; CHECK-NEXT: .LBB0_6: // %for.body67.us
+; CHECK-NEXT: // Parent Loop BB0_4 Depth=1
+; CHECK-NEXT: // => This Loop Header: Depth=2
+; CHECK-NEXT: // Child Loop BB0_8 Depth 3
+; CHECK-NEXT: // Child Loop BB0_10 Depth 4
+; CHECK-NEXT: // Child Loop BB0_11 Depth 5
+; CHECK-NEXT: // Child Loop BB0_28 Depth 5
+; CHECK-NEXT: // Child Loop BB0_39 Depth 5
+; CHECK-NEXT: str x12, [sp, #40] // 8-byte Folded Spill
+; CHECK-NEXT: cmn x24, #30
+; CHECK-NEXT: mov x12, #-30 // =0xffffffffffffffe2
+; CHECK-NEXT: add x19, x4, w8, sxtw #2
+; CHECK-NEXT: mov x9, xzr
+; CHECK-NEXT: csel x12, x24, x12, lo
+; CHECK-NEXT: mov w4, w30
+; CHECK-NEXT: str x12, [sp, #56] // 8-byte Folded Spill
+; CHECK-NEXT: b .LBB0_8
+; CHECK-NEXT: .p2align 5, , 16
+; CHECK-NEXT: .LBB0_7: // %for.cond.cleanup98.us
+; CHECK-NEXT: // in Loop: Header=BB0_8 Depth=3
+; CHECK-NEXT: ldr w4, [sp, #72] // 4-byte Folded Reload
+; CHECK-NEXT: ldr w23, [sp, #128] // 4-byte Folded Reload
+; CHECK-NEXT: mov w9, #1 // =0x1
+; CHECK-NEXT: mov x22, xzr
+; CHECK-NEXT: tbnz w0, #0, .LBB0_5
+; CHECK-NEXT: .LBB0_8: // %for.cond95.preheader.us
+; CHECK-NEXT: // Parent Loop BB0_4 Depth=1
+; CHECK-NEXT: // Parent Loop BB0_6 Depth=2
+; CHECK-NEXT: // => This Loop Header: Depth=3
+; CHECK-NEXT: // Child Loop BB0_10 Depth 4
+; CHECK-NEXT: // Child Loop BB0_11 Depth 5
+; CHECK-NEXT: // Child Loop BB0_28 Depth 5
+; CHECK-NEXT: // Child Loop BB0_39 Depth 5
+; CHECK-NEXT: ldr x8, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT: mov w14, #1152 // =0x480
+; CHECK-NEXT: mov w24, #1 // =0x1
+; CHECK-NEXT: mov w12, wzr
+; CHECK-NEXT: str wzr, [sp, #132] // 4-byte Folded Spill
+; CHECK-NEXT: mov w30, w4
+; CHECK-NEXT: madd x8, x9, x14, x8
+; CHECK-NEXT: mov w14, #1 // =0x1
+; CHECK-NEXT: str x8, [sp, #120] // 8-byte Folded Spill
+; CHECK-NEXT: add x8, x9, x9, lsl #1
+; CHECK-NEXT: lsl x26, x8, #4
+; CHECK-NEXT: sxtb w8, w23
+; CHECK-NEXT: mov w23, w25
+; CHECK-NEXT: str w8, [sp, #116] // 4-byte Folded Spill
+; CHECK-NEXT: b .LBB0_10
+; CHECK-NEXT: .p2align 5, , 16
+; CHECK-NEXT: .LBB0_9: // %for.cond510.preheader.us
+; CHECK-NEXT: // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT: ldr w23, [sp, #92] // 4-byte Folded Reload
+; CHECK-NEXT: mov x22, x8
+; CHECK-NEXT: ldr x3, [sp, #96] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x27, [sp, #80] // 8-byte Folded Reload
+; CHECK-NEXT: mov x28, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov x14, xzr
+; CHECK-NEXT: ldr w8, [sp, #76] // 4-byte Folded Reload
+; CHECK-NEXT: tbz w8, #31, .LBB0_7
+; CHECK-NEXT: .LBB0_10: // %for.body99.us
+; CHECK-NEXT: // Parent Loop BB0_4 Depth=1
+; CHECK-NEXT: // Parent Loop BB0_6 Depth=2
+; CHECK-NEXT: // Parent Loop BB0_8 Depth=3
+; CHECK-NEXT: // => This Loop Header: Depth=4
+; CHECK-NEXT: // Child Loop BB0_11 Depth 5
+; CHECK-NEXT: // Child Loop BB0_28 Depth 5
+; CHECK-NEXT: // Child Loop BB0_39 Depth 5
+; CHECK-NEXT: ldr w8, [sp, #116] // 4-byte Folded Reload
+; CHECK-NEXT: and w8, w8, w8, asr #31
+; CHECK-NEXT: str w8, [sp, #128] // 4-byte Folded Spill
+; CHECK-NEXT: .p2align 5, , 16
+; CHECK-NEXT: .LBB0_11: // %for.body113.us
+; CHECK-NEXT: // Parent Loop BB0_4 Depth=1
+; CHECK-NEXT: // Parent Loop BB0_6 Depth=2
+; CHECK-NEXT: // Parent Loop BB0_8 Depth=3
+; CHECK-NEXT: // Parent Loop BB0_10 Depth=4
+; CHECK-NEXT: // => This Inner Loop Header: Depth=5
+; CHECK-NEXT: tbnz w0, #0, .LBB0_11
+; CHECK-NEXT: // %bb.12: // %for.cond131.preheader.us
+; CHECK-NEXT: // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT: ldr w8, [sp, #112] // 4-byte Folded Reload
+; CHECK-NEXT: mov w4, #1 // =0x1
+; CHECK-NEXT: strb w8, [x18]
+; CHECK-NEXT: ldr x8, [sp, #120] // 8-byte Folded Reload
+; CHECK-NEXT: ldrh w8, [x8]
+; CHECK-NEXT: cbnz w4, .LBB0_14
+; CHECK-NEXT: // %bb.13: // %cond.true146.us
+; CHECK-NEXT: // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT: ldrsb w4, [x27, x3]
+; CHECK-NEXT: b .LBB0_15
+; CHECK-NEXT: .p2align 5, , 16
+; CHECK-NEXT: .LBB0_14: // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT: mov w4, wzr
+; CHECK-NEXT: .LBB0_15: // %cond.end154.us
+; CHECK-NEXT: // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT: mov w25, #18984 // =0x4a28
+; CHECK-NEXT: mul w8, w8, w25
+; CHECK-NEXT: and w8, w8, #0xfff8
+; CHECK-NEXT: lsl w8, w8, w4
+; CHECK-NEXT: cbz w8, .LBB0_17
+; CHECK-NEXT: // %bb.16: // %if.then.us
+; CHECK-NEXT: // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT: str wzr, [sp, #132] // 4-byte Folded Spill
+; CHECK-NEXT: str wzr, [x18]
+; CHECK-NEXT: .LBB0_17: // %if.end.us
+; CHECK-NEXT: // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT: ldr w8, [sp, #108] // 4-byte Folded Reload
+; CHECK-NEXT: mov w4, #18984 // =0x4a28
+; CHECK-NEXT: mov w25, w23
+; CHECK-NEXT: strb w8, [x18]
+; CHECK-NEXT: ldrsb w8, [x27, x3]
+; CHECK-NEXT: lsl w8, w4, w8
+; CHECK-NEXT: mov x4, #-18403 // =0xffffffffffffb81d
+; CHECK-NEXT: movk x4, #58909, lsl #16
+; CHECK-NEXT: cbz w8, .LBB0_19
+; CHECK-NEXT: // %bb.18: // %if.then.us.2
+; CHECK-NEXT: // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT: str wzr, [sp, #132] // 4-byte Folded Spill
+; CHECK-NEXT: strb wzr, [x18]
+; CHECK-NEXT: .LBB0_19: // %if.then.us.5
+; CHECK-NEXT: // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT: ldr w23, [sp, #132] // 4-byte Folded Reload
+; CHECK-NEXT: mov w8, #29625 // =0x73b9
+; CHECK-NEXT: movk w8, #21515, lsl #16
+; CHECK-NEXT: cmp w23, w8
+; CHECK-NEXT: csel w23, w23, w8, lt
+; CHECK-NEXT: str w23, [sp, #132] // 4-byte Folded Spill
+; CHECK-NEXT: tbz w0, #0, .LBB0_21
+; CHECK-NEXT: // %bb.20: // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT: mov w8, wzr
+; CHECK-NEXT: b .LBB0_22
+; CHECK-NEXT: .p2align 5, , 16
+; CHECK-NEXT: .LBB0_21: // %cond.true146.us.7
+; CHECK-NEXT: // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT: ldrsb w8, [x27, x3]
+; CHECK-NEXT: .LBB0_22: // %cond.end154.us.7
+; CHECK-NEXT: // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT: mov w23, #18984 // =0x4a28
+; CHECK-NEXT: mov w3, #149 // =0x95
+; CHECK-NEXT: lsl w8, w23, w8
+; CHECK-NEXT: cbz w8, .LBB0_24
+; CHECK-NEXT: // %bb.23: // %if.then.us.7
+; CHECK-NEXT: // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT: ldr x8, [sp, #152] // 8-byte Folded Reload
+; CHECK-NEXT: str wzr, [sp, #132] // 4-byte Folded Spill
+; CHECK-NEXT: str wzr, [x8]
+; CHECK-NEXT: .LBB0_24: // %if.end.us.7
+; CHECK-NEXT: // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT: mov x23, xzr
+; CHECK-NEXT: b .LBB0_28
+; CHECK-NEXT: .p2align 5, , 16
+; CHECK-NEXT: .LBB0_25: // %cond.true331.us
+; CHECK-NEXT: // in Loop: Header=BB0_28 Depth=5
+; CHECK-NEXT: ldrsb w4, [x10]
+; CHECK-NEXT: .LBB0_26: // %cond.end345.us
+; CHECK-NEXT: // in Loop: Header=BB0_28 Depth=5
+; CHECK-NEXT: strh w4, [x18]
+; CHECK-NEXT: mul x4, x22, x28
+; CHECK-NEXT: adrp x22, :got:var_46
+; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: ldr x22, [x22, :got_lo12:var_46]
+; CHECK-NEXT: str x4, [x22]
+; CHECK-NEXT: mov x4, #-18403 // =0xffffffffffffb81d
+; CHECK-NEXT: movk x4, #58909, lsl #16
+; CHECK-NEXT: .LBB0_27: // %for.inc371.us
+; CHECK-NEXT: // in Loop: Header=BB0_28 Depth=5
+; CHECK-NEXT: mov w22, #-18978 // =0xffffb5de
+; CHECK-NEXT: orr x23, x23, #0x1
+; CHECK-NEXT: mov x24, xzr
+; CHECK-NEXT: mul w12, w12, w22
+; CHECK-NEXT: mov x22, x5
+; CHECK-NEXT: tbz w0, #0, .LBB0_36
+; CHECK-NEXT: .LBB0_28: // %for.body194.us
+; CHECK-NEXT: // Parent Loop BB0_4 Depth=1
+; CHECK-NEXT: // Parent Loop BB0_6 Depth=2
+; CHECK-NEXT: // Parent Loop BB0_8 Depth=3
+; CHECK-NEXT: // Parent Loop BB0_10 Depth=4
+; CHECK-NEXT: // => This Inner Loop Header: Depth=5
+; CHECK-NEXT: cbnz wzr, .LBB0_30
+; CHECK-NEXT: // %bb.29: // %if.then222.us
+; CHECK-NEXT: // in Loop: Header=BB0_28 Depth=5
+; CHECK-NEXT: adrp x27, :got:var_32
+; CHECK-NEXT: ldur w8, [x19, #-12]
+; CHECK-NEXT: ldr x27, [x27, :got_lo12:var_32]
+; CHECK-NEXT: strh w8, [x27]
+; CHECK-NEXT: sxtb w8, w25
+; CHECK-NEXT: bic w25, w8, w8, asr #31
+; CHECK-NEXT: b .LBB0_31
+; CHECK-NEXT: .p2align 5, , 16
+; CHECK-NEXT: .LBB0_30: // in Loop: Header=BB0_28 Depth=5
+; CHECK-NEXT: mov w25, wzr
+; CHECK-NEXT: .LBB0_31: // %if.end239.us
+; CHECK-NEXT: // in Loop: Header=BB0_28 Depth=5
+; CHECK-NEXT: strb w3, [x16]
+; CHECK-NEXT: tst w13, #0xff
+; CHECK-NEXT: b.eq .LBB0_33
+; CHECK-NEXT: // %bb.32: // %if.then254.us
+; CHECK-NEXT: // in Loop: Header=BB0_28 Depth=5
+; CHECK-NEXT: ldrh w8, [x26, x14, lsl #1]
+; CHECK-NEXT: adrp x27, :got:var_35
+; CHECK-NEXT: ldr x27, [x27, :got_lo12:var_35]
+; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: csel x8, xzr, x7, eq
+; CHECK-NEXT: str x8, [x27]
+; CHECK-NEXT: strh w1, [x17]
+; CHECK-NEXT: .LBB0_33: // %if.end282.us
+; CHECK-NEXT: // in Loop: Header=BB0_28 Depth=5
+; CHECK-NEXT: orr x27, x24, x4
+; CHECK-NEXT: adrp x8, :got:var_39
+; CHECK-NEXT: str x27, [x18]
+; CHECK-NEXT: ldr x8, [x8, :got_lo12:var_39]
+; CHECK-NEXT: str x10, [x8]
+; CHECK-NEXT: ldrb w8, [x6, x9]
+; CHECK-NEXT: str x8, [x18]
+; CHECK-NEXT: mov w8, #1 // =0x1
+; CHECK-NEXT: cbnz x2, .LBB0_27
+; CHECK-NEXT: // %bb.34: // %if.then327.us
+; CHECK-NEXT: // in Loop: Header=BB0_28 Depth=5
+; CHECK-NEXT: cbz w8, .LBB0_25
+; CHECK-NEXT: // %bb.35: // in Loop: Header=BB0_28 Depth=5
+; CHECK-NEXT: mov w4, wzr
+; CHECK-NEXT: b .LBB0_26
+; CHECK-NEXT: .p2align 5, , 16
+; CHECK-NEXT: .LBB0_36: // %for.cond376.preheader.us
+; CHECK-NEXT: // in Loop: Header=BB0_10 Depth=4
+; CHECK-NEXT: mov w3, #1152 // =0x480
+; CHECK-NEXT: mov x22, xzr
+; CHECK-NEXT: mov w4, wzr
+; CHECK-NEXT: mov x24, x27
+; CHECK-NEXT: lsl x23, x14, #1
+; CHECK-NEXT: mov x27, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: madd x14, x14, x3, x11
+; CHECK-NEXT: mov w28, w30
+; CHECK-NEXT: mov w3, #-7680 // =0xffffe200
+; CHECK-NEXT: b .LBB0_39
+; CHECK-NEXT: .p2align 5, , 16
+; CHECK-NEXT: .LBB0_37: // %if.then466.us
+; CHECK-NEXT: // in Loop: Header=BB0_39 Depth=5
+; CHECK-NEXT: ldr x28, [sp, #152] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x3, [sp, #136] // 8-byte Folded Reload
+; CHECK-NEXT: sxtb w4, w4
+; CHECK-NEXT: bic w4, w4, w4, asr #31
+; CHECK-NEXT: str x3, [x28]
+; CHECK-NEXT: mov w3, #-7680 // =0xffffe200
+; CHECK-NEXT: .LBB0_38: // %for.inc505.us
+; CHECK-NEXT: // in Loop: Header=BB0_39 Depth=5
+; CHECK-NEXT: add x22, x22, #1
+; CHECK-NEXT: add x27, x27, #1
+; CHECK-NEXT: mov w28, wzr
+; CHECK-NEXT: cmp x27, #0
+; CHECK-NEXT: b.hs .LBB0_9
+; CHECK-NEXT: .LBB0_39: // %for.body380.us
+; CHECK-NEXT: // Parent Loop BB0_4 Depth=1
+; CHECK-NEXT: // Parent Loop BB0_6 Depth=2
+; CHECK-NEXT: // Parent Loop BB0_8 Depth=3
+; CHECK-NEXT: // Parent Loop BB0_10 Depth=4
+; CHECK-NEXT: // => This Inner Loop Header: Depth=5
+; CHECK-NEXT: mov w30, w28
+; CHECK-NEXT: ldrh w28, [x23]
+; CHECK-NEXT: tst w0, #0x1
+; CHECK-NEXT: strh w28, [x11]
+; CHECK-NEXT: csel w28, w21, w3, ne
+; CHECK-NEXT: str w28, [x20]
+; CHECK-NEXT: cbz x15, .LBB0_38
+; CHECK-NEXT: // %bb.40: // %if.then436.us
+; CHECK-NEXT: // in Loop: Header=BB0_39 Depth=5
+; CHECK-NEXT: ldrh w28, [x14]
+; CHECK-NEXT: cbnz w28, .LBB0_37
+; CHECK-NEXT: // %bb.41: // in Loop: Header=BB0_39 Depth=5
+; CHECK-NEXT: mov w4, wzr
+; CHECK-NEXT: b .LBB0_38
+; CHECK-NEXT: .LBB0_42: // %for.body41
+; CHECK-NEXT: strb wzr, [x4]
+; CHECK-NEXT: strb wzr, [x14]
+; CHECK-NEXT: .LBB0_43: // %for.cond563.preheader
+; CHECK-NEXT: ldp x20, x19, [sp, #224] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x22, x21, [sp, #208] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x24, x23, [sp, #192] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x26, x25, [sp, #176] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x28, x27, [sp, #160] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #144] // 8-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #240
+; CHECK-NEXT: ret
+entry:
+ br i1 %var_5, label %for.body41.lr.ph, label %for.cond563.preheader
+
+for.body41.lr.ph: ; preds = %entry
+ %arrayidx147 = getelementptr i8, ptr %arr_3, i64 120
+ %tobool326.not = icmp eq i64 %var_2, 0
+ %not353 = xor i64 0, -1
+ %add538 = select i1 %var_0, i16 0, i16 1
+ br i1 %var_0, label %for.body41.us, label %for.body41
+
+for.body41.us: ; preds = %for.cond.cleanup93.us, %for.body41.lr.ph
+ %var_24.promoted9271009.us = phi i64 [ 0, %for.body41.lr.ph ], [ %6, %for.cond.cleanup93.us ]
+ %var_37.promoted9301008.us = phi i64 [ 1, %for.body41.lr.ph ], [ 0, %for.cond.cleanup93.us ]
+ %2 = phi i8 [ 0, %for.body41.lr.ph ], [ 1, %for.cond.cleanup93.us ]
+ %add4139751001.us = phi i16 [ 0, %for.body41.lr.ph ], [ 1, %for.cond.cleanup93.us ]
+ %3 = phi i8 [ 0, %for.body41.lr.ph ], [ %var_10, %for.cond.cleanup93.us ]
+ store i32 %var_6, ptr %arr_3, align 4
+ store i8 %var_10, ptr %arr_3, align 1
+ br label %for.body67.us
+
+for.body67.us: ; preds = %for.cond.cleanup93.us, %for.body41.us
+ %4 = phi i8 [ %3, %for.body41.us ], [ 0, %for.cond.cleanup93.us ]
+ %add413977.us = phi i16 [ %add4139751001.us, %for.body41.us ], [ %add413.us17, %for.cond.cleanup93.us ]
+ %5 = phi i8 [ %2, %for.body41.us ], [ %.sroa.speculated829.us, %for.cond.cleanup93.us ]
+ %conv64922.us = phi i32 [ 1, %for.body41.us ], [ 0, %for.cond.cleanup93.us ]
+ %6 = phi i64 [ %var_24.promoted9271009.us, %for.body41.us ], [ %.sroa.speculated832.us, %for.cond.cleanup93.us ]
+ %mul354903918.us = phi i64 [ %var_37.promoted9301008.us, %for.body41.us ], [ 0, %for.cond.cleanup93.us ]
+ %i_2.0921.us = zext i32 %var_15 to i64
+ %.sroa.speculated832.us = tail call i64 @llvm.umin.i64(i64 %var_24.promoted9271009.us, i64 -30)
+ %sext1023 = shl i64 %i_2.0921.us, 1
+ %idxprom138.us162 = ashr i64 %sext1023, 1
+ %gep889.us = getelementptr [24 x i16], ptr %arr_19, i64 %idxprom138.us16
+ %arrayidx149.us = getelementptr i8, ptr %arrayidx147, i64 %idxprom138.us162
+ %arrayidx319.us = getelementptr [24 x i8], ptr null, i64 %idxprom138.us162
+ %7 = sext i32 %conv64922.us to i64
+ %8 = getelementptr i32, ptr %arr_12, i64 %7
+ %arrayidx226.us = getelementptr i8, ptr %8, i64 -12
+ br label %for.cond95.preheader.us
+
+for.cond.cleanup93.us: ; preds = %for.cond.cleanup98.us
+ br i1 %var_5, label %for.body67.us, label %for.body41.us
+
+for.cond.cleanup98.us: ; preds = %for.cond510.preheader.us
+ br i1 %var_0, label %for.cond.cleanup93.us, label %for.cond95.preheader.us
+
+for.body99.us: ; preds = %for.cond95.preheader.us, %for.cond510.preheader.us
+ %mul287985.us = phi i16 [ 0, %for.cond95.preheader.us ], [ %mul287.us, %for.cond510.preheader.us ]
+ %9 = phi i8 [ %29, %for.cond95.preheader.us ], [ %var_14, %for.cond510.preheader.us ]
+ %add413979.us = phi i16 [ %add413978.us, %for.cond95.preheader.us ], [ %add413.us17, %for.cond510.preheader.us ]
+ %10 = phi i32 [ 0, %for.cond95.preheader.us ], [ %26, %for.cond510.preheader.us ]
+ %mul354905.us = phi i64 [ %mul354904.us, %for.cond95.preheader.us ], [ %mul354907.us, %for.cond510.preheader.us ]
+ %sub283896.us = phi i64 [ 1, %for.cond95.preheader.us ], [ %sub283.us, %for.cond510.preheader.us ]
+ %conv96880.us = phi i64 [ 1, %for.cond95.preheader.us ], [ 0, %for.cond510.preheader.us ]
+ %.sroa.speculated829.us = tail call i8 @llvm.smin.i8(i8 %30, i8 0)
+ br label %for.body113.us
+
+for.body380.us: ; preds = %for.cond376.preheader.us, %for.inc505.us
+ %indvars.iv1018 = phi i64 [ 0, %for.cond376.preheader.us ], [ %indvars.iv.next1019, %for.inc505.us ]
+ %11 = phi i8 [ 0, %for.cond376.preheader.us ], [ %13, %for.inc505.us ]
+ %add413980.us = phi i16 [ %add413979.us, %for.cond376.preheader.us ], [ 0, %for.inc505.us ]
+ %12 = load i16, ptr %arrayidx384.us, align 2
+ store i16 %12, ptr %invariant.gep875.us, align 2
+ %add413.us17 = or i16 %add413980.us, 0
+ %arrayidx416.us = getelementptr i16, ptr %arr_13, i64 %indvars.iv1018
+ %conv419.us = select i1 %var_0, i32 36006, i32 -7680
+ store i32 %conv419.us, ptr @var_50, align 4
+ %tobool435.not.us = icmp eq i64 %mul, 0
+ br i1 %tobool435.not.us, label %for.inc505.us, label %if.then436.us
+
+if.then436.us: ; preds = %for.body380.us
+ %.sroa.speculated817.us = tail call i8 @llvm.smax.i8(i8 %11, i8 0)
+ %cond464.in.us = load i16, ptr %gep876.us, align 2
+ %tobool465.not.us = icmp eq i16 %cond464.in.us, 0
+ br i1 %tobool465.not.us, label %for.inc505.us, label %if.then466.us
+
+if.then466.us: ; preds = %if.then436.us
+ store i64 %conv35, ptr %arr_3, align 8
+ br label %for.inc505.us
+
+for.inc505.us: ; preds = %if.then466.us, %if.then436.us, %for.body380.us
+ %13 = phi i8 [ %11, %for.body380.us ], [ %.sroa.speculated817.us, %if.then466.us ], [ 0, %if.then436.us ]
+ %indvars.iv.next1019 = add i64 %indvars.iv1018, 1
+ %cmp378.us = icmp ult i64 %indvars.iv1018, 0
+ br i1 %cmp378.us, label %for.body380.us, label %for.cond510.preheader.us
+
+for.body194.us: ; preds = %if.end.us.7, %for.inc371.us
+ %indvars.iv = phi i64 [ 0, %if.end.us.7 ], [ %indvars.iv.next, %for.inc371.us ]
+ %mul287986.us = phi i16 [ %mul287985.us, %if.end.us.7 ], [ %mul287.us, %for.inc371.us ]
+ %14 = phi i8 [ %9, %if.end.us.7 ], [ %16, %for.inc371.us ]
+ %mul354906.us = phi i64 [ %mul354905.us, %if.end.us.7 ], [ %var_11, %for.inc371.us ]
+ %sub283897.us = phi i64 [ %sub283896.us, %if.end.us.7 ], [ 0, %for.inc371.us ]
+ %tobool221.not.us = icmp eq i32 1, 0
+ br i1 %tobool221.not.us, label %if.end239.us, label %if.then222.us
+
+if.then222.us: ; preds = %for.body194.us
+ %15 = load i32, ptr %arrayidx226.us, align 4
+ %conv227.us = trunc i32 %15 to i16
+ store i16 %conv227.us, ptr @var_32, align 2
+ %.sroa.speculated820.us = tail call i8 @llvm.smax.i8(i8 %14, i8 0)
+ br label %if.end239.us
+
+if.end239.us: ; preds = %if.then222.us, %for.body194.us
+ %16 = phi i8 [ %.sroa.speculated820.us, %if.then222.us ], [ 0, %for.body194.us ]
+ store i8 -107, ptr %arr_7, align 1
+ %tobool253.not.us = icmp eq i8 %0, 0
+ br i1 %tobool253.not.us, label %if.end282.us, label %if.then254.us
+
+if.then254.us: ; preds = %if.end239.us
+ %17 = load i16, ptr %arrayidx259.us, align 2
+ %tobool261.not.us = icmp eq i16 %17, 0
+ %conv268.us = select i1 %tobool261.not.us, i64 0, i64 %var_16
+ store i64 %conv268.us, ptr @var_35, align 8
+ %gep867.us = getelementptr [24 x [24 x i64]], ptr null, i64 %indvars.iv
+ store i16 %var_1, ptr %arr_6, align 2
+ br label %if.end282.us
+
+if.end282.us: ; preds = %if.then254.us, %if.end239.us
+ %sub283.us = or i64 %sub283897.us, -434259939
+ store i64 %sub283.us, ptr %arr_4, align 8
+ %mul287.us = mul i16 %mul287986.us, -18978
+ store i64 0, ptr @var_39, align 8
+ %18 = load i8, ptr %arrayidx321.us, align 1
+ %conv322.us = zext i8 %18 to i64
+ store i64 %conv322.us, ptr %arr_4, align 8
+ br i1 %tobool326.not, label %if.then327.us, label %for.inc371.us
+
+if.then327.us: ; preds = %if.end282.us
+ %tobool330.not.us = icmp eq i32 0, 0
+ br i1 %tobool330.not.us, label %cond.end345.us, label %cond.true331.us
+
+cond.true331.us: ; preds = %if.then327.us
+ %19 = load i8, ptr null, align 1
+ %20 = sext i8 %19 to i16
+ br label %cond.end345.us
+
+cond.end345.us: ; preds = %cond.true331.us, %if.then327.us
+ %cond346.us = phi i16 [ %20, %cond.true331.us ], [ 0, %if.then327.us ]
+ store i16 %cond346.us, ptr %arr_4, align 2
+ %mul354.us = mul i64 %mul354906.us, %not353
+ store i64 %mul354.us, ptr @var_46, align 8
+ br label %for.inc371.us
+
+for.inc371.us: ; preds = %cond.end345.us, %if.end282.us
+ %mul354907.us = phi i64 [ 1, %if.end282.us ], [ 0, %cond.end345.us ]
+ %indvars.iv.next = or i64 %indvars.iv, 1
+ br i1 %var_0, label %for.body194.us, label %for.cond376.preheader.us
+
+cond.true146.us: ; preds = %for.cond131.preheader.us
+ %21 = load i8, ptr %arrayidx149.us, align 1
+ %conv150.us = sext i8 %21 to i32
+ br label %cond.end154.us
+
+cond.end154.us: ; preds = %for.cond131.preheader.us, %cond.true146.us
+ %cond155.us = phi i32 [ %conv150.us, %cond.true146.us ], [ 0, %for.cond131.preheader.us ]
+ %shl.us = shl i32 %div.us, %cond155.us
+ %tobool157.not.us = icmp eq i32 %shl.us, 0
+ br i1 %tobool157.not.us, label %if.end.us, label %if.then.us
+
+if.then.us: ; preds = %cond.end154.us
+ store i32 0, ptr %arr_4, align 4
+ br label %if.end.us
+
+if.end.us: ; preds = %if.then.us, %cond.end154.us
+ %22 = phi i32 [ 0, %if.then.us ], [ %10, %cond.end154.us ]
+ store i8 %1, ptr %arr_4, align 1
+ call void @llvm.assume(i1 true)
+ %23 = load i8, ptr %arrayidx149.us, align 1
+ %conv150.us.2 = sext i8 %23 to i32
+ %shl.us.2 = shl i32 18984, %conv150.us.2
+ %tobool157.not.us.2 = icmp eq i32 %shl.us.2, 0
+ br i1 %tobool157.not.us.2, label %if.then.us.5, label %if.then.us.2
+
+if.then.us.2: ; preds = %if.end.us
+ %.sroa.speculated826.us.2 = tail call i32 @llvm.smin.i32(i32 %10, i32 0)
+ store i8 0, ptr %arr_4, align 1
+ br label %if.then.us.5
+
+if.then.us.5: ; preds = %if.then.us.2, %if.end.us
+ %24 = phi i32 [ 0, %if.then.us.2 ], [ %22, %if.end.us ]
+ %.sroa.speculated826.us.5 = tail call i32 @llvm.smin.i32(i32 %24, i32 1410036665)
+ br i1 %var_0, label %cond.end154.us.7, label %cond.true146.us.7
+
+cond.true146.us.7: ; preds = %if.then.us.5
+ %25 = load i8, ptr %arrayidx149.us, align 1
+ %conv150.us.7 = sext i8 %25 to i32
+ br label %cond.end154.us.7
+
+cond.end154.us.7: ; preds = %cond.true146.us.7, %if.then.us.5
+ %cond155.us.7 = phi i32 [ %conv150.us.7, %cond.true146.us.7 ], [ 0, %if.then.us.5 ]
+ %shl.us.7 = shl i32 18984, %cond155.us.7
+ %tobool157.not.us.7 = icmp eq i32 %shl.us.7, 0
+ br i1 %tobool157.not.us.7, label %if.end.us.7, label %if.then.us.7
+
+if.then.us.7: ; preds = %cond.end154.us.7
+ store i32 0, ptr %arr_3, align 4
+ br label %if.end.us.7
+
+if.end.us.7: ; preds = %if.then.us.7, %cond.end154.us.7
+ %26 = phi i32 [ 0, %if.then.us.7 ], [ %.sroa.speculated826.us.5, %cond.end154.us.7 ]
+ %arrayidx259.us = getelementptr i16, ptr %arrayidx257.us, i64 %conv96880.us
+ br label %for.body194.us
+
+for.body113.us: ; preds = %for.body113.us, %for.body99.us
+ br i1 %var_0, label %for.body113.us, label %for.cond131.preheader.us
+
+for.cond510.preheader.us: ; preds = %for.inc505.us
+ %cmp97.us = icmp slt i16 %add538, 0
+ br i1 %cmp97.us, label %for.body99.us, label %for.cond.cleanup98.us
+
+for.cond376.preheader.us: ; preds = %for.inc371.us
+ %arrayidx384.us = getelementptr i16, ptr null, i64 %conv96880.us
+ %gep876.us = getelementptr [24 x [24 x i16]], ptr %invariant.gep875.us, i64 %conv96880.us
+ br label %for.body380.us
+
+for.cond131.preheader.us: ; preds = %for.body113.us
+ store i8 %var_3, ptr %arr_4, align 1
+ %27 = load i16, ptr %gep884.us, align 2
+ %28 = mul i16 18984, %27
+ %div.us = zext i16 %28 to i32
+ %tobool145.not.us = icmp eq i8 0, 0
+ br i1 %tobool145.not.us, label %cond.end154.us, label %cond.true146.us
+
+for.cond95.preheader.us: ; preds = %for.cond.cleanup98.us, %for.body67.us
+ %indvars.iv1021 = phi i64 [ 1, %for.cond.cleanup98.us ], [ 0, %for.body67.us ]
+ %29 = phi i8 [ %16, %for.cond.cleanup98.us ], [ %4, %for.body67.us ]
+ %add413978.us = phi i16 [ %var_4, %for.cond.cleanup98.us ], [ %add413977.us, %for.body67.us ]
+ %30 = phi i8 [ %.sroa.speculated829.us, %for.cond.cleanup98.us ], [ %5, %for.body67.us ]
+ %mul354904.us = phi i64 [ 0, %for.cond.cleanup98.us ], [ %mul354903918.us, %for.body67.us ]
+ %gep884.us = getelementptr [24 x [24 x i16]], ptr %gep889.us, i64 %indvars.iv1021
+ %arrayidx321.us = getelementptr i8, ptr %arrayidx319.us, i64 %indvars.iv1021
+ %arrayidx257.us = getelementptr [24 x i16], ptr null, i64 %indvars.iv1021
+ br label %for.body99.us
+
+for.cond563.preheader: ; preds = %for.body41, %entry
+ ret void
+
+for.body41: ; preds = %for.body41.lr.ph
+ store i8 0, ptr %arr_12, align 1
+ store i8 0, ptr %arr_3, align 1
+ br label %for.cond563.preheader
+
+; uselistorder directives
+ uselistorder i16 %add413.us17, { 1, 0 }
+ uselistorder i16 %mul287.us, { 1, 0 }
----------------
arsenm wrote:
```suggestion
```
https://github.com/llvm/llvm-project/pull/164386
More information about the llvm-commits
mailing list