[llvm] [RISCV] Use proper LLA operand for constant from load (PR #142292)

Mon Jun 2 15:27:21 PDT 2025

cnettel wrote:

It took some work. This isn's quite minimal, but you need to fool LLVM a bit to store constants without inlining all the math, even at -O0.

The premise is that ori can be replaced by addi if the compiler can prove that the low bits are zero. This is done late during instruction selection.

The following IR:
```
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
target triple = "riscv64-unknown-unknown"

; Function Attrs: mustprogress noinline nounwind optnone
define dso_local noundef i64 @_Z4sum2j(i32 noundef signext %c, i32 noundef signext %d) #0 {
entry:
  %or1 = or i64 -9191740941672644608, 4096
  %or2 = or i64 -9191740941672644608, 8192
  %or3 = or i64 -9191740941672644608, 16384
  %conv = zext i32 %c to i64
  %donv = zext i32 %d to i64
  %3 = mul i64 %or1, %conv
  %4 = mul i64 %or2, %donv
  %5 = mul i64 %or3, %conv
  %6 = add i64 %3, %4
  %7 = add i64 %6, %5
  %8 = or i64 %5, 127
  %9 = mul i64 %3, %8
  %add = add i64 -9191740941672644608, %9
  %add2 = add i64 %add, %conv
  ret i64 %add2
}
```

generates the following assembler if run through `llc -mcpu=mips-p8700 -O0 -relocation-model=pic` in the patched version:
```
        .attribute      4, 16
        .attribute      5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zicsr2p0_zifencei2p0_zmmul1p0_zaamo1p0_zalrsc1p0_zba1p0_zbb1p0"
        .file   "testbug.ll"
        .section        .rodata.cst8,"aM", at progbits,8
        .p2align        3, 0x0                          # -- Begin function _Z4sum2j
.LCPI0_0:
        .quad   -9191740941672628224            # 0x8070605040304000
.LCPI0_1:
        .quad   -9191740941672640512            # 0x8070605040301000
        .text
        .globl  _Z4sum2j
        .p2align        1
        .type   _Z4sum2j, at function
_Z4sum2j:                               # @_Z4sum2j
.L_Z4sum2j$local:
        .type   .L_Z4sum2j$local, at function
        .cfi_startproc
# %bb.0:                                # %entry
                                        # kill: def $x11 killed $x10
        zext.w  a1, a0
.Lpcrel_hi0:
        auipc   a2, %pcrel_hi(.LCPI0_0)
        addi    a2, a2, %pcrel_lo(.Lpcrel_hi0)
        ld      a2, 0(a2)
        mul     a2, a1, a2
        addi    a2, a2, 127
        mul     a1, a1, a2
.Lpcrel_hi1:
        auipc   a2, %pcrel_hi(.LCPI0_1)
        addi    a2, a2, %pcrel_lo(.Lpcrel_hi1)
        ld      a2, 0(a2)
        mul     a1, a1, a2
        add.uw  a0, a0, a1
        lui     a1, 1015920
        addiw   a1, a1, 1541
        slli    a1, a1, 16
        addi    a1, a1, 1027
        slli    a1, a1, 20
        add     a0, a0, a1
        ret
.Lfunc_end0:
        .size   _Z4sum2j, .Lfunc_end0-_Z4sum2j
        .size   .L_Z4sum2j$local, .Lfunc_end0-_Z4sum2j
        .cfi_endproc
                                        # -- End function
        .section        ".note.GNU-stack","", at progbits
```

This is correct. The same command, on the current main branch, generates the ori instruction instead, showing that or_is_add in RISCVInstrInfo isn't able to see the known bits:
`
        .attribute      4, 16
        .attribute      5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zicsr2p0_zifencei2p0_zmmul1p0_zaamo1p0_zalrsc1p0_zba1p0_zbb1p0"
        .file   "testbug.ll"
        .section        .rodata.cst8,"aM", at progbits,8
        .p2align        3, 0x0                          # -- Begin function _Z4sum2j
.LCPI0_0:
        .quad   -9191740941672628224            # 0x8070605040304000
.LCPI0_1:
        .quad   -9191740941672640512            # 0x8070605040301000
        .text
        .globl  _Z4sum2j
        .p2align        1
        .type   _Z4sum2j, at function
_Z4sum2j:                               # @_Z4sum2j
.L_Z4sum2j$local:
        .type   .L_Z4sum2j$local, at function
        .cfi_startproc
# %bb.0:                                # %entry
                                        # kill: def $x11 killed $x10
        zext.w  a1, a0
.Lpcrel_hi0:
        auipc   a2, %pcrel_hi(.LCPI0_0)
        addi    a2, a2, %pcrel_lo(.Lpcrel_hi0)
        ld      a2, 0(a2)
        mul     a2, a1, a2
        ori     a2, a2, 127
        mul     a1, a1, a2
.Lpcrel_hi1:
        auipc   a2, %pcrel_hi(.LCPI0_1)
        addi    a2, a2, %pcrel_lo(.Lpcrel_hi1)
        ld      a2, 0(a2)
        mul     a1, a1, a2
        add.uw  a0, a0, a1
        lui     a1, 1015920
        addiw   a1, a1, 1541
        slli    a1, a1, 16
        addi    a1, a1, 1027
        slli    a1, a1, 20
        add     a0, a0, a1
        ret
.Lfunc_end0:
        .size   _Z4sum2j, .Lfunc_end0-_Z4sum2j
        .size   .L_Z4sum2j$local, .Lfunc_end0-_Z4sum2j
        .cfi_endproc
                                        # -- End function
        .section        ".note.GNU-stack","", at progbits
```

https://github.com/llvm/llvm-project/pull/142292