[llvm] [RISCV] Exploit sh3add/sh2add for stack offsets by shifted 12-bit constants (PR #87950)

Mon Apr 8 10:39:21 PDT 2024

================
@@ -143,119 +143,221 @@ define void @frame_4096b() {
 
 ;; 2^12-16+2032
 define void @frame_4kb() {
-; RV32-LABEL: frame_4kb:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -2032
-; RV32-NEXT:    .cfi_def_cfa_offset 2032
-; RV32-NEXT:    sw ra, 2028(sp) # 4-byte Folded Spill
-; RV32-NEXT:    .cfi_offset ra, -4
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    sub sp, sp, a0
-; RV32-NEXT:    .cfi_def_cfa_offset 6128
-; RV32-NEXT:    addi a0, sp, 12
-; RV32-NEXT:    call callee
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    add sp, sp, a0
-; RV32-NEXT:    lw ra, 2028(sp) # 4-byte Folded Reload
-; RV32-NEXT:    addi sp, sp, 2032
-; RV32-NEXT:    ret
+; RV32I-LABEL: frame_4kb:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -2032
+; RV32I-NEXT:    .cfi_def_cfa_offset 2032
+; RV32I-NEXT:    sw ra, 2028(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    .cfi_offset ra, -4
+; RV32I-NEXT:    lui a0, 1
+; RV32I-NEXT:    sub sp, sp, a0
+; RV32I-NEXT:    .cfi_def_cfa_offset 6128
+; RV32I-NEXT:    addi a0, sp, 12
+; RV32I-NEXT:    call callee
+; RV32I-NEXT:    lui a0, 1
+; RV32I-NEXT:    add sp, sp, a0
+; RV32I-NEXT:    lw ra, 2028(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 2032
+; RV32I-NEXT:    ret
 ;
-; RV64-LABEL: frame_4kb:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -2032
-; RV64-NEXT:    .cfi_def_cfa_offset 2032
-; RV64-NEXT:    sd ra, 2024(sp) # 8-byte Folded Spill
-; RV64-NEXT:    .cfi_offset ra, -8
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    sub sp, sp, a0
-; RV64-NEXT:    .cfi_def_cfa_offset 6128
-; RV64-NEXT:    addi a0, sp, 8
-; RV64-NEXT:    call callee
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    add sp, sp, a0
-; RV64-NEXT:    ld ra, 2024(sp) # 8-byte Folded Reload
-; RV64-NEXT:    addi sp, sp, 2032
-; RV64-NEXT:    ret
+; RV32ZBA-LABEL: frame_4kb:
+; RV32ZBA:       # %bb.0:
+; RV32ZBA-NEXT:    addi sp, sp, -2032
+; RV32ZBA-NEXT:    .cfi_def_cfa_offset 2032
+; RV32ZBA-NEXT:    sw ra, 2028(sp) # 4-byte Folded Spill
+; RV32ZBA-NEXT:    .cfi_offset ra, -4
+; RV32ZBA-NEXT:    li a0, -512
----------------
topperc wrote:

> You're correct here.  I'd really hoped I wasn't going to have tor reason about the instructions used for the constant materialization here, but it doesn't look like I get this lucky.
> 
> 
> 
> I'll note as an aside that the existing dual ADDI trick is also "less compressible" than the corresponding C.LUI/C.ADDI.  Not sure if we care, or not.

I think the sub itself isn't compressible because x1 isn't compressible for sub.

For your case is it sufficient to check that the low 12 bits are non-zero before trying shXadd?

https://github.com/llvm/llvm-project/pull/87950