[llvm] [RISCV] Enable rematerialization for scalar loads (PR #166774)
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 17 09:37:02 PST 2025
================
@@ -200,3 +200,169 @@ for.end: ; preds = %for.inc, %entry
}
declare i32 @foo(i32, i32, i32, i32, i32, i32)
+
+define void @remat_load(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, double %8, double %9, double %10, double %11, double %12, double %13, double %14, double %15, i8 %stackarg0, i16 %stackarg1, i32 %stackarg2, i64 %stackarg3, half %stackarg4, bfloat %stackarg5, float %stackarg6, double %stackarg7, ptr %p) nounwind {
+; CHECK-LABEL: remat_load:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -208
+; CHECK-NEXT: sd ra, 200(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 192(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 184(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s2, 176(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s3, 168(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s4, 160(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s5, 152(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s6, 144(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s7, 136(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s8, 128(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s9, 120(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s10, 112(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s11, 104(sp) # 8-byte Folded Spill
+; CHECK-NEXT: fsd fs0, 96(sp) # 8-byte Folded Spill
+; CHECK-NEXT: fsd fs1, 88(sp) # 8-byte Folded Spill
+; CHECK-NEXT: fsd fs2, 80(sp) # 8-byte Folded Spill
+; CHECK-NEXT: fsd fs3, 72(sp) # 8-byte Folded Spill
+; CHECK-NEXT: fsd fs4, 64(sp) # 8-byte Folded Spill
+; CHECK-NEXT: fsd fs5, 56(sp) # 8-byte Folded Spill
+; CHECK-NEXT: fsd fs6, 48(sp) # 8-byte Folded Spill
+; CHECK-NEXT: fsd fs7, 40(sp) # 8-byte Folded Spill
+; CHECK-NEXT: fsd fs8, 32(sp) # 8-byte Folded Spill
+; CHECK-NEXT: fsd fs9, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: fsd fs10, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: fsd fs11, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: fld fa5, 264(sp)
+; CHECK-NEXT: flw fa4, 256(sp)
+; CHECK-NEXT: flh fa3, 248(sp)
+; CHECK-NEXT: flh fa2, 240(sp)
+; CHECK-NEXT: ld a0, 272(sp)
+; CHECK-NEXT: lbu a4, 208(sp)
+; CHECK-NEXT: lh a3, 216(sp)
+; CHECK-NEXT: lw a2, 224(sp)
+; CHECK-NEXT: ld a1, 232(sp)
+; CHECK-NEXT: sb a4, 0(a0)
+; CHECK-NEXT: sh a3, 0(a0)
+; CHECK-NEXT: sw a2, 0(a0)
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: fsh fa2, 0(a0)
+; CHECK-NEXT: fsh fa3, 0(a0)
+; CHECK-NEXT: fsw fa4, 0(a0)
+; CHECK-NEXT: fsd fa5, 0(a0)
+; CHECK-NEXT: #APP
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: ld a0, 272(sp)
+; CHECK-NEXT: lbu a1, 208(sp)
+; CHECK-NEXT: sb a1, 0(a0)
+; CHECK-NEXT: lh a1, 216(sp)
+; CHECK-NEXT: sh a1, 0(a0)
+; CHECK-NEXT: lw a1, 224(sp)
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ld a1, 232(sp)
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: flh fa5, 240(sp)
+; CHECK-NEXT: fsh fa5, 0(a0)
+; CHECK-NEXT: flh fa5, 248(sp)
+; CHECK-NEXT: fsh fa5, 0(a0)
+; CHECK-NEXT: flw fa5, 256(sp)
+; CHECK-NEXT: fsw fa5, 0(a0)
+; CHECK-NEXT: fld fa5, 264(sp)
+; CHECK-NEXT: fsd fa5, 0(a0)
+; CHECK-NEXT: ld ra, 200(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 192(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 184(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s2, 176(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s3, 168(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s4, 160(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s5, 152(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s6, 144(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s7, 136(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s8, 128(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s9, 120(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s10, 112(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s11, 104(sp) # 8-byte Folded Reload
+; CHECK-NEXT: fld fs0, 96(sp) # 8-byte Folded Reload
+; CHECK-NEXT: fld fs1, 88(sp) # 8-byte Folded Reload
+; CHECK-NEXT: fld fs2, 80(sp) # 8-byte Folded Reload
+; CHECK-NEXT: fld fs3, 72(sp) # 8-byte Folded Reload
+; CHECK-NEXT: fld fs4, 64(sp) # 8-byte Folded Reload
+; CHECK-NEXT: fld fs5, 56(sp) # 8-byte Folded Reload
+; CHECK-NEXT: fld fs6, 48(sp) # 8-byte Folded Reload
+; CHECK-NEXT: fld fs7, 40(sp) # 8-byte Folded Reload
+; CHECK-NEXT: fld fs8, 32(sp) # 8-byte Folded Reload
+; CHECK-NEXT: fld fs9, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: fld fs10, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: fld fs11, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 208
+; CHECK-NEXT: ret
+entry:
+ ; Add a use of the stack arguments here so that we will have to load them from
+ ; the stack before the inline asm
----------------
preames wrote:
Add "because otherwise we'd be exercising the machine scheduler, not rematerialization"
https://github.com/llvm/llvm-project/pull/166774
More information about the llvm-commits
mailing list