[llvm] aa34a6a - [RISCV] Add register allocation hints for lui/auipc+addi fusion. (#123860)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 27 11:16:26 PST 2025
Author: Craig Topper
Date: 2025-01-27T11:16:22-08:00
New Revision: aa34a6ab299027ac31929173287e42db0dbdb06b
URL: https://github.com/llvm/llvm-project/commit/aa34a6ab299027ac31929173287e42db0dbdb06b
DIFF: https://github.com/llvm/llvm-project/commit/aa34a6ab299027ac31929173287e42db0dbdb06b.diff
LOG: [RISCV] Add register allocation hints for lui/auipc+addi fusion. (#123860)
Spotted the auipc case while looking at code for P550. I'm not sure this
is the right long term fix. We're still missing rematerialization
opportunities for these pairs so a pseudo might be better. That would
interfere with folding auipc+add into load/store addressing though.
Fixes #76779.
Added:
Modified:
llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
llvm/test/CodeGen/RISCV/machinelicm-address-pseudos.ll
llvm/test/CodeGen/RISCV/macro-fusion-lui-addi.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index b9c70fe60fb506..b0a52698c1e9f1 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -926,6 +926,26 @@ bool RISCVRegisterInfo::getRegAllocationHints(
tryAddHint(MO, MI.getOperand(0), NeedGPRC);
}
}
+
+ // Add a hint if it would allow auipc/lui+addi(w) fusion.
+ if ((MI.getOpcode() == RISCV::ADDIW || MI.getOpcode() == RISCV::ADDI) &&
+ MI.getOperand(1).isReg()) {
+ const MachineBasicBlock &MBB = *MI.getParent();
+ MachineBasicBlock::const_iterator I = MI.getIterator();
+ // Is the previous instruction a LUI or AUIPC that can be fused?
+ if (I != MBB.begin()) {
+ I = skipDebugInstructionsBackward(std::prev(I), MBB.begin());
+ if (((I->getOpcode() == RISCV::LUI && Subtarget.hasLUIADDIFusion()) ||
+ (I->getOpcode() == RISCV::AUIPC &&
+ Subtarget.hasAUIPCADDIFusion())) &&
+ I->getOperand(0).getReg() == MI.getOperand(1).getReg()) {
+ if (OpIdx == 0)
+ tryAddHint(MO, MI.getOperand(1), /*NeedGPRC=*/false);
+ else
+ tryAddHint(MO, MI.getOperand(0), /*NeedGPRC=*/false);
+ }
+ }
+ }
}
for (MCPhysReg OrderReg : Order)
diff --git a/llvm/test/CodeGen/RISCV/machinelicm-address-pseudos.ll b/llvm/test/CodeGen/RISCV/machinelicm-address-pseudos.ll
index af8105644b57db..8deb17582cb116 100644
--- a/llvm/test/CodeGen/RISCV/machinelicm-address-pseudos.ll
+++ b/llvm/test/CodeGen/RISCV/machinelicm-address-pseudos.ll
@@ -1,8 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -relocation-model=pic -verify-machineinstrs < %s \
-; RUN: | FileCheck -check-prefixes=RV32I %s
+; RUN: | FileCheck -check-prefixes=RV32I,RV32NOFUSION %s
; RUN: llc -mtriple=riscv64 -relocation-model=pic -verify-machineinstrs < %s \
-; RUN: | FileCheck -check-prefixes=RV64I %s
+; RUN: | FileCheck -check-prefixes=RV64I,RV64NOFUSION %s
+; RUN: llc -mtriple=riscv32 -relocation-model=pic -verify-machineinstrs < %s \
+; RUN: -mattr=+auipc-addi-fusion | FileCheck -check-prefixes=RV32I,RV32FUSION %s
+; RUN: llc -mtriple=riscv64 -relocation-model=pic -verify-machineinstrs < %s \
+; RUN: -mattr=+auipc-addi-fusion | FileCheck -check-prefixes=RV64I,RV64FUSION %s
; Verifies that MachineLICM can hoist address generation pseudos out of loops.
@@ -141,59 +145,113 @@ ret:
@gd = external thread_local global i32
define void @test_la_tls_gd(i32 signext %n) nounwind {
-; RV32I-LABEL: test_la_tls_gd:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: li s2, 0
-; RV32I-NEXT: .Lpcrel_hi3:
-; RV32I-NEXT: auipc a0, %tls_gd_pcrel_hi(gd)
-; RV32I-NEXT: addi s1, a0, %pcrel_lo(.Lpcrel_hi3)
-; RV32I-NEXT: .LBB3_1: # %loop
-; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __tls_get_addr
-; RV32I-NEXT: lw zero, 0(a0)
-; RV32I-NEXT: addi s2, s2, 1
-; RV32I-NEXT: blt s2, s0, .LBB3_1
-; RV32I-NEXT: # %bb.2: # %ret
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
-; RV32I-NEXT: ret
+; RV32NOFUSION-LABEL: test_la_tls_gd:
+; RV32NOFUSION: # %bb.0: # %entry
+; RV32NOFUSION-NEXT: addi sp, sp, -16
+; RV32NOFUSION-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32NOFUSION-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32NOFUSION-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32NOFUSION-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32NOFUSION-NEXT: mv s0, a0
+; RV32NOFUSION-NEXT: li s2, 0
+; RV32NOFUSION-NEXT: .Lpcrel_hi3:
+; RV32NOFUSION-NEXT: auipc a0, %tls_gd_pcrel_hi(gd)
+; RV32NOFUSION-NEXT: addi s1, a0, %pcrel_lo(.Lpcrel_hi3)
+; RV32NOFUSION-NEXT: .LBB3_1: # %loop
+; RV32NOFUSION-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32NOFUSION-NEXT: mv a0, s1
+; RV32NOFUSION-NEXT: call __tls_get_addr
+; RV32NOFUSION-NEXT: lw zero, 0(a0)
+; RV32NOFUSION-NEXT: addi s2, s2, 1
+; RV32NOFUSION-NEXT: blt s2, s0, .LBB3_1
+; RV32NOFUSION-NEXT: # %bb.2: # %ret
+; RV32NOFUSION-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32NOFUSION-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32NOFUSION-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32NOFUSION-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32NOFUSION-NEXT: addi sp, sp, 16
+; RV32NOFUSION-NEXT: ret
;
-; RV64I-LABEL: test_la_tls_gd:
-; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: li s2, 0
-; RV64I-NEXT: .Lpcrel_hi3:
-; RV64I-NEXT: auipc a0, %tls_gd_pcrel_hi(gd)
-; RV64I-NEXT: addi s1, a0, %pcrel_lo(.Lpcrel_hi3)
-; RV64I-NEXT: .LBB3_1: # %loop
-; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: mv a0, s1
-; RV64I-NEXT: call __tls_get_addr
-; RV64I-NEXT: lw zero, 0(a0)
-; RV64I-NEXT: addiw s2, s2, 1
-; RV64I-NEXT: blt s2, s0, .LBB3_1
-; RV64I-NEXT: # %bb.2: # %ret
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
-; RV64I-NEXT: ret
+; RV64NOFUSION-LABEL: test_la_tls_gd:
+; RV64NOFUSION: # %bb.0: # %entry
+; RV64NOFUSION-NEXT: addi sp, sp, -32
+; RV64NOFUSION-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64NOFUSION-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64NOFUSION-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64NOFUSION-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; RV64NOFUSION-NEXT: mv s0, a0
+; RV64NOFUSION-NEXT: li s2, 0
+; RV64NOFUSION-NEXT: .Lpcrel_hi3:
+; RV64NOFUSION-NEXT: auipc a0, %tls_gd_pcrel_hi(gd)
+; RV64NOFUSION-NEXT: addi s1, a0, %pcrel_lo(.Lpcrel_hi3)
+; RV64NOFUSION-NEXT: .LBB3_1: # %loop
+; RV64NOFUSION-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64NOFUSION-NEXT: mv a0, s1
+; RV64NOFUSION-NEXT: call __tls_get_addr
+; RV64NOFUSION-NEXT: lw zero, 0(a0)
+; RV64NOFUSION-NEXT: addiw s2, s2, 1
+; RV64NOFUSION-NEXT: blt s2, s0, .LBB3_1
+; RV64NOFUSION-NEXT: # %bb.2: # %ret
+; RV64NOFUSION-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64NOFUSION-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64NOFUSION-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64NOFUSION-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; RV64NOFUSION-NEXT: addi sp, sp, 32
+; RV64NOFUSION-NEXT: ret
+;
+; RV32FUSION-LABEL: test_la_tls_gd:
+; RV32FUSION: # %bb.0: # %entry
+; RV32FUSION-NEXT: addi sp, sp, -16
+; RV32FUSION-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32FUSION-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32FUSION-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32FUSION-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32FUSION-NEXT: mv s0, a0
+; RV32FUSION-NEXT: li s2, 0
+; RV32FUSION-NEXT: .Lpcrel_hi3:
+; RV32FUSION-NEXT: auipc s1, %tls_gd_pcrel_hi(gd)
+; RV32FUSION-NEXT: addi s1, s1, %pcrel_lo(.Lpcrel_hi3)
+; RV32FUSION-NEXT: .LBB3_1: # %loop
+; RV32FUSION-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32FUSION-NEXT: mv a0, s1
+; RV32FUSION-NEXT: call __tls_get_addr
+; RV32FUSION-NEXT: lw zero, 0(a0)
+; RV32FUSION-NEXT: addi s2, s2, 1
+; RV32FUSION-NEXT: blt s2, s0, .LBB3_1
+; RV32FUSION-NEXT: # %bb.2: # %ret
+; RV32FUSION-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32FUSION-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32FUSION-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32FUSION-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32FUSION-NEXT: addi sp, sp, 16
+; RV32FUSION-NEXT: ret
+;
+; RV64FUSION-LABEL: test_la_tls_gd:
+; RV64FUSION: # %bb.0: # %entry
+; RV64FUSION-NEXT: addi sp, sp, -32
+; RV64FUSION-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64FUSION-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64FUSION-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64FUSION-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; RV64FUSION-NEXT: mv s0, a0
+; RV64FUSION-NEXT: li s2, 0
+; RV64FUSION-NEXT: .Lpcrel_hi3:
+; RV64FUSION-NEXT: auipc s1, %tls_gd_pcrel_hi(gd)
+; RV64FUSION-NEXT: addi s1, s1, %pcrel_lo(.Lpcrel_hi3)
+; RV64FUSION-NEXT: .LBB3_1: # %loop
+; RV64FUSION-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64FUSION-NEXT: mv a0, s1
+; RV64FUSION-NEXT: call __tls_get_addr
+; RV64FUSION-NEXT: lw zero, 0(a0)
+; RV64FUSION-NEXT: addiw s2, s2, 1
+; RV64FUSION-NEXT: blt s2, s0, .LBB3_1
+; RV64FUSION-NEXT: # %bb.2: # %ret
+; RV64FUSION-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64FUSION-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64FUSION-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64FUSION-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; RV64FUSION-NEXT: addi sp, sp, 32
+; RV64FUSION-NEXT: ret
entry:
br label %loop
diff --git a/llvm/test/CodeGen/RISCV/macro-fusion-lui-addi.ll b/llvm/test/CodeGen/RISCV/macro-fusion-lui-addi.ll
index 3f758e25c42170..d1b10af16063a9 100644
--- a/llvm/test/CodeGen/RISCV/macro-fusion-lui-addi.ll
+++ b/llvm/test/CodeGen/RISCV/macro-fusion-lui-addi.ll
@@ -12,6 +12,8 @@
;RUN: llc < %s -mtriple=riscv64 -mattr=+f,+lui-addi-fusion,+use-postra-scheduler -mcpu=sifive-u74 \
;RUN: -misched-postra-direction=bidirectional -target-abi=lp64f \
;RUN: | FileCheck %s --check-prefixes=FUSION-POSTRA,FUSION-POSTRA-BIDIRECTIONAL
+;RUN: llc < %s -mtriple=riscv64 -mattr=+f,+lui-addi-fusion -target-abi=lp64f \
+;RUN: | FileCheck %s --check-prefix=FUSION-GENERIC
@.str = private constant [4 x i8] c"%f\0A\00", align 1
@@ -50,6 +52,13 @@ define void @foo(i32 signext %0, i32 signext %1) {
; FUSION-POSTRA-BIDIRECTIONAL-NEXT: addi a0, a0, %lo(.L.str)
; FUSION-POSTRA-BIDIRECTIONAL-NEXT: fcvt.s.w fa0, a1
; FUSION-POSTRA-BIDIRECTIONAL-NEXT: tail bar
+;
+; FUSION-GENERIC-LABEL: foo:
+; FUSION-GENERIC: # %bb.0:
+; FUSION-GENERIC-NEXT: fcvt.s.w fa0, a1
+; FUSION-GENERIC-NEXT: lui a0, %hi(.L.str)
+; FUSION-GENERIC-NEXT: addi a0, a0, %lo(.L.str)
+; FUSION-GENERIC-NEXT: tail bar
%3 = sitofp i32 %1 to float
tail call void @bar(ptr @.str, float %3)
ret void
@@ -76,5 +85,44 @@ define i32 @test_matint() {
; FUSION-POSTRA-NEXT: lui a0, 1
; FUSION-POSTRA-NEXT: addiw a0, a0, -2048
; FUSION-POSTRA-NEXT: ret
+;
+; FUSION-GENERIC-LABEL: test_matint:
+; FUSION-GENERIC: # %bb.0:
+; FUSION-GENERIC-NEXT: lui a0, 1
+; FUSION-GENERIC-NEXT: addiw a0, a0, -2048
+; FUSION-GENERIC-NEXT: ret
ret i32 2048
}
+
+define void @test_regalloc_hint(i32 noundef signext %0, i32 noundef signext %1) {
+; NOFUSION-LABEL: test_regalloc_hint:
+; NOFUSION: # %bb.0:
+; NOFUSION-NEXT: mv a0, a1
+; NOFUSION-NEXT: lui a1, 3014
+; NOFUSION-NEXT: addiw a1, a1, 334
+; NOFUSION-NEXT: tail bar
+;
+; FUSION-LABEL: test_regalloc_hint:
+; FUSION: # %bb.0:
+; FUSION-NEXT: mv a0, a1
+; FUSION-NEXT: lui a1, 3014
+; FUSION-NEXT: addiw a1, a1, 334
+; FUSION-NEXT: tail bar
+;
+; FUSION-POSTRA-LABEL: test_regalloc_hint:
+; FUSION-POSTRA: # %bb.0:
+; FUSION-POSTRA-NEXT: mv a0, a1
+; FUSION-POSTRA-NEXT: lui a1, 3014
+; FUSION-POSTRA-NEXT: addiw a1, a1, 334
+; FUSION-POSTRA-NEXT: tail bar
+;
+; FUSION-GENERIC-LABEL: test_regalloc_hint:
+; FUSION-GENERIC: # %bb.0:
+; FUSION-GENERIC-NEXT: lui a2, 3014
+; FUSION-GENERIC-NEXT: addiw a2, a2, 334
+; FUSION-GENERIC-NEXT: mv a0, a1
+; FUSION-GENERIC-NEXT: mv a1, a2
+; FUSION-GENERIC-NEXT: tail bar
+ tail call void @bar(i32 noundef signext %1, i32 noundef signext 12345678)
+ ret void
+}
More information about the llvm-commits
mailing list