[llvm] aa34a6a - [RISCV] Add register allocation hints for lui/auipc+addi fusion. (#123860)

via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 27 11:16:26 PST 2025


Author: Craig Topper
Date: 2025-01-27T11:16:22-08:00
New Revision: aa34a6ab299027ac31929173287e42db0dbdb06b

URL: https://github.com/llvm/llvm-project/commit/aa34a6ab299027ac31929173287e42db0dbdb06b
DIFF: https://github.com/llvm/llvm-project/commit/aa34a6ab299027ac31929173287e42db0dbdb06b.diff

LOG: [RISCV] Add register allocation hints for lui/auipc+addi fusion. (#123860)

Spotted the auipc case while looking at code for P550. I'm not sure this
is the right long term fix. We're still missing rematerialization
opportunities for these pairs so a pseudo might be better. That would
interfere with folding auipc+add into load/store addressing though.

Fixes #76779.

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
    llvm/test/CodeGen/RISCV/machinelicm-address-pseudos.ll
    llvm/test/CodeGen/RISCV/macro-fusion-lui-addi.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index b9c70fe60fb506..b0a52698c1e9f1 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -926,6 +926,26 @@ bool RISCVRegisterInfo::getRegAllocationHints(
         tryAddHint(MO, MI.getOperand(0), NeedGPRC);
       }
     }
+
+    // Add a hint if it would allow auipc/lui+addi(w) fusion.
+    if ((MI.getOpcode() == RISCV::ADDIW || MI.getOpcode() == RISCV::ADDI) &&
+        MI.getOperand(1).isReg()) {
+      const MachineBasicBlock &MBB = *MI.getParent();
+      MachineBasicBlock::const_iterator I = MI.getIterator();
+      // Is the previous instruction a LUI or AUIPC that can be fused?
+      if (I != MBB.begin()) {
+        I = skipDebugInstructionsBackward(std::prev(I), MBB.begin());
+        if (((I->getOpcode() == RISCV::LUI && Subtarget.hasLUIADDIFusion()) ||
+             (I->getOpcode() == RISCV::AUIPC &&
+              Subtarget.hasAUIPCADDIFusion())) &&
+            I->getOperand(0).getReg() == MI.getOperand(1).getReg()) {
+          if (OpIdx == 0)
+            tryAddHint(MO, MI.getOperand(1), /*NeedGPRC=*/false);
+          else
+            tryAddHint(MO, MI.getOperand(0), /*NeedGPRC=*/false);
+        }
+      }
+    }
   }
 
   for (MCPhysReg OrderReg : Order)

diff  --git a/llvm/test/CodeGen/RISCV/machinelicm-address-pseudos.ll b/llvm/test/CodeGen/RISCV/machinelicm-address-pseudos.ll
index af8105644b57db..8deb17582cb116 100644
--- a/llvm/test/CodeGen/RISCV/machinelicm-address-pseudos.ll
+++ b/llvm/test/CodeGen/RISCV/machinelicm-address-pseudos.ll
@@ -1,8 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -relocation-model=pic -verify-machineinstrs < %s \
-; RUN:   | FileCheck -check-prefixes=RV32I %s
+; RUN:   | FileCheck -check-prefixes=RV32I,RV32NOFUSION %s
 ; RUN: llc -mtriple=riscv64 -relocation-model=pic -verify-machineinstrs < %s \
-; RUN:   | FileCheck -check-prefixes=RV64I %s
+; RUN:   | FileCheck -check-prefixes=RV64I,RV64NOFUSION %s
+; RUN: llc -mtriple=riscv32 -relocation-model=pic -verify-machineinstrs < %s \
+; RUN:   -mattr=+auipc-addi-fusion | FileCheck -check-prefixes=RV32I,RV32FUSION %s
+; RUN: llc -mtriple=riscv64 -relocation-model=pic -verify-machineinstrs < %s \
+; RUN:   -mattr=+auipc-addi-fusion | FileCheck -check-prefixes=RV64I,RV64FUSION %s
 
 ; Verifies that MachineLICM can hoist address generation pseudos out of loops.
 
@@ -141,59 +145,113 @@ ret:
 @gd = external thread_local global i32
 
 define void @test_la_tls_gd(i32 signext %n) nounwind {
-; RV32I-LABEL: test_la_tls_gd:
-; RV32I:       # %bb.0: # %entry
-; RV32I-NEXT:    addi sp, sp, -16
-; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s0, a0
-; RV32I-NEXT:    li s2, 0
-; RV32I-NEXT:  .Lpcrel_hi3:
-; RV32I-NEXT:    auipc a0, %tls_gd_pcrel_hi(gd)
-; RV32I-NEXT:    addi s1, a0, %pcrel_lo(.Lpcrel_hi3)
-; RV32I-NEXT:  .LBB3_1: # %loop
-; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    call __tls_get_addr
-; RV32I-NEXT:    lw zero, 0(a0)
-; RV32I-NEXT:    addi s2, s2, 1
-; RV32I-NEXT:    blt s2, s0, .LBB3_1
-; RV32I-NEXT:  # %bb.2: # %ret
-; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    addi sp, sp, 16
-; RV32I-NEXT:    ret
+; RV32NOFUSION-LABEL: test_la_tls_gd:
+; RV32NOFUSION:       # %bb.0: # %entry
+; RV32NOFUSION-NEXT:    addi sp, sp, -16
+; RV32NOFUSION-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32NOFUSION-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32NOFUSION-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
+; RV32NOFUSION-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
+; RV32NOFUSION-NEXT:    mv s0, a0
+; RV32NOFUSION-NEXT:    li s2, 0
+; RV32NOFUSION-NEXT:  .Lpcrel_hi3:
+; RV32NOFUSION-NEXT:    auipc a0, %tls_gd_pcrel_hi(gd)
+; RV32NOFUSION-NEXT:    addi s1, a0, %pcrel_lo(.Lpcrel_hi3)
+; RV32NOFUSION-NEXT:  .LBB3_1: # %loop
+; RV32NOFUSION-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32NOFUSION-NEXT:    mv a0, s1
+; RV32NOFUSION-NEXT:    call __tls_get_addr
+; RV32NOFUSION-NEXT:    lw zero, 0(a0)
+; RV32NOFUSION-NEXT:    addi s2, s2, 1
+; RV32NOFUSION-NEXT:    blt s2, s0, .LBB3_1
+; RV32NOFUSION-NEXT:  # %bb.2: # %ret
+; RV32NOFUSION-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32NOFUSION-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32NOFUSION-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32NOFUSION-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
+; RV32NOFUSION-NEXT:    addi sp, sp, 16
+; RV32NOFUSION-NEXT:    ret
 ;
-; RV64I-LABEL: test_la_tls_gd:
-; RV64I:       # %bb.0: # %entry
-; RV64I-NEXT:    addi sp, sp, -32
-; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    mv s0, a0
-; RV64I-NEXT:    li s2, 0
-; RV64I-NEXT:  .Lpcrel_hi3:
-; RV64I-NEXT:    auipc a0, %tls_gd_pcrel_hi(gd)
-; RV64I-NEXT:    addi s1, a0, %pcrel_lo(.Lpcrel_hi3)
-; RV64I-NEXT:  .LBB3_1: # %loop
-; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    mv a0, s1
-; RV64I-NEXT:    call __tls_get_addr
-; RV64I-NEXT:    lw zero, 0(a0)
-; RV64I-NEXT:    addiw s2, s2, 1
-; RV64I-NEXT:    blt s2, s0, .LBB3_1
-; RV64I-NEXT:  # %bb.2: # %ret
-; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    addi sp, sp, 32
-; RV64I-NEXT:    ret
+; RV64NOFUSION-LABEL: test_la_tls_gd:
+; RV64NOFUSION:       # %bb.0: # %entry
+; RV64NOFUSION-NEXT:    addi sp, sp, -32
+; RV64NOFUSION-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64NOFUSION-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; RV64NOFUSION-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; RV64NOFUSION-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
+; RV64NOFUSION-NEXT:    mv s0, a0
+; RV64NOFUSION-NEXT:    li s2, 0
+; RV64NOFUSION-NEXT:  .Lpcrel_hi3:
+; RV64NOFUSION-NEXT:    auipc a0, %tls_gd_pcrel_hi(gd)
+; RV64NOFUSION-NEXT:    addi s1, a0, %pcrel_lo(.Lpcrel_hi3)
+; RV64NOFUSION-NEXT:  .LBB3_1: # %loop
+; RV64NOFUSION-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64NOFUSION-NEXT:    mv a0, s1
+; RV64NOFUSION-NEXT:    call __tls_get_addr
+; RV64NOFUSION-NEXT:    lw zero, 0(a0)
+; RV64NOFUSION-NEXT:    addiw s2, s2, 1
+; RV64NOFUSION-NEXT:    blt s2, s0, .LBB3_1
+; RV64NOFUSION-NEXT:  # %bb.2: # %ret
+; RV64NOFUSION-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64NOFUSION-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64NOFUSION-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64NOFUSION-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
+; RV64NOFUSION-NEXT:    addi sp, sp, 32
+; RV64NOFUSION-NEXT:    ret
+;
+; RV32FUSION-LABEL: test_la_tls_gd:
+; RV32FUSION:       # %bb.0: # %entry
+; RV32FUSION-NEXT:    addi sp, sp, -16
+; RV32FUSION-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32FUSION-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32FUSION-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
+; RV32FUSION-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
+; RV32FUSION-NEXT:    mv s0, a0
+; RV32FUSION-NEXT:    li s2, 0
+; RV32FUSION-NEXT:  .Lpcrel_hi3:
+; RV32FUSION-NEXT:    auipc s1, %tls_gd_pcrel_hi(gd)
+; RV32FUSION-NEXT:    addi s1, s1, %pcrel_lo(.Lpcrel_hi3)
+; RV32FUSION-NEXT:  .LBB3_1: # %loop
+; RV32FUSION-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32FUSION-NEXT:    mv a0, s1
+; RV32FUSION-NEXT:    call __tls_get_addr
+; RV32FUSION-NEXT:    lw zero, 0(a0)
+; RV32FUSION-NEXT:    addi s2, s2, 1
+; RV32FUSION-NEXT:    blt s2, s0, .LBB3_1
+; RV32FUSION-NEXT:  # %bb.2: # %ret
+; RV32FUSION-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32FUSION-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32FUSION-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32FUSION-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
+; RV32FUSION-NEXT:    addi sp, sp, 16
+; RV32FUSION-NEXT:    ret
+;
+; RV64FUSION-LABEL: test_la_tls_gd:
+; RV64FUSION:       # %bb.0: # %entry
+; RV64FUSION-NEXT:    addi sp, sp, -32
+; RV64FUSION-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64FUSION-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; RV64FUSION-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; RV64FUSION-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
+; RV64FUSION-NEXT:    mv s0, a0
+; RV64FUSION-NEXT:    li s2, 0
+; RV64FUSION-NEXT:  .Lpcrel_hi3:
+; RV64FUSION-NEXT:    auipc s1, %tls_gd_pcrel_hi(gd)
+; RV64FUSION-NEXT:    addi s1, s1, %pcrel_lo(.Lpcrel_hi3)
+; RV64FUSION-NEXT:  .LBB3_1: # %loop
+; RV64FUSION-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64FUSION-NEXT:    mv a0, s1
+; RV64FUSION-NEXT:    call __tls_get_addr
+; RV64FUSION-NEXT:    lw zero, 0(a0)
+; RV64FUSION-NEXT:    addiw s2, s2, 1
+; RV64FUSION-NEXT:    blt s2, s0, .LBB3_1
+; RV64FUSION-NEXT:  # %bb.2: # %ret
+; RV64FUSION-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64FUSION-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64FUSION-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64FUSION-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
+; RV64FUSION-NEXT:    addi sp, sp, 32
+; RV64FUSION-NEXT:    ret
 entry:
   br label %loop
 

diff  --git a/llvm/test/CodeGen/RISCV/macro-fusion-lui-addi.ll b/llvm/test/CodeGen/RISCV/macro-fusion-lui-addi.ll
index 3f758e25c42170..d1b10af16063a9 100644
--- a/llvm/test/CodeGen/RISCV/macro-fusion-lui-addi.ll
+++ b/llvm/test/CodeGen/RISCV/macro-fusion-lui-addi.ll
@@ -12,6 +12,8 @@
 ;RUN: llc < %s -mtriple=riscv64 -mattr=+f,+lui-addi-fusion,+use-postra-scheduler -mcpu=sifive-u74 \
 ;RUN:   -misched-postra-direction=bidirectional -target-abi=lp64f \
 ;RUN:   | FileCheck %s --check-prefixes=FUSION-POSTRA,FUSION-POSTRA-BIDIRECTIONAL
+;RUN: llc < %s -mtriple=riscv64 -mattr=+f,+lui-addi-fusion -target-abi=lp64f \
+;RUN:   | FileCheck %s --check-prefix=FUSION-GENERIC
 
 @.str = private constant [4 x i8] c"%f\0A\00", align 1
 
@@ -50,6 +52,13 @@ define void @foo(i32 signext %0, i32 signext %1) {
 ; FUSION-POSTRA-BIDIRECTIONAL-NEXT:    addi a0, a0, %lo(.L.str)
 ; FUSION-POSTRA-BIDIRECTIONAL-NEXT:    fcvt.s.w fa0, a1
 ; FUSION-POSTRA-BIDIRECTIONAL-NEXT:    tail bar
+;
+; FUSION-GENERIC-LABEL: foo:
+; FUSION-GENERIC:       # %bb.0:
+; FUSION-GENERIC-NEXT:    fcvt.s.w fa0, a1
+; FUSION-GENERIC-NEXT:    lui a0, %hi(.L.str)
+; FUSION-GENERIC-NEXT:    addi a0, a0, %lo(.L.str)
+; FUSION-GENERIC-NEXT:    tail bar
   %3 = sitofp i32 %1 to float
   tail call void @bar(ptr @.str, float %3)
   ret void
@@ -76,5 +85,44 @@ define i32 @test_matint() {
 ; FUSION-POSTRA-NEXT:    lui a0, 1
 ; FUSION-POSTRA-NEXT:    addiw a0, a0, -2048
 ; FUSION-POSTRA-NEXT:    ret
+;
+; FUSION-GENERIC-LABEL: test_matint:
+; FUSION-GENERIC:       # %bb.0:
+; FUSION-GENERIC-NEXT:    lui a0, 1
+; FUSION-GENERIC-NEXT:    addiw a0, a0, -2048
+; FUSION-GENERIC-NEXT:    ret
   ret i32 2048
 }
+
+define void @test_regalloc_hint(i32 noundef signext %0, i32 noundef signext %1) {
+; NOFUSION-LABEL: test_regalloc_hint:
+; NOFUSION:       # %bb.0:
+; NOFUSION-NEXT:    mv a0, a1
+; NOFUSION-NEXT:    lui a1, 3014
+; NOFUSION-NEXT:    addiw a1, a1, 334
+; NOFUSION-NEXT:    tail bar
+;
+; FUSION-LABEL: test_regalloc_hint:
+; FUSION:       # %bb.0:
+; FUSION-NEXT:    mv a0, a1
+; FUSION-NEXT:    lui a1, 3014
+; FUSION-NEXT:    addiw a1, a1, 334
+; FUSION-NEXT:    tail bar
+;
+; FUSION-POSTRA-LABEL: test_regalloc_hint:
+; FUSION-POSTRA:       # %bb.0:
+; FUSION-POSTRA-NEXT:    mv a0, a1
+; FUSION-POSTRA-NEXT:    lui a1, 3014
+; FUSION-POSTRA-NEXT:    addiw a1, a1, 334
+; FUSION-POSTRA-NEXT:    tail bar
+;
+; FUSION-GENERIC-LABEL: test_regalloc_hint:
+; FUSION-GENERIC:       # %bb.0:
+; FUSION-GENERIC-NEXT:    lui a2, 3014
+; FUSION-GENERIC-NEXT:    addiw a2, a2, 334
+; FUSION-GENERIC-NEXT:    mv a0, a1
+; FUSION-GENERIC-NEXT:    mv a1, a2
+; FUSION-GENERIC-NEXT:    tail bar
+  tail call void @bar(i32 noundef signext %1, i32 noundef signext 12345678)
+  ret void
+}


        


More information about the llvm-commits mailing list