[llvm] 18fda86 - [RISCV] Optimize scalable frame offset calculation when VLEN is precisely known

Fri Nov 18 09:57:07 PST 2022

Author: Philip Reames
Date: 2022-11-18T09:56:55-08:00
New Revision: 18fda867f4777adbcb3ebfeaaeadeb46fc8c3157

URL: https://github.com/llvm/llvm-project/commit/18fda867f4777adbcb3ebfeaaeadeb46fc8c3157
DIFF: https://github.com/llvm/llvm-project/commit/18fda867f4777adbcb3ebfeaaeadeb46fc8c3157.diff

LOG: [RISCV] Optimize scalable frame offset calculation when VLEN is precisely known

When we have a precisely known VLEN, we can replace runtime usage of VLENB with compile time constants. This converts offsets involving both fixed and scalable components into fixed offsets. The result is that we avoid the csr read of vlenb, and can often fold the multiply as well.

Differential Revision: https://reviews.llvm.org/D137591

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
    llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index 48e2b2aa2051b..38a6c7f8ed1ae 100644

--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -163,7 +163,8 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   MachineInstr &MI = *II;
   MachineFunction &MF = *MI.getParent()->getParent();
   MachineRegisterInfo &MRI = MF.getRegInfo();
-  const RISCVInstrInfo *TII = MF.getSubtarget<RISCVSubtarget>().getInstrInfo();
+  const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
+  const RISCVInstrInfo *TII = ST.getInstrInfo();
   DebugLoc DL = MI.getDebugLoc();
 
   int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
@@ -174,6 +175,19 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   if (!IsRVVSpill)
     Offset += StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm());
 
+  if (Offset.getScalable() &&
+      ST.getRealMinVLen() == ST.getRealMaxVLen()) {
+    // For an exact VLEN value, scalable offsets become constant and thus
+    // can be converted entirely into fixed offsets.
+    int64_t FixedValue = Offset.getFixed();
+    int64_t ScalableValue = Offset.getScalable();
+    assert(ScalableValue % 8 == 0 &&
+           "Scalable offset is not a multiple of a single vector size.");
+    int64_t NumOfVReg = ScalableValue / 8;
+    int64_t VLENB = ST.getRealMinVLen() / 8;
+    Offset = StackOffset::getFixed(FixedValue + NumOfVReg * VLENB);
+  }
+
   if (!isInt<32>(Offset.getFixed())) {
     report_fatal_error(
         "Frame offsets outside of the signed 32-bit range not supported");

diff  --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll
index 123f75df3cfa2..d640e22df37aa 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll
@@ -3,6 +3,9 @@
 ; RUN:    | FileCheck --check-prefix=SPILL-O0 %s
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d -mattr=+d -O2 < %s \
 ; RUN:    | FileCheck --check-prefix=SPILL-O2 %s
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d -mattr=+d -riscv-v-vector-bits-max=128 -O2 < %s \
+; RUN:    | FileCheck --check-prefix=SPILL-O2-VLEN128 %s
+
 
 @.str = private unnamed_addr constant [6 x i8] c"hello\00", align 1
 
@@ -78,6 +81,38 @@ define <vscale x 1 x double> @foo(<vscale x 1 x double> %a, <vscale x 1 x double
 ; SPILL-O2-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; SPILL-O2-NEXT:    addi sp, sp, 32
 ; SPILL-O2-NEXT:    ret
+;
+; SPILL-O2-VLEN128-LABEL: foo:
+; SPILL-O2-VLEN128:       # %bb.0:
+; SPILL-O2-VLEN128-NEXT:    addi sp, sp, -32
+; SPILL-O2-VLEN128-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; SPILL-O2-VLEN128-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; SPILL-O2-VLEN128-NEXT:    csrr a1, vlenb
+; SPILL-O2-VLEN128-NEXT:    slli a1, a1, 1
+; SPILL-O2-VLEN128-NEXT:    sub sp, sp, a1
+; SPILL-O2-VLEN128-NEXT:    mv s0, a0
+; SPILL-O2-VLEN128-NEXT:    addi a1, sp, 16
+; SPILL-O2-VLEN128-NEXT:    vs1r.v v8, (a1) # Unknown-size Folded Spill
+; SPILL-O2-VLEN128-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; SPILL-O2-VLEN128-NEXT:    vfadd.vv v9, v8, v9
+; SPILL-O2-VLEN128-NEXT:    addi a0, sp, 32
+; SPILL-O2-VLEN128-NEXT:    vs1r.v v9, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VLEN128-NEXT:    lui a0, %hi(.L.str)
+; SPILL-O2-VLEN128-NEXT:    addi a0, a0, %lo(.L.str)
+; SPILL-O2-VLEN128-NEXT:    call puts at plt
+; SPILL-O2-VLEN128-NEXT:    vsetvli zero, s0, e64, m1, ta, ma
+; SPILL-O2-VLEN128-NEXT:    addi a0, sp, 32
+; SPILL-O2-VLEN128-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VLEN128-NEXT:    addi a0, sp, 16
+; SPILL-O2-VLEN128-NEXT:    vl1r.v v9, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VLEN128-NEXT:    vfadd.vv v8, v9, v8
+; SPILL-O2-VLEN128-NEXT:    csrr a0, vlenb
+; SPILL-O2-VLEN128-NEXT:    slli a0, a0, 1
+; SPILL-O2-VLEN128-NEXT:    add sp, sp, a0
+; SPILL-O2-VLEN128-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; SPILL-O2-VLEN128-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; SPILL-O2-VLEN128-NEXT:    addi sp, sp, 32
+; SPILL-O2-VLEN128-NEXT:    ret
 {
    %x = call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 %gvl)
    %call = call signext i32 @puts(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0))