[PATCH] D137591: [RISCV] Optimize scalable frame offset calculation when VLEN is precisely known

Mon Nov 7 14:37:24 PST 2022

reames created this revision.
reames added reviewers: craig.topper, frasercrmck, asb, kito-cheng.
Herald added subscribers: sunshaoce, VincentWu, StephenFan, vkmr, evandro, luismarques, apazos, sameer.abuasal, s.egerton, Jim, benna, psnobl, jocewei, PkmX, the_o, brucehoult, MartinMosbeck, rogfer01, edward-jones, zzheng, jrtc27, shiva0217, niosHD, sabuasal, bollu, simoncook, johnrusso, rbar, hiraditya, arichardson, mcrosier, qcolombet.
Herald added a project: All.
reames requested review of this revision.
Herald added subscribers: alextsao1999, pcwang-thead, eopXD, MaskRay.
Herald added a project: LLVM.

When we have a precisely known VLEN, we can replace runtime usage of VLENB with compile time constants.  This converts offsets involving both fixed and scalable components into fixed offsets.  The result is that we avoid the csr read of vlenb, and can often fold the multiply as well.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D137591

Files:
  llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
  llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll


Index: llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll
===================================================================

--- llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll
+++ llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll
@@ -3,6 +3,9 @@
 ; RUN:    | FileCheck --check-prefix=SPILL-O0 %s
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d -mattr=+d -O2 < %s \
 ; RUN:    | FileCheck --check-prefix=SPILL-O2 %s
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d -mattr=+d -riscv-v-vector-bits-min=256 -riscv-v-vector-bits-max=256 -O2 < %s \
+; RUN:    | FileCheck --check-prefix=SPILL-O2-VLEN128 %s
+
 
 @.str = private unnamed_addr constant [6 x i8] c"hello\00", align 1
 
@@ -78,6 +81,38 @@
 ; SPILL-O2-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; SPILL-O2-NEXT:    addi sp, sp, 32
 ; SPILL-O2-NEXT:    ret
+;
+; SPILL-O2-VLEN128-LABEL: foo:
+; SPILL-O2-VLEN128:       # %bb.0:
+; SPILL-O2-VLEN128-NEXT:    addi sp, sp, -32
+; SPILL-O2-VLEN128-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; SPILL-O2-VLEN128-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; SPILL-O2-VLEN128-NEXT:    csrr a1, vlenb
+; SPILL-O2-VLEN128-NEXT:    slli a1, a1, 1
+; SPILL-O2-VLEN128-NEXT:    sub sp, sp, a1
+; SPILL-O2-VLEN128-NEXT:    mv s0, a0
+; SPILL-O2-VLEN128-NEXT:    addi a1, sp, 16
+; SPILL-O2-VLEN128-NEXT:    vs1r.v v8, (a1) # Unknown-size Folded Spill
+; SPILL-O2-VLEN128-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; SPILL-O2-VLEN128-NEXT:    vfadd.vv v9, v8, v9
+; SPILL-O2-VLEN128-NEXT:    addi a0, sp, 272
+; SPILL-O2-VLEN128-NEXT:    vs1r.v v9, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VLEN128-NEXT:    lui a0, %hi(.L.str)
+; SPILL-O2-VLEN128-NEXT:    addi a0, a0, %lo(.L.str)
+; SPILL-O2-VLEN128-NEXT:    call puts at plt
+; SPILL-O2-VLEN128-NEXT:    vsetvli zero, s0, e64, m1, ta, ma
+; SPILL-O2-VLEN128-NEXT:    addi a0, sp, 272
+; SPILL-O2-VLEN128-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VLEN128-NEXT:    addi a0, sp, 16
+; SPILL-O2-VLEN128-NEXT:    vl1r.v v9, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VLEN128-NEXT:    vfadd.vv v8, v9, v8
+; SPILL-O2-VLEN128-NEXT:    csrr a0, vlenb
+; SPILL-O2-VLEN128-NEXT:    slli a0, a0, 1
+; SPILL-O2-VLEN128-NEXT:    add sp, sp, a0
+; SPILL-O2-VLEN128-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; SPILL-O2-VLEN128-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; SPILL-O2-VLEN128-NEXT:    addi sp, sp, 32
+; SPILL-O2-VLEN128-NEXT:    ret
 {
    %x = call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 %gvl)
    %call = call signext i32 @puts(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0))
Index: llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
===================================================================
--- llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -163,7 +163,8 @@
   MachineInstr &MI = *II;
   MachineFunction &MF = *MI.getParent()->getParent();
   MachineRegisterInfo &MRI = MF.getRegInfo();
-  const RISCVInstrInfo *TII = MF.getSubtarget<RISCVSubtarget>().getInstrInfo();
+  const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
+  const RISCVInstrInfo *TII = ST.getInstrInfo();
   DebugLoc DL = MI.getDebugLoc();
 
   int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
@@ -174,6 +175,16 @@
   if (!IsRVVSpill)
     Offset += StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm());
 
+  if (Offset.getScalable() &&
+      ST.getRealMinVLen() == ST.getRealMaxVLen()) {
+    // For an exact VLEN value, scalable offsets become constant and thus
+    // can be converted entirely into fixed offsets.
+    int64_t FixedValue = Offset.getFixed();
+    int64_t ScalableValue = Offset.getScalable();
+    int64_t VLENB = ST.getRealMinVLen() / 8;
+    Offset = StackOffset::getFixed(FixedValue + ScalableValue * VLENB);
+  }
+
   if (!isInt<32>(Offset.getFixed())) {
     report_fatal_error(
         "Frame offsets outside of the signed 32-bit range not supported");


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D137591.473797.patch
Type: text/x-patch
Size: 4045 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20221107/ef5d4cd5/attachment.bin>