[llvm] 3f02d26 - [RISCV] Further fixes for RVV stack offset computation

Fraser Cormack via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 21 02:58:23 PDT 2021


Author: Fraser Cormack
Date: 2021-04-21T10:51:07+01:00
New Revision: 3f02d269433e1c9b12dee9bf81ea3ec98c212548

URL: https://github.com/llvm/llvm-project/commit/3f02d269433e1c9b12dee9bf81ea3ec98c212548
DIFF: https://github.com/llvm/llvm-project/commit/3f02d269433e1c9b12dee9bf81ea3ec98c212548.diff

LOG: [RISCV] Further fixes for RVV stack offset computation

This patch fixes a case missed out by D100574, in which RVV scalable
stack offset computations may require three live registers in the case
where the offset's fixed component is 12 bits or larger and has a
scalable component.

Instead of adding an additional emergency spill slot, this patch further
optimizes the scalable stack offset computation sequences to reduce
register usage.

By emitting the sequence to compute the scalable component before the
fixed component, we can free up one scratch register to be reallocated
by the sequence for the fixed component. Doing this saves one register
and thus one additional emergency spill slot.

Compare:

    $x5 = LUI 1
    $x1 = ADDIW killed $x5, -1896
    $x1 = ADD $x2, killed $x1
    $x5 = PseudoReadVLENB
    $x6 = ADDI $x0, 50
    $x5 = MUL killed $x5, killed $x6
    $x1 = ADD killed $x1, killed $x5

versus:

    $x5 = PseudoReadVLENB
    $x1 = ADDI $x0, 50
    $x5 = MUL killed $x5, killed $x1
    $x1 = LUI 1
    $x1 = ADDIW killed $x1, -1896
    $x1 = ADD $x2, killed $x1
    $x1 = ADD killed $x1, killed $x5

Reviewed By: HsiangKai

Differential Revision: https://reviews.llvm.org/D100847

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
    llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir
    llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index cb501ed34940..8edfa9ff09f7 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -213,6 +213,24 @@ void RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   MachineBasicBlock &MBB = *MI.getParent();
   bool FrameRegIsKill = false;
 
+  // If required, pre-compute the scalable factor amount which will be used in
+  // later offset computation. Since this sequence requires up to two scratch
+  // registers -- after which one is made free -- this grants us better
+  // scavenging of scratch registers as only up to two are live at one time,
+  // rather than three.
+  Register ScalableFactorRegister;
+  unsigned ScalableAdjOpc = RISCV::ADD;
+  if (Offset.getScalable()) {
+    int64_t ScalableValue = Offset.getScalable();
+    if (ScalableValue < 0) {
+      ScalableValue = -ScalableValue;
+      ScalableAdjOpc = RISCV::SUB;
+    }
+    // 1. Get vlenb && multiply vlen with the number of vector registers.
+    ScalableFactorRegister =
+        TII->getVLENFactoredAmount(MF, MBB, II, ScalableValue);
+  }
+
   if (!isInt<12>(Offset.getFixed())) {
     // The offset won't fit in an immediate, so use a scratch register instead
     // Modify Offset and FrameReg appropriately
@@ -251,29 +269,22 @@ void RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     }
   } else {
     // Offset = (fixed offset, scalable offset)
-    unsigned Opc = RISCV::ADD;
-    int64_t ScalableValue = Offset.getScalable();
-    if (ScalableValue < 0) {
-      ScalableValue = -ScalableValue;
-      Opc = RISCV::SUB;
-    }
-
-    // 1. Get vlenb && multiply vlen with number of vector register.
-    Register FactorRegister =
-        TII->getVLENFactoredAmount(MF, MBB, II, ScalableValue);
+    // Step 1, the scalable offset, has already been computed.
+    assert(ScalableFactorRegister &&
+           "Expected pre-computation of scalable factor in earlier step");
 
     // 2. Calculate address: FrameReg + result of multiply
     if (MI.getOpcode() == RISCV::ADDI && !Offset.getFixed()) {
-      BuildMI(MBB, II, DL, TII->get(Opc), MI.getOperand(0).getReg())
+      BuildMI(MBB, II, DL, TII->get(ScalableAdjOpc), MI.getOperand(0).getReg())
           .addReg(FrameReg, getKillRegState(FrameRegIsKill))
-          .addReg(FactorRegister, RegState::Kill);
+          .addReg(ScalableFactorRegister, RegState::Kill);
       MI.eraseFromParent();
       return;
     }
     Register VL = MRI.createVirtualRegister(&RISCV::GPRRegClass);
-    BuildMI(MBB, II, DL, TII->get(Opc), VL)
+    BuildMI(MBB, II, DL, TII->get(ScalableAdjOpc), VL)
         .addReg(FrameReg, getKillRegState(FrameRegIsKill))
-        .addReg(FactorRegister, RegState::Kill);
+        .addReg(ScalableFactorRegister, RegState::Kill);
 
     if (isRVV && Offset.getFixed()) {
       // Scalable load/store has no immediate argument. We need to add the

diff  --git a/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir b/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir
index efb66565539b..fcba9fe1181e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir
@@ -40,10 +40,10 @@ body: |
     ; CHECK: $x2 = SUB $x2, killed $x12
     ; CHECK: dead renamable $x11 = PseudoVSETVLI killed renamable $x11, 88, implicit-def $vl, implicit-def $vtype
     ; CHECK: renamable $v25 = PseudoVLE64_V_M1 killed renamable $x10, $noreg, 64, implicit $vl, implicit $vtype :: (load unknown-size from %ir.pa, align 8)
+    ; CHECK: $x11 = PseudoReadVLENB
     ; CHECK: $x10 = LUI 1048575
     ; CHECK: $x10 = ADDIW killed $x10, 1824
     ; CHECK: $x10 = ADD $x8, killed $x10
-    ; CHECK: $x11 = PseudoReadVLENB
     ; CHECK: $x10 = SUB killed $x10, killed $x11
     ; CHECK: VS1R_V killed renamable $v25, killed renamable $x10
     ; CHECK: $x10 = PseudoReadVLENB

diff  --git a/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir b/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir
index ca1dbc2a03f1..44fc7c371934 100644
--- a/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir
@@ -90,12 +90,12 @@ body:             |
   ; CHECK:   $x2 = ANDI $x2, -128
   ; CHECK:   dead renamable $x15 = PseudoVSETIVLI 1, 72, implicit-def $vl, implicit-def $vtype
   ; CHECK:   renamable $v25 = PseudoVMV_V_X_M1 killed renamable $x12, $noreg, 16, implicit $vl, implicit $vtype
+  ; CHECK:   $x11 = PseudoReadVLENB
+  ; CHECK:   $x10 = ADDI $x0, 50
+  ; CHECK:   $x11 = MUL killed $x11, killed $x10
   ; CHECK:   $x10 = LUI 1
   ; CHECK:   $x10 = ADDIW killed $x10, -1896
   ; CHECK:   $x10 = ADD $x2, killed $x10
-  ; CHECK:   $x11 = PseudoReadVLENB
-  ; CHECK:   $x12 = ADDI $x0, 50
-  ; CHECK:   $x11 = MUL killed $x11, killed $x12
   ; CHECK:   $x10 = ADD killed $x10, killed $x11
   ; CHECK:   PseudoVSPILL_M1 killed renamable $v25, killed $x10 :: (store unknown-size into %stack.1, align 8)
   ; CHECK:   renamable $x1 = ADDI $x0, 255
@@ -133,25 +133,25 @@ body:             |
   ; CHECK:   renamable $x9 = SRLI killed renamable $x9, 62
   ; CHECK:   renamable $x9 = ADD renamable $x13, killed renamable $x9
   ; CHECK:   renamable $x9 = ANDI killed renamable $x9, -4
-  ; CHECK:   renamable $x16 = SUB killed renamable $x13, killed renamable $x9
+  ; CHECK:   renamable $x16 = SUB killed renamable $x13, renamable $x9
   ; CHECK:   dead renamable $x13 = PseudoVSETIVLI 1, 64, implicit-def $vl, implicit-def $vtype
   ; CHECK:   renamable $x13 = nsw ADDI renamable $x16, -2
-  ; CHECK:   $x5 = LUI 1
-  ; CHECK:   $x9 = ADDIW killed $x5, -1896
-  ; CHECK:   $x9 = ADD $x2, killed $x9
-  ; CHECK:   $x1 = PseudoReadVLENB
-  ; CHECK:   $x5 = ADDI $x0, 50
-  ; CHECK:   $x1 = MUL killed $x1, killed $x5
+  ; CHECK:   $x5 = PseudoReadVLENB
+  ; CHECK:   $x1 = ADDI $x0, 50
+  ; CHECK:   $x5 = MUL killed $x5, killed $x1
+  ; CHECK:   $x1 = LUI 1
+  ; CHECK:   $x1 = ADDIW killed $x1, -1896
+  ; CHECK:   $x1 = ADD $x2, killed $x1
+  ; CHECK:   $x1 = ADD killed $x1, killed $x5
   ; CHECK:   $x5 = LD $x2, 0 :: (load 8 from %stack.17)
-  ; CHECK:   $x9 = ADD killed $x9, killed $x1
+  ; CHECK:   renamable $v0 = PseudoVRELOAD_M1 killed $x1 :: (load unknown-size from %stack.1, align 8)
   ; CHECK:   $x1 = LD $x2, 8 :: (load 8 from %stack.16)
-  ; CHECK:   renamable $v0 = PseudoVRELOAD_M1 killed $x9 :: (load unknown-size from %stack.1, align 8)
   ; CHECK:   renamable $v0 = PseudoVSLIDEDOWN_VX_M1 undef renamable $v0, killed renamable $v0, killed renamable $x13, $noreg, 8, implicit $vl, implicit $vtype
   ; CHECK:   renamable $x13 = PseudoVMV_X_S_M1 killed renamable $v0, 8, implicit $vl, implicit $vtype
   ; CHECK:   BLT killed renamable $x16, renamable $x27, %bb.2
   ; CHECK: bb.1:
   ; CHECK:   successors: %bb.2(0x80000000)
-  ; CHECK:   liveins: $x1, $x5, $x6, $x7, $x10, $x11, $x12, $x13, $x14, $x15, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $x29, $x30, $x31
+  ; CHECK:   liveins: $x1, $x5, $x6, $x7, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $x29, $x30, $x31
   ; CHECK:   renamable $x9 = COPY killed renamable $x13
   ; CHECK:   PseudoBR %bb.2
   ; CHECK: bb.2:
@@ -212,7 +212,7 @@ body:             |
     renamable $x9 = SRLI killed renamable $x9, 62
     renamable $x9 = ADD renamable $x13, killed renamable $x9
     renamable $x9 = ANDI killed renamable $x9, -4
-    renamable $x16 = SUB killed renamable $x13, killed renamable $x9
+    renamable $x16 = SUB killed renamable $x13, renamable $x9
     dead renamable $x13 = PseudoVSETIVLI 1, 64, implicit-def $vl, implicit-def $vtype
     renamable $x13 = nsw ADDI renamable $x16, -2
     renamable $v0 = PseudoVRELOAD_M1 %stack.1 :: (load unknown-size from %stack.1, align 8)
@@ -222,7 +222,7 @@ body:             |
 
   bb.1:
     successors: %bb.2
-    liveins: $x1, $x5, $x6, $x7, $x10, $x11, $x12, $x13, $x14, $x15, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $x29, $x30, $x31
+    liveins: $x1, $x5, $x6, $x7, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $x29, $x30, $x31
 
     renamable $x9 = COPY killed renamable $x13
     PseudoBR %bb.2


        


More information about the llvm-commits mailing list