[llvm] 46e8970 - [RISCV] Prevent use of t0(aka x5) as rs1 for jalr instructions.

Tue Jul 13 09:53:19 PDT 2021

Author: Craig Topper
Date: 2021-07-13T09:46:21-07:00
New Revision: 46e89708170c40e8cf0305b6de048ca879f43aab

URL: https://github.com/llvm/llvm-project/commit/46e89708170c40e8cf0305b6de048ca879f43aab
DIFF: https://github.com/llvm/llvm-project/commit/46e89708170c40e8cf0305b6de048ca879f43aab.diff

LOG: [RISCV] Prevent use of t0(aka x5) as rs1 for jalr instructions.

Some microarchitectures treat rs1=x1/x5 on jalr as a hint to pop
the return-address stack. We should avoid using x5 on jalr
instructions since we aren't using x5 as an alternate link register.

Differential Revision: https://reviews.llvm.org/D105875

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVInstrInfo.td
    llvm/lib/Target/RISCV/RISCVRegisterInfo.td
    llvm/test/CodeGen/RISCV/calls.ll
    llvm/test/CodeGen/RISCV/tail-calls.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 1c1277426385..65767ae1e682 100644

--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1002,12 +1002,12 @@ def PseudoBR : Pseudo<(outs), (ins simm21_lsb0_jal:$imm20), [(br bb:$imm20)]>,
                PseudoInstExpansion<(JAL X0, simm21_lsb0_jal:$imm20)>;
 
 let isBarrier = 1, isBranch = 1, isIndirectBranch = 1, isTerminator = 1 in
-def PseudoBRIND : Pseudo<(outs), (ins GPR:$rs1, simm12:$imm12), []>,
+def PseudoBRIND : Pseudo<(outs), (ins GPRJALR:$rs1, simm12:$imm12), []>,
                   PseudoInstExpansion<(JALR X0, GPR:$rs1, simm12:$imm12)>;
 
-def : Pat<(brind GPR:$rs1), (PseudoBRIND GPR:$rs1, 0)>;
-def : Pat<(brind (add GPR:$rs1, simm12:$imm12)),
-          (PseudoBRIND GPR:$rs1, simm12:$imm12)>;
+def : Pat<(brind GPRJALR:$rs1), (PseudoBRIND GPRJALR:$rs1, 0)>;
+def : Pat<(brind (add GPRJALR:$rs1, simm12:$imm12)),
+          (PseudoBRIND GPRJALR:$rs1, simm12:$imm12)>;
 
 // PseudoCALLReg is a generic pseudo instruction for calls which will eventually
 // expand to auipc and jalr while encoding, with any given register used as the
@@ -1039,8 +1039,8 @@ def : Pat<(riscv_sret_flag), (SRET X0, X0)>;
 def : Pat<(riscv_mret_flag), (MRET X0, X0)>;
 
 let isCall = 1, Defs = [X1] in
-def PseudoCALLIndirect : Pseudo<(outs), (ins GPR:$rs1),
-                                [(riscv_call GPR:$rs1)]>,
+def PseudoCALLIndirect : Pseudo<(outs), (ins GPRJALR:$rs1),
+                                [(riscv_call GPRJALR:$rs1)]>,
                          PseudoInstExpansion<(JALR X1, GPR:$rs1, 0)>;
 
 let isBarrier = 1, isReturn = 1, isTerminator = 1 in

diff  --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index 45264f20b5f9..f9e3c514532e 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -162,6 +162,20 @@ def GPRNoX0X2 : RegisterClass<"RISCV", [XLenVT], 32, (add
   let RegInfos = XLenRI;
 }
 
+// Don't use X1 or X5 for JALR since that is a hint to pop the return address
+// stack on some microarchitectures. Also remove the reserved registers X0, X2,
+// X3, and X4 as it reduces the number of register classes that get synthesized
+// by tablegen.
+def GPRJALR : RegisterClass<"RISCV", [XLenVT], 32, (add
+    (sequence "X%u", 10, 17),
+    (sequence "X%u", 6, 7),
+    (sequence "X%u", 28, 31),
+    (sequence "X%u", 8, 9),
+    (sequence "X%u", 18, 27)
+  )> {
+  let RegInfos = XLenRI;
+}
+
 def GPRC : RegisterClass<"RISCV", [XLenVT], 32, (add
     (sequence "X%u", 10, 15),
     (sequence "X%u", 8, 9)
@@ -171,9 +185,10 @@ def GPRC : RegisterClass<"RISCV", [XLenVT], 32, (add
 
 // For indirect tail calls, we can't use callee-saved registers, as they are
 // restored to the saved value before the tail call, which would clobber a call
-// address.
+// address. We shouldn't use x5 since that is a hint for to pop the return
+// address stack on some microarchitectures.
 def GPRTC : RegisterClass<"RISCV", [XLenVT], 32, (add
-    (sequence "X%u", 5, 7),
+    (sequence "X%u", 6, 7),
     (sequence "X%u", 10, 17),
     (sequence "X%u", 28, 31)
   )> {

diff  --git a/llvm/test/CodeGen/RISCV/calls.ll b/llvm/test/CodeGen/RISCV/calls.ll
index 91644e6d7e27..a54d08dbbde6 100644
--- a/llvm/test/CodeGen/RISCV/calls.ll
+++ b/llvm/test/CodeGen/RISCV/calls.ll
@@ -114,6 +114,46 @@ define i32 @test_call_indirect(i32 (i32)* %a, i32 %b) nounwind {
   ret i32 %1
 }
 
+; Make sure we don't use t0 as the source for jalr as that is a hint to pop the
+; return address stack on some microarchitectures.
+define i32 @test_call_indirect_no_t0(i32 (i32, i32, i32, i32, i32, i32, i32)* %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) nounwind {
+; RV32I-LABEL: test_call_indirect_no_t0:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    mv t1, a0
+; RV32I-NEXT:    mv a0, a1
+; RV32I-NEXT:    mv a1, a2
+; RV32I-NEXT:    mv a2, a3
+; RV32I-NEXT:    mv a3, a4
+; RV32I-NEXT:    mv a4, a5
+; RV32I-NEXT:    mv a5, a6
+; RV32I-NEXT:    mv a6, a7
+; RV32I-NEXT:    jalr t1
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32I-PIC-LABEL: test_call_indirect_no_t0:
+; RV32I-PIC:       # %bb.0:
+; RV32I-PIC-NEXT:    addi sp, sp, -16
+; RV32I-PIC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-PIC-NEXT:    mv t1, a0
+; RV32I-PIC-NEXT:    mv a0, a1
+; RV32I-PIC-NEXT:    mv a1, a2
+; RV32I-PIC-NEXT:    mv a2, a3
+; RV32I-PIC-NEXT:    mv a3, a4
+; RV32I-PIC-NEXT:    mv a4, a5
+; RV32I-PIC-NEXT:    mv a5, a6
+; RV32I-PIC-NEXT:    mv a6, a7
+; RV32I-PIC-NEXT:    jalr t1
+; RV32I-PIC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-PIC-NEXT:    addi sp, sp, 16
+; RV32I-PIC-NEXT:    ret
+  %1 = call i32 %a(i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h)
+  ret i32 %1
+}
+
 ; Ensure that calls to fastcc functions aren't rejected. Such calls may be
 ; introduced when compiling with optimisation.
 

diff  --git a/llvm/test/CodeGen/RISCV/tail-calls.ll b/llvm/test/CodeGen/RISCV/tail-calls.ll
index 7df14fc2ca10..c358b6172d46 100644
--- a/llvm/test/CodeGen/RISCV/tail-calls.ll
+++ b/llvm/test/CodeGen/RISCV/tail-calls.ll
@@ -58,6 +58,24 @@ entry:
   ret void
 }
 
+; Make sure we don't use t0 as the source for jr as that is a hint to pop the
+; return address stack on some microarchitectures.
+define i32 @caller_indirect_no_t0(i32 (i32, i32, i32, i32, i32, i32, i32)* %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7) {
+; CHECK-LABEL: caller_indirect_no_t0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mv t1, a0
+; CHECK-NEXT:    mv a0, a1
+; CHECK-NEXT:    mv a1, a2
+; CHECK-NEXT:    mv a2, a3
+; CHECK-NEXT:    mv a3, a4
+; CHECK-NEXT:    mv a4, a5
+; CHECK-NEXT:    mv a5, a6
+; CHECK-NEXT:    mv a6, a7
+; CHECK-NEXT:    jr t1
+  %9 = tail call i32 %0(i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7)
+  ret i32 %9
+}
+
 ; Do not tail call optimize functions with varargs passed by stack.
 declare i32 @callee_varargs(i32, ...)
 define void @caller_varargs(i32 %a, i32 %b) nounwind {