[llvm] 1bb7766 - [LoongArch] Optimize stack realignment using BSTRINS instruction
via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 22 18:21:59 PDT 2023
Author: wanglei
Date: 2023-08-23T09:21:42+08:00
New Revision: 1bb7766489803bb5cc4752ecade1164b31b758b5
URL: https://github.com/llvm/llvm-project/commit/1bb7766489803bb5cc4752ecade1164b31b758b5
DIFF: https://github.com/llvm/llvm-project/commit/1bb7766489803bb5cc4752ecade1164b31b758b5.diff
LOG: [LoongArch] Optimize stack realignment using BSTRINS instruction
Prior to this change, stack realignment was achieved using the SRLI/SLLI
instructions in two steps. With this patch, stack realignment is
optimized using a single `BSTRINS` instruction.
Reviewed By: SixWeining, xen0n
Differential Revision: https://reviews.llvm.org/D158384
Added:
Modified:
llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp
llvm/test/CodeGen/LoongArch/stack-realignment-with-variable-sized-objects.ll
llvm/test/CodeGen/LoongArch/stack-realignment.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp
index 0d78e39b38289c..dc2d61a6e4740e 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp
@@ -291,18 +291,15 @@ void LoongArchFrameLowering::emitPrologue(MachineFunction &MF,
if (hasFP(MF)) {
// Realign stack.
if (RI->hasStackRealignment(MF)) {
- unsigned ShiftAmount = Log2(MFI.getMaxAlign());
- Register VR =
- MF.getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass);
+ unsigned Align = Log2(MFI.getMaxAlign());
+ assert(Align > 0 && "The stack realignment size is invalid!");
BuildMI(MBB, MBBI, DL,
- TII->get(IsLA64 ? LoongArch::SRLI_D : LoongArch::SRLI_W), VR)
+ TII->get(IsLA64 ? LoongArch::BSTRINS_D : LoongArch::BSTRINS_W),
+ SPReg)
.addReg(SPReg)
- .addImm(ShiftAmount)
- .setMIFlag(MachineInstr::FrameSetup);
- BuildMI(MBB, MBBI, DL,
- TII->get(IsLA64 ? LoongArch::SLLI_D : LoongArch::SLLI_W), SPReg)
- .addReg(VR)
- .addImm(ShiftAmount)
+ .addReg(LoongArch::R0)
+ .addImm(Align - 1)
+ .addImm(0)
.setMIFlag(MachineInstr::FrameSetup);
// FP will be used to restore the frame in the epilogue, so we need
// another base register BP to record SP after re-alignment. SP will
diff --git a/llvm/test/CodeGen/LoongArch/stack-realignment-with-variable-sized-objects.ll b/llvm/test/CodeGen/LoongArch/stack-realignment-with-variable-sized-objects.ll
index e149f3748d4a60..30a352215606ff 100644
--- a/llvm/test/CodeGen/LoongArch/stack-realignment-with-variable-sized-objects.ll
+++ b/llvm/test/CodeGen/LoongArch/stack-realignment-with-variable-sized-objects.ll
@@ -19,8 +19,7 @@ define void @caller(i32 %n) {
; LA32-NEXT: .cfi_offset 31, -12
; LA32-NEXT: addi.w $fp, $sp, 64
; LA32-NEXT: .cfi_def_cfa 22, 0
-; LA32-NEXT: srli.w $a1, $sp, 6
-; LA32-NEXT: slli.w $sp, $a1, 6
+; LA32-NEXT: bstrins.w $sp, $zero, 5, 0
; LA32-NEXT: move $s8, $sp
; LA32-NEXT: addi.w $a0, $a0, 15
; LA32-NEXT: addi.w $a1, $zero, -16
@@ -48,8 +47,7 @@ define void @caller(i32 %n) {
; LA64-NEXT: .cfi_offset 31, -24
; LA64-NEXT: addi.d $fp, $sp, 64
; LA64-NEXT: .cfi_def_cfa 22, 0
-; LA64-NEXT: srli.d $a1, $sp, 6
-; LA64-NEXT: slli.d $sp, $a1, 6
+; LA64-NEXT: bstrins.d $sp, $zero, 5, 0
; LA64-NEXT: move $s8, $sp
; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0
; LA64-NEXT: addi.d $a0, $a0, 15
diff --git a/llvm/test/CodeGen/LoongArch/stack-realignment.ll b/llvm/test/CodeGen/LoongArch/stack-realignment.ll
index a9f3fc4e2a0e50..ac1397a9370de2 100644
--- a/llvm/test/CodeGen/LoongArch/stack-realignment.ll
+++ b/llvm/test/CodeGen/LoongArch/stack-realignment.ll
@@ -17,8 +17,7 @@ define void @caller32() {
; LA32-NEXT: .cfi_offset 22, -8
; LA32-NEXT: addi.w $fp, $sp, 32
; LA32-NEXT: .cfi_def_cfa 22, 0
-; LA32-NEXT: srli.w $a0, $sp, 5
-; LA32-NEXT: slli.w $sp, $a0, 5
+; LA32-NEXT: bstrins.w $sp, $zero, 4, 0
; LA32-NEXT: addi.w $a0, $sp, 0
; LA32-NEXT: bl %plt(callee)
; LA32-NEXT: addi.w $sp, $fp, -32
@@ -37,8 +36,7 @@ define void @caller32() {
; LA64-NEXT: .cfi_offset 22, -16
; LA64-NEXT: addi.d $fp, $sp, 32
; LA64-NEXT: .cfi_def_cfa 22, 0
-; LA64-NEXT: srli.d $a0, $sp, 5
-; LA64-NEXT: slli.d $sp, $a0, 5
+; LA64-NEXT: bstrins.d $sp, $zero, 4, 0
; LA64-NEXT: addi.d $a0, $sp, 0
; LA64-NEXT: bl %plt(callee)
; LA64-NEXT: addi.d $sp, $fp, -32
@@ -91,8 +89,7 @@ define void @caller64() {
; LA32-NEXT: .cfi_offset 22, -8
; LA32-NEXT: addi.w $fp, $sp, 64
; LA32-NEXT: .cfi_def_cfa 22, 0
-; LA32-NEXT: srli.w $a0, $sp, 6
-; LA32-NEXT: slli.w $sp, $a0, 6
+; LA32-NEXT: bstrins.w $sp, $zero, 5, 0
; LA32-NEXT: addi.w $a0, $sp, 0
; LA32-NEXT: bl %plt(callee)
; LA32-NEXT: addi.w $sp, $fp, -64
@@ -111,8 +108,7 @@ define void @caller64() {
; LA64-NEXT: .cfi_offset 22, -16
; LA64-NEXT: addi.d $fp, $sp, 64
; LA64-NEXT: .cfi_def_cfa 22, 0
-; LA64-NEXT: srli.d $a0, $sp, 6
-; LA64-NEXT: slli.d $sp, $a0, 6
+; LA64-NEXT: bstrins.d $sp, $zero, 5, 0
; LA64-NEXT: addi.d $a0, $sp, 0
; LA64-NEXT: bl %plt(callee)
; LA64-NEXT: addi.d $sp, $fp, -64
@@ -165,8 +161,7 @@ define void @caller128() {
; LA32-NEXT: .cfi_offset 22, -8
; LA32-NEXT: addi.w $fp, $sp, 128
; LA32-NEXT: .cfi_def_cfa 22, 0
-; LA32-NEXT: srli.w $a0, $sp, 7
-; LA32-NEXT: slli.w $sp, $a0, 7
+; LA32-NEXT: bstrins.w $sp, $zero, 6, 0
; LA32-NEXT: addi.w $a0, $sp, 0
; LA32-NEXT: bl %plt(callee)
; LA32-NEXT: addi.w $sp, $fp, -128
@@ -185,8 +180,7 @@ define void @caller128() {
; LA64-NEXT: .cfi_offset 22, -16
; LA64-NEXT: addi.d $fp, $sp, 128
; LA64-NEXT: .cfi_def_cfa 22, 0
-; LA64-NEXT: srli.d $a0, $sp, 7
-; LA64-NEXT: slli.d $sp, $a0, 7
+; LA64-NEXT: bstrins.d $sp, $zero, 6, 0
; LA64-NEXT: addi.d $a0, $sp, 0
; LA64-NEXT: bl %plt(callee)
; LA64-NEXT: addi.d $sp, $fp, -128
@@ -239,8 +233,7 @@ define void @caller256() {
; LA32-NEXT: .cfi_offset 22, -8
; LA32-NEXT: addi.w $fp, $sp, 256
; LA32-NEXT: .cfi_def_cfa 22, 0
-; LA32-NEXT: srli.w $a0, $sp, 8
-; LA32-NEXT: slli.w $sp, $a0, 8
+; LA32-NEXT: bstrins.w $sp, $zero, 7, 0
; LA32-NEXT: addi.w $a0, $sp, 0
; LA32-NEXT: bl %plt(callee)
; LA32-NEXT: addi.w $sp, $fp, -256
@@ -259,8 +252,7 @@ define void @caller256() {
; LA64-NEXT: .cfi_offset 22, -16
; LA64-NEXT: addi.d $fp, $sp, 256
; LA64-NEXT: .cfi_def_cfa 22, 0
-; LA64-NEXT: srli.d $a0, $sp, 8
-; LA64-NEXT: slli.d $sp, $a0, 8
+; LA64-NEXT: bstrins.d $sp, $zero, 7, 0
; LA64-NEXT: addi.d $a0, $sp, 0
; LA64-NEXT: bl %plt(callee)
; LA64-NEXT: addi.d $sp, $fp, -256
@@ -313,8 +305,7 @@ define void @caller512() {
; LA32-NEXT: .cfi_offset 22, -8
; LA32-NEXT: addi.w $fp, $sp, 1024
; LA32-NEXT: .cfi_def_cfa 22, 0
-; LA32-NEXT: srli.w $a0, $sp, 9
-; LA32-NEXT: slli.w $sp, $a0, 9
+; LA32-NEXT: bstrins.w $sp, $zero, 8, 0
; LA32-NEXT: addi.w $a0, $sp, 512
; LA32-NEXT: bl %plt(callee)
; LA32-NEXT: addi.w $sp, $fp, -1024
@@ -333,8 +324,7 @@ define void @caller512() {
; LA64-NEXT: .cfi_offset 22, -16
; LA64-NEXT: addi.d $fp, $sp, 1024
; LA64-NEXT: .cfi_def_cfa 22, 0
-; LA64-NEXT: srli.d $a0, $sp, 9
-; LA64-NEXT: slli.d $sp, $a0, 9
+; LA64-NEXT: bstrins.d $sp, $zero, 8, 0
; LA64-NEXT: addi.d $a0, $sp, 512
; LA64-NEXT: bl %plt(callee)
; LA64-NEXT: addi.d $sp, $fp, -1024
@@ -388,8 +378,7 @@ define void @caller1024() {
; LA32-NEXT: addi.w $fp, $sp, 2032
; LA32-NEXT: .cfi_def_cfa 22, 0
; LA32-NEXT: addi.w $sp, $sp, -16
-; LA32-NEXT: srli.w $a0, $sp, 10
-; LA32-NEXT: slli.w $sp, $a0, 10
+; LA32-NEXT: bstrins.w $sp, $zero, 9, 0
; LA32-NEXT: addi.w $a0, $sp, 1024
; LA32-NEXT: bl %plt(callee)
; LA32-NEXT: addi.w $sp, $fp, -2048
@@ -410,8 +399,7 @@ define void @caller1024() {
; LA64-NEXT: addi.d $fp, $sp, 2032
; LA64-NEXT: .cfi_def_cfa 22, 0
; LA64-NEXT: addi.d $sp, $sp, -16
-; LA64-NEXT: srli.d $a0, $sp, 10
-; LA64-NEXT: slli.d $sp, $a0, 10
+; LA64-NEXT: bstrins.d $sp, $zero, 9, 0
; LA64-NEXT: addi.d $a0, $sp, 1024
; LA64-NEXT: bl %plt(callee)
; LA64-NEXT: addi.d $sp, $fp, -2048
@@ -467,8 +455,7 @@ define void @caller2048() {
; LA32-NEXT: .cfi_def_cfa 22, 0
; LA32-NEXT: addi.w $sp, $sp, -2048
; LA32-NEXT: addi.w $sp, $sp, -16
-; LA32-NEXT: srli.w $a0, $sp, 11
-; LA32-NEXT: slli.w $sp, $a0, 11
+; LA32-NEXT: bstrins.w $sp, $zero, 10, 0
; LA32-NEXT: ori $a0, $zero, 2048
; LA32-NEXT: add.w $a0, $sp, $a0
; LA32-NEXT: bl %plt(callee)
@@ -493,8 +480,7 @@ define void @caller2048() {
; LA64-NEXT: .cfi_def_cfa 22, 0
; LA64-NEXT: addi.d $sp, $sp, -2048
; LA64-NEXT: addi.d $sp, $sp, -16
-; LA64-NEXT: srli.d $a0, $sp, 11
-; LA64-NEXT: slli.d $sp, $a0, 11
+; LA64-NEXT: bstrins.d $sp, $zero, 10, 0
; LA64-NEXT: ori $a0, $zero, 2048
; LA64-NEXT: add.d $a0, $sp, $a0
; LA64-NEXT: bl %plt(callee)
@@ -554,8 +540,7 @@ define void @caller4096() {
; LA32-NEXT: lu12i.w $a0, 1
; LA32-NEXT: ori $a0, $a0, 2064
; LA32-NEXT: sub.w $sp, $sp, $a0
-; LA32-NEXT: srli.w $a0, $sp, 12
-; LA32-NEXT: slli.w $sp, $a0, 12
+; LA32-NEXT: bstrins.w $sp, $zero, 11, 0
; LA32-NEXT: lu12i.w $a0, 1
; LA32-NEXT: add.w $a0, $sp, $a0
; LA32-NEXT: bl %plt(callee)
@@ -582,8 +567,7 @@ define void @caller4096() {
; LA64-NEXT: lu12i.w $a0, 1
; LA64-NEXT: ori $a0, $a0, 2064
; LA64-NEXT: sub.d $sp, $sp, $a0
-; LA64-NEXT: srli.d $a0, $sp, 12
-; LA64-NEXT: slli.d $sp, $a0, 12
+; LA64-NEXT: bstrins.d $sp, $zero, 11, 0
; LA64-NEXT: lu12i.w $a0, 1
; LA64-NEXT: add.d $a0, $sp, $a0
; LA64-NEXT: bl %plt(callee)
More information about the llvm-commits
mailing list