[llvm] [RISCV] Use QC_E_ADDI while eliminating the frameindex (PR #139515)
Sudharsan Veeravalli via llvm-commits
llvm-commits at lists.llvm.org
Thu May 15 07:50:21 PDT 2025
https://github.com/svs-quic updated https://github.com/llvm/llvm-project/pull/139515
>From d58f0b46e6b14dbd9ef5e254f1dc7fa52d07fffe Mon Sep 17 00:00:00 2001
From: Sudharsan Veeravalli <quic_svs at quicinc.com>
Date: Mon, 12 May 2025 14:30:44 +0530
Subject: [PATCH 1/3] [RISCV] Use QC_E_ADDI while eliminating the frameindex
The QC_E_ADDI instruction from the Xqcilia extension takes a signed 26-bit
immediate and can be used instead of splitting the offset across two ADDI's
while eliminating the frameindex.
---
llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp | 11 +++
llvm/test/CodeGen/RISCV/stack-offset.ll | 99 +++++++++++++++++++++
2 files changed, 110 insertions(+)
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index c6f6c9007b2b1..8c754222a3b0c 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -288,6 +288,17 @@ void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB,
return;
}
+ // Use the QC_E_ADDI instruction from the Xqcilia extension that can take a
+ // signed 26-bit immediate. Avoid anything which can be done with a single lui
+ // as it might be compressible.
+ if (ST.hasVendorXqcilia() && isInt<26>(Val) && (Val & 0xFFF) != 0) {
+ BuildMI(MBB, II, DL, TII->get(RISCV::QC_E_ADDI), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrcReg))
+ .addImm(Val)
+ .setMIFlag(Flag);
+ return;
+ }
+
// Try to split the offset across two ADDIs. We need to keep the intermediate
// result aligned after each ADDI. We need to determine the maximum value we
// can put in each ADDI. In the negative direction, we can use -2048 which is
diff --git a/llvm/test/CodeGen/RISCV/stack-offset.ll b/llvm/test/CodeGen/RISCV/stack-offset.ll
index 402d3546eae29..cdc117675af97 100644
--- a/llvm/test/CodeGen/RISCV/stack-offset.ll
+++ b/llvm/test/CodeGen/RISCV/stack-offset.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefixes=RV32,RV32I
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+experimental-xqcilia < %s \
+; RUN: | FileCheck %s -check-prefixes=RV32XQCILIA
; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+zba < %s \
; RUN: | FileCheck %s -check-prefixes=RV32,RV32ZBA
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
@@ -39,6 +41,27 @@ define void @test() {
; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
+; RV32XQCILIA-LABEL: test:
+; RV32XQCILIA: # %bb.0:
+; RV32XQCILIA-NEXT: addi sp, sp, -2032
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 2032
+; RV32XQCILIA-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
+; RV32XQCILIA-NEXT: .cfi_offset ra, -4
+; RV32XQCILIA-NEXT: qc.e.addi sp, sp, -3168
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 5200
+; RV32XQCILIA-NEXT: addi a0, sp, 12
+; RV32XQCILIA-NEXT: qc.e.addi a1, sp, 2060
+; RV32XQCILIA-NEXT: qc.e.addi a2, sp, 4108
+; RV32XQCILIA-NEXT: qc.e.addi a3, sp, 5132
+; RV32XQCILIA-NEXT: call inspect
+; RV32XQCILIA-NEXT: qc.e.addi sp, sp, 3168
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 2032
+; RV32XQCILIA-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
+; RV32XQCILIA-NEXT: .cfi_restore ra
+; RV32XQCILIA-NEXT: addi sp, sp, 2032
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 0
+; RV32XQCILIA-NEXT: ret
+;
; RV32ZBA-LABEL: test:
; RV32ZBA: # %bb.0:
; RV32ZBA-NEXT: addi sp, sp, -2032
@@ -150,6 +173,25 @@ define void @align_8() {
; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
+; RV32XQCILIA-LABEL: align_8:
+; RV32XQCILIA: # %bb.0:
+; RV32XQCILIA-NEXT: addi sp, sp, -256
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 256
+; RV32XQCILIA-NEXT: sw ra, 252(sp) # 4-byte Folded Spill
+; RV32XQCILIA-NEXT: .cfi_offset ra, -4
+; RV32XQCILIA-NEXT: qc.e.addi sp, sp, -3856
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 4112
+; RV32XQCILIA-NEXT: addi a0, sp, 7
+; RV32XQCILIA-NEXT: qc.e.addi a1, sp, 4104
+; RV32XQCILIA-NEXT: call inspect
+; RV32XQCILIA-NEXT: qc.e.addi sp, sp, 3856
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 256
+; RV32XQCILIA-NEXT: lw ra, 252(sp) # 4-byte Folded Reload
+; RV32XQCILIA-NEXT: .cfi_restore ra
+; RV32XQCILIA-NEXT: addi sp, sp, 256
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 0
+; RV32XQCILIA-NEXT: ret
+;
; RV32ZBA-LABEL: align_8:
; RV32ZBA: # %bb.0:
; RV32ZBA-NEXT: addi sp, sp, -2032
@@ -246,6 +288,25 @@ define void @align_4() {
; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
+; RV32XQCILIA-LABEL: align_4:
+; RV32XQCILIA: # %bb.0:
+; RV32XQCILIA-NEXT: addi sp, sp, -256
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 256
+; RV32XQCILIA-NEXT: sw ra, 252(sp) # 4-byte Folded Spill
+; RV32XQCILIA-NEXT: .cfi_offset ra, -4
+; RV32XQCILIA-NEXT: qc.e.addi sp, sp, -3856
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 4112
+; RV32XQCILIA-NEXT: addi a0, sp, 7
+; RV32XQCILIA-NEXT: qc.e.addi a1, sp, 4104
+; RV32XQCILIA-NEXT: call inspect
+; RV32XQCILIA-NEXT: qc.e.addi sp, sp, 3856
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 256
+; RV32XQCILIA-NEXT: lw ra, 252(sp) # 4-byte Folded Reload
+; RV32XQCILIA-NEXT: .cfi_restore ra
+; RV32XQCILIA-NEXT: addi sp, sp, 256
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 0
+; RV32XQCILIA-NEXT: ret
+;
; RV32ZBA-LABEL: align_4:
; RV32ZBA: # %bb.0:
; RV32ZBA-NEXT: addi sp, sp, -2032
@@ -342,6 +403,25 @@ define void @align_2() {
; RV32-NEXT: .cfi_def_cfa_offset 0
; RV32-NEXT: ret
;
+; RV32XQCILIA-LABEL: align_2:
+; RV32XQCILIA: # %bb.0:
+; RV32XQCILIA-NEXT: addi sp, sp, -256
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 256
+; RV32XQCILIA-NEXT: sw ra, 252(sp) # 4-byte Folded Spill
+; RV32XQCILIA-NEXT: .cfi_offset ra, -4
+; RV32XQCILIA-NEXT: qc.e.addi sp, sp, -3856
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 4112
+; RV32XQCILIA-NEXT: addi a0, sp, 9
+; RV32XQCILIA-NEXT: qc.e.addi a1, sp, 4106
+; RV32XQCILIA-NEXT: call inspect
+; RV32XQCILIA-NEXT: qc.e.addi sp, sp, 3856
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 256
+; RV32XQCILIA-NEXT: lw ra, 252(sp) # 4-byte Folded Reload
+; RV32XQCILIA-NEXT: .cfi_restore ra
+; RV32XQCILIA-NEXT: addi sp, sp, 256
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 0
+; RV32XQCILIA-NEXT: ret
+;
; RV64-LABEL: align_2:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -2032
@@ -395,6 +475,25 @@ define void @align_1() {
; RV32-NEXT: .cfi_def_cfa_offset 0
; RV32-NEXT: ret
;
+; RV32XQCILIA-LABEL: align_1:
+; RV32XQCILIA: # %bb.0:
+; RV32XQCILIA-NEXT: addi sp, sp, -256
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 256
+; RV32XQCILIA-NEXT: sw ra, 252(sp) # 4-byte Folded Spill
+; RV32XQCILIA-NEXT: .cfi_offset ra, -4
+; RV32XQCILIA-NEXT: qc.e.addi sp, sp, -3856
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 4112
+; RV32XQCILIA-NEXT: addi a0, sp, 10
+; RV32XQCILIA-NEXT: qc.e.addi a1, sp, 4107
+; RV32XQCILIA-NEXT: call inspect
+; RV32XQCILIA-NEXT: qc.e.addi sp, sp, 3856
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 256
+; RV32XQCILIA-NEXT: lw ra, 252(sp) # 4-byte Folded Reload
+; RV32XQCILIA-NEXT: .cfi_restore ra
+; RV32XQCILIA-NEXT: addi sp, sp, 256
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 0
+; RV32XQCILIA-NEXT: ret
+;
; RV64-LABEL: align_1:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -2032
>From b49f577de3e51ac5ebc4e2198bc751e608b3a06b Mon Sep 17 00:00:00 2001
From: Sudharsan Veeravalli <quic_svs at quicinc.com>
Date: Tue, 13 May 2025 06:57:12 +0530
Subject: [PATCH 2/3] Update RISCVRegisterInfo.cpp
Improve comment
---
llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index 8c754222a3b0c..4d92fd63f1ced 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -290,7 +290,8 @@ void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB,
// Use the QC_E_ADDI instruction from the Xqcilia extension that can take a
// signed 26-bit immediate. Avoid anything which can be done with a single lui
- // as it might be compressible.
+ // (4 bytes) since it will be better than using QC_E_ADDI (6 bytes). It could
+ // also be compressible in certain cases.
if (ST.hasVendorXqcilia() && isInt<26>(Val) && (Val & 0xFFF) != 0) {
BuildMI(MBB, II, DL, TII->get(RISCV::QC_E_ADDI), DestReg)
.addReg(SrcReg, getKillRegState(KillSrcReg))
>From 3804d044ebfbd24bec6ce1480e2c0af1dc955643 Mon Sep 17 00:00:00 2001
From: Sudharsan Veeravalli <quic_svs at quicinc.com>
Date: Thu, 15 May 2025 20:18:59 +0530
Subject: [PATCH 3/3] Handle lui compress
---
llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp | 30 +++++--
llvm/test/CodeGen/RISCV/stack-offset.ll | 97 +++++++++++++++++++++
2 files changed, 118 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index 4d92fd63f1ced..d08d092297135 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -289,15 +289,27 @@ void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB,
}
// Use the QC_E_ADDI instruction from the Xqcilia extension that can take a
- // signed 26-bit immediate. Avoid anything which can be done with a single lui
- // (4 bytes) since it will be better than using QC_E_ADDI (6 bytes). It could
- // also be compressible in certain cases.
- if (ST.hasVendorXqcilia() && isInt<26>(Val) && (Val & 0xFFF) != 0) {
- BuildMI(MBB, II, DL, TII->get(RISCV::QC_E_ADDI), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrcReg))
- .addImm(Val)
- .setMIFlag(Flag);
- return;
+ // signed 26-bit immediate.
+ if (ST.hasVendorXqcilia() && isInt<26>(Val)) {
+ // The one case where using this instruction is sub-optimal is if Val can be
+ // materialized with a single compressible LUI and following add/sub is also
+ // compressible. Avoid doing this if that is the case.
+ int Hi20 = (Val & 0xFFFFF000) >> 12;
+ bool IsCompressLUI =
+ ((Val & 0xFFF) == 0) && (Hi20 != 0) &&
+ (isUInt<5>(Hi20) || (Hi20 >= 0xfffe0 && Hi20 <= 0xfffff));
+ bool IsCompressAddSub =
+ (SrcReg == DestReg) &&
+ ((Val > 0 && RISCV::GPRNoX0RegClass.contains(SrcReg)) ||
+ (Val < 0 && RISCV::GPRCRegClass.contains(SrcReg)));
+
+ if (!(IsCompressLUI && IsCompressAddSub)) {
+ BuildMI(MBB, II, DL, TII->get(RISCV::QC_E_ADDI), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrcReg))
+ .addImm(Val)
+ .setMIFlag(Flag);
+ return;
+ }
}
// Try to split the offset across two ADDIs. We need to keep the intermediate
diff --git a/llvm/test/CodeGen/RISCV/stack-offset.ll b/llvm/test/CodeGen/RISCV/stack-offset.ll
index cdc117675af97..3dc4fcfe26a82 100644
--- a/llvm/test/CodeGen/RISCV/stack-offset.ll
+++ b/llvm/test/CodeGen/RISCV/stack-offset.ll
@@ -521,3 +521,100 @@ define void @align_1() {
call void (...) @inspect(ptr %p1, ptr %p2)
ret void
}
+
+define void @align_1_lui() {
+; RV32-LABEL: align_1_lui:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -2032
+; RV32-NEXT: .cfi_def_cfa_offset 2032
+; RV32-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: lui a0, 1
+; RV32-NEXT: sub sp, sp, a0
+; RV32-NEXT: .cfi_def_cfa_offset 6128
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: lui a1, 1
+; RV32-NEXT: addi a1, a1, 2027
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: call inspect
+; RV32-NEXT: lui a0, 1
+; RV32-NEXT: add sp, sp, a0
+; RV32-NEXT: .cfi_def_cfa_offset 2032
+; RV32-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
+; RV32-NEXT: .cfi_restore ra
+; RV32-NEXT: addi sp, sp, 2032
+; RV32-NEXT: .cfi_def_cfa_offset 0
+; RV32-NEXT: ret
+;
+; RV32XQCILIA-LABEL: align_1_lui:
+; RV32XQCILIA: # %bb.0:
+; RV32XQCILIA-NEXT: addi sp, sp, -2032
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 2032
+; RV32XQCILIA-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
+; RV32XQCILIA-NEXT: .cfi_offset ra, -4
+; RV32XQCILIA-NEXT: qc.e.addi sp, sp, -4096
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 6128
+; RV32XQCILIA-NEXT: addi a0, sp, 8
+; RV32XQCILIA-NEXT: qc.e.addi a1, sp, 6123
+; RV32XQCILIA-NEXT: call inspect
+; RV32XQCILIA-NEXT: lui a0, 1
+; RV32XQCILIA-NEXT: add sp, sp, a0
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 2032
+; RV32XQCILIA-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
+; RV32XQCILIA-NEXT: .cfi_restore ra
+; RV32XQCILIA-NEXT: addi sp, sp, 2032
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 0
+; RV32XQCILIA-NEXT: ret
+;
+; RV64I-LABEL: align_1_lui:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -2032
+; RV64I-NEXT: .cfi_def_cfa_offset 2032
+; RV64I-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: lui a0, 1
+; RV64I-NEXT: addiw a0, a0, 16
+; RV64I-NEXT: sub sp, sp, a0
+; RV64I-NEXT: .cfi_def_cfa_offset 6144
+; RV64I-NEXT: addi a0, sp, 20
+; RV64I-NEXT: lui a1, 1
+; RV64I-NEXT: addiw a1, a1, 2039
+; RV64I-NEXT: add a1, sp, a1
+; RV64I-NEXT: call inspect
+; RV64I-NEXT: lui a0, 1
+; RV64I-NEXT: addiw a0, a0, 16
+; RV64I-NEXT: add sp, sp, a0
+; RV64I-NEXT: .cfi_def_cfa_offset 2032
+; RV64I-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: addi sp, sp, 2032
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: align_1_lui:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: addi sp, sp, -2032
+; RV64ZBA-NEXT: .cfi_def_cfa_offset 2032
+; RV64ZBA-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
+; RV64ZBA-NEXT: .cfi_offset ra, -8
+; RV64ZBA-NEXT: li a0, -514
+; RV64ZBA-NEXT: sh3add sp, a0, sp
+; RV64ZBA-NEXT: .cfi_def_cfa_offset 6144
+; RV64ZBA-NEXT: addi a0, sp, 20
+; RV64ZBA-NEXT: lui a1, 1
+; RV64ZBA-NEXT: addiw a1, a1, 2039
+; RV64ZBA-NEXT: add a1, sp, a1
+; RV64ZBA-NEXT: call inspect
+; RV64ZBA-NEXT: li a0, 514
+; RV64ZBA-NEXT: sh3add sp, a0, sp
+; RV64ZBA-NEXT: .cfi_def_cfa_offset 2032
+; RV64ZBA-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
+; RV64ZBA-NEXT: .cfi_restore ra
+; RV64ZBA-NEXT: addi sp, sp, 2032
+; RV64ZBA-NEXT: .cfi_def_cfa_offset 0
+; RV64ZBA-NEXT: ret
+ %p2 = alloca i8, align 1
+ %p1 = alloca [6115 x i8], align 1
+ call void (...) @inspect(ptr %p1, ptr %p2)
+ ret void
+}
More information about the llvm-commits
mailing list