[llvm] de0bcd0 - [RISCV] Use QC_E_ADDI while eliminating the frameindex (#139515)
via llvm-commits
llvm-commits at lists.llvm.org
Thu May 15 17:17:07 PDT 2025
Author: Sudharsan Veeravalli
Date: 2025-05-16T05:47:04+05:30
New Revision: de0bcd0564e4e67ae2afe1bad41c7bc505362e19
URL: https://github.com/llvm/llvm-project/commit/de0bcd0564e4e67ae2afe1bad41c7bc505362e19
DIFF: https://github.com/llvm/llvm-project/commit/de0bcd0564e4e67ae2afe1bad41c7bc505362e19.diff
LOG: [RISCV] Use QC_E_ADDI while eliminating the frameindex (#139515)
The QC_E_ADDI instruction from the Xqcilia extension takes a signed
26-bit immediate and can be used instead of splitting the offset across
two ADDI's while eliminating the frameindex.
Added:
Modified:
llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
llvm/test/CodeGen/RISCV/stack-offset.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index 65a6f54d6c8aa..112142e1ef2f2 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -291,6 +291,30 @@ void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB,
return;
}
+ // Use the QC_E_ADDI instruction from the Xqcilia extension that can take a
+ // signed 26-bit immediate.
+ if (ST.hasVendorXqcilia() && isInt<26>(Val)) {
+ // The one case where using this instruction is sub-optimal is if Val can be
+ // materialized with a single compressible LUI and following add/sub is also
+ // compressible. Avoid doing this if that is the case.
+ int Hi20 = (Val & 0xFFFFF000) >> 12;
+ bool IsCompressLUI =
+ ((Val & 0xFFF) == 0) && (Hi20 != 0) &&
+ (isUInt<5>(Hi20) || (Hi20 >= 0xfffe0 && Hi20 <= 0xfffff));
+ bool IsCompressAddSub =
+ (SrcReg == DestReg) &&
+ ((Val > 0 && RISCV::GPRNoX0RegClass.contains(SrcReg)) ||
+ (Val < 0 && RISCV::GPRCRegClass.contains(SrcReg)));
+
+ if (!(IsCompressLUI && IsCompressAddSub)) {
+ BuildMI(MBB, II, DL, TII->get(RISCV::QC_E_ADDI), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrcReg))
+ .addImm(Val)
+ .setMIFlag(Flag);
+ return;
+ }
+ }
+
// Try to split the offset across two ADDIs. We need to keep the intermediate
// result aligned after each ADDI. We need to determine the maximum value we
// can put in each ADDI. In the negative direction, we can use -2048 which is
diff --git a/llvm/test/CodeGen/RISCV/stack-offset.ll b/llvm/test/CodeGen/RISCV/stack-offset.ll
index 402d3546eae29..3dc4fcfe26a82 100644
--- a/llvm/test/CodeGen/RISCV/stack-offset.ll
+++ b/llvm/test/CodeGen/RISCV/stack-offset.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefixes=RV32,RV32I
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+experimental-xqcilia < %s \
+; RUN: | FileCheck %s -check-prefixes=RV32XQCILIA
; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+zba < %s \
; RUN: | FileCheck %s -check-prefixes=RV32,RV32ZBA
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
@@ -39,6 +41,27 @@ define void @test() {
; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
+; RV32XQCILIA-LABEL: test:
+; RV32XQCILIA: # %bb.0:
+; RV32XQCILIA-NEXT: addi sp, sp, -2032
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 2032
+; RV32XQCILIA-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
+; RV32XQCILIA-NEXT: .cfi_offset ra, -4
+; RV32XQCILIA-NEXT: qc.e.addi sp, sp, -3168
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 5200
+; RV32XQCILIA-NEXT: addi a0, sp, 12
+; RV32XQCILIA-NEXT: qc.e.addi a1, sp, 2060
+; RV32XQCILIA-NEXT: qc.e.addi a2, sp, 4108
+; RV32XQCILIA-NEXT: qc.e.addi a3, sp, 5132
+; RV32XQCILIA-NEXT: call inspect
+; RV32XQCILIA-NEXT: qc.e.addi sp, sp, 3168
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 2032
+; RV32XQCILIA-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
+; RV32XQCILIA-NEXT: .cfi_restore ra
+; RV32XQCILIA-NEXT: addi sp, sp, 2032
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 0
+; RV32XQCILIA-NEXT: ret
+;
; RV32ZBA-LABEL: test:
; RV32ZBA: # %bb.0:
; RV32ZBA-NEXT: addi sp, sp, -2032
@@ -150,6 +173,25 @@ define void @align_8() {
; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
+; RV32XQCILIA-LABEL: align_8:
+; RV32XQCILIA: # %bb.0:
+; RV32XQCILIA-NEXT: addi sp, sp, -256
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 256
+; RV32XQCILIA-NEXT: sw ra, 252(sp) # 4-byte Folded Spill
+; RV32XQCILIA-NEXT: .cfi_offset ra, -4
+; RV32XQCILIA-NEXT: qc.e.addi sp, sp, -3856
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 4112
+; RV32XQCILIA-NEXT: addi a0, sp, 7
+; RV32XQCILIA-NEXT: qc.e.addi a1, sp, 4104
+; RV32XQCILIA-NEXT: call inspect
+; RV32XQCILIA-NEXT: qc.e.addi sp, sp, 3856
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 256
+; RV32XQCILIA-NEXT: lw ra, 252(sp) # 4-byte Folded Reload
+; RV32XQCILIA-NEXT: .cfi_restore ra
+; RV32XQCILIA-NEXT: addi sp, sp, 256
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 0
+; RV32XQCILIA-NEXT: ret
+;
; RV32ZBA-LABEL: align_8:
; RV32ZBA: # %bb.0:
; RV32ZBA-NEXT: addi sp, sp, -2032
@@ -246,6 +288,25 @@ define void @align_4() {
; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
+; RV32XQCILIA-LABEL: align_4:
+; RV32XQCILIA: # %bb.0:
+; RV32XQCILIA-NEXT: addi sp, sp, -256
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 256
+; RV32XQCILIA-NEXT: sw ra, 252(sp) # 4-byte Folded Spill
+; RV32XQCILIA-NEXT: .cfi_offset ra, -4
+; RV32XQCILIA-NEXT: qc.e.addi sp, sp, -3856
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 4112
+; RV32XQCILIA-NEXT: addi a0, sp, 7
+; RV32XQCILIA-NEXT: qc.e.addi a1, sp, 4104
+; RV32XQCILIA-NEXT: call inspect
+; RV32XQCILIA-NEXT: qc.e.addi sp, sp, 3856
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 256
+; RV32XQCILIA-NEXT: lw ra, 252(sp) # 4-byte Folded Reload
+; RV32XQCILIA-NEXT: .cfi_restore ra
+; RV32XQCILIA-NEXT: addi sp, sp, 256
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 0
+; RV32XQCILIA-NEXT: ret
+;
; RV32ZBA-LABEL: align_4:
; RV32ZBA: # %bb.0:
; RV32ZBA-NEXT: addi sp, sp, -2032
@@ -342,6 +403,25 @@ define void @align_2() {
; RV32-NEXT: .cfi_def_cfa_offset 0
; RV32-NEXT: ret
;
+; RV32XQCILIA-LABEL: align_2:
+; RV32XQCILIA: # %bb.0:
+; RV32XQCILIA-NEXT: addi sp, sp, -256
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 256
+; RV32XQCILIA-NEXT: sw ra, 252(sp) # 4-byte Folded Spill
+; RV32XQCILIA-NEXT: .cfi_offset ra, -4
+; RV32XQCILIA-NEXT: qc.e.addi sp, sp, -3856
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 4112
+; RV32XQCILIA-NEXT: addi a0, sp, 9
+; RV32XQCILIA-NEXT: qc.e.addi a1, sp, 4106
+; RV32XQCILIA-NEXT: call inspect
+; RV32XQCILIA-NEXT: qc.e.addi sp, sp, 3856
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 256
+; RV32XQCILIA-NEXT: lw ra, 252(sp) # 4-byte Folded Reload
+; RV32XQCILIA-NEXT: .cfi_restore ra
+; RV32XQCILIA-NEXT: addi sp, sp, 256
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 0
+; RV32XQCILIA-NEXT: ret
+;
; RV64-LABEL: align_2:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -2032
@@ -395,6 +475,25 @@ define void @align_1() {
; RV32-NEXT: .cfi_def_cfa_offset 0
; RV32-NEXT: ret
;
+; RV32XQCILIA-LABEL: align_1:
+; RV32XQCILIA: # %bb.0:
+; RV32XQCILIA-NEXT: addi sp, sp, -256
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 256
+; RV32XQCILIA-NEXT: sw ra, 252(sp) # 4-byte Folded Spill
+; RV32XQCILIA-NEXT: .cfi_offset ra, -4
+; RV32XQCILIA-NEXT: qc.e.addi sp, sp, -3856
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 4112
+; RV32XQCILIA-NEXT: addi a0, sp, 10
+; RV32XQCILIA-NEXT: qc.e.addi a1, sp, 4107
+; RV32XQCILIA-NEXT: call inspect
+; RV32XQCILIA-NEXT: qc.e.addi sp, sp, 3856
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 256
+; RV32XQCILIA-NEXT: lw ra, 252(sp) # 4-byte Folded Reload
+; RV32XQCILIA-NEXT: .cfi_restore ra
+; RV32XQCILIA-NEXT: addi sp, sp, 256
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 0
+; RV32XQCILIA-NEXT: ret
+;
; RV64-LABEL: align_1:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -2032
@@ -422,3 +521,100 @@ define void @align_1() {
call void (...) @inspect(ptr %p1, ptr %p2)
ret void
}
+
+define void @align_1_lui() {
+; RV32-LABEL: align_1_lui:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -2032
+; RV32-NEXT: .cfi_def_cfa_offset 2032
+; RV32-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: lui a0, 1
+; RV32-NEXT: sub sp, sp, a0
+; RV32-NEXT: .cfi_def_cfa_offset 6128
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: lui a1, 1
+; RV32-NEXT: addi a1, a1, 2027
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: call inspect
+; RV32-NEXT: lui a0, 1
+; RV32-NEXT: add sp, sp, a0
+; RV32-NEXT: .cfi_def_cfa_offset 2032
+; RV32-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
+; RV32-NEXT: .cfi_restore ra
+; RV32-NEXT: addi sp, sp, 2032
+; RV32-NEXT: .cfi_def_cfa_offset 0
+; RV32-NEXT: ret
+;
+; RV32XQCILIA-LABEL: align_1_lui:
+; RV32XQCILIA: # %bb.0:
+; RV32XQCILIA-NEXT: addi sp, sp, -2032
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 2032
+; RV32XQCILIA-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
+; RV32XQCILIA-NEXT: .cfi_offset ra, -4
+; RV32XQCILIA-NEXT: qc.e.addi sp, sp, -4096
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 6128
+; RV32XQCILIA-NEXT: addi a0, sp, 8
+; RV32XQCILIA-NEXT: qc.e.addi a1, sp, 6123
+; RV32XQCILIA-NEXT: call inspect
+; RV32XQCILIA-NEXT: lui a0, 1
+; RV32XQCILIA-NEXT: add sp, sp, a0
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 2032
+; RV32XQCILIA-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
+; RV32XQCILIA-NEXT: .cfi_restore ra
+; RV32XQCILIA-NEXT: addi sp, sp, 2032
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 0
+; RV32XQCILIA-NEXT: ret
+;
+; RV64I-LABEL: align_1_lui:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -2032
+; RV64I-NEXT: .cfi_def_cfa_offset 2032
+; RV64I-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: lui a0, 1
+; RV64I-NEXT: addiw a0, a0, 16
+; RV64I-NEXT: sub sp, sp, a0
+; RV64I-NEXT: .cfi_def_cfa_offset 6144
+; RV64I-NEXT: addi a0, sp, 20
+; RV64I-NEXT: lui a1, 1
+; RV64I-NEXT: addiw a1, a1, 2039
+; RV64I-NEXT: add a1, sp, a1
+; RV64I-NEXT: call inspect
+; RV64I-NEXT: lui a0, 1
+; RV64I-NEXT: addiw a0, a0, 16
+; RV64I-NEXT: add sp, sp, a0
+; RV64I-NEXT: .cfi_def_cfa_offset 2032
+; RV64I-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore ra
+; RV64I-NEXT: addi sp, sp, 2032
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: align_1_lui:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: addi sp, sp, -2032
+; RV64ZBA-NEXT: .cfi_def_cfa_offset 2032
+; RV64ZBA-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
+; RV64ZBA-NEXT: .cfi_offset ra, -8
+; RV64ZBA-NEXT: li a0, -514
+; RV64ZBA-NEXT: sh3add sp, a0, sp
+; RV64ZBA-NEXT: .cfi_def_cfa_offset 6144
+; RV64ZBA-NEXT: addi a0, sp, 20
+; RV64ZBA-NEXT: lui a1, 1
+; RV64ZBA-NEXT: addiw a1, a1, 2039
+; RV64ZBA-NEXT: add a1, sp, a1
+; RV64ZBA-NEXT: call inspect
+; RV64ZBA-NEXT: li a0, 514
+; RV64ZBA-NEXT: sh3add sp, a0, sp
+; RV64ZBA-NEXT: .cfi_def_cfa_offset 2032
+; RV64ZBA-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
+; RV64ZBA-NEXT: .cfi_restore ra
+; RV64ZBA-NEXT: addi sp, sp, 2032
+; RV64ZBA-NEXT: .cfi_def_cfa_offset 0
+; RV64ZBA-NEXT: ret
+ %p2 = alloca i8, align 1
+ %p1 = alloca [6115 x i8], align 1
+ call void (...) @inspect(ptr %p1, ptr %p2)
+ ret void
+}
More information about the llvm-commits
mailing list