[llvm] [RISCV] Merge Base Offset for SFB Pseudos (PR #187620)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 19 18:20:26 PDT 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Sam Elliott (lenary)
<details>
<summary>Changes</summary>
This implements the Merge Base Offset pass for the SFB Load Pseudos.
These Pseudos are expanded after Merge Base Offset, so the pass needs to
handle them.
I also had to extend support in MergeBaseOffset to ensuring that ImmOp
could be a Constant Pool Index, which seemed to be supported in some
checks but not others.
---
Patch is 22.43 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/187620.diff
2 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp (+99-4)
- (added) llvm/test/CodeGen/RISCV/sfb-merge-base-offset.ll (+493)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
index d7ad4c14468ee..657dbc72a4e1b 100644
--- a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
@@ -338,7 +338,7 @@ bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi,
MachineInstr &Tail = *MRI->use_instr_begin(DestReg);
switch (Tail.getOpcode()) {
default:
- LLVM_DEBUG(dbgs() << "Don't know how to get offset from this instr:"
+ LLVM_DEBUG(dbgs() << "Don't know how to get offset from this instr: "
<< Tail);
break;
case RISCV::ADDI:
@@ -451,6 +451,34 @@ bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi,
CommonOffset = Offset;
break;
}
+ case RISCV::PseudoCCLD:
+ case RISCV::PseudoCCLW:
+ case RISCV::PseudoCCLWU:
+ case RISCV::PseudoCCLH:
+ case RISCV::PseudoCCLHU:
+ case RISCV::PseudoCCLB:
+ case RISCV::PseudoCCLBU: {
+ // The SFB Pseudos are like their non-SFB counterparts but have more
+ // operands.
+ if (UseMI.getOperand(2).isFI())
+ return false;
+ // Register defined by Lo should not be the value register.
+ if (DestReg == UseMI.getOperand(0).getReg() ||
+ DestReg == UseMI.getOperand(1).getReg() ||
+ DestReg == UseMI.getOperand(5).getReg())
+ return false;
+ if (UseMI.getOperand(6).isReg() &&
+ DestReg == UseMI.getOperand(6).getReg())
+ return false;
+ assert(DestReg == UseMI.getOperand(2).getReg() &&
+ "Expected base address use");
+ // All load/store instructions must use the same offset.
+ int64_t Offset = UseMI.getOperand(3).getImm();
+ if (CommonOffset && Offset != CommonOffset)
+ return false;
+ CommonOffset = Offset;
+ break;
+ }
case RISCV::INLINEASM:
case RISCV::INLINEASM_BR: {
SmallVector<unsigned> InlineAsmMemoryOpIndexes;
@@ -539,6 +567,10 @@ bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi,
for (unsigned I : InlineAsmMemoryOpIndexes) {
MachineOperand &MO = UseMI.getOperand(I + 1);
switch (ImmOp.getType()) {
+ case MachineOperand::MO_ConstantPoolIndex:
+ MO.ChangeToCPI(ImmOp.getIndex(), ImmOp.getOffset(),
+ ImmOp.getTargetFlags());
+ break;
case MachineOperand::MO_GlobalAddress:
MO.ChangeToGA(ImmOp.getGlobal(), ImmOp.getOffset(),
ImmOp.getTargetFlags());
@@ -557,11 +589,74 @@ bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi,
}
}
} else {
+ unsigned ImmIdx;
+ switch (UseMI.getOpcode()) {
+ case RISCV::INLINEASM:
+ case RISCV::INLINEASM_BR:
+ llvm_unreachable("Should have been dealt with before this else");
+ case RISCV::PseudoCCLD:
+ case RISCV::PseudoCCLW:
+ case RISCV::PseudoCCLWU:
+ case RISCV::PseudoCCLH:
+ case RISCV::PseudoCCLHU:
+ case RISCV::PseudoCCLB:
+ case RISCV::PseudoCCLBU:
+ ImmIdx = 3;
+ break;
+ case RISCV::LB:
+ case RISCV::LH:
+ case RISCV::LH_INX:
+ case RISCV::LW:
+ case RISCV::LW_INX:
+ case RISCV::LBU:
+ case RISCV::LHU:
+ case RISCV::LWU:
+ case RISCV::LD:
+ case RISCV::LD_RV32:
+ case RISCV::FLH:
+ case RISCV::FLW:
+ case RISCV::FLD:
+ case RISCV::SB:
+ case RISCV::SH:
+ case RISCV::SH_INX:
+ case RISCV::SW:
+ case RISCV::SW_INX:
+ case RISCV::SD:
+ case RISCV::SD_RV32:
+ case RISCV::FSH:
+ case RISCV::FSW:
+ case RISCV::FSD:
+ ImmIdx = 2;
+ break;
+ default:
+ llvm_unreachable("Unknown Instruction");
+ }
+
+ MachineOperand &MO = UseMI.getOperand(ImmIdx);
if (Hi.getOpcode() == RISCV::QC_E_LI) {
- UseMI.getOperand(2).ChangeToImmediate(0);
+ MO.ChangeToImmediate(0);
} else {
- UseMI.removeOperand(2);
- UseMI.addOperand(ImmOp);
+ switch (ImmOp.getType()) {
+ case MachineOperand::MO_ConstantPoolIndex:
+ MO.ChangeToCPI(ImmOp.getIndex(), ImmOp.getOffset(),
+ ImmOp.getTargetFlags());
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ MO.ChangeToGA(ImmOp.getGlobal(), ImmOp.getOffset(),
+ ImmOp.getTargetFlags());
+ break;
+ case MachineOperand::MO_MCSymbol:
+ MO.ChangeToMCSymbol(ImmOp.getMCSymbol(), ImmOp.getTargetFlags());
+ MO.setOffset(ImmOp.getOffset());
+ break;
+ case MachineOperand::MO_BlockAddress:
+ MO.ChangeToBA(ImmOp.getBlockAddress(), ImmOp.getOffset(),
+ ImmOp.getTargetFlags());
+ break;
+ default:
+ report_fatal_error("unsupported machine operand type");
+ break;
+ }
}
}
}
diff --git a/llvm/test/CodeGen/RISCV/sfb-merge-base-offset.ll b/llvm/test/CodeGen/RISCV/sfb-merge-base-offset.ll
new file mode 100644
index 0000000000000..2cd2d29e5e164
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/sfb-merge-base-offset.ll
@@ -0,0 +1,493 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+short-forward-branch-iload,+short-forward-branch-imm < %s \
+; RUN: | FileCheck -check-prefix=RV32I-WITH-SFB %s
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+xqci,+short-forward-branch-iload,+short-forward-branch-imm < %s \
+; RUN: | FileCheck -check-prefix=XQCI-WITH-SFB %s
+
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs -mattr=+short-forward-branch-iload,+short-forward-branch-imm < %s \
+; RUN: | FileCheck -check-prefix=RV64I-WITH-SFB %s
+
+
+ at ga = global [16 x i8] zeroinitializer, align 8
+
+;; This tests that we can fold offsets into the SFB Load Pseudos.
+
+define i64 @load_cond_ga_dword(i64 %a, i32 %b, i32 %c) {
+; RV32I-WITH-SFB-LABEL: load_cond_ga_dword:
+; RV32I-WITH-SFB: # %bb.0:
+; RV32I-WITH-SFB-NEXT: lui a4, %hi(ga)
+; RV32I-WITH-SFB-NEXT: addi a4, a4, %lo(ga)
+; RV32I-WITH-SFB-NEXT: bne a2, a3, .LBB0_2
+; RV32I-WITH-SFB-NEXT: # %bb.1:
+; RV32I-WITH-SFB-NEXT: lw a0, 8(a4)
+; RV32I-WITH-SFB-NEXT: .LBB0_2:
+; RV32I-WITH-SFB-NEXT: bne a2, a3, .LBB0_4
+; RV32I-WITH-SFB-NEXT: # %bb.3:
+; RV32I-WITH-SFB-NEXT: lw a1, 12(a4)
+; RV32I-WITH-SFB-NEXT: .LBB0_4:
+; RV32I-WITH-SFB-NEXT: ret
+;
+; XQCI-WITH-SFB-LABEL: load_cond_ga_dword:
+; XQCI-WITH-SFB: # %bb.0:
+; XQCI-WITH-SFB-NEXT: qc.e.li a4, ga
+; XQCI-WITH-SFB-NEXT: bne a2, a3, .LBB0_2
+; XQCI-WITH-SFB-NEXT: # %bb.1:
+; XQCI-WITH-SFB-NEXT: lw a0, 8(a4)
+; XQCI-WITH-SFB-NEXT: .LBB0_2:
+; XQCI-WITH-SFB-NEXT: bne a2, a3, .LBB0_4
+; XQCI-WITH-SFB-NEXT: # %bb.3:
+; XQCI-WITH-SFB-NEXT: lw a1, 12(a4)
+; XQCI-WITH-SFB-NEXT: .LBB0_4:
+; XQCI-WITH-SFB-NEXT: ret
+;
+; RV64I-WITH-SFB-LABEL: load_cond_ga_dword:
+; RV64I-WITH-SFB: # %bb.0:
+; RV64I-WITH-SFB-NEXT: sext.w a2, a2
+; RV64I-WITH-SFB-NEXT: sext.w a1, a1
+; RV64I-WITH-SFB-NEXT: lui a3, %hi(ga+8)
+; RV64I-WITH-SFB-NEXT: bne a1, a2, .LBB0_2
+; RV64I-WITH-SFB-NEXT: # %bb.1:
+; RV64I-WITH-SFB-NEXT: ld a0, %lo(ga+8)(a3)
+; RV64I-WITH-SFB-NEXT: .LBB0_2:
+; RV64I-WITH-SFB-NEXT: ret
+ %addr = getelementptr i8, ptr @ga, i32 8
+ %val = load i64, ptr %addr, align 8
+ %cmp = icmp eq i32 %b, %c
+ %cmv = select i1 %cmp, i64 %val, i64 %a
+ ret i64 %cmv
+}
+
+define i64 @load_cond_ga_word_sext(i64 %a, i32 %b, i32 %c) {
+; RV32I-WITH-SFB-LABEL: load_cond_ga_word_sext:
+; RV32I-WITH-SFB: # %bb.0:
+; RV32I-WITH-SFB-NEXT: lui a4, %hi(ga+12)
+; RV32I-WITH-SFB-NEXT: lw a4, %lo(ga+12)(a4)
+; RV32I-WITH-SFB-NEXT: bne a2, a3, .LBB1_2
+; RV32I-WITH-SFB-NEXT: # %bb.1:
+; RV32I-WITH-SFB-NEXT: mv a0, a4
+; RV32I-WITH-SFB-NEXT: .LBB1_2:
+; RV32I-WITH-SFB-NEXT: bne a2, a3, .LBB1_4
+; RV32I-WITH-SFB-NEXT: # %bb.3:
+; RV32I-WITH-SFB-NEXT: srai a1, a4, 31
+; RV32I-WITH-SFB-NEXT: .LBB1_4:
+; RV32I-WITH-SFB-NEXT: ret
+;
+; XQCI-WITH-SFB-LABEL: load_cond_ga_word_sext:
+; XQCI-WITH-SFB: # %bb.0:
+; XQCI-WITH-SFB-NEXT: qc.e.li a4, ga+12
+; XQCI-WITH-SFB-NEXT: lw a4, 0(a4)
+; XQCI-WITH-SFB-NEXT: qc.mveq a0, a2, a3, a4
+; XQCI-WITH-SFB-NEXT: bne a2, a3, .LBB1_2
+; XQCI-WITH-SFB-NEXT: # %bb.1:
+; XQCI-WITH-SFB-NEXT: srai a1, a4, 31
+; XQCI-WITH-SFB-NEXT: .LBB1_2:
+; XQCI-WITH-SFB-NEXT: ret
+;
+; RV64I-WITH-SFB-LABEL: load_cond_ga_word_sext:
+; RV64I-WITH-SFB: # %bb.0:
+; RV64I-WITH-SFB-NEXT: sext.w a2, a2
+; RV64I-WITH-SFB-NEXT: sext.w a1, a1
+; RV64I-WITH-SFB-NEXT: lui a3, %hi(ga+12)
+; RV64I-WITH-SFB-NEXT: bne a1, a2, .LBB1_2
+; RV64I-WITH-SFB-NEXT: # %bb.1:
+; RV64I-WITH-SFB-NEXT: lw a0, %lo(ga+12)(a3)
+; RV64I-WITH-SFB-NEXT: .LBB1_2:
+; RV64I-WITH-SFB-NEXT: ret
+ %addr = getelementptr i8, ptr @ga, i32 12
+ %val = load i32, ptr %addr, align 4
+ %ext = sext i32 %val to i64
+ %cmp = icmp eq i32 %b, %c
+ %cmv = select i1 %cmp, i64 %ext, i64 %a
+ ret i64 %cmv
+}
+
+define i64 @load_cond_ga_word_zext(i64 %a, i32 %b, i32 %c) {
+; RV32I-WITH-SFB-LABEL: load_cond_ga_word_zext:
+; RV32I-WITH-SFB: # %bb.0:
+; RV32I-WITH-SFB-NEXT: lui a4, %hi(ga+12)
+; RV32I-WITH-SFB-NEXT: bne a2, a3, .LBB2_2
+; RV32I-WITH-SFB-NEXT: # %bb.1:
+; RV32I-WITH-SFB-NEXT: lw a0, %lo(ga+12)(a4)
+; RV32I-WITH-SFB-NEXT: .LBB2_2:
+; RV32I-WITH-SFB-NEXT: bne a2, a3, .LBB2_4
+; RV32I-WITH-SFB-NEXT: # %bb.3:
+; RV32I-WITH-SFB-NEXT: li a1, 0
+; RV32I-WITH-SFB-NEXT: .LBB2_4:
+; RV32I-WITH-SFB-NEXT: ret
+;
+; XQCI-WITH-SFB-LABEL: load_cond_ga_word_zext:
+; XQCI-WITH-SFB: # %bb.0:
+; XQCI-WITH-SFB-NEXT: qc.e.li a4, ga+12
+; XQCI-WITH-SFB-NEXT: bne a2, a3, .LBB2_2
+; XQCI-WITH-SFB-NEXT: # %bb.1:
+; XQCI-WITH-SFB-NEXT: lw a0, 0(a4)
+; XQCI-WITH-SFB-NEXT: .LBB2_2:
+; XQCI-WITH-SFB-NEXT: bne a2, a3, .LBB2_4
+; XQCI-WITH-SFB-NEXT: # %bb.3:
+; XQCI-WITH-SFB-NEXT: li a1, 0
+; XQCI-WITH-SFB-NEXT: .LBB2_4:
+; XQCI-WITH-SFB-NEXT: ret
+;
+; RV64I-WITH-SFB-LABEL: load_cond_ga_word_zext:
+; RV64I-WITH-SFB: # %bb.0:
+; RV64I-WITH-SFB-NEXT: sext.w a2, a2
+; RV64I-WITH-SFB-NEXT: sext.w a1, a1
+; RV64I-WITH-SFB-NEXT: lui a3, %hi(ga+12)
+; RV64I-WITH-SFB-NEXT: bne a1, a2, .LBB2_2
+; RV64I-WITH-SFB-NEXT: # %bb.1:
+; RV64I-WITH-SFB-NEXT: lwu a0, %lo(ga+12)(a3)
+; RV64I-WITH-SFB-NEXT: .LBB2_2:
+; RV64I-WITH-SFB-NEXT: ret
+ %addr = getelementptr i8, ptr @ga, i32 12
+ %val = load i32, ptr %addr, align 4
+ %ext = zext i32 %val to i64
+ %cmp = icmp eq i32 %b, %c
+ %cmv = select i1 %cmp, i64 %ext, i64 %a
+ ret i64 %cmv
+}
+
+define i32 @load_cond_ga_word(i32 %a, i32 %b, i32 %c) {
+; RV32I-WITH-SFB-LABEL: load_cond_ga_word:
+; RV32I-WITH-SFB: # %bb.0:
+; RV32I-WITH-SFB-NEXT: lui a3, %hi(ga)
+; RV32I-WITH-SFB-NEXT: bne a1, a2, .LBB3_2
+; RV32I-WITH-SFB-NEXT: # %bb.1:
+; RV32I-WITH-SFB-NEXT: lw a0, %lo(ga+4)(a3)
+; RV32I-WITH-SFB-NEXT: .LBB3_2:
+; RV32I-WITH-SFB-NEXT: ret
+;
+; XQCI-WITH-SFB-LABEL: load_cond_ga_word:
+; XQCI-WITH-SFB: # %bb.0:
+; XQCI-WITH-SFB-NEXT: qc.e.li a3, ga+4
+; XQCI-WITH-SFB-NEXT: bne a1, a2, .LBB3_2
+; XQCI-WITH-SFB-NEXT: # %bb.1:
+; XQCI-WITH-SFB-NEXT: lw a0, 0(a3)
+; XQCI-WITH-SFB-NEXT: .LBB3_2:
+; XQCI-WITH-SFB-NEXT: ret
+;
+; RV64I-WITH-SFB-LABEL: load_cond_ga_word:
+; RV64I-WITH-SFB: # %bb.0:
+; RV64I-WITH-SFB-NEXT: sext.w a2, a2
+; RV64I-WITH-SFB-NEXT: sext.w a1, a1
+; RV64I-WITH-SFB-NEXT: lui a3, %hi(ga)
+; RV64I-WITH-SFB-NEXT: bne a1, a2, .LBB3_2
+; RV64I-WITH-SFB-NEXT: # %bb.1:
+; RV64I-WITH-SFB-NEXT: lw a0, %lo(ga+4)(a3)
+; RV64I-WITH-SFB-NEXT: .LBB3_2:
+; RV64I-WITH-SFB-NEXT: ret
+ %addr = getelementptr i8, ptr @ga, i32 4
+ %val = load i32, ptr %addr, align 4
+ %cmp = icmp eq i32 %b, %c
+ %cmv = select i1 %cmp, i32 %val, i32 %a
+ ret i32 %cmv
+}
+
+define i32 @load_cond_ga_half_sext(i32 %a, i32 %b, i32 %c) {
+; RV32I-WITH-SFB-LABEL: load_cond_ga_half_sext:
+; RV32I-WITH-SFB: # %bb.0:
+; RV32I-WITH-SFB-NEXT: lui a3, %hi(ga)
+; RV32I-WITH-SFB-NEXT: bne a1, a2, .LBB4_2
+; RV32I-WITH-SFB-NEXT: # %bb.1:
+; RV32I-WITH-SFB-NEXT: lh a0, %lo(ga+6)(a3)
+; RV32I-WITH-SFB-NEXT: .LBB4_2:
+; RV32I-WITH-SFB-NEXT: ret
+;
+; XQCI-WITH-SFB-LABEL: load_cond_ga_half_sext:
+; XQCI-WITH-SFB: # %bb.0:
+; XQCI-WITH-SFB-NEXT: qc.e.li a3, ga+6
+; XQCI-WITH-SFB-NEXT: bne a1, a2, .LBB4_2
+; XQCI-WITH-SFB-NEXT: # %bb.1:
+; XQCI-WITH-SFB-NEXT: lh a0, 0(a3)
+; XQCI-WITH-SFB-NEXT: .LBB4_2:
+; XQCI-WITH-SFB-NEXT: ret
+;
+; RV64I-WITH-SFB-LABEL: load_cond_ga_half_sext:
+; RV64I-WITH-SFB: # %bb.0:
+; RV64I-WITH-SFB-NEXT: sext.w a2, a2
+; RV64I-WITH-SFB-NEXT: sext.w a1, a1
+; RV64I-WITH-SFB-NEXT: lui a3, %hi(ga)
+; RV64I-WITH-SFB-NEXT: bne a1, a2, .LBB4_2
+; RV64I-WITH-SFB-NEXT: # %bb.1:
+; RV64I-WITH-SFB-NEXT: lh a0, %lo(ga+6)(a3)
+; RV64I-WITH-SFB-NEXT: .LBB4_2:
+; RV64I-WITH-SFB-NEXT: ret
+ %addr = getelementptr i8, ptr @ga, i32 6
+ %val = load i16, ptr %addr, align 2
+ %ext = sext i16 %val to i32
+ %cmp = icmp eq i32 %b, %c
+ %cmv = select i1 %cmp, i32 %ext, i32 %a
+ ret i32 %cmv
+}
+
+define i32 @load_cond_ga_half_zext(i32 %a, i32 %b, i32 %c) {
+; RV32I-WITH-SFB-LABEL: load_cond_ga_half_zext:
+; RV32I-WITH-SFB: # %bb.0:
+; RV32I-WITH-SFB-NEXT: lui a3, %hi(ga)
+; RV32I-WITH-SFB-NEXT: bne a1, a2, .LBB5_2
+; RV32I-WITH-SFB-NEXT: # %bb.1:
+; RV32I-WITH-SFB-NEXT: lhu a0, %lo(ga+6)(a3)
+; RV32I-WITH-SFB-NEXT: .LBB5_2:
+; RV32I-WITH-SFB-NEXT: ret
+;
+; XQCI-WITH-SFB-LABEL: load_cond_ga_half_zext:
+; XQCI-WITH-SFB: # %bb.0:
+; XQCI-WITH-SFB-NEXT: qc.e.li a3, ga+6
+; XQCI-WITH-SFB-NEXT: bne a1, a2, .LBB5_2
+; XQCI-WITH-SFB-NEXT: # %bb.1:
+; XQCI-WITH-SFB-NEXT: lhu a0, 0(a3)
+; XQCI-WITH-SFB-NEXT: .LBB5_2:
+; XQCI-WITH-SFB-NEXT: ret
+;
+; RV64I-WITH-SFB-LABEL: load_cond_ga_half_zext:
+; RV64I-WITH-SFB: # %bb.0:
+; RV64I-WITH-SFB-NEXT: sext.w a2, a2
+; RV64I-WITH-SFB-NEXT: sext.w a1, a1
+; RV64I-WITH-SFB-NEXT: lui a3, %hi(ga)
+; RV64I-WITH-SFB-NEXT: bne a1, a2, .LBB5_2
+; RV64I-WITH-SFB-NEXT: # %bb.1:
+; RV64I-WITH-SFB-NEXT: lhu a0, %lo(ga+6)(a3)
+; RV64I-WITH-SFB-NEXT: .LBB5_2:
+; RV64I-WITH-SFB-NEXT: ret
+ %addr = getelementptr i8, ptr @ga, i32 6
+ %val = load i16, ptr %addr, align 2
+ %ext = zext i16 %val to i32
+ %cmp = icmp eq i32 %b, %c
+ %cmv = select i1 %cmp, i32 %ext, i32 %a
+ ret i32 %cmv
+}
+
+define i32 @load_cond_ga_byte_sext(i32 %a, i32 %b, i32 %c) {
+; RV32I-WITH-SFB-LABEL: load_cond_ga_byte_sext:
+; RV32I-WITH-SFB: # %bb.0:
+; RV32I-WITH-SFB-NEXT: lui a3, %hi(ga)
+; RV32I-WITH-SFB-NEXT: bne a1, a2, .LBB6_2
+; RV32I-WITH-SFB-NEXT: # %bb.1:
+; RV32I-WITH-SFB-NEXT: lb a0, %lo(ga+7)(a3)
+; RV32I-WITH-SFB-NEXT: .LBB6_2:
+; RV32I-WITH-SFB-NEXT: ret
+;
+; XQCI-WITH-SFB-LABEL: load_cond_ga_byte_sext:
+; XQCI-WITH-SFB: # %bb.0:
+; XQCI-WITH-SFB-NEXT: qc.e.li a3, ga+7
+; XQCI-WITH-SFB-NEXT: bne a1, a2, .LBB6_2
+; XQCI-WITH-SFB-NEXT: # %bb.1:
+; XQCI-WITH-SFB-NEXT: lb a0, 0(a3)
+; XQCI-WITH-SFB-NEXT: .LBB6_2:
+; XQCI-WITH-SFB-NEXT: ret
+;
+; RV64I-WITH-SFB-LABEL: load_cond_ga_byte_sext:
+; RV64I-WITH-SFB: # %bb.0:
+; RV64I-WITH-SFB-NEXT: sext.w a2, a2
+; RV64I-WITH-SFB-NEXT: sext.w a1, a1
+; RV64I-WITH-SFB-NEXT: lui a3, %hi(ga)
+; RV64I-WITH-SFB-NEXT: bne a1, a2, .LBB6_2
+; RV64I-WITH-SFB-NEXT: # %bb.1:
+; RV64I-WITH-SFB-NEXT: lb a0, %lo(ga+7)(a3)
+; RV64I-WITH-SFB-NEXT: .LBB6_2:
+; RV64I-WITH-SFB-NEXT: ret
+ %addr = getelementptr i8, ptr @ga, i32 7
+ %val = load i8, ptr %addr, align 1
+ %ext = sext i8 %val to i32
+ %cmp = icmp eq i32 %b, %c
+ %cmv = select i1 %cmp, i32 %ext, i32 %a
+ ret i32 %cmv
+}
+
+define i32 @load_cond_ga_byte_zext(i32 %a, i32 %b, i32 %c) {
+; RV32I-WITH-SFB-LABEL: load_cond_ga_byte_zext:
+; RV32I-WITH-SFB: # %bb.0:
+; RV32I-WITH-SFB-NEXT: lui a3, %hi(ga)
+; RV32I-WITH-SFB-NEXT: bne a1, a2, .LBB7_2
+; RV32I-WITH-SFB-NEXT: # %bb.1:
+; RV32I-WITH-SFB-NEXT: lbu a0, %lo(ga+7)(a3)
+; RV32I-WITH-SFB-NEXT: .LBB7_2:
+; RV32I-WITH-SFB-NEXT: ret
+;
+; XQCI-WITH-SFB-LABEL: load_cond_ga_byte_zext:
+; XQCI-WITH-SFB: # %bb.0:
+; XQCI-WITH-SFB-NEXT: qc.e.li a3, ga+7
+; XQCI-WITH-SFB-NEXT: bne a1, a2, .LBB7_2
+; XQCI-WITH-SFB-NEXT: # %bb.1:
+; XQCI-WITH-SFB-NEXT: lbu a0, 0(a3)
+; XQCI-WITH-SFB-NEXT: .LBB7_2:
+; XQCI-WITH-SFB-NEXT: ret
+;
+; RV64I-WITH-SFB-LABEL: load_cond_ga_byte_zext:
+; RV64I-WITH-SFB: # %bb.0:
+; RV64I-WITH-SFB-NEXT: sext.w a2, a2
+; RV64I-WITH-SFB-NEXT: sext.w a1, a1
+; RV64I-WITH-SFB-NEXT: lui a3, %hi(ga)
+; RV64I-WITH-SFB-NEXT: bne a1, a2, .LBB7_2
+; RV64I-WITH-SFB-NEXT: # %bb.1:
+; RV64I-WITH-SFB-NEXT: lbu a0, %lo(ga+7)(a3)
+; RV64I-WITH-SFB-NEXT: .LBB7_2:
+; RV64I-WITH-SFB-NEXT: ret
+ %addr = getelementptr i8, ptr @ga, i32 7
+ %val = load i8, ptr %addr, align 1
+ %ext = zext i8 %val to i32
+ %cmp = icmp eq i32 %b, %c
+ %cmv = select i1 %cmp, i32 %ext, i32 %a
+ ret i32 %cmv
+}
+
+;; Check we handle cases where the branch is with an immediate
+
+define i32 @load_cond_ga_word_imm(i32 %a, i32 %b) {
+; RV32I-WITH-SFB-LABEL: load_cond_ga_word_imm:
+; RV32I-WITH-SFB: # %bb.0:
+; RV32I-WITH-SFB-NEXT: lui a2, %hi(ga)
+; RV32I-WITH-SFB-NEXT: li a3, 14
+; RV32I-WITH-SFB-NEXT: bne a1, a3, .LBB8_2
+; RV32I-WITH-SFB-NEXT: # %bb.1:
+; RV32I-WITH-SFB-NEXT: lw a0, %lo(ga+4)(a2)
+; RV32I-WITH-SFB-NEXT: .LBB8_2:
+; RV32I-WITH-SFB-NEXT: ret
+;
+; XQCI-WITH-SFB-LABEL: load_cond_ga_word_imm:
+; XQCI-WITH-SFB: # %bb.0:
+; XQCI-WITH-SFB-NEXT: qc.e.li a2, ga+4
+; XQCI-WITH-SFB-NEXT: qc.bnei a1, 14, .LBB8_2
+; XQCI-WITH-SFB-NEXT: # %bb.1:
+; XQCI-WITH-SFB-NEXT: lw a0, 0(a2)
+; XQCI-WITH-SFB-NEXT: .LBB8_2:
+; XQCI-WITH-SFB-NEXT: ret
+;
+; RV64I-WITH-SFB-LABEL: load_cond_ga_word_imm:
+; RV64I-WITH-SFB: # %bb.0:
+; RV64I-WITH-SFB-NEXT: sext.w a1, a1
+; RV64I-WITH-SFB-NEXT: lui a2, %hi(ga)
+; RV64I-WITH-SFB-NEXT: li a3, 14
+; RV64I-WITH-SFB-NEXT: bne a1, a3, .LBB8_2
+; RV64I-WITH-SFB-NEXT: # %bb.1:
+; RV64I-WITH-SFB-NEXT: lw a0, %lo(ga+4)(a2)
+; RV64I-WITH-SFB-NEXT: .LBB8_2:
+; RV64I-WITH-SFB-NEXT: ret
+ %addr = getelementptr i8, ptr @ga, i32 4
+ %val = load i32, ptr %addr, align 4
+ %cmp = icmp eq i32 %b, 14
+ %cmv = select i1 %cmp, i32 %val, i32 %a
+ ret i32 %cmv
+}
+
+;; Negative Tests
+
+
+define i32 @load_cond_ga_word_addr_cond(i32 %a, ptr %b) {
+; RV32I-WITH-SFB-LABEL: load_cond_ga_word_addr_cond:
+; RV32I-WITH-SFB: # %bb.0:
+; RV32I-WITH-SFB-NEXT: lui a2, %hi(ga)
+; RV32I-WITH-SFB-NEXT: addi a2, a2, %lo(ga)
+; RV32I-WITH-SFB-NEXT: addi a3, a2, 4
+; RV32I-WITH-SFB-NEXT: beq a3, a1, .LBB9_2
+; RV32I-WITH-SFB-NEXT: # %bb.1:
+; RV32I-WITH-SFB-NEXT: lw a0, 4(a2)
+; RV32I-WITH-SFB-NEXT: .LBB9_2:
+; RV32I-WITH-SFB-NEXT: ret
+;
+; XQCI-WITH-SFB-LABEL: load_cond_ga_word_addr_cond:
+; XQCI-WITH-SFB: # %bb.0:
+; XQCI-WITH-SFB-NEXT: qc.e.li a2, ga
+; XQCI-WITH-SFB-NEXT: addi a3, a2, 4
+; XQCI-WITH-SFB-NEXT: beq a3, a1, .LBB9_2
+; XQCI-WITH-SFB-NEXT: # %bb.1:
+; XQCI-WITH-SFB-NEXT: lw a0, 4(a2)
+; XQCI-WITH-SFB-NEXT: .LBB9_2:
+; XQCI-WITH-SFB-NEXT: ret
+;
+; RV64I-WITH-SFB-LABEL: load_cond_ga_word_addr_cond:
+; RV64I-WITH-SFB: # %bb.0:
+; RV64I-WITH-SFB-NEXT: lui a2, %hi(ga)
+; RV64I-WITH-SFB-NEXT: addi a2, a2, %lo(ga)
+; RV64I-WITH-SFB-NEXT: addi a3, a2, 4
+; RV64I-WITH-SFB-NEXT: beq a3, a1, .LBB9_2
+; RV64I-WITH-SFB-NEXT: # %bb.1:
+; RV64I-WITH-SFB-NEXT: lw a0, 4(a2)
+; RV64I-WITH-SFB-NEXT: .LBB9_2:
+; RV64I-WITH-SFB-NEXT: ret
+ %addr = getelementptr i8, ptr @ga, i32 4
+ %val = load i32, ptr %addr, align 4
+ %cmp = icmp ne ptr %addr, %b
+ %cmv = select i1 %cmp, i32 %val, i32 %a
+ ret i32 %cmv
+}
+
+define i32 @load_cond_ga_word_addr_cond_imm(i32 %a) {
+; RV32I-WITH-SFB-LABEL: load_cond_ga_word_addr_cond_imm:
+; RV32I-WITH-SFB: # %bb.0:
+; RV32I-WITH-SFB-NEXT: lui a1, %hi(ga)
+; RV32I-WITH-SFB-NEXT: addi a1, a1, %lo(ga)
+; RV32I-WITH-SFB-NEXT: addi a2, a1, 4
+; RV32I-WITH-SFB-NEXT: beqz a2, .LBB10_2
+; RV32I-WITH-SFB-NEXT: # %bb.1:
+; RV32I-WITH-SFB-NEXT: lw a0, 4(a1)
+; RV32I-WITH-SFB...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/187620
More information about the llvm-commits
mailing list