[llvm] [RISCV] Add short forward branch support for `lb`, `lbu`, `lh`, `lhu`, `lw`, `lwu` and `ld` (PR #170829)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 5 02:15:53 PST 2025
- Previous message: [llvm] [RISCV] Add short forward branch support for `lb`, `lbu`, `lh`, `lhu`, `lw`, `lwu` and `ld` (PR #170829)
- Next message: [llvm] [RISCV] Add short forward branch support for `lb`, `lbu`, `lh`, `lhu`, `lw`, `lwu` and `ld` (PR #170829)
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: quic_hchandel (hchandel)
<details>
<summary>Changes</summary>
---
Patch is 341.30 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/170829.diff
11 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp (+14)
- (modified) llvm/lib/Target/RISCV/RISCVFeatures.td (+8)
- (modified) llvm/lib/Target/RISCV/RISCVInstrInfo.cpp (+74)
- (modified) llvm/lib/Target/RISCV/RISCVInstrInfo.h (+5)
- (modified) llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td (+21)
- (modified) llvm/test/CodeGen/RISCV/features-info.ll (+1)
- (added) llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-acquire.ll (+2205)
- (added) llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-monotonic.ll (+2091)
- (added) llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-seq_cst.ll (+2319)
- (added) llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-volatile.ll (+1022)
- (added) llvm/test/CodeGen/RISCV/short-forward-branch-opt-load.ll (+1761)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index 60e0afdd99912..55efead1ad887 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -133,6 +133,13 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
case RISCV::PseudoCCMINU:
case RISCV::PseudoCCMUL:
case RISCV::PseudoCCLUI:
+ case RISCV::PseudoCCLB:
+ case RISCV::PseudoCCLH:
+ case RISCV::PseudoCCLW:
+ case RISCV::PseudoCCLHU:
+ case RISCV::PseudoCCLBU:
+ case RISCV::PseudoCCLWU:
+ case RISCV::PseudoCCLD:
case RISCV::PseudoCCQC_LI:
case RISCV::PseudoCCQC_E_LI:
case RISCV::PseudoCCADDW:
@@ -243,6 +250,13 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB,
case RISCV::PseudoCCMINU: NewOpc = RISCV::MINU; break;
case RISCV::PseudoCCMUL: NewOpc = RISCV::MUL; break;
case RISCV::PseudoCCLUI: NewOpc = RISCV::LUI; break;
+ case RISCV::PseudoCCLB: NewOpc = RISCV::LB; break;
+ case RISCV::PseudoCCLH: NewOpc = RISCV::LH; break;
+ case RISCV::PseudoCCLW: NewOpc = RISCV::LW; break;
+ case RISCV::PseudoCCLHU: NewOpc = RISCV::LHU; break;
+ case RISCV::PseudoCCLBU: NewOpc = RISCV::LBU; break;
+ case RISCV::PseudoCCLWU: NewOpc = RISCV::LWU; break;
+ case RISCV::PseudoCCLD: NewOpc = RISCV::LD; break;
case RISCV::PseudoCCQC_LI: NewOpc = RISCV::QC_LI; break;
case RISCV::PseudoCCQC_E_LI: NewOpc = RISCV::QC_E_LI; break;
case RISCV::PseudoCCADDI: NewOpc = RISCV::ADDI; break;
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 0c75312847c87..dd7f36136ea4e 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -1898,6 +1898,7 @@ def TuneNoDefaultUnroll
// - IALU: RVI Integer instructions, plus ANDN/ORN/XNOR (Zbb/Zbkb)
// - IMinMax: Zbb MIN(U)/MAX(U)
// - IMul: MUL
+// - ILoad: LB(U)/LH(U)/LW(U)/LD
//
// We make the simplifying assumption that any microarches that implement
// any "short forward branches" can do the IALU fusions, and can opt into
@@ -1928,6 +1929,13 @@ def TuneShortForwardBranchIMul
def HasShortForwardBranchIMul : Predicate<"Subtarget->hasShortForwardBranchIMul()">;
+
+def TuneShortForwardBranchILoad
+ : SubtargetFeature<"short-forward-branch-iload", "HasShortForwardBranchILoad",
+ "true", "Enable short forward branch optimization for load instructions",
+ [TuneShortForwardBranchIALU]>;
+def HasShortForwardBranchILoad : Predicate<"Subtarget->hasShortForwardBranchILoad()">;
+
// Some subtargets require a S2V transfer buffer to move scalars into vectors.
// FIXME: Forming .vx/.vf/.wx/.wf can reduce register pressure.
def TuneNoSinkSplatOperands
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 2bd63e75d060b..1940d36af4dcd 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -897,6 +897,80 @@ MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl(
.addImm(0);
}
+unsigned getLoadPredicatedOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ case RISCV::LB:
+ return RISCV::PseudoCCLB;
+ case RISCV::LBU:
+ return RISCV::PseudoCCLBU;
+ case RISCV::LH:
+ return RISCV::PseudoCCLH;
+ case RISCV::LHU:
+ return RISCV::PseudoCCLHU;
+ case RISCV::LW:
+ return RISCV::PseudoCCLW;
+ case RISCV::LWU:
+ return RISCV::PseudoCCLWU;
+ case RISCV::LD:
+ return RISCV::PseudoCCLD;
+ default:
+ return 0;
+ }
+}
+
+MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl(
+ MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI,
+ LiveIntervals *LIS) const {
+ // For now, only handle RISCV::PseudoCCMOVGPR.
+ if (MI.getOpcode() != RISCV::PseudoCCMOVGPR)
+ return nullptr;
+
+ if (!STI.hasShortForwardBranchILoad() ||
+ (LoadMI.getOpcode() != RISCV::LB && LoadMI.getOpcode() != RISCV::LBU &&
+ LoadMI.getOpcode() != RISCV::LH && LoadMI.getOpcode() != RISCV::LHU &&
+ LoadMI.getOpcode() != RISCV::LW && LoadMI.getOpcode() != RISCV::LWU &&
+ LoadMI.getOpcode() != RISCV::LD))
+ return nullptr;
+
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ bool Invert =
+ (MRI.getVRegDef(MI.getOperand(4).getReg()) == &LoadMI) ? true : false;
+ MachineOperand FalseReg = MI.getOperand(Invert ? 5 : 4);
+ Register DestReg = MI.getOperand(0).getReg();
+ const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());
+ if (!MRI.constrainRegClass(DestReg, PreviousClass))
+ return nullptr;
+
+ unsigned PredOpc = getLoadPredicatedOpcode(LoadMI.getOpcode());
+ assert(PredOpc != 0 && "Unexpected opcode!");
+
+ // Create a new predicated version of DefMI.
+ MachineInstrBuilder NewMI = BuildMI(*MI.getParent(), InsertPt,
+ MI.getDebugLoc(), get(PredOpc), DestReg);
+
+ // Copy the condition portion.
+ NewMI.add(MI.getOperand(1));
+ NewMI.add(MI.getOperand(2));
+
+ // Add condition code, inverting if necessary.
+ auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
+ if (Invert)
+ CC = RISCVCC::getInverseBranchCondition(CC);
+ NewMI.addImm(CC);
+
+ // Copy the false register.
+ NewMI.add(FalseReg);
+
+ // Copy all the DefMI operands.
+ const MCInstrDesc &DefDesc = LoadMI.getDesc();
+ for (unsigned i = 1, e = DefDesc.getNumOperands(); i != e; ++i)
+ NewMI.add(LoadMI.getOperand(i));
+
+ NewMI.cloneMemRefs(LoadMI);
+ return NewMI;
+}
+
void RISCVInstrInfo::movImm(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, Register DstReg, uint64_t Val,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index 0ffe015b9fac8..908da393535fe 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -133,6 +133,11 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
LiveIntervals *LIS = nullptr,
VirtRegMap *VRM = nullptr) const override;
+ MachineInstr *foldMemoryOperandImpl(
+ MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI,
+ LiveIntervals *LIS = nullptr) const override;
+
// Materializes the given integer Val into DstReg.
void movImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, Register DstReg, uint64_t Val,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td
index 6563cc27ecb76..e83246a82b28e 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td
@@ -80,6 +80,17 @@ class SFBLUI
let Constraints = "$dst = $falsev";
}
+class SFBLoad
+ : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1,
+ simm12_lo:$imm), []> {
+ let hasSideEffects = 0;
+ let mayLoad = 1;
+ let mayStore = 0;
+ let Size = 8;
+ let Constraints = "$dst = $falsev";
+}
+
class SFBShift_ri
: Pseudo<(outs GPR:$dst),
(ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1,
@@ -166,3 +177,13 @@ def PseudoCCMINU : SFBALU_rr;
let Predicates = [HasShortForwardBranchIMul] in
def PseudoCCMUL : SFBALU_rr;
+
+let Predicates = [HasShortForwardBranchILoad] in {
+def PseudoCCLB : SFBLoad;
+def PseudoCCLH : SFBLoad;
+def PseudoCCLW : SFBLoad;
+def PseudoCCLHU : SFBLoad;
+def PseudoCCLBU : SFBLoad;
+def PseudoCCLWU : SFBLoad;
+def PseudoCCLD : SFBLoad;
+}
diff --git a/llvm/test/CodeGen/RISCV/features-info.ll b/llvm/test/CodeGen/RISCV/features-info.ll
index b3fa871c859a0..9b3a1d4b0f337 100644
--- a/llvm/test/CodeGen/RISCV/features-info.ll
+++ b/llvm/test/CodeGen/RISCV/features-info.ll
@@ -138,6 +138,7 @@
; CHECK-NEXT: shifted-zextw-fusion - Enable SLLI+SRLI to be fused when computing (shifted) word zero extension.
; CHECK-NEXT: shlcofideleg - 'Shlcofideleg' (Delegating LCOFI Interrupts to VS-mode).
; CHECK-NEXT: short-forward-branch-ialu - Enable short forward branch optimization for RVI base instructions.
+; CHECK-NEXT: short-forward-branch-iload - Enable short forward branch optimization for load instructions.
; CHECK-NEXT: short-forward-branch-iminmax - Enable short forward branch optimization for MIN,MAX instructions in Zbb.
; CHECK-NEXT: short-forward-branch-imul - Enable short forward branch optimization for MUL instruction.
; CHECK-NEXT: shtvala - 'Shtvala' (htval provides all needed values).
diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-acquire.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-acquire.ll
new file mode 100644
index 0000000000000..1ba01ac5225d3
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/short-forward-branch-opt-load-atomic-acquire.ll
@@ -0,0 +1,2205 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+a | FileCheck %s --check-prefixes=RV32I
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+a | FileCheck %s --check-prefixes=RV64I
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+a,+short-forward-branch-ialu | \
+; RUN: FileCheck %s --check-prefixes=RV32I-SFB
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+a,+short-forward-branch-ialu | \
+; RUN: FileCheck %s --check-prefixes=RV64I-SFB
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+a,+short-forward-branch-iload | \
+; RUN: FileCheck %s --check-prefixes=RV32I-SFBILOAD
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+a,+short-forward-branch-iload | \
+; RUN: FileCheck %s --check-prefixes=RV64I-SFBILOAD
+
+define i32 @test_i8_s_3(ptr %base, i1 %x, i32 %b) nounwind {
+; RV32I-LABEL: test_i8_s_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lb a0, 4(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB0_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB0_2: # %entry
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_s_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lb a0, 4(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB0_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB0_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_s_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lb a0, 4(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB0_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB0_2: # %entry
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_s_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lb a0, 4(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB0_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB0_2: # %entry
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_s_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB0_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB0_2: # %entry
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_s_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB0_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB0_2: # %entry
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr acquire, align 1 ; load 8-bit value
+ %ext = sext i8 %val to i32 ; sign-extend to 32 bits
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i8_z_3(ptr %base, i1 %x, i32 %b) nounwind {
+; RV32I-LABEL: test_i8_z_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lbu a0, 4(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB1_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB1_2: # %entry
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_z_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lbu a0, 4(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB1_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB1_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_z_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lbu a0, 4(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB1_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB1_2: # %entry
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_z_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lbu a0, 4(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB1_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB1_2: # %entry
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_z_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB1_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB1_2: # %entry
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_z_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB1_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB1_2: # %entry
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr acquire, align 1 ; load 8-bit value
+ %ext = zext i8 %val to i32 ; zero-extend to 32 bits
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i16_s_3(ptr %base, i1 %x, i32 %b) nounwind {
+; RV32I-LABEL: test_i16_s_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lh a0, 8(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB2_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB2_2: # %entry
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_s_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lh a0, 8(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB2_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB2_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_s_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lh a0, 8(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB2_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB2_2: # %entry
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_s_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lh a0, 8(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB2_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB2_2: # %entry
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_s_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB2_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB2_2: # %entry
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_s_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB2_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB2_2: # %entry
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr acquire, align 2 ; load 16-bit value
+ %ext = sext i16 %val to i32 ; sign-extend to 32 bits
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i16_z_3(ptr %base, i1 %x, i32 %b) nounwind {
+; RV32I-LABEL: test_i16_z_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lhu a0, 8(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB3_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB3_2: # %entry
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_z_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lhu a0, 8(a0)
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB3_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB3_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_z_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lhu a0, 8(a0)
+; RV32I-SFB-NEXT: andi a1, a1, 1
+; RV32I-SFB-NEXT: bnez a1, .LBB3_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB3_2: # %entry
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_z_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lhu a0, 8(a0)
+; RV64I-SFB-NEXT: andi a1, a1, 1
+; RV64I-SFB-NEXT: bnez a1, .LBB3_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB3_2: # %entry
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_z_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB3_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB3_2: # %entry
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_z_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: andi a1, a1, 1
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB3_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB3_2: # %entry
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr acquire, align 2 ; load 16-bit value
+ %ext = zext i16 %val to i32 ; zero-extend to 32 bits
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i32_3(ptr %base, i1 %x, i32 %b) nounwind {
+; RV32I-LABEL: test_i32_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lw a0, 16(a0)
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB4_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB4_2: # %entry
+; RV32I-NE...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/170829
- Previous message: [llvm] [RISCV] Add short forward branch support for `lb`, `lbu`, `lh`, `lhu`, `lw`, `lwu` and `ld` (PR #170829)
- Next message: [llvm] [RISCV] Add short forward branch support for `lb`, `lbu`, `lh`, `lhu`, `lw`, `lwu` and `ld` (PR #170829)
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
More information about the llvm-commits
mailing list